diff options
author | Joerg Roedel <joerg.roedel@amd.com> | 2010-06-01 03:57:49 -0400 |
---|---|---|
committer | Joerg Roedel <joerg.roedel@amd.com> | 2010-06-01 03:57:49 -0400 |
commit | 1d61e73ab4c7470833241af888939a7aab2b0354 (patch) | |
tree | dd714c2428070a7ea2bf807c2821ac75ff13ec55 /arch/x86 | |
parent | 84fe6c19e4a598e8071e3bd1b2c923454eae1268 (diff) | |
parent | 67a3e12b05e055c0415c556a315a3d3eb637e29e (diff) |
Merge commit 'v2.6.35-rc1' into amd-iommu/2.6.35
Diffstat (limited to 'arch/x86')
186 files changed, 11259 insertions, 8175 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9458685902bd..dcb0593b4a66 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -53,11 +53,15 @@ config X86 | |||
53 | select HAVE_KERNEL_LZMA | 53 | select HAVE_KERNEL_LZMA |
54 | select HAVE_KERNEL_LZO | 54 | select HAVE_KERNEL_LZO |
55 | select HAVE_HW_BREAKPOINT | 55 | select HAVE_HW_BREAKPOINT |
56 | select HAVE_MIXED_BREAKPOINTS_REGS | ||
56 | select PERF_EVENTS | 57 | select PERF_EVENTS |
57 | select ANON_INODES | 58 | select ANON_INODES |
58 | select HAVE_ARCH_KMEMCHECK | 59 | select HAVE_ARCH_KMEMCHECK |
59 | select HAVE_USER_RETURN_NOTIFIER | 60 | select HAVE_USER_RETURN_NOTIFIER |
60 | 61 | ||
62 | config INSTRUCTION_DECODER | ||
63 | def_bool (KPROBES || PERF_EVENTS) | ||
64 | |||
61 | config OUTPUT_FORMAT | 65 | config OUTPUT_FORMAT |
62 | string | 66 | string |
63 | default "elf32-i386" if X86_32 | 67 | default "elf32-i386" if X86_32 |
@@ -105,6 +109,9 @@ config SBUS | |||
105 | config NEED_DMA_MAP_STATE | 109 | config NEED_DMA_MAP_STATE |
106 | def_bool (X86_64 || DMAR || DMA_API_DEBUG) | 110 | def_bool (X86_64 || DMAR || DMA_API_DEBUG) |
107 | 111 | ||
112 | config NEED_SG_DMA_LENGTH | ||
113 | def_bool y | ||
114 | |||
108 | config GENERIC_ISA_DMA | 115 | config GENERIC_ISA_DMA |
109 | def_bool y | 116 | def_bool y |
110 | 117 | ||
@@ -197,20 +204,17 @@ config HAVE_INTEL_TXT | |||
197 | 204 | ||
198 | # Use the generic interrupt handling code in kernel/irq/: | 205 | # Use the generic interrupt handling code in kernel/irq/: |
199 | config GENERIC_HARDIRQS | 206 | config GENERIC_HARDIRQS |
200 | bool | 207 | def_bool y |
201 | default y | ||
202 | 208 | ||
203 | config GENERIC_HARDIRQS_NO__DO_IRQ | 209 | config GENERIC_HARDIRQS_NO__DO_IRQ |
204 | def_bool y | 210 | def_bool y |
205 | 211 | ||
206 | config GENERIC_IRQ_PROBE | 212 | config GENERIC_IRQ_PROBE |
207 | bool | 213 | def_bool y |
208 | default y | ||
209 | 214 | ||
210 | config GENERIC_PENDING_IRQ | 215 | config GENERIC_PENDING_IRQ |
211 | bool | 216 | def_bool y |
212 | depends on GENERIC_HARDIRQS && SMP | 217 | depends on GENERIC_HARDIRQS && SMP |
213 | default y | ||
214 | 218 | ||
215 | config USE_GENERIC_SMP_HELPERS | 219 | config USE_GENERIC_SMP_HELPERS |
216 | def_bool y | 220 | def_bool y |
@@ -225,19 +229,22 @@ config X86_64_SMP | |||
225 | depends on X86_64 && SMP | 229 | depends on X86_64 && SMP |
226 | 230 | ||
227 | config X86_HT | 231 | config X86_HT |
228 | bool | 232 | def_bool y |
229 | depends on SMP | 233 | depends on SMP |
230 | default y | ||
231 | 234 | ||
232 | config X86_TRAMPOLINE | 235 | config X86_TRAMPOLINE |
233 | bool | 236 | def_bool y |
234 | depends on SMP || (64BIT && ACPI_SLEEP) | 237 | depends on SMP || (64BIT && ACPI_SLEEP) |
235 | default y | ||
236 | 238 | ||
237 | config X86_32_LAZY_GS | 239 | config X86_32_LAZY_GS |
238 | def_bool y | 240 | def_bool y |
239 | depends on X86_32 && !CC_STACKPROTECTOR | 241 | depends on X86_32 && !CC_STACKPROTECTOR |
240 | 242 | ||
243 | config ARCH_HWEIGHT_CFLAGS | ||
244 | string | ||
245 | default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 | ||
246 | default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 | ||
247 | |||
241 | config KTIME_SCALAR | 248 | config KTIME_SCALAR |
242 | def_bool X86_32 | 249 | def_bool X86_32 |
243 | source "init/Kconfig" | 250 | source "init/Kconfig" |
@@ -447,7 +454,7 @@ config X86_NUMAQ | |||
447 | firmware with - send email to <Martin.Bligh@us.ibm.com>. | 454 | firmware with - send email to <Martin.Bligh@us.ibm.com>. |
448 | 455 | ||
449 | config X86_SUPPORTS_MEMORY_FAILURE | 456 | config X86_SUPPORTS_MEMORY_FAILURE |
450 | bool | 457 | def_bool y |
451 | # MCE code calls memory_failure(): | 458 | # MCE code calls memory_failure(): |
452 | depends on X86_MCE | 459 | depends on X86_MCE |
453 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: | 460 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: |
@@ -455,7 +462,6 @@ config X86_SUPPORTS_MEMORY_FAILURE | |||
455 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: | 462 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: |
456 | depends on X86_64 || !SPARSEMEM | 463 | depends on X86_64 || !SPARSEMEM |
457 | select ARCH_SUPPORTS_MEMORY_FAILURE | 464 | select ARCH_SUPPORTS_MEMORY_FAILURE |
458 | default y | ||
459 | 465 | ||
460 | config X86_VISWS | 466 | config X86_VISWS |
461 | bool "SGI 320/540 (Visual Workstation)" | 467 | bool "SGI 320/540 (Visual Workstation)" |
@@ -570,7 +576,6 @@ config PARAVIRT_SPINLOCKS | |||
570 | 576 | ||
571 | config PARAVIRT_CLOCK | 577 | config PARAVIRT_CLOCK |
572 | bool | 578 | bool |
573 | default n | ||
574 | 579 | ||
575 | endif | 580 | endif |
576 | 581 | ||
@@ -749,7 +754,6 @@ config MAXSMP | |||
749 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" | 754 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" |
750 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL | 755 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL |
751 | select CPUMASK_OFFSTACK | 756 | select CPUMASK_OFFSTACK |
752 | default n | ||
753 | ---help--- | 757 | ---help--- |
754 | Configure maximum number of CPUS and NUMA Nodes for this architecture. | 758 | Configure maximum number of CPUS and NUMA Nodes for this architecture. |
755 | If unsure, say N. | 759 | If unsure, say N. |
@@ -829,7 +833,6 @@ config X86_VISWS_APIC | |||
829 | 833 | ||
830 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | 834 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS |
831 | bool "Reroute for broken boot IRQs" | 835 | bool "Reroute for broken boot IRQs" |
832 | default n | ||
833 | depends on X86_IO_APIC | 836 | depends on X86_IO_APIC |
834 | ---help--- | 837 | ---help--- |
835 | This option enables a workaround that fixes a source of | 838 | This option enables a workaround that fixes a source of |
@@ -876,9 +879,8 @@ config X86_MCE_AMD | |||
876 | the DRAM Error Threshold. | 879 | the DRAM Error Threshold. |
877 | 880 | ||
878 | config X86_ANCIENT_MCE | 881 | config X86_ANCIENT_MCE |
879 | def_bool n | 882 | bool "Support for old Pentium 5 / WinChip machine checks" |
880 | depends on X86_32 && X86_MCE | 883 | depends on X86_32 && X86_MCE |
881 | prompt "Support for old Pentium 5 / WinChip machine checks" | ||
882 | ---help--- | 884 | ---help--- |
883 | Include support for machine check handling on old Pentium 5 or WinChip | 885 | Include support for machine check handling on old Pentium 5 or WinChip |
884 | systems. These typically need to be enabled explicitely on the command | 886 | systems. These typically need to be enabled explicitely on the command |
@@ -886,8 +888,7 @@ config X86_ANCIENT_MCE | |||
886 | 888 | ||
887 | config X86_MCE_THRESHOLD | 889 | config X86_MCE_THRESHOLD |
888 | depends on X86_MCE_AMD || X86_MCE_INTEL | 890 | depends on X86_MCE_AMD || X86_MCE_INTEL |
889 | bool | 891 | def_bool y |
890 | default y | ||
891 | 892 | ||
892 | config X86_MCE_INJECT | 893 | config X86_MCE_INJECT |
893 | depends on X86_MCE | 894 | depends on X86_MCE |
@@ -1026,8 +1027,8 @@ config X86_CPUID | |||
1026 | 1027 | ||
1027 | choice | 1028 | choice |
1028 | prompt "High Memory Support" | 1029 | prompt "High Memory Support" |
1029 | default HIGHMEM4G if !X86_NUMAQ | ||
1030 | default HIGHMEM64G if X86_NUMAQ | 1030 | default HIGHMEM64G if X86_NUMAQ |
1031 | default HIGHMEM4G | ||
1031 | depends on X86_32 | 1032 | depends on X86_32 |
1032 | 1033 | ||
1033 | config NOHIGHMEM | 1034 | config NOHIGHMEM |
@@ -1285,7 +1286,7 @@ source "mm/Kconfig" | |||
1285 | 1286 | ||
1286 | config HIGHPTE | 1287 | config HIGHPTE |
1287 | bool "Allocate 3rd-level pagetables from highmem" | 1288 | bool "Allocate 3rd-level pagetables from highmem" |
1288 | depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) | 1289 | depends on HIGHMEM |
1289 | ---help--- | 1290 | ---help--- |
1290 | The VM uses one page table entry for each page of physical memory. | 1291 | The VM uses one page table entry for each page of physical memory. |
1291 | For systems with a lot of RAM, this can be wasteful of precious | 1292 | For systems with a lot of RAM, this can be wasteful of precious |
@@ -1369,8 +1370,7 @@ config MATH_EMULATION | |||
1369 | kernel, it won't hurt. | 1370 | kernel, it won't hurt. |
1370 | 1371 | ||
1371 | config MTRR | 1372 | config MTRR |
1372 | bool | 1373 | def_bool y |
1373 | default y | ||
1374 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED | 1374 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED |
1375 | ---help--- | 1375 | ---help--- |
1376 | On Intel P6 family processors (Pentium Pro, Pentium II and later) | 1376 | On Intel P6 family processors (Pentium Pro, Pentium II and later) |
@@ -1436,8 +1436,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | |||
1436 | mtrr_spare_reg_nr=N on the kernel command line. | 1436 | mtrr_spare_reg_nr=N on the kernel command line. |
1437 | 1437 | ||
1438 | config X86_PAT | 1438 | config X86_PAT |
1439 | bool | 1439 | def_bool y |
1440 | default y | ||
1441 | prompt "x86 PAT support" if EMBEDDED | 1440 | prompt "x86 PAT support" if EMBEDDED |
1442 | depends on MTRR | 1441 | depends on MTRR |
1443 | ---help--- | 1442 | ---help--- |
@@ -1605,8 +1604,7 @@ config X86_NEED_RELOCS | |||
1605 | depends on X86_32 && RELOCATABLE | 1604 | depends on X86_32 && RELOCATABLE |
1606 | 1605 | ||
1607 | config PHYSICAL_ALIGN | 1606 | config PHYSICAL_ALIGN |
1608 | hex | 1607 | hex "Alignment value to which kernel should be aligned" if X86_32 |
1609 | prompt "Alignment value to which kernel should be aligned" if X86_32 | ||
1610 | default "0x1000000" | 1608 | default "0x1000000" |
1611 | range 0x2000 0x1000000 | 1609 | range 0x2000 0x1000000 |
1612 | ---help--- | 1610 | ---help--- |
@@ -1653,7 +1651,6 @@ config COMPAT_VDSO | |||
1653 | 1651 | ||
1654 | config CMDLINE_BOOL | 1652 | config CMDLINE_BOOL |
1655 | bool "Built-in kernel command line" | 1653 | bool "Built-in kernel command line" |
1656 | default n | ||
1657 | ---help--- | 1654 | ---help--- |
1658 | Allow for specifying boot arguments to the kernel at | 1655 | Allow for specifying boot arguments to the kernel at |
1659 | build time. On some systems (e.g. embedded ones), it is | 1656 | build time. On some systems (e.g. embedded ones), it is |
@@ -1687,7 +1684,6 @@ config CMDLINE | |||
1687 | 1684 | ||
1688 | config CMDLINE_OVERRIDE | 1685 | config CMDLINE_OVERRIDE |
1689 | bool "Built-in command line overrides boot loader arguments" | 1686 | bool "Built-in command line overrides boot loader arguments" |
1690 | default n | ||
1691 | depends on CMDLINE_BOOL | 1687 | depends on CMDLINE_BOOL |
1692 | ---help--- | 1688 | ---help--- |
1693 | Set this option to 'Y' to have the kernel ignore the boot loader | 1689 | Set this option to 'Y' to have the kernel ignore the boot loader |
@@ -1710,6 +1706,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID | |||
1710 | def_bool X86_64 | 1706 | def_bool X86_64 |
1711 | depends on NUMA | 1707 | depends on NUMA |
1712 | 1708 | ||
1709 | config USE_PERCPU_NUMA_NODE_ID | ||
1710 | def_bool X86_64 | ||
1711 | depends on NUMA | ||
1712 | |||
1713 | menu "Power management and ACPI options" | 1713 | menu "Power management and ACPI options" |
1714 | 1714 | ||
1715 | config ARCH_HIBERNATION_HEADER | 1715 | config ARCH_HIBERNATION_HEADER |
@@ -1723,8 +1723,7 @@ source "drivers/acpi/Kconfig" | |||
1723 | source "drivers/sfi/Kconfig" | 1723 | source "drivers/sfi/Kconfig" |
1724 | 1724 | ||
1725 | config X86_APM_BOOT | 1725 | config X86_APM_BOOT |
1726 | bool | 1726 | def_bool y |
1727 | default y | ||
1728 | depends on APM || APM_MODULE | 1727 | depends on APM || APM_MODULE |
1729 | 1728 | ||
1730 | menuconfig APM | 1729 | menuconfig APM |
@@ -1931,6 +1930,14 @@ config PCI_MMCONFIG | |||
1931 | bool "Support mmconfig PCI config space access" | 1930 | bool "Support mmconfig PCI config space access" |
1932 | depends on X86_64 && PCI && ACPI | 1931 | depends on X86_64 && PCI && ACPI |
1933 | 1932 | ||
1933 | config PCI_CNB20LE_QUIRK | ||
1934 | bool "Read CNB20LE Host Bridge Windows" | ||
1935 | depends on PCI | ||
1936 | help | ||
1937 | Read the PCI windows out of the CNB20LE host bridge. This allows | ||
1938 | PCI hotplug to work on systems with the CNB20LE chipset which do | ||
1939 | not have ACPI. | ||
1940 | |||
1934 | config DMAR | 1941 | config DMAR |
1935 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" | 1942 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" |
1936 | depends on PCI_MSI && ACPI && EXPERIMENTAL | 1943 | depends on PCI_MSI && ACPI && EXPERIMENTAL |
@@ -1953,8 +1960,7 @@ config DMAR_DEFAULT_ON | |||
1953 | experimental. | 1960 | experimental. |
1954 | 1961 | ||
1955 | config DMAR_BROKEN_GFX_WA | 1962 | config DMAR_BROKEN_GFX_WA |
1956 | def_bool n | 1963 | bool "Workaround broken graphics drivers (going away soon)" |
1957 | prompt "Workaround broken graphics drivers (going away soon)" | ||
1958 | depends on DMAR && BROKEN | 1964 | depends on DMAR && BROKEN |
1959 | ---help--- | 1965 | ---help--- |
1960 | Current Graphics drivers tend to use physical address | 1966 | Current Graphics drivers tend to use physical address |
@@ -2052,7 +2058,6 @@ config SCx200HR_TIMER | |||
2052 | config OLPC | 2058 | config OLPC |
2053 | bool "One Laptop Per Child support" | 2059 | bool "One Laptop Per Child support" |
2054 | select GPIOLIB | 2060 | select GPIOLIB |
2055 | default n | ||
2056 | ---help--- | 2061 | ---help--- |
2057 | Add support for detecting the unique features of the OLPC | 2062 | Add support for detecting the unique features of the OLPC |
2058 | XO hardware. | 2063 | XO hardware. |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index a19829374e6a..2ac9069890cd 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -338,6 +338,10 @@ config X86_F00F_BUG | |||
338 | def_bool y | 338 | def_bool y |
339 | depends on M586MMX || M586TSC || M586 || M486 || M386 | 339 | depends on M586MMX || M586TSC || M586 || M486 || M386 |
340 | 340 | ||
341 | config X86_INVD_BUG | ||
342 | def_bool y | ||
343 | depends on M486 || M386 | ||
344 | |||
341 | config X86_WP_WORKS_OK | 345 | config X86_WP_WORKS_OK |
342 | def_bool y | 346 | def_bool y |
343 | depends on !M386 | 347 | depends on !M386 |
@@ -502,23 +506,3 @@ config CPU_SUP_UMC_32 | |||
502 | CPU might render the kernel unbootable. | 506 | CPU might render the kernel unbootable. |
503 | 507 | ||
504 | If unsure, say N. | 508 | If unsure, say N. |
505 | |||
506 | config X86_DS | ||
507 | def_bool X86_PTRACE_BTS | ||
508 | depends on X86_DEBUGCTLMSR | ||
509 | select HAVE_HW_BRANCH_TRACER | ||
510 | |||
511 | config X86_PTRACE_BTS | ||
512 | bool "Branch Trace Store" | ||
513 | default y | ||
514 | depends on X86_DEBUGCTLMSR | ||
515 | depends on BROKEN | ||
516 | ---help--- | ||
517 | This adds a ptrace interface to the hardware's branch trace store. | ||
518 | |||
519 | Debuggers may use it to collect an execution trace of the debugged | ||
520 | application in order to answer the question 'how did I get here?'. | ||
521 | Debuggers may trace user mode as well as kernel mode. | ||
522 | |||
523 | Say Y unless there is no application development on this machine | ||
524 | and you want to save a small amount of code size. | ||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bc01e3ebfeb2..75085080b63e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -45,7 +45,6 @@ config EARLY_PRINTK | |||
45 | 45 | ||
46 | config EARLY_PRINTK_DBGP | 46 | config EARLY_PRINTK_DBGP |
47 | bool "Early printk via EHCI debug port" | 47 | bool "Early printk via EHCI debug port" |
48 | default n | ||
49 | depends on EARLY_PRINTK && PCI | 48 | depends on EARLY_PRINTK && PCI |
50 | ---help--- | 49 | ---help--- |
51 | Write kernel log output directly into the EHCI debug port. | 50 | Write kernel log output directly into the EHCI debug port. |
@@ -76,7 +75,6 @@ config DEBUG_PER_CPU_MAPS | |||
76 | bool "Debug access to per_cpu maps" | 75 | bool "Debug access to per_cpu maps" |
77 | depends on DEBUG_KERNEL | 76 | depends on DEBUG_KERNEL |
78 | depends on SMP | 77 | depends on SMP |
79 | default n | ||
80 | ---help--- | 78 | ---help--- |
81 | Say Y to verify that the per_cpu map being accessed has | 79 | Say Y to verify that the per_cpu map being accessed has |
82 | been setup. Adds a fair amount of code to kernel memory | 80 | been setup. Adds a fair amount of code to kernel memory |
@@ -174,15 +172,6 @@ config IOMMU_LEAK | |||
174 | Add a simple leak tracer to the IOMMU code. This is useful when you | 172 | Add a simple leak tracer to the IOMMU code. This is useful when you |
175 | are debugging a buggy device driver that leaks IOMMU mappings. | 173 | are debugging a buggy device driver that leaks IOMMU mappings. |
176 | 174 | ||
177 | config X86_DS_SELFTEST | ||
178 | bool "DS selftest" | ||
179 | default y | ||
180 | depends on DEBUG_KERNEL | ||
181 | depends on X86_DS | ||
182 | ---help--- | ||
183 | Perform Debug Store selftests at boot time. | ||
184 | If in doubt, say "N". | ||
185 | |||
186 | config HAVE_MMIOTRACE_SUPPORT | 175 | config HAVE_MMIOTRACE_SUPPORT |
187 | def_bool y | 176 | def_bool y |
188 | 177 | ||
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 0a43dc515e4c..8aa1b59b9074 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -95,8 +95,9 @@ sp-$(CONFIG_X86_64) := rsp | |||
95 | cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) | 95 | cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) |
96 | # is .cfi_signal_frame supported too? | 96 | # is .cfi_signal_frame supported too? |
97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) | 97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) |
98 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) | 98 | cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) |
99 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) | 99 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) |
100 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) | ||
100 | 101 | ||
101 | LDFLAGS := -m elf_$(UTS_MACHINE) | 102 | LDFLAGS := -m elf_$(UTS_MACHINE) |
102 | 103 | ||
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 20bb0e1ac681..ff16756a51c1 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -32,6 +32,9 @@ | |||
32 | #define IN IN1 | 32 | #define IN IN1 |
33 | #define KEY %xmm2 | 33 | #define KEY %xmm2 |
34 | #define IV %xmm3 | 34 | #define IV %xmm3 |
35 | #define BSWAP_MASK %xmm10 | ||
36 | #define CTR %xmm11 | ||
37 | #define INC %xmm12 | ||
35 | 38 | ||
36 | #define KEYP %rdi | 39 | #define KEYP %rdi |
37 | #define OUTP %rsi | 40 | #define OUTP %rsi |
@@ -42,6 +45,7 @@ | |||
42 | #define T1 %r10 | 45 | #define T1 %r10 |
43 | #define TKEYP T1 | 46 | #define TKEYP T1 |
44 | #define T2 %r11 | 47 | #define T2 %r11 |
48 | #define TCTR_LOW T2 | ||
45 | 49 | ||
46 | _key_expansion_128: | 50 | _key_expansion_128: |
47 | _key_expansion_256a: | 51 | _key_expansion_256a: |
@@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec) | |||
724 | movups IV, (IVP) | 728 | movups IV, (IVP) |
725 | .Lcbc_dec_just_ret: | 729 | .Lcbc_dec_just_ret: |
726 | ret | 730 | ret |
731 | |||
732 | .align 16 | ||
733 | .Lbswap_mask: | ||
734 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
735 | |||
736 | /* | ||
737 | * _aesni_inc_init: internal ABI | ||
738 | * setup registers used by _aesni_inc | ||
739 | * input: | ||
740 | * IV | ||
741 | * output: | ||
742 | * CTR: == IV, in little endian | ||
743 | * TCTR_LOW: == lower qword of CTR | ||
744 | * INC: == 1, in little endian | ||
745 | * BSWAP_MASK == endian swapping mask | ||
746 | */ | ||
747 | _aesni_inc_init: | ||
748 | movaps .Lbswap_mask, BSWAP_MASK | ||
749 | movaps IV, CTR | ||
750 | PSHUFB_XMM BSWAP_MASK CTR | ||
751 | mov $1, TCTR_LOW | ||
752 | MOVQ_R64_XMM TCTR_LOW INC | ||
753 | MOVQ_R64_XMM CTR TCTR_LOW | ||
754 | ret | ||
755 | |||
756 | /* | ||
757 | * _aesni_inc: internal ABI | ||
758 | * Increase IV by 1, IV is in big endian | ||
759 | * input: | ||
760 | * IV | ||
761 | * CTR: == IV, in little endian | ||
762 | * TCTR_LOW: == lower qword of CTR | ||
763 | * INC: == 1, in little endian | ||
764 | * BSWAP_MASK == endian swapping mask | ||
765 | * output: | ||
766 | * IV: Increase by 1 | ||
767 | * changed: | ||
768 | * CTR: == output IV, in little endian | ||
769 | * TCTR_LOW: == lower qword of CTR | ||
770 | */ | ||
771 | _aesni_inc: | ||
772 | paddq INC, CTR | ||
773 | add $1, TCTR_LOW | ||
774 | jnc .Linc_low | ||
775 | pslldq $8, INC | ||
776 | paddq INC, CTR | ||
777 | psrldq $8, INC | ||
778 | .Linc_low: | ||
779 | movaps CTR, IV | ||
780 | PSHUFB_XMM BSWAP_MASK IV | ||
781 | ret | ||
782 | |||
783 | /* | ||
784 | * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | ||
785 | * size_t len, u8 *iv) | ||
786 | */ | ||
787 | ENTRY(aesni_ctr_enc) | ||
788 | cmp $16, LEN | ||
789 | jb .Lctr_enc_just_ret | ||
790 | mov 480(KEYP), KLEN | ||
791 | movups (IVP), IV | ||
792 | call _aesni_inc_init | ||
793 | cmp $64, LEN | ||
794 | jb .Lctr_enc_loop1 | ||
795 | .align 4 | ||
796 | .Lctr_enc_loop4: | ||
797 | movaps IV, STATE1 | ||
798 | call _aesni_inc | ||
799 | movups (INP), IN1 | ||
800 | movaps IV, STATE2 | ||
801 | call _aesni_inc | ||
802 | movups 0x10(INP), IN2 | ||
803 | movaps IV, STATE3 | ||
804 | call _aesni_inc | ||
805 | movups 0x20(INP), IN3 | ||
806 | movaps IV, STATE4 | ||
807 | call _aesni_inc | ||
808 | movups 0x30(INP), IN4 | ||
809 | call _aesni_enc4 | ||
810 | pxor IN1, STATE1 | ||
811 | movups STATE1, (OUTP) | ||
812 | pxor IN2, STATE2 | ||
813 | movups STATE2, 0x10(OUTP) | ||
814 | pxor IN3, STATE3 | ||
815 | movups STATE3, 0x20(OUTP) | ||
816 | pxor IN4, STATE4 | ||
817 | movups STATE4, 0x30(OUTP) | ||
818 | sub $64, LEN | ||
819 | add $64, INP | ||
820 | add $64, OUTP | ||
821 | cmp $64, LEN | ||
822 | jge .Lctr_enc_loop4 | ||
823 | cmp $16, LEN | ||
824 | jb .Lctr_enc_ret | ||
825 | .align 4 | ||
826 | .Lctr_enc_loop1: | ||
827 | movaps IV, STATE | ||
828 | call _aesni_inc | ||
829 | movups (INP), IN | ||
830 | call _aesni_enc1 | ||
831 | pxor IN, STATE | ||
832 | movups STATE, (OUTP) | ||
833 | sub $16, LEN | ||
834 | add $16, INP | ||
835 | add $16, OUTP | ||
836 | cmp $16, LEN | ||
837 | jge .Lctr_enc_loop1 | ||
838 | .Lctr_enc_ret: | ||
839 | movups IV, (IVP) | ||
840 | .Lctr_enc_just_ret: | ||
841 | ret | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 49c552c060e9..2cb3dcc4490a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <crypto/algapi.h> | 18 | #include <crypto/algapi.h> |
19 | #include <crypto/aes.h> | 19 | #include <crypto/aes.h> |
20 | #include <crypto/cryptd.h> | 20 | #include <crypto/cryptd.h> |
21 | #include <crypto/ctr.h> | ||
21 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
22 | #include <asm/aes.h> | 23 | #include <asm/aes.h> |
23 | 24 | ||
@@ -58,6 +59,8 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
58 | const u8 *in, unsigned int len, u8 *iv); | 59 | const u8 *in, unsigned int len, u8 *iv); |
59 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 60 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
60 | const u8 *in, unsigned int len, u8 *iv); | 61 | const u8 *in, unsigned int len, u8 *iv); |
62 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | ||
63 | const u8 *in, unsigned int len, u8 *iv); | ||
61 | 64 | ||
62 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) | 65 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) |
63 | { | 66 | { |
@@ -321,6 +324,72 @@ static struct crypto_alg blk_cbc_alg = { | |||
321 | }, | 324 | }, |
322 | }; | 325 | }; |
323 | 326 | ||
327 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, | ||
328 | struct blkcipher_walk *walk) | ||
329 | { | ||
330 | u8 *ctrblk = walk->iv; | ||
331 | u8 keystream[AES_BLOCK_SIZE]; | ||
332 | u8 *src = walk->src.virt.addr; | ||
333 | u8 *dst = walk->dst.virt.addr; | ||
334 | unsigned int nbytes = walk->nbytes; | ||
335 | |||
336 | aesni_enc(ctx, keystream, ctrblk); | ||
337 | crypto_xor(keystream, src, nbytes); | ||
338 | memcpy(dst, keystream, nbytes); | ||
339 | crypto_inc(ctrblk, AES_BLOCK_SIZE); | ||
340 | } | ||
341 | |||
342 | static int ctr_crypt(struct blkcipher_desc *desc, | ||
343 | struct scatterlist *dst, struct scatterlist *src, | ||
344 | unsigned int nbytes) | ||
345 | { | ||
346 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); | ||
347 | struct blkcipher_walk walk; | ||
348 | int err; | ||
349 | |||
350 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
351 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | ||
352 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
353 | |||
354 | kernel_fpu_begin(); | ||
355 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | ||
356 | aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | ||
357 | nbytes & AES_BLOCK_MASK, walk.iv); | ||
358 | nbytes &= AES_BLOCK_SIZE - 1; | ||
359 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
360 | } | ||
361 | if (walk.nbytes) { | ||
362 | ctr_crypt_final(ctx, &walk); | ||
363 | err = blkcipher_walk_done(desc, &walk, 0); | ||
364 | } | ||
365 | kernel_fpu_end(); | ||
366 | |||
367 | return err; | ||
368 | } | ||
369 | |||
370 | static struct crypto_alg blk_ctr_alg = { | ||
371 | .cra_name = "__ctr-aes-aesni", | ||
372 | .cra_driver_name = "__driver-ctr-aes-aesni", | ||
373 | .cra_priority = 0, | ||
374 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
375 | .cra_blocksize = 1, | ||
376 | .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, | ||
377 | .cra_alignmask = 0, | ||
378 | .cra_type = &crypto_blkcipher_type, | ||
379 | .cra_module = THIS_MODULE, | ||
380 | .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), | ||
381 | .cra_u = { | ||
382 | .blkcipher = { | ||
383 | .min_keysize = AES_MIN_KEY_SIZE, | ||
384 | .max_keysize = AES_MAX_KEY_SIZE, | ||
385 | .ivsize = AES_BLOCK_SIZE, | ||
386 | .setkey = aes_set_key, | ||
387 | .encrypt = ctr_crypt, | ||
388 | .decrypt = ctr_crypt, | ||
389 | }, | ||
390 | }, | ||
391 | }; | ||
392 | |||
324 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | 393 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, |
325 | unsigned int key_len) | 394 | unsigned int key_len) |
326 | { | 395 | { |
@@ -467,13 +536,11 @@ static struct crypto_alg ablk_cbc_alg = { | |||
467 | }, | 536 | }, |
468 | }; | 537 | }; |
469 | 538 | ||
470 | #ifdef HAS_CTR | ||
471 | static int ablk_ctr_init(struct crypto_tfm *tfm) | 539 | static int ablk_ctr_init(struct crypto_tfm *tfm) |
472 | { | 540 | { |
473 | struct cryptd_ablkcipher *cryptd_tfm; | 541 | struct cryptd_ablkcipher *cryptd_tfm; |
474 | 542 | ||
475 | cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))", | 543 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0); |
476 | 0, 0); | ||
477 | if (IS_ERR(cryptd_tfm)) | 544 | if (IS_ERR(cryptd_tfm)) |
478 | return PTR_ERR(cryptd_tfm); | 545 | return PTR_ERR(cryptd_tfm); |
479 | ablk_init_common(tfm, cryptd_tfm); | 546 | ablk_init_common(tfm, cryptd_tfm); |
@@ -500,11 +567,50 @@ static struct crypto_alg ablk_ctr_alg = { | |||
500 | .ivsize = AES_BLOCK_SIZE, | 567 | .ivsize = AES_BLOCK_SIZE, |
501 | .setkey = ablk_set_key, | 568 | .setkey = ablk_set_key, |
502 | .encrypt = ablk_encrypt, | 569 | .encrypt = ablk_encrypt, |
503 | .decrypt = ablk_decrypt, | 570 | .decrypt = ablk_encrypt, |
504 | .geniv = "chainiv", | 571 | .geniv = "chainiv", |
505 | }, | 572 | }, |
506 | }, | 573 | }, |
507 | }; | 574 | }; |
575 | |||
576 | #ifdef HAS_CTR | ||
577 | static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) | ||
578 | { | ||
579 | struct cryptd_ablkcipher *cryptd_tfm; | ||
580 | |||
581 | cryptd_tfm = cryptd_alloc_ablkcipher( | ||
582 | "rfc3686(__driver-ctr-aes-aesni)", 0, 0); | ||
583 | if (IS_ERR(cryptd_tfm)) | ||
584 | return PTR_ERR(cryptd_tfm); | ||
585 | ablk_init_common(tfm, cryptd_tfm); | ||
586 | return 0; | ||
587 | } | ||
588 | |||
589 | static struct crypto_alg ablk_rfc3686_ctr_alg = { | ||
590 | .cra_name = "rfc3686(ctr(aes))", | ||
591 | .cra_driver_name = "rfc3686-ctr-aes-aesni", | ||
592 | .cra_priority = 400, | ||
593 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
594 | .cra_blocksize = 1, | ||
595 | .cra_ctxsize = sizeof(struct async_aes_ctx), | ||
596 | .cra_alignmask = 0, | ||
597 | .cra_type = &crypto_ablkcipher_type, | ||
598 | .cra_module = THIS_MODULE, | ||
599 | .cra_list = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list), | ||
600 | .cra_init = ablk_rfc3686_ctr_init, | ||
601 | .cra_exit = ablk_exit, | ||
602 | .cra_u = { | ||
603 | .ablkcipher = { | ||
604 | .min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, | ||
605 | .max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, | ||
606 | .ivsize = CTR_RFC3686_IV_SIZE, | ||
607 | .setkey = ablk_set_key, | ||
608 | .encrypt = ablk_encrypt, | ||
609 | .decrypt = ablk_decrypt, | ||
610 | .geniv = "seqiv", | ||
611 | }, | ||
612 | }, | ||
613 | }; | ||
508 | #endif | 614 | #endif |
509 | 615 | ||
510 | #ifdef HAS_LRW | 616 | #ifdef HAS_LRW |
@@ -640,13 +746,17 @@ static int __init aesni_init(void) | |||
640 | goto blk_ecb_err; | 746 | goto blk_ecb_err; |
641 | if ((err = crypto_register_alg(&blk_cbc_alg))) | 747 | if ((err = crypto_register_alg(&blk_cbc_alg))) |
642 | goto blk_cbc_err; | 748 | goto blk_cbc_err; |
749 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
750 | goto blk_ctr_err; | ||
643 | if ((err = crypto_register_alg(&ablk_ecb_alg))) | 751 | if ((err = crypto_register_alg(&ablk_ecb_alg))) |
644 | goto ablk_ecb_err; | 752 | goto ablk_ecb_err; |
645 | if ((err = crypto_register_alg(&ablk_cbc_alg))) | 753 | if ((err = crypto_register_alg(&ablk_cbc_alg))) |
646 | goto ablk_cbc_err; | 754 | goto ablk_cbc_err; |
647 | #ifdef HAS_CTR | ||
648 | if ((err = crypto_register_alg(&ablk_ctr_alg))) | 755 | if ((err = crypto_register_alg(&ablk_ctr_alg))) |
649 | goto ablk_ctr_err; | 756 | goto ablk_ctr_err; |
757 | #ifdef HAS_CTR | ||
758 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) | ||
759 | goto ablk_rfc3686_ctr_err; | ||
650 | #endif | 760 | #endif |
651 | #ifdef HAS_LRW | 761 | #ifdef HAS_LRW |
652 | if ((err = crypto_register_alg(&ablk_lrw_alg))) | 762 | if ((err = crypto_register_alg(&ablk_lrw_alg))) |
@@ -675,13 +785,17 @@ ablk_pcbc_err: | |||
675 | ablk_lrw_err: | 785 | ablk_lrw_err: |
676 | #endif | 786 | #endif |
677 | #ifdef HAS_CTR | 787 | #ifdef HAS_CTR |
788 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | ||
789 | ablk_rfc3686_ctr_err: | ||
790 | #endif | ||
678 | crypto_unregister_alg(&ablk_ctr_alg); | 791 | crypto_unregister_alg(&ablk_ctr_alg); |
679 | ablk_ctr_err: | 792 | ablk_ctr_err: |
680 | #endif | ||
681 | crypto_unregister_alg(&ablk_cbc_alg); | 793 | crypto_unregister_alg(&ablk_cbc_alg); |
682 | ablk_cbc_err: | 794 | ablk_cbc_err: |
683 | crypto_unregister_alg(&ablk_ecb_alg); | 795 | crypto_unregister_alg(&ablk_ecb_alg); |
684 | ablk_ecb_err: | 796 | ablk_ecb_err: |
797 | crypto_unregister_alg(&blk_ctr_alg); | ||
798 | blk_ctr_err: | ||
685 | crypto_unregister_alg(&blk_cbc_alg); | 799 | crypto_unregister_alg(&blk_cbc_alg); |
686 | blk_cbc_err: | 800 | blk_cbc_err: |
687 | crypto_unregister_alg(&blk_ecb_alg); | 801 | crypto_unregister_alg(&blk_ecb_alg); |
@@ -705,10 +819,12 @@ static void __exit aesni_exit(void) | |||
705 | crypto_unregister_alg(&ablk_lrw_alg); | 819 | crypto_unregister_alg(&ablk_lrw_alg); |
706 | #endif | 820 | #endif |
707 | #ifdef HAS_CTR | 821 | #ifdef HAS_CTR |
708 | crypto_unregister_alg(&ablk_ctr_alg); | 822 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
709 | #endif | 823 | #endif |
824 | crypto_unregister_alg(&ablk_ctr_alg); | ||
710 | crypto_unregister_alg(&ablk_cbc_alg); | 825 | crypto_unregister_alg(&ablk_cbc_alg); |
711 | crypto_unregister_alg(&ablk_ecb_alg); | 826 | crypto_unregister_alg(&ablk_ecb_alg); |
827 | crypto_unregister_alg(&blk_ctr_alg); | ||
712 | crypto_unregister_alg(&blk_cbc_alg); | 828 | crypto_unregister_alg(&blk_cbc_alg); |
713 | crypto_unregister_alg(&blk_ecb_alg); | 829 | crypto_unregister_alg(&blk_ecb_alg); |
714 | crypto_unregister_alg(&__aesni_alg); | 830 | crypto_unregister_alg(&__aesni_alg); |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 56f462cf22d2..aa2c39d968fc 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -85,7 +85,6 @@ extern int acpi_ioapic; | |||
85 | extern int acpi_noirq; | 85 | extern int acpi_noirq; |
86 | extern int acpi_strict; | 86 | extern int acpi_strict; |
87 | extern int acpi_disabled; | 87 | extern int acpi_disabled; |
88 | extern int acpi_ht; | ||
89 | extern int acpi_pci_disabled; | 88 | extern int acpi_pci_disabled; |
90 | extern int acpi_skip_timer_override; | 89 | extern int acpi_skip_timer_override; |
91 | extern int acpi_use_timer_override; | 90 | extern int acpi_use_timer_override; |
@@ -97,7 +96,6 @@ void acpi_pic_sci_set_trigger(unsigned int, u16); | |||
97 | static inline void disable_acpi(void) | 96 | static inline void disable_acpi(void) |
98 | { | 97 | { |
99 | acpi_disabled = 1; | 98 | acpi_disabled = 1; |
100 | acpi_ht = 0; | ||
101 | acpi_pci_disabled = 1; | 99 | acpi_pci_disabled = 1; |
102 | acpi_noirq = 1; | 100 | acpi_noirq = 1; |
103 | } | 101 | } |
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index b97f786a48d5..a63a68be1cce 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -6,8 +6,8 @@ | |||
6 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
7 | 1: lock | 7 | 1: lock |
8 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
9 | _ASM_ALIGN | 9 | .balign 4 |
10 | _ASM_PTR 1b | 10 | .long 1b - . |
11 | .previous | 11 | .previous |
12 | .endm | 12 | .endm |
13 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index b09ec55650b3..03b6bb5394a0 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -28,20 +28,20 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | #ifdef CONFIG_SMP | 30 | #ifdef CONFIG_SMP |
31 | #define LOCK_PREFIX \ | 31 | #define LOCK_PREFIX_HERE \ |
32 | ".section .smp_locks,\"a\"\n" \ | 32 | ".section .smp_locks,\"a\"\n" \ |
33 | _ASM_ALIGN "\n" \ | 33 | ".balign 4\n" \ |
34 | _ASM_PTR "661f\n" /* address */ \ | 34 | ".long 671f - .\n" /* offset */ \ |
35 | ".previous\n" \ | 35 | ".previous\n" \ |
36 | "661:\n\tlock; " | 36 | "671:" |
37 | |||
38 | #define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " | ||
37 | 39 | ||
38 | #else /* ! CONFIG_SMP */ | 40 | #else /* ! CONFIG_SMP */ |
41 | #define LOCK_PREFIX_HERE "" | ||
39 | #define LOCK_PREFIX "" | 42 | #define LOCK_PREFIX "" |
40 | #endif | 43 | #endif |
41 | 44 | ||
42 | /* This must be included *after* the definition of LOCK_PREFIX */ | ||
43 | #include <asm/cpufeature.h> | ||
44 | |||
45 | struct alt_instr { | 45 | struct alt_instr { |
46 | u8 *instr; /* original instruction */ | 46 | u8 *instr; /* original instruction */ |
47 | u8 *replacement; | 47 | u8 *replacement; |
@@ -96,6 +96,12 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
96 | ".previous" | 96 | ".previous" |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * This must be included *after* the definition of ALTERNATIVE due to | ||
100 | * <asm/arch_hweight.h> | ||
101 | */ | ||
102 | #include <asm/cpufeature.h> | ||
103 | |||
104 | /* | ||
99 | * Alternative instructions for different CPU types or capabilities. | 105 | * Alternative instructions for different CPU types or capabilities. |
100 | * | 106 | * |
101 | * This allows to use optimized instructions even on generic binary | 107 | * This allows to use optimized instructions even on generic binary |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b4ac2cdcb64f..1fa03e04ae44 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -373,6 +373,7 @@ extern atomic_t init_deasserted; | |||
373 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); | 373 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); |
374 | #endif | 374 | #endif |
375 | 375 | ||
376 | #ifdef CONFIG_X86_LOCAL_APIC | ||
376 | static inline u32 apic_read(u32 reg) | 377 | static inline u32 apic_read(u32 reg) |
377 | { | 378 | { |
378 | return apic->read(reg); | 379 | return apic->read(reg); |
@@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void) | |||
403 | return apic->safe_wait_icr_idle(); | 404 | return apic->safe_wait_icr_idle(); |
404 | } | 405 | } |
405 | 406 | ||
407 | #else /* CONFIG_X86_LOCAL_APIC */ | ||
408 | |||
409 | static inline u32 apic_read(u32 reg) { return 0; } | ||
410 | static inline void apic_write(u32 reg, u32 val) { } | ||
411 | static inline u64 apic_icr_read(void) { return 0; } | ||
412 | static inline void apic_icr_write(u32 low, u32 high) { } | ||
413 | static inline void apic_wait_icr_idle(void) { } | ||
414 | static inline u32 safe_apic_wait_icr_idle(void) { return 0; } | ||
415 | |||
416 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
406 | 417 | ||
407 | static inline void ack_APIC_irq(void) | 418 | static inline void ack_APIC_irq(void) |
408 | { | 419 | { |
409 | #ifdef CONFIG_X86_LOCAL_APIC | ||
410 | /* | 420 | /* |
411 | * ack_APIC_irq() actually gets compiled as a single instruction | 421 | * ack_APIC_irq() actually gets compiled as a single instruction |
412 | * ... yummie. | 422 | * ... yummie. |
@@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void) | |||
414 | 424 | ||
415 | /* Docs say use 0 for future compatibility */ | 425 | /* Docs say use 0 for future compatibility */ |
416 | apic_write(APIC_EOI, 0); | 426 | apic_write(APIC_EOI, 0); |
417 | #endif | ||
418 | } | 427 | } |
419 | 428 | ||
420 | static inline unsigned default_get_apic_id(unsigned long x) | 429 | static inline unsigned default_get_apic_id(unsigned long x) |
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 000000000000..9686c3d9ff73 --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h | |||
@@ -0,0 +1,61 @@ | |||
1 | #ifndef _ASM_X86_HWEIGHT_H | ||
2 | #define _ASM_X86_HWEIGHT_H | ||
3 | |||
4 | #ifdef CONFIG_64BIT | ||
5 | /* popcnt %edi, %eax -- redundant REX prefix for alignment */ | ||
6 | #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" | ||
7 | /* popcnt %rdi, %rax */ | ||
8 | #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" | ||
9 | #define REG_IN "D" | ||
10 | #define REG_OUT "a" | ||
11 | #else | ||
12 | /* popcnt %eax, %eax */ | ||
13 | #define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" | ||
14 | #define REG_IN "a" | ||
15 | #define REG_OUT "a" | ||
16 | #endif | ||
17 | |||
18 | /* | ||
19 | * __sw_hweightXX are called from within the alternatives below | ||
20 | * and callee-clobbered registers need to be taken care of. See | ||
21 | * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective | ||
22 | * compiler switches. | ||
23 | */ | ||
24 | static inline unsigned int __arch_hweight32(unsigned int w) | ||
25 | { | ||
26 | unsigned int res = 0; | ||
27 | |||
28 | asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) | ||
29 | : "="REG_OUT (res) | ||
30 | : REG_IN (w)); | ||
31 | |||
32 | return res; | ||
33 | } | ||
34 | |||
35 | static inline unsigned int __arch_hweight16(unsigned int w) | ||
36 | { | ||
37 | return __arch_hweight32(w & 0xffff); | ||
38 | } | ||
39 | |||
40 | static inline unsigned int __arch_hweight8(unsigned int w) | ||
41 | { | ||
42 | return __arch_hweight32(w & 0xff); | ||
43 | } | ||
44 | |||
45 | static inline unsigned long __arch_hweight64(__u64 w) | ||
46 | { | ||
47 | unsigned long res = 0; | ||
48 | |||
49 | #ifdef CONFIG_X86_32 | ||
50 | return __arch_hweight32((u32)w) + | ||
51 | __arch_hweight32((u32)(w >> 32)); | ||
52 | #else | ||
53 | asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) | ||
54 | : "="REG_OUT (res) | ||
55 | : REG_IN (w)); | ||
56 | #endif /* CONFIG_X86_32 */ | ||
57 | |||
58 | return res; | ||
59 | } | ||
60 | |||
61 | #endif | ||
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 8f8217b9bdac..952a826ac4e5 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h | |||
@@ -22,7 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | static inline int atomic_read(const atomic_t *v) | 23 | static inline int atomic_read(const atomic_t *v) |
24 | { | 24 | { |
25 | return v->counter; | 25 | return (*(volatile int *)&(v)->counter); |
26 | } | 26 | } |
27 | 27 | ||
28 | /** | 28 | /** |
@@ -246,6 +246,29 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
246 | 246 | ||
247 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) | 247 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) |
248 | 248 | ||
249 | /* | ||
250 | * atomic_dec_if_positive - decrement by 1 if old value positive | ||
251 | * @v: pointer of type atomic_t | ||
252 | * | ||
253 | * The function returns the old value of *v minus 1, even if | ||
254 | * the atomic variable, v, was not decremented. | ||
255 | */ | ||
256 | static inline int atomic_dec_if_positive(atomic_t *v) | ||
257 | { | ||
258 | int c, old, dec; | ||
259 | c = atomic_read(v); | ||
260 | for (;;) { | ||
261 | dec = c - 1; | ||
262 | if (unlikely(dec < 0)) | ||
263 | break; | ||
264 | old = atomic_cmpxchg((v), c, dec); | ||
265 | if (likely(old == c)) | ||
266 | break; | ||
267 | c = old; | ||
268 | } | ||
269 | return dec; | ||
270 | } | ||
271 | |||
249 | /** | 272 | /** |
250 | * atomic_inc_short - increment of a short integer | 273 | * atomic_inc_short - increment of a short integer |
251 | * @v: pointer to type int | 274 | * @v: pointer to type int |
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 03027bf28de5..2a934aa19a43 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h | |||
@@ -14,109 +14,193 @@ typedef struct { | |||
14 | 14 | ||
15 | #define ATOMIC64_INIT(val) { (val) } | 15 | #define ATOMIC64_INIT(val) { (val) } |
16 | 16 | ||
17 | extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); | 17 | #ifdef CONFIG_X86_CMPXCHG64 |
18 | #define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" | ||
19 | #else | ||
20 | #define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) | ||
21 | #endif | ||
22 | |||
23 | #define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) | ||
24 | |||
25 | /** | ||
26 | * atomic64_cmpxchg - cmpxchg atomic64 variable | ||
27 | * @p: pointer to type atomic64_t | ||
28 | * @o: expected value | ||
29 | * @n: new value | ||
30 | * | ||
31 | * Atomically sets @v to @n if it was equal to @o and returns | ||
32 | * the old value. | ||
33 | */ | ||
34 | |||
35 | static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) | ||
36 | { | ||
37 | return cmpxchg64(&v->counter, o, n); | ||
38 | } | ||
18 | 39 | ||
19 | /** | 40 | /** |
20 | * atomic64_xchg - xchg atomic64 variable | 41 | * atomic64_xchg - xchg atomic64 variable |
21 | * @ptr: pointer to type atomic64_t | 42 | * @v: pointer to type atomic64_t |
22 | * @new_val: value to assign | 43 | * @n: value to assign |
23 | * | 44 | * |
24 | * Atomically xchgs the value of @ptr to @new_val and returns | 45 | * Atomically xchgs the value of @v to @n and returns |
25 | * the old value. | 46 | * the old value. |
26 | */ | 47 | */ |
27 | extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); | 48 | static inline long long atomic64_xchg(atomic64_t *v, long long n) |
49 | { | ||
50 | long long o; | ||
51 | unsigned high = (unsigned)(n >> 32); | ||
52 | unsigned low = (unsigned)n; | ||
53 | asm volatile(ATOMIC64_ALTERNATIVE(xchg) | ||
54 | : "=A" (o), "+b" (low), "+c" (high) | ||
55 | : "S" (v) | ||
56 | : "memory" | ||
57 | ); | ||
58 | return o; | ||
59 | } | ||
28 | 60 | ||
29 | /** | 61 | /** |
30 | * atomic64_set - set atomic64 variable | 62 | * atomic64_set - set atomic64 variable |
31 | * @ptr: pointer to type atomic64_t | 63 | * @v: pointer to type atomic64_t |
32 | * @new_val: value to assign | 64 | * @n: value to assign |
33 | * | 65 | * |
34 | * Atomically sets the value of @ptr to @new_val. | 66 | * Atomically sets the value of @v to @n. |
35 | */ | 67 | */ |
36 | extern void atomic64_set(atomic64_t *ptr, u64 new_val); | 68 | static inline void atomic64_set(atomic64_t *v, long long i) |
69 | { | ||
70 | unsigned high = (unsigned)(i >> 32); | ||
71 | unsigned low = (unsigned)i; | ||
72 | asm volatile(ATOMIC64_ALTERNATIVE(set) | ||
73 | : "+b" (low), "+c" (high) | ||
74 | : "S" (v) | ||
75 | : "eax", "edx", "memory" | ||
76 | ); | ||
77 | } | ||
37 | 78 | ||
38 | /** | 79 | /** |
39 | * atomic64_read - read atomic64 variable | 80 | * atomic64_read - read atomic64 variable |
40 | * @ptr: pointer to type atomic64_t | 81 | * @v: pointer to type atomic64_t |
41 | * | 82 | * |
42 | * Atomically reads the value of @ptr and returns it. | 83 | * Atomically reads the value of @v and returns it. |
43 | */ | 84 | */ |
44 | static inline u64 atomic64_read(atomic64_t *ptr) | 85 | static inline long long atomic64_read(atomic64_t *v) |
45 | { | 86 | { |
46 | u64 res; | 87 | long long r; |
47 | 88 | asm volatile(ATOMIC64_ALTERNATIVE(read) | |
48 | /* | 89 | : "=A" (r), "+c" (v) |
49 | * Note, we inline this atomic64_t primitive because | 90 | : : "memory" |
50 | * it only clobbers EAX/EDX and leaves the others | 91 | ); |
51 | * untouched. We also (somewhat subtly) rely on the | 92 | return r; |
52 | * fact that cmpxchg8b returns the current 64-bit value | 93 | } |
53 | * of the memory location we are touching: | ||
54 | */ | ||
55 | asm volatile( | ||
56 | "mov %%ebx, %%eax\n\t" | ||
57 | "mov %%ecx, %%edx\n\t" | ||
58 | LOCK_PREFIX "cmpxchg8b %1\n" | ||
59 | : "=&A" (res) | ||
60 | : "m" (*ptr) | ||
61 | ); | ||
62 | |||
63 | return res; | ||
64 | } | ||
65 | |||
66 | extern u64 atomic64_read(atomic64_t *ptr); | ||
67 | 94 | ||
68 | /** | 95 | /** |
69 | * atomic64_add_return - add and return | 96 | * atomic64_add_return - add and return |
70 | * @delta: integer value to add | 97 | * @i: integer value to add |
71 | * @ptr: pointer to type atomic64_t | 98 | * @v: pointer to type atomic64_t |
72 | * | 99 | * |
73 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | 100 | * Atomically adds @i to @v and returns @i + *@v |
74 | */ | 101 | */ |
75 | extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); | 102 | static inline long long atomic64_add_return(long long i, atomic64_t *v) |
103 | { | ||
104 | asm volatile(ATOMIC64_ALTERNATIVE(add_return) | ||
105 | : "+A" (i), "+c" (v) | ||
106 | : : "memory" | ||
107 | ); | ||
108 | return i; | ||
109 | } | ||
76 | 110 | ||
77 | /* | 111 | /* |
78 | * Other variants with different arithmetic operators: | 112 | * Other variants with different arithmetic operators: |
79 | */ | 113 | */ |
80 | extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); | 114 | static inline long long atomic64_sub_return(long long i, atomic64_t *v) |
81 | extern u64 atomic64_inc_return(atomic64_t *ptr); | 115 | { |
82 | extern u64 atomic64_dec_return(atomic64_t *ptr); | 116 | asm volatile(ATOMIC64_ALTERNATIVE(sub_return) |
117 | : "+A" (i), "+c" (v) | ||
118 | : : "memory" | ||
119 | ); | ||
120 | return i; | ||
121 | } | ||
122 | |||
123 | static inline long long atomic64_inc_return(atomic64_t *v) | ||
124 | { | ||
125 | long long a; | ||
126 | asm volatile(ATOMIC64_ALTERNATIVE(inc_return) | ||
127 | : "=A" (a) | ||
128 | : "S" (v) | ||
129 | : "memory", "ecx" | ||
130 | ); | ||
131 | return a; | ||
132 | } | ||
133 | |||
134 | static inline long long atomic64_dec_return(atomic64_t *v) | ||
135 | { | ||
136 | long long a; | ||
137 | asm volatile(ATOMIC64_ALTERNATIVE(dec_return) | ||
138 | : "=A" (a) | ||
139 | : "S" (v) | ||
140 | : "memory", "ecx" | ||
141 | ); | ||
142 | return a; | ||
143 | } | ||
83 | 144 | ||
84 | /** | 145 | /** |
85 | * atomic64_add - add integer to atomic64 variable | 146 | * atomic64_add - add integer to atomic64 variable |
86 | * @delta: integer value to add | 147 | * @i: integer value to add |
87 | * @ptr: pointer to type atomic64_t | 148 | * @v: pointer to type atomic64_t |
88 | * | 149 | * |
89 | * Atomically adds @delta to @ptr. | 150 | * Atomically adds @i to @v. |
90 | */ | 151 | */ |
91 | extern void atomic64_add(u64 delta, atomic64_t *ptr); | 152 | static inline long long atomic64_add(long long i, atomic64_t *v) |
153 | { | ||
154 | asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) | ||
155 | : "+A" (i), "+c" (v) | ||
156 | : : "memory" | ||
157 | ); | ||
158 | return i; | ||
159 | } | ||
92 | 160 | ||
93 | /** | 161 | /** |
94 | * atomic64_sub - subtract the atomic64 variable | 162 | * atomic64_sub - subtract the atomic64 variable |
95 | * @delta: integer value to subtract | 163 | * @i: integer value to subtract |
96 | * @ptr: pointer to type atomic64_t | 164 | * @v: pointer to type atomic64_t |
97 | * | 165 | * |
98 | * Atomically subtracts @delta from @ptr. | 166 | * Atomically subtracts @i from @v. |
99 | */ | 167 | */ |
100 | extern void atomic64_sub(u64 delta, atomic64_t *ptr); | 168 | static inline long long atomic64_sub(long long i, atomic64_t *v) |
169 | { | ||
170 | asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) | ||
171 | : "+A" (i), "+c" (v) | ||
172 | : : "memory" | ||
173 | ); | ||
174 | return i; | ||
175 | } | ||
101 | 176 | ||
102 | /** | 177 | /** |
103 | * atomic64_sub_and_test - subtract value from variable and test result | 178 | * atomic64_sub_and_test - subtract value from variable and test result |
104 | * @delta: integer value to subtract | 179 | * @i: integer value to subtract |
105 | * @ptr: pointer to type atomic64_t | 180 | * @v: pointer to type atomic64_t |
106 | * | 181 | * |
107 | * Atomically subtracts @delta from @ptr and returns | 182 | * Atomically subtracts @i from @v and returns |
108 | * true if the result is zero, or false for all | 183 | * true if the result is zero, or false for all |
109 | * other cases. | 184 | * other cases. |
110 | */ | 185 | */ |
111 | extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); | 186 | static inline int atomic64_sub_and_test(long long i, atomic64_t *v) |
187 | { | ||
188 | return atomic64_sub_return(i, v) == 0; | ||
189 | } | ||
112 | 190 | ||
113 | /** | 191 | /** |
114 | * atomic64_inc - increment atomic64 variable | 192 | * atomic64_inc - increment atomic64 variable |
115 | * @ptr: pointer to type atomic64_t | 193 | * @v: pointer to type atomic64_t |
116 | * | 194 | * |
117 | * Atomically increments @ptr by 1. | 195 | * Atomically increments @v by 1. |
118 | */ | 196 | */ |
119 | extern void atomic64_inc(atomic64_t *ptr); | 197 | static inline void atomic64_inc(atomic64_t *v) |
198 | { | ||
199 | asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) | ||
200 | : : "S" (v) | ||
201 | : "memory", "eax", "ecx", "edx" | ||
202 | ); | ||
203 | } | ||
120 | 204 | ||
121 | /** | 205 | /** |
122 | * atomic64_dec - decrement atomic64 variable | 206 | * atomic64_dec - decrement atomic64 variable |
@@ -124,37 +208,97 @@ extern void atomic64_inc(atomic64_t *ptr); | |||
124 | * | 208 | * |
125 | * Atomically decrements @ptr by 1. | 209 | * Atomically decrements @ptr by 1. |
126 | */ | 210 | */ |
127 | extern void atomic64_dec(atomic64_t *ptr); | 211 | static inline void atomic64_dec(atomic64_t *v) |
212 | { | ||
213 | asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) | ||
214 | : : "S" (v) | ||
215 | : "memory", "eax", "ecx", "edx" | ||
216 | ); | ||
217 | } | ||
128 | 218 | ||
129 | /** | 219 | /** |
130 | * atomic64_dec_and_test - decrement and test | 220 | * atomic64_dec_and_test - decrement and test |
131 | * @ptr: pointer to type atomic64_t | 221 | * @v: pointer to type atomic64_t |
132 | * | 222 | * |
133 | * Atomically decrements @ptr by 1 and | 223 | * Atomically decrements @v by 1 and |
134 | * returns true if the result is 0, or false for all other | 224 | * returns true if the result is 0, or false for all other |
135 | * cases. | 225 | * cases. |
136 | */ | 226 | */ |
137 | extern int atomic64_dec_and_test(atomic64_t *ptr); | 227 | static inline int atomic64_dec_and_test(atomic64_t *v) |
228 | { | ||
229 | return atomic64_dec_return(v) == 0; | ||
230 | } | ||
138 | 231 | ||
139 | /** | 232 | /** |
140 | * atomic64_inc_and_test - increment and test | 233 | * atomic64_inc_and_test - increment and test |
141 | * @ptr: pointer to type atomic64_t | 234 | * @v: pointer to type atomic64_t |
142 | * | 235 | * |
143 | * Atomically increments @ptr by 1 | 236 | * Atomically increments @v by 1 |
144 | * and returns true if the result is zero, or false for all | 237 | * and returns true if the result is zero, or false for all |
145 | * other cases. | 238 | * other cases. |
146 | */ | 239 | */ |
147 | extern int atomic64_inc_and_test(atomic64_t *ptr); | 240 | static inline int atomic64_inc_and_test(atomic64_t *v) |
241 | { | ||
242 | return atomic64_inc_return(v) == 0; | ||
243 | } | ||
148 | 244 | ||
149 | /** | 245 | /** |
150 | * atomic64_add_negative - add and test if negative | 246 | * atomic64_add_negative - add and test if negative |
151 | * @delta: integer value to add | 247 | * @i: integer value to add |
152 | * @ptr: pointer to type atomic64_t | 248 | * @v: pointer to type atomic64_t |
153 | * | 249 | * |
154 | * Atomically adds @delta to @ptr and returns true | 250 | * Atomically adds @i to @v and returns true |
155 | * if the result is negative, or false when | 251 | * if the result is negative, or false when |
156 | * result is greater than or equal to zero. | 252 | * result is greater than or equal to zero. |
157 | */ | 253 | */ |
158 | extern int atomic64_add_negative(u64 delta, atomic64_t *ptr); | 254 | static inline int atomic64_add_negative(long long i, atomic64_t *v) |
255 | { | ||
256 | return atomic64_add_return(i, v) < 0; | ||
257 | } | ||
258 | |||
259 | /** | ||
260 | * atomic64_add_unless - add unless the number is a given value | ||
261 | * @v: pointer of type atomic64_t | ||
262 | * @a: the amount to add to v... | ||
263 | * @u: ...unless v is equal to u. | ||
264 | * | ||
265 | * Atomically adds @a to @v, so long as it was not @u. | ||
266 | * Returns non-zero if @v was not @u, and zero otherwise. | ||
267 | */ | ||
268 | static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) | ||
269 | { | ||
270 | unsigned low = (unsigned)u; | ||
271 | unsigned high = (unsigned)(u >> 32); | ||
272 | asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" | ||
273 | : "+A" (a), "+c" (v), "+S" (low), "+D" (high) | ||
274 | : : "memory"); | ||
275 | return (int)a; | ||
276 | } | ||
277 | |||
278 | |||
279 | static inline int atomic64_inc_not_zero(atomic64_t *v) | ||
280 | { | ||
281 | int r; | ||
282 | asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) | ||
283 | : "=a" (r) | ||
284 | : "S" (v) | ||
285 | : "ecx", "edx", "memory" | ||
286 | ); | ||
287 | return r; | ||
288 | } | ||
289 | |||
290 | static inline long long atomic64_dec_if_positive(atomic64_t *v) | ||
291 | { | ||
292 | long long r; | ||
293 | asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) | ||
294 | : "=A" (r) | ||
295 | : "S" (v) | ||
296 | : "ecx", "memory" | ||
297 | ); | ||
298 | return r; | ||
299 | } | ||
300 | |||
301 | #undef ATOMIC64_ALTERNATIVE | ||
302 | #undef ATOMIC64_ALTERNATIVE_ | ||
159 | 303 | ||
160 | #endif /* _ASM_X86_ATOMIC64_32_H */ | 304 | #endif /* _ASM_X86_ATOMIC64_32_H */ |
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 51c5b4056929..49fd1ea22951 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h | |||
@@ -18,7 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | static inline long atomic64_read(const atomic64_t *v) | 19 | static inline long atomic64_read(const atomic64_t *v) |
20 | { | 20 | { |
21 | return v->counter; | 21 | return (*(volatile long *)&(v)->counter); |
22 | } | 22 | } |
23 | 23 | ||
24 | /** | 24 | /** |
@@ -221,4 +221,27 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) | |||
221 | 221 | ||
222 | #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) | 222 | #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) |
223 | 223 | ||
224 | /* | ||
225 | * atomic64_dec_if_positive - decrement by 1 if old value positive | ||
226 | * @v: pointer of type atomic_t | ||
227 | * | ||
228 | * The function returns the old value of *v minus 1, even if | ||
229 | * the atomic variable, v, was not decremented. | ||
230 | */ | ||
231 | static inline long atomic64_dec_if_positive(atomic64_t *v) | ||
232 | { | ||
233 | long c, old, dec; | ||
234 | c = atomic64_read(v); | ||
235 | for (;;) { | ||
236 | dec = c - 1; | ||
237 | if (unlikely(dec < 0)) | ||
238 | break; | ||
239 | old = atomic64_cmpxchg((v), c, dec); | ||
240 | if (likely(old == c)) | ||
241 | break; | ||
242 | c = old; | ||
243 | } | ||
244 | return dec; | ||
245 | } | ||
246 | |||
224 | #endif /* _ASM_X86_ATOMIC64_64_H */ | 247 | #endif /* _ASM_X86_ATOMIC64_64_H */ |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 02b47a603fc8..545776efeb16 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -444,7 +444,9 @@ static inline int fls(int x) | |||
444 | 444 | ||
445 | #define ARCH_HAS_FAST_MULTIPLIER 1 | 445 | #define ARCH_HAS_FAST_MULTIPLIER 1 |
446 | 446 | ||
447 | #include <asm-generic/bitops/hweight.h> | 447 | #include <asm/arch_hweight.h> |
448 | |||
449 | #include <asm-generic/bitops/const_hweight.h> | ||
448 | 450 | ||
449 | #endif /* __KERNEL__ */ | 451 | #endif /* __KERNEL__ */ |
450 | 452 | ||
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 7a1065958ba9..3b62ab56c7a0 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
@@ -24,7 +24,7 @@ | |||
24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) | 24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) |
25 | 25 | ||
26 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ | 26 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ |
27 | (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)) | 27 | (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) |
28 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" | 28 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" |
29 | #endif | 29 | #endif |
30 | 30 | ||
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 634c40a739a6..63e35ec9075c 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -44,9 +44,6 @@ static inline void copy_from_user_page(struct vm_area_struct *vma, | |||
44 | memcpy(dst, src, len); | 44 | memcpy(dst, src, len); |
45 | } | 45 | } |
46 | 46 | ||
47 | #define PG_WC PG_arch_1 | ||
48 | PAGEFLAG(WC, WC) | ||
49 | |||
50 | #ifdef CONFIG_X86_PAT | 47 | #ifdef CONFIG_X86_PAT |
51 | /* | 48 | /* |
52 | * X86 PAT uses page flags WC and Uncached together to keep track of | 49 | * X86 PAT uses page flags WC and Uncached together to keep track of |
@@ -55,16 +52,24 @@ PAGEFLAG(WC, WC) | |||
55 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not | 52 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not |
56 | * been changed from its default (value of -1 used to denote this). | 53 | * been changed from its default (value of -1 used to denote this). |
57 | * Note we do not support _PAGE_CACHE_UC here. | 54 | * Note we do not support _PAGE_CACHE_UC here. |
58 | * | ||
59 | * Caller must hold memtype_lock for atomicity. | ||
60 | */ | 55 | */ |
56 | |||
57 | #define _PGMT_DEFAULT 0 | ||
58 | #define _PGMT_WC (1UL << PG_arch_1) | ||
59 | #define _PGMT_UC_MINUS (1UL << PG_uncached) | ||
60 | #define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) | ||
61 | #define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) | ||
62 | #define _PGMT_CLEAR_MASK (~_PGMT_MASK) | ||
63 | |||
61 | static inline unsigned long get_page_memtype(struct page *pg) | 64 | static inline unsigned long get_page_memtype(struct page *pg) |
62 | { | 65 | { |
63 | if (!PageUncached(pg) && !PageWC(pg)) | 66 | unsigned long pg_flags = pg->flags & _PGMT_MASK; |
67 | |||
68 | if (pg_flags == _PGMT_DEFAULT) | ||
64 | return -1; | 69 | return -1; |
65 | else if (!PageUncached(pg) && PageWC(pg)) | 70 | else if (pg_flags == _PGMT_WC) |
66 | return _PAGE_CACHE_WC; | 71 | return _PAGE_CACHE_WC; |
67 | else if (PageUncached(pg) && !PageWC(pg)) | 72 | else if (pg_flags == _PGMT_UC_MINUS) |
68 | return _PAGE_CACHE_UC_MINUS; | 73 | return _PAGE_CACHE_UC_MINUS; |
69 | else | 74 | else |
70 | return _PAGE_CACHE_WB; | 75 | return _PAGE_CACHE_WB; |
@@ -72,25 +77,26 @@ static inline unsigned long get_page_memtype(struct page *pg) | |||
72 | 77 | ||
73 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) | 78 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) |
74 | { | 79 | { |
80 | unsigned long memtype_flags = _PGMT_DEFAULT; | ||
81 | unsigned long old_flags; | ||
82 | unsigned long new_flags; | ||
83 | |||
75 | switch (memtype) { | 84 | switch (memtype) { |
76 | case _PAGE_CACHE_WC: | 85 | case _PAGE_CACHE_WC: |
77 | ClearPageUncached(pg); | 86 | memtype_flags = _PGMT_WC; |
78 | SetPageWC(pg); | ||
79 | break; | 87 | break; |
80 | case _PAGE_CACHE_UC_MINUS: | 88 | case _PAGE_CACHE_UC_MINUS: |
81 | SetPageUncached(pg); | 89 | memtype_flags = _PGMT_UC_MINUS; |
82 | ClearPageWC(pg); | ||
83 | break; | 90 | break; |
84 | case _PAGE_CACHE_WB: | 91 | case _PAGE_CACHE_WB: |
85 | SetPageUncached(pg); | 92 | memtype_flags = _PGMT_WB; |
86 | SetPageWC(pg); | ||
87 | break; | ||
88 | default: | ||
89 | case -1: | ||
90 | ClearPageUncached(pg); | ||
91 | ClearPageWC(pg); | ||
92 | break; | 93 | break; |
93 | } | 94 | } |
95 | |||
96 | do { | ||
97 | old_flags = pg->flags; | ||
98 | new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; | ||
99 | } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); | ||
94 | } | 100 | } |
95 | #else | 101 | #else |
96 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } | 102 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } |
@@ -139,9 +145,11 @@ int set_memory_np(unsigned long addr, int numpages); | |||
139 | int set_memory_4k(unsigned long addr, int numpages); | 145 | int set_memory_4k(unsigned long addr, int numpages); |
140 | 146 | ||
141 | int set_memory_array_uc(unsigned long *addr, int addrinarray); | 147 | int set_memory_array_uc(unsigned long *addr, int addrinarray); |
148 | int set_memory_array_wc(unsigned long *addr, int addrinarray); | ||
142 | int set_memory_array_wb(unsigned long *addr, int addrinarray); | 149 | int set_memory_array_wb(unsigned long *addr, int addrinarray); |
143 | 150 | ||
144 | int set_pages_array_uc(struct page **pages, int addrinarray); | 151 | int set_pages_array_uc(struct page **pages, int addrinarray); |
152 | int set_pages_array_wc(struct page **pages, int addrinarray); | ||
145 | int set_pages_array_wb(struct page **pages, int addrinarray); | 153 | int set_pages_array_wb(struct page **pages, int addrinarray); |
146 | 154 | ||
147 | /* | 155 | /* |
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ffb9bb6b6c37..8859e12dd3cf 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -271,7 +271,8 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); | |||
271 | __typeof__(*(ptr)) __ret; \ | 271 | __typeof__(*(ptr)) __ret; \ |
272 | __typeof__(*(ptr)) __old = (o); \ | 272 | __typeof__(*(ptr)) __old = (o); \ |
273 | __typeof__(*(ptr)) __new = (n); \ | 273 | __typeof__(*(ptr)) __new = (n); \ |
274 | alternative_io("call cmpxchg8b_emu", \ | 274 | alternative_io(LOCK_PREFIX_HERE \ |
275 | "call cmpxchg8b_emu", \ | ||
275 | "lock; cmpxchg8b (%%esi)" , \ | 276 | "lock; cmpxchg8b (%%esi)" , \ |
276 | X86_FEATURE_CX8, \ | 277 | X86_FEATURE_CX8, \ |
277 | "=A" (__ret), \ | 278 | "=A" (__ret), \ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0cd82d068613..468145914389 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -161,6 +161,7 @@ | |||
161 | */ | 161 | */ |
162 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ | 162 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ |
163 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ | 163 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ |
164 | #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ | ||
164 | 165 | ||
165 | /* Virtualization flags: Linux defined */ | 166 | /* Virtualization flags: Linux defined */ |
166 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ | 167 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ |
@@ -175,6 +176,7 @@ | |||
175 | 176 | ||
176 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 177 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
177 | 178 | ||
179 | #include <asm/asm.h> | ||
178 | #include <linux/bitops.h> | 180 | #include <linux/bitops.h> |
179 | 181 | ||
180 | extern const char * const x86_cap_flags[NCAPINTS*32]; | 182 | extern const char * const x86_cap_flags[NCAPINTS*32]; |
@@ -283,6 +285,69 @@ extern const char * const x86_power_flags[32]; | |||
283 | 285 | ||
284 | #endif /* CONFIG_X86_64 */ | 286 | #endif /* CONFIG_X86_64 */ |
285 | 287 | ||
288 | /* | ||
289 | * Static testing of CPU features. Used the same as boot_cpu_has(). | ||
290 | * These are only valid after alternatives have run, but will statically | ||
291 | * patch the target code for additional performance. | ||
292 | * | ||
293 | */ | ||
294 | static __always_inline __pure bool __static_cpu_has(u8 bit) | ||
295 | { | ||
296 | #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) | ||
297 | asm goto("1: jmp %l[t_no]\n" | ||
298 | "2:\n" | ||
299 | ".section .altinstructions,\"a\"\n" | ||
300 | _ASM_ALIGN "\n" | ||
301 | _ASM_PTR "1b\n" | ||
302 | _ASM_PTR "0\n" /* no replacement */ | ||
303 | " .byte %P0\n" /* feature bit */ | ||
304 | " .byte 2b - 1b\n" /* source len */ | ||
305 | " .byte 0\n" /* replacement len */ | ||
306 | " .byte 0xff + 0 - (2b-1b)\n" /* padding */ | ||
307 | ".previous\n" | ||
308 | : : "i" (bit) : : t_no); | ||
309 | return true; | ||
310 | t_no: | ||
311 | return false; | ||
312 | #else | ||
313 | u8 flag; | ||
314 | /* Open-coded due to __stringify() in ALTERNATIVE() */ | ||
315 | asm volatile("1: movb $0,%0\n" | ||
316 | "2:\n" | ||
317 | ".section .altinstructions,\"a\"\n" | ||
318 | _ASM_ALIGN "\n" | ||
319 | _ASM_PTR "1b\n" | ||
320 | _ASM_PTR "3f\n" | ||
321 | " .byte %P1\n" /* feature bit */ | ||
322 | " .byte 2b - 1b\n" /* source len */ | ||
323 | " .byte 4f - 3f\n" /* replacement len */ | ||
324 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */ | ||
325 | ".previous\n" | ||
326 | ".section .altinstr_replacement,\"ax\"\n" | ||
327 | "3: movb $1,%0\n" | ||
328 | "4:\n" | ||
329 | ".previous\n" | ||
330 | : "=qm" (flag) : "i" (bit)); | ||
331 | return flag; | ||
332 | #endif | ||
333 | } | ||
334 | |||
335 | #if __GNUC__ >= 4 | ||
336 | #define static_cpu_has(bit) \ | ||
337 | ( \ | ||
338 | __builtin_constant_p(boot_cpu_has(bit)) ? \ | ||
339 | boot_cpu_has(bit) : \ | ||
340 | (__builtin_constant_p(bit) && !((bit) & ~0xff)) ? \ | ||
341 | __static_cpu_has(bit) : \ | ||
342 | boot_cpu_has(bit) \ | ||
343 | ) | ||
344 | #else | ||
345 | /* | ||
346 | * gcc 3.x is too stupid to do the static test; fall back to dynamic. | ||
347 | */ | ||
348 | #define static_cpu_has(bit) boot_cpu_has(bit) | ||
349 | #endif | ||
350 | |||
286 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ | 351 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ |
287 | 352 | ||
288 | #endif /* _ASM_X86_CPUFEATURE_H */ | 353 | #endif /* _ASM_X86_CPUFEATURE_H */ |
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h deleted file mode 100644 index 70dac199b093..000000000000 --- a/arch/x86/include/asm/ds.h +++ /dev/null | |||
@@ -1,302 +0,0 @@ | |||
1 | /* | ||
2 | * Debug Store (DS) support | ||
3 | * | ||
4 | * This provides a low-level interface to the hardware's Debug Store | ||
5 | * feature that is used for branch trace store (BTS) and | ||
6 | * precise-event based sampling (PEBS). | ||
7 | * | ||
8 | * It manages: | ||
9 | * - DS and BTS hardware configuration | ||
10 | * - buffer overflow handling (to be done) | ||
11 | * - buffer access | ||
12 | * | ||
13 | * It does not do: | ||
14 | * - security checking (is the caller allowed to trace the task) | ||
15 | * - buffer allocation (memory accounting) | ||
16 | * | ||
17 | * | ||
18 | * Copyright (C) 2007-2009 Intel Corporation. | ||
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | ||
20 | */ | ||
21 | |||
22 | #ifndef _ASM_X86_DS_H | ||
23 | #define _ASM_X86_DS_H | ||
24 | |||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/err.h> | ||
29 | |||
30 | |||
31 | #ifdef CONFIG_X86_DS | ||
32 | |||
33 | struct task_struct; | ||
34 | struct ds_context; | ||
35 | struct ds_tracer; | ||
36 | struct bts_tracer; | ||
37 | struct pebs_tracer; | ||
38 | |||
39 | typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); | ||
40 | typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); | ||
41 | |||
42 | |||
43 | /* | ||
44 | * A list of features plus corresponding macros to talk about them in | ||
45 | * the ds_request function's flags parameter. | ||
46 | * | ||
47 | * We use the enum to index an array of corresponding control bits; | ||
48 | * we use the macro to index a flags bit-vector. | ||
49 | */ | ||
50 | enum ds_feature { | ||
51 | dsf_bts = 0, | ||
52 | dsf_bts_kernel, | ||
53 | #define BTS_KERNEL (1 << dsf_bts_kernel) | ||
54 | /* trace kernel-mode branches */ | ||
55 | |||
56 | dsf_bts_user, | ||
57 | #define BTS_USER (1 << dsf_bts_user) | ||
58 | /* trace user-mode branches */ | ||
59 | |||
60 | dsf_bts_overflow, | ||
61 | dsf_bts_max, | ||
62 | dsf_pebs = dsf_bts_max, | ||
63 | |||
64 | dsf_pebs_max, | ||
65 | dsf_ctl_max = dsf_pebs_max, | ||
66 | dsf_bts_timestamps = dsf_ctl_max, | ||
67 | #define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) | ||
68 | /* add timestamps into BTS trace */ | ||
69 | |||
70 | #define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) | ||
71 | }; | ||
72 | |||
73 | |||
74 | /* | ||
75 | * Request BTS or PEBS | ||
76 | * | ||
77 | * Due to alignement constraints, the actual buffer may be slightly | ||
78 | * smaller than the requested or provided buffer. | ||
79 | * | ||
80 | * Returns a pointer to a tracer structure on success, or | ||
81 | * ERR_PTR(errcode) on failure. | ||
82 | * | ||
83 | * The interrupt threshold is independent from the overflow callback | ||
84 | * to allow users to use their own overflow interrupt handling mechanism. | ||
85 | * | ||
86 | * The function might sleep. | ||
87 | * | ||
88 | * task: the task to request recording for | ||
89 | * cpu: the cpu to request recording for | ||
90 | * base: the base pointer for the (non-pageable) buffer; | ||
91 | * size: the size of the provided buffer in bytes | ||
92 | * ovfl: pointer to a function to be called on buffer overflow; | ||
93 | * NULL if cyclic buffer requested | ||
94 | * th: the interrupt threshold in records from the end of the buffer; | ||
95 | * -1 if no interrupt threshold is requested. | ||
96 | * flags: a bit-mask of the above flags | ||
97 | */ | ||
98 | extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, | ||
99 | void *base, size_t size, | ||
100 | bts_ovfl_callback_t ovfl, | ||
101 | size_t th, unsigned int flags); | ||
102 | extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
103 | bts_ovfl_callback_t ovfl, | ||
104 | size_t th, unsigned int flags); | ||
105 | extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, | ||
106 | void *base, size_t size, | ||
107 | pebs_ovfl_callback_t ovfl, | ||
108 | size_t th, unsigned int flags); | ||
109 | extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, | ||
110 | void *base, size_t size, | ||
111 | pebs_ovfl_callback_t ovfl, | ||
112 | size_t th, unsigned int flags); | ||
113 | |||
114 | /* | ||
115 | * Release BTS or PEBS resources | ||
116 | * Suspend and resume BTS or PEBS tracing | ||
117 | * | ||
118 | * Must be called with irq's enabled. | ||
119 | * | ||
120 | * tracer: the tracer handle returned from ds_request_~() | ||
121 | */ | ||
122 | extern void ds_release_bts(struct bts_tracer *tracer); | ||
123 | extern void ds_suspend_bts(struct bts_tracer *tracer); | ||
124 | extern void ds_resume_bts(struct bts_tracer *tracer); | ||
125 | extern void ds_release_pebs(struct pebs_tracer *tracer); | ||
126 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); | ||
127 | extern void ds_resume_pebs(struct pebs_tracer *tracer); | ||
128 | |||
129 | /* | ||
130 | * Release BTS or PEBS resources | ||
131 | * Suspend and resume BTS or PEBS tracing | ||
132 | * | ||
133 | * Cpu tracers must call this on the traced cpu. | ||
134 | * Task tracers must call ds_release_~_noirq() for themselves. | ||
135 | * | ||
136 | * May be called with irq's disabled. | ||
137 | * | ||
138 | * Returns 0 if successful; | ||
139 | * -EPERM if the cpu tracer does not trace the current cpu. | ||
140 | * -EPERM if the task tracer does not trace itself. | ||
141 | * | ||
142 | * tracer: the tracer handle returned from ds_request_~() | ||
143 | */ | ||
144 | extern int ds_release_bts_noirq(struct bts_tracer *tracer); | ||
145 | extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); | ||
146 | extern int ds_resume_bts_noirq(struct bts_tracer *tracer); | ||
147 | extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); | ||
148 | extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); | ||
149 | extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); | ||
150 | |||
151 | |||
152 | /* | ||
153 | * The raw DS buffer state as it is used for BTS and PEBS recording. | ||
154 | * | ||
155 | * This is the low-level, arch-dependent interface for working | ||
156 | * directly on the raw trace data. | ||
157 | */ | ||
158 | struct ds_trace { | ||
159 | /* the number of bts/pebs records */ | ||
160 | size_t n; | ||
161 | /* the size of a bts/pebs record in bytes */ | ||
162 | size_t size; | ||
163 | /* pointers into the raw buffer: | ||
164 | - to the first entry */ | ||
165 | void *begin; | ||
166 | /* - one beyond the last entry */ | ||
167 | void *end; | ||
168 | /* - one beyond the newest entry */ | ||
169 | void *top; | ||
170 | /* - the interrupt threshold */ | ||
171 | void *ith; | ||
172 | /* flags given on ds_request() */ | ||
173 | unsigned int flags; | ||
174 | }; | ||
175 | |||
176 | /* | ||
177 | * An arch-independent view on branch trace data. | ||
178 | */ | ||
179 | enum bts_qualifier { | ||
180 | bts_invalid, | ||
181 | #define BTS_INVALID bts_invalid | ||
182 | |||
183 | bts_branch, | ||
184 | #define BTS_BRANCH bts_branch | ||
185 | |||
186 | bts_task_arrives, | ||
187 | #define BTS_TASK_ARRIVES bts_task_arrives | ||
188 | |||
189 | bts_task_departs, | ||
190 | #define BTS_TASK_DEPARTS bts_task_departs | ||
191 | |||
192 | bts_qual_bit_size = 4, | ||
193 | bts_qual_max = (1 << bts_qual_bit_size), | ||
194 | }; | ||
195 | |||
196 | struct bts_struct { | ||
197 | __u64 qualifier; | ||
198 | union { | ||
199 | /* BTS_BRANCH */ | ||
200 | struct { | ||
201 | __u64 from; | ||
202 | __u64 to; | ||
203 | } lbr; | ||
204 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ | ||
205 | struct { | ||
206 | __u64 clock; | ||
207 | pid_t pid; | ||
208 | } event; | ||
209 | } variant; | ||
210 | }; | ||
211 | |||
212 | |||
213 | /* | ||
214 | * The BTS state. | ||
215 | * | ||
216 | * This gives access to the raw DS state and adds functions to provide | ||
217 | * an arch-independent view of the BTS data. | ||
218 | */ | ||
219 | struct bts_trace { | ||
220 | struct ds_trace ds; | ||
221 | |||
222 | int (*read)(struct bts_tracer *tracer, const void *at, | ||
223 | struct bts_struct *out); | ||
224 | int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); | ||
225 | }; | ||
226 | |||
227 | |||
228 | /* | ||
229 | * The PEBS state. | ||
230 | * | ||
231 | * This gives access to the raw DS state and the PEBS-specific counter | ||
232 | * reset value. | ||
233 | */ | ||
234 | struct pebs_trace { | ||
235 | struct ds_trace ds; | ||
236 | |||
237 | /* the number of valid counters in the below array */ | ||
238 | unsigned int counters; | ||
239 | |||
240 | #define MAX_PEBS_COUNTERS 4 | ||
241 | /* the counter reset value */ | ||
242 | unsigned long long counter_reset[MAX_PEBS_COUNTERS]; | ||
243 | }; | ||
244 | |||
245 | |||
246 | /* | ||
247 | * Read the BTS or PEBS trace. | ||
248 | * | ||
249 | * Returns a view on the trace collected for the parameter tracer. | ||
250 | * | ||
251 | * The view remains valid as long as the traced task is not running or | ||
252 | * the tracer is suspended. | ||
253 | * Writes into the trace buffer are not reflected. | ||
254 | * | ||
255 | * tracer: the tracer handle returned from ds_request_~() | ||
256 | */ | ||
257 | extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); | ||
258 | extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); | ||
259 | |||
260 | |||
261 | /* | ||
262 | * Reset the write pointer of the BTS/PEBS buffer. | ||
263 | * | ||
264 | * Returns 0 on success; -Eerrno on error | ||
265 | * | ||
266 | * tracer: the tracer handle returned from ds_request_~() | ||
267 | */ | ||
268 | extern int ds_reset_bts(struct bts_tracer *tracer); | ||
269 | extern int ds_reset_pebs(struct pebs_tracer *tracer); | ||
270 | |||
271 | /* | ||
272 | * Set the PEBS counter reset value. | ||
273 | * | ||
274 | * Returns 0 on success; -Eerrno on error | ||
275 | * | ||
276 | * tracer: the tracer handle returned from ds_request_pebs() | ||
277 | * counter: the index of the counter | ||
278 | * value: the new counter reset value | ||
279 | */ | ||
280 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, | ||
281 | unsigned int counter, u64 value); | ||
282 | |||
283 | /* | ||
284 | * Initialization | ||
285 | */ | ||
286 | struct cpuinfo_x86; | ||
287 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | ||
288 | |||
289 | /* | ||
290 | * Context switch work | ||
291 | */ | ||
292 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); | ||
293 | |||
294 | #else /* CONFIG_X86_DS */ | ||
295 | |||
296 | struct cpuinfo_x86; | ||
297 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} | ||
298 | static inline void ds_switch_to(struct task_struct *prev, | ||
299 | struct task_struct *next) {} | ||
300 | |||
301 | #endif /* CONFIG_X86_DS */ | ||
302 | #endif /* _ASM_X86_DS_H */ | ||
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index ae6253ab9029..733f7e91e7a9 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
@@ -34,6 +34,18 @@ | |||
34 | #define CFI_SIGNAL_FRAME | 34 | #define CFI_SIGNAL_FRAME |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | #if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) | ||
38 | /* | ||
39 | * Emit CFI data in .debug_frame sections, not .eh_frame sections. | ||
40 | * The latter we currently just discard since we don't do DWARF | ||
41 | * unwinding at runtime. So only the offline DWARF information is | ||
42 | * useful to anyone. Note we should not use this directive if this | ||
43 | * file is used in the vDSO assembly, or if vmlinux.lds.S gets | ||
44 | * changed so it doesn't discard .eh_frame. | ||
45 | */ | ||
46 | .cfi_sections .debug_frame | ||
47 | #endif | ||
48 | |||
37 | #else | 49 | #else |
38 | 50 | ||
39 | /* | 51 | /* |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 0e22296790d3..ec8a52d14ab1 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -45,7 +45,12 @@ | |||
45 | #define E820_NVS 4 | 45 | #define E820_NVS 4 |
46 | #define E820_UNUSABLE 5 | 46 | #define E820_UNUSABLE 5 |
47 | 47 | ||
48 | /* reserved RAM used by kernel itself */ | 48 | /* |
49 | * reserved RAM used by kernel itself | ||
50 | * if CONFIG_INTEL_TXT is enabled, memory of this type will be | ||
51 | * included in the S3 integrity calculation and so should not include | ||
52 | * any memory that BIOS might alter over the S3 transition | ||
53 | */ | ||
49 | #define E820_RESERVED_KERN 128 | 54 | #define E820_RESERVED_KERN 128 |
50 | 55 | ||
51 | #ifndef __ASSEMBLY__ | 56 | #ifndef __ASSEMBLY__ |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f8576427cfe..aeab29aee617 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -35,7 +35,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); | |||
35 | 35 | ||
36 | #define __ARCH_IRQ_STAT | 36 | #define __ARCH_IRQ_STAT |
37 | 37 | ||
38 | #define inc_irq_stat(member) percpu_add(irq_stat.member, 1) | 38 | #define inc_irq_stat(member) percpu_inc(irq_stat.member) |
39 | 39 | ||
40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) | 40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) |
41 | 41 | ||
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 1d5c08a1bdfd..004e6e25e913 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h | |||
@@ -68,6 +68,7 @@ extern unsigned long force_hpet_address; | |||
68 | extern u8 hpet_blockid; | 68 | extern u8 hpet_blockid; |
69 | extern int hpet_force_user; | 69 | extern int hpet_force_user; |
70 | extern u8 hpet_msi_disable; | 70 | extern u8 hpet_msi_disable; |
71 | extern u8 hpet_readback_cmp; | ||
71 | extern int is_hpet_enabled(void); | 72 | extern int is_hpet_enabled(void); |
72 | extern int hpet_enable(void); | 73 | extern int hpet_enable(void); |
73 | extern void hpet_disable(void); | 74 | extern void hpet_disable(void); |
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 2a1bd8f4f23a..942255310e6a 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -41,12 +41,16 @@ struct arch_hw_breakpoint { | |||
41 | /* Total number of available HW breakpoint registers */ | 41 | /* Total number of available HW breakpoint registers */ |
42 | #define HBP_NUM 4 | 42 | #define HBP_NUM 4 |
43 | 43 | ||
44 | static inline int hw_breakpoint_slots(int type) | ||
45 | { | ||
46 | return HBP_NUM; | ||
47 | } | ||
48 | |||
44 | struct perf_event; | 49 | struct perf_event; |
45 | struct pmu; | 50 | struct pmu; |
46 | 51 | ||
47 | extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); | 52 | extern int arch_check_bp_in_kernelspace(struct perf_event *bp); |
48 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp, | 53 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp); |
49 | struct task_struct *tsk); | ||
50 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | 54 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, |
51 | unsigned long val, void *data); | 55 | unsigned long val, void *data); |
52 | 56 | ||
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h index e153a2b3889a..5df477ac3af7 100644 --- a/arch/x86/include/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _ASM_X86_KVM_HYPERV_H | 1 | #ifndef _ASM_X86_HYPERV_H |
2 | #define _ASM_X86_KVM_HYPERV_H | 2 | #define _ASM_X86_HYPERV_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
@@ -14,6 +14,10 @@ | |||
14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 | 14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 |
15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 | 15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 |
16 | 16 | ||
17 | #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 | ||
18 | #define HYPERV_CPUID_MIN 0x40000005 | ||
19 | #define HYPERV_CPUID_MAX 0x4000ffff | ||
20 | |||
17 | /* | 21 | /* |
18 | * Feature identification. EAX indicates which features are available | 22 | * Feature identification. EAX indicates which features are available |
19 | * to the partition based upon the current partition privileges. | 23 | * to the partition based upon the current partition privileges. |
@@ -129,6 +133,9 @@ | |||
129 | /* MSR used to provide vcpu index */ | 133 | /* MSR used to provide vcpu index */ |
130 | #define HV_X64_MSR_VP_INDEX 0x40000002 | 134 | #define HV_X64_MSR_VP_INDEX 0x40000002 |
131 | 135 | ||
136 | /* MSR used to read the per-partition time reference counter */ | ||
137 | #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 | ||
138 | |||
132 | /* Define the virtual APIC registers */ | 139 | /* Define the virtual APIC registers */ |
133 | #define HV_X64_MSR_EOI 0x40000070 | 140 | #define HV_X64_MSR_EOI 0x40000070 |
134 | #define HV_X64_MSR_ICR 0x40000071 | 141 | #define HV_X64_MSR_ICR 0x40000071 |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index b78c0941e422..70abda7058c8 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -17,10 +17,33 @@ | |||
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * | 18 | * |
19 | */ | 19 | */ |
20 | #ifndef ASM_X86__HYPERVISOR_H | 20 | #ifndef _ASM_X86_HYPERVISOR_H |
21 | #define ASM_X86__HYPERVISOR_H | 21 | #define _ASM_X86_HYPERVISOR_H |
22 | 22 | ||
23 | extern void init_hypervisor(struct cpuinfo_x86 *c); | 23 | extern void init_hypervisor(struct cpuinfo_x86 *c); |
24 | extern void init_hypervisor_platform(void); | 24 | extern void init_hypervisor_platform(void); |
25 | 25 | ||
26 | /* | ||
27 | * x86 hypervisor information | ||
28 | */ | ||
29 | struct hypervisor_x86 { | ||
30 | /* Hypervisor name */ | ||
31 | const char *name; | ||
32 | |||
33 | /* Detection routine */ | ||
34 | bool (*detect)(void); | ||
35 | |||
36 | /* Adjust CPU feature bits (run once per CPU) */ | ||
37 | void (*set_cpu_features)(struct cpuinfo_x86 *); | ||
38 | |||
39 | /* Platform setup (run once per boot) */ | ||
40 | void (*init_platform)(void); | ||
41 | }; | ||
42 | |||
43 | extern const struct hypervisor_x86 *x86_hyper; | ||
44 | |||
45 | /* Recognized hypervisors */ | ||
46 | extern const struct hypervisor_x86 x86_hyper_vmware; | ||
47 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | ||
48 | |||
26 | #endif | 49 | #endif |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index da2930924501..c991b3a7b904 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -16,7 +16,9 @@ | |||
16 | #include <linux/kernel_stat.h> | 16 | #include <linux/kernel_stat.h> |
17 | #include <linux/regset.h> | 17 | #include <linux/regset.h> |
18 | #include <linux/hardirq.h> | 18 | #include <linux/hardirq.h> |
19 | #include <linux/slab.h> | ||
19 | #include <asm/asm.h> | 20 | #include <asm/asm.h> |
21 | #include <asm/cpufeature.h> | ||
20 | #include <asm/processor.h> | 22 | #include <asm/processor.h> |
21 | #include <asm/sigcontext.h> | 23 | #include <asm/sigcontext.h> |
22 | #include <asm/user.h> | 24 | #include <asm/user.h> |
@@ -56,6 +58,11 @@ extern int restore_i387_xstate_ia32(void __user *buf); | |||
56 | 58 | ||
57 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 59 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
58 | 60 | ||
61 | static __always_inline __pure bool use_xsave(void) | ||
62 | { | ||
63 | return static_cpu_has(X86_FEATURE_XSAVE); | ||
64 | } | ||
65 | |||
59 | #ifdef CONFIG_X86_64 | 66 | #ifdef CONFIG_X86_64 |
60 | 67 | ||
61 | /* Ignore delayed exceptions from user space */ | 68 | /* Ignore delayed exceptions from user space */ |
@@ -91,15 +98,15 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
91 | values. The kernel data segment can be sometimes 0 and sometimes | 98 | values. The kernel data segment can be sometimes 0 and sometimes |
92 | new user value. Both should be ok. | 99 | new user value. Both should be ok. |
93 | Use the PDA as safe address because it should be already in L1. */ | 100 | Use the PDA as safe address because it should be already in L1. */ |
94 | static inline void clear_fpu_state(struct task_struct *tsk) | 101 | static inline void fpu_clear(struct fpu *fpu) |
95 | { | 102 | { |
96 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | 103 | struct xsave_struct *xstate = &fpu->state->xsave; |
97 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 104 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
98 | 105 | ||
99 | /* | 106 | /* |
100 | * xsave header may indicate the init state of the FP. | 107 | * xsave header may indicate the init state of the FP. |
101 | */ | 108 | */ |
102 | if ((task_thread_info(tsk)->status & TS_XSAVE) && | 109 | if (use_xsave() && |
103 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) | 110 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) |
104 | return; | 111 | return; |
105 | 112 | ||
@@ -111,6 +118,11 @@ static inline void clear_fpu_state(struct task_struct *tsk) | |||
111 | X86_FEATURE_FXSAVE_LEAK); | 118 | X86_FEATURE_FXSAVE_LEAK); |
112 | } | 119 | } |
113 | 120 | ||
121 | static inline void clear_fpu_state(struct task_struct *tsk) | ||
122 | { | ||
123 | fpu_clear(&tsk->thread.fpu); | ||
124 | } | ||
125 | |||
114 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | 126 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) |
115 | { | 127 | { |
116 | int err; | 128 | int err; |
@@ -135,7 +147,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
135 | return err; | 147 | return err; |
136 | } | 148 | } |
137 | 149 | ||
138 | static inline void fxsave(struct task_struct *tsk) | 150 | static inline void fpu_fxsave(struct fpu *fpu) |
139 | { | 151 | { |
140 | /* Using "rex64; fxsave %0" is broken because, if the memory operand | 152 | /* Using "rex64; fxsave %0" is broken because, if the memory operand |
141 | uses any extended registers for addressing, a second REX prefix | 153 | uses any extended registers for addressing, a second REX prefix |
@@ -145,42 +157,45 @@ static inline void fxsave(struct task_struct *tsk) | |||
145 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported | 157 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported |
146 | starting with gas 2.16. */ | 158 | starting with gas 2.16. */ |
147 | __asm__ __volatile__("fxsaveq %0" | 159 | __asm__ __volatile__("fxsaveq %0" |
148 | : "=m" (tsk->thread.xstate->fxsave)); | 160 | : "=m" (fpu->state->fxsave)); |
149 | #elif 0 | 161 | #elif 0 |
150 | /* Using, as a workaround, the properly prefixed form below isn't | 162 | /* Using, as a workaround, the properly prefixed form below isn't |
151 | accepted by any binutils version so far released, complaining that | 163 | accepted by any binutils version so far released, complaining that |
152 | the same type of prefix is used twice if an extended register is | 164 | the same type of prefix is used twice if an extended register is |
153 | needed for addressing (fix submitted to mainline 2005-11-21). */ | 165 | needed for addressing (fix submitted to mainline 2005-11-21). */ |
154 | __asm__ __volatile__("rex64/fxsave %0" | 166 | __asm__ __volatile__("rex64/fxsave %0" |
155 | : "=m" (tsk->thread.xstate->fxsave)); | 167 | : "=m" (fpu->state->fxsave)); |
156 | #else | 168 | #else |
157 | /* This, however, we can work around by forcing the compiler to select | 169 | /* This, however, we can work around by forcing the compiler to select |
158 | an addressing mode that doesn't require extended registers. */ | 170 | an addressing mode that doesn't require extended registers. */ |
159 | __asm__ __volatile__("rex64/fxsave (%1)" | 171 | __asm__ __volatile__("rex64/fxsave (%1)" |
160 | : "=m" (tsk->thread.xstate->fxsave) | 172 | : "=m" (fpu->state->fxsave) |
161 | : "cdaSDb" (&tsk->thread.xstate->fxsave)); | 173 | : "cdaSDb" (&fpu->state->fxsave)); |
162 | #endif | 174 | #endif |
163 | } | 175 | } |
164 | 176 | ||
165 | static inline void __save_init_fpu(struct task_struct *tsk) | 177 | static inline void fpu_save_init(struct fpu *fpu) |
166 | { | 178 | { |
167 | if (task_thread_info(tsk)->status & TS_XSAVE) | 179 | if (use_xsave()) |
168 | xsave(tsk); | 180 | fpu_xsave(fpu); |
169 | else | 181 | else |
170 | fxsave(tsk); | 182 | fpu_fxsave(fpu); |
183 | |||
184 | fpu_clear(fpu); | ||
185 | } | ||
171 | 186 | ||
172 | clear_fpu_state(tsk); | 187 | static inline void __save_init_fpu(struct task_struct *tsk) |
188 | { | ||
189 | fpu_save_init(&tsk->thread.fpu); | ||
173 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 190 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
174 | } | 191 | } |
175 | 192 | ||
176 | #else /* CONFIG_X86_32 */ | 193 | #else /* CONFIG_X86_32 */ |
177 | 194 | ||
178 | #ifdef CONFIG_MATH_EMULATION | 195 | #ifdef CONFIG_MATH_EMULATION |
179 | extern void finit_task(struct task_struct *tsk); | 196 | extern void finit_soft_fpu(struct i387_soft_struct *soft); |
180 | #else | 197 | #else |
181 | static inline void finit_task(struct task_struct *tsk) | 198 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} |
182 | { | ||
183 | } | ||
184 | #endif | 199 | #endif |
185 | 200 | ||
186 | static inline void tolerant_fwait(void) | 201 | static inline void tolerant_fwait(void) |
@@ -216,13 +231,13 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
216 | /* | 231 | /* |
217 | * These must be called with preempt disabled | 232 | * These must be called with preempt disabled |
218 | */ | 233 | */ |
219 | static inline void __save_init_fpu(struct task_struct *tsk) | 234 | static inline void fpu_save_init(struct fpu *fpu) |
220 | { | 235 | { |
221 | if (task_thread_info(tsk)->status & TS_XSAVE) { | 236 | if (use_xsave()) { |
222 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | 237 | struct xsave_struct *xstate = &fpu->state->xsave; |
223 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 238 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
224 | 239 | ||
225 | xsave(tsk); | 240 | fpu_xsave(fpu); |
226 | 241 | ||
227 | /* | 242 | /* |
228 | * xsave header may indicate the init state of the FP. | 243 | * xsave header may indicate the init state of the FP. |
@@ -246,8 +261,8 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
246 | "fxsave %[fx]\n" | 261 | "fxsave %[fx]\n" |
247 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", | 262 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", |
248 | X86_FEATURE_FXSR, | 263 | X86_FEATURE_FXSR, |
249 | [fx] "m" (tsk->thread.xstate->fxsave), | 264 | [fx] "m" (fpu->state->fxsave), |
250 | [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); | 265 | [fsw] "m" (fpu->state->fxsave.swd) : "memory"); |
251 | clear_state: | 266 | clear_state: |
252 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 267 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
253 | is pending. Clear the x87 state here by setting it to fixed | 268 | is pending. Clear the x87 state here by setting it to fixed |
@@ -259,17 +274,34 @@ clear_state: | |||
259 | X86_FEATURE_FXSAVE_LEAK, | 274 | X86_FEATURE_FXSAVE_LEAK, |
260 | [addr] "m" (safe_address)); | 275 | [addr] "m" (safe_address)); |
261 | end: | 276 | end: |
277 | ; | ||
278 | } | ||
279 | |||
280 | static inline void __save_init_fpu(struct task_struct *tsk) | ||
281 | { | ||
282 | fpu_save_init(&tsk->thread.fpu); | ||
262 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 283 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
263 | } | 284 | } |
264 | 285 | ||
286 | |||
265 | #endif /* CONFIG_X86_64 */ | 287 | #endif /* CONFIG_X86_64 */ |
266 | 288 | ||
267 | static inline int restore_fpu_checking(struct task_struct *tsk) | 289 | static inline int fpu_fxrstor_checking(struct fpu *fpu) |
268 | { | 290 | { |
269 | if (task_thread_info(tsk)->status & TS_XSAVE) | 291 | return fxrstor_checking(&fpu->state->fxsave); |
270 | return xrstor_checking(&tsk->thread.xstate->xsave); | 292 | } |
293 | |||
294 | static inline int fpu_restore_checking(struct fpu *fpu) | ||
295 | { | ||
296 | if (use_xsave()) | ||
297 | return fpu_xrstor_checking(fpu); | ||
271 | else | 298 | else |
272 | return fxrstor_checking(&tsk->thread.xstate->fxsave); | 299 | return fpu_fxrstor_checking(fpu); |
300 | } | ||
301 | |||
302 | static inline int restore_fpu_checking(struct task_struct *tsk) | ||
303 | { | ||
304 | return fpu_restore_checking(&tsk->thread.fpu); | ||
273 | } | 305 | } |
274 | 306 | ||
275 | /* | 307 | /* |
@@ -397,30 +429,59 @@ static inline void clear_fpu(struct task_struct *tsk) | |||
397 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | 429 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) |
398 | { | 430 | { |
399 | if (cpu_has_fxsr) { | 431 | if (cpu_has_fxsr) { |
400 | return tsk->thread.xstate->fxsave.cwd; | 432 | return tsk->thread.fpu.state->fxsave.cwd; |
401 | } else { | 433 | } else { |
402 | return (unsigned short)tsk->thread.xstate->fsave.cwd; | 434 | return (unsigned short)tsk->thread.fpu.state->fsave.cwd; |
403 | } | 435 | } |
404 | } | 436 | } |
405 | 437 | ||
406 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) | 438 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) |
407 | { | 439 | { |
408 | if (cpu_has_fxsr) { | 440 | if (cpu_has_fxsr) { |
409 | return tsk->thread.xstate->fxsave.swd; | 441 | return tsk->thread.fpu.state->fxsave.swd; |
410 | } else { | 442 | } else { |
411 | return (unsigned short)tsk->thread.xstate->fsave.swd; | 443 | return (unsigned short)tsk->thread.fpu.state->fsave.swd; |
412 | } | 444 | } |
413 | } | 445 | } |
414 | 446 | ||
415 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | 447 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) |
416 | { | 448 | { |
417 | if (cpu_has_xmm) { | 449 | if (cpu_has_xmm) { |
418 | return tsk->thread.xstate->fxsave.mxcsr; | 450 | return tsk->thread.fpu.state->fxsave.mxcsr; |
419 | } else { | 451 | } else { |
420 | return MXCSR_DEFAULT; | 452 | return MXCSR_DEFAULT; |
421 | } | 453 | } |
422 | } | 454 | } |
423 | 455 | ||
456 | static bool fpu_allocated(struct fpu *fpu) | ||
457 | { | ||
458 | return fpu->state != NULL; | ||
459 | } | ||
460 | |||
461 | static inline int fpu_alloc(struct fpu *fpu) | ||
462 | { | ||
463 | if (fpu_allocated(fpu)) | ||
464 | return 0; | ||
465 | fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); | ||
466 | if (!fpu->state) | ||
467 | return -ENOMEM; | ||
468 | WARN_ON((unsigned long)fpu->state & 15); | ||
469 | return 0; | ||
470 | } | ||
471 | |||
472 | static inline void fpu_free(struct fpu *fpu) | ||
473 | { | ||
474 | if (fpu->state) { | ||
475 | kmem_cache_free(task_xstate_cachep, fpu->state); | ||
476 | fpu->state = NULL; | ||
477 | } | ||
478 | } | ||
479 | |||
480 | static inline void fpu_copy(struct fpu *dst, struct fpu *src) | ||
481 | { | ||
482 | memcpy(dst->state, src->state, xstate_size); | ||
483 | } | ||
484 | |||
424 | #endif /* __ASSEMBLY__ */ | 485 | #endif /* __ASSEMBLY__ */ |
425 | 486 | ||
426 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | 487 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 |
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index 1edbf89680fd..fc1f579fb965 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h | |||
@@ -6,7 +6,7 @@ | |||
6 | #define PIT_CH0 0x40 | 6 | #define PIT_CH0 0x40 |
7 | #define PIT_CH2 0x42 | 7 | #define PIT_CH2 0x42 |
8 | 8 | ||
9 | extern spinlock_t i8253_lock; | 9 | extern raw_spinlock_t i8253_lock; |
10 | 10 | ||
11 | extern struct clock_event_device *global_clock_event; | 11 | extern struct clock_event_device *global_clock_event; |
12 | 12 | ||
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 96c2e0ad04ca..88c765e16410 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -68,6 +68,8 @@ struct insn { | |||
68 | const insn_byte_t *next_byte; | 68 | const insn_byte_t *next_byte; |
69 | }; | 69 | }; |
70 | 70 | ||
71 | #define MAX_INSN_SIZE 16 | ||
72 | |||
71 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | 73 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) |
72 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | 74 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) |
73 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) | 75 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) |
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index 14cf526091f9..280bf7fb6aba 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h | |||
@@ -7,7 +7,66 @@ | |||
7 | 7 | ||
8 | #ifdef __ASSEMBLY__ | 8 | #ifdef __ASSEMBLY__ |
9 | 9 | ||
10 | #define REG_NUM_INVALID 100 | ||
11 | |||
12 | #define REG_TYPE_R64 0 | ||
13 | #define REG_TYPE_XMM 1 | ||
14 | #define REG_TYPE_INVALID 100 | ||
15 | |||
16 | .macro R64_NUM opd r64 | ||
17 | \opd = REG_NUM_INVALID | ||
18 | .ifc \r64,%rax | ||
19 | \opd = 0 | ||
20 | .endif | ||
21 | .ifc \r64,%rcx | ||
22 | \opd = 1 | ||
23 | .endif | ||
24 | .ifc \r64,%rdx | ||
25 | \opd = 2 | ||
26 | .endif | ||
27 | .ifc \r64,%rbx | ||
28 | \opd = 3 | ||
29 | .endif | ||
30 | .ifc \r64,%rsp | ||
31 | \opd = 4 | ||
32 | .endif | ||
33 | .ifc \r64,%rbp | ||
34 | \opd = 5 | ||
35 | .endif | ||
36 | .ifc \r64,%rsi | ||
37 | \opd = 6 | ||
38 | .endif | ||
39 | .ifc \r64,%rdi | ||
40 | \opd = 7 | ||
41 | .endif | ||
42 | .ifc \r64,%r8 | ||
43 | \opd = 8 | ||
44 | .endif | ||
45 | .ifc \r64,%r9 | ||
46 | \opd = 9 | ||
47 | .endif | ||
48 | .ifc \r64,%r10 | ||
49 | \opd = 10 | ||
50 | .endif | ||
51 | .ifc \r64,%r11 | ||
52 | \opd = 11 | ||
53 | .endif | ||
54 | .ifc \r64,%r12 | ||
55 | \opd = 12 | ||
56 | .endif | ||
57 | .ifc \r64,%r13 | ||
58 | \opd = 13 | ||
59 | .endif | ||
60 | .ifc \r64,%r14 | ||
61 | \opd = 14 | ||
62 | .endif | ||
63 | .ifc \r64,%r15 | ||
64 | \opd = 15 | ||
65 | .endif | ||
66 | .endm | ||
67 | |||
10 | .macro XMM_NUM opd xmm | 68 | .macro XMM_NUM opd xmm |
69 | \opd = REG_NUM_INVALID | ||
11 | .ifc \xmm,%xmm0 | 70 | .ifc \xmm,%xmm0 |
12 | \opd = 0 | 71 | \opd = 0 |
13 | .endif | 72 | .endif |
@@ -58,13 +117,25 @@ | |||
58 | .endif | 117 | .endif |
59 | .endm | 118 | .endm |
60 | 119 | ||
120 | .macro REG_TYPE type reg | ||
121 | R64_NUM reg_type_r64 \reg | ||
122 | XMM_NUM reg_type_xmm \reg | ||
123 | .if reg_type_r64 <> REG_NUM_INVALID | ||
124 | \type = REG_TYPE_R64 | ||
125 | .elseif reg_type_xmm <> REG_NUM_INVALID | ||
126 | \type = REG_TYPE_XMM | ||
127 | .else | ||
128 | \type = REG_TYPE_INVALID | ||
129 | .endif | ||
130 | .endm | ||
131 | |||
61 | .macro PFX_OPD_SIZE | 132 | .macro PFX_OPD_SIZE |
62 | .byte 0x66 | 133 | .byte 0x66 |
63 | .endm | 134 | .endm |
64 | 135 | ||
65 | .macro PFX_REX opd1 opd2 | 136 | .macro PFX_REX opd1 opd2 W=0 |
66 | .if (\opd1 | \opd2) & 8 | 137 | .if ((\opd1 | \opd2) & 8) || \W |
67 | .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | 138 | .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) |
68 | .endif | 139 | .endif |
69 | .endm | 140 | .endm |
70 | 141 | ||
@@ -145,6 +216,25 @@ | |||
145 | .byte 0x0f, 0x38, 0xdf | 216 | .byte 0x0f, 0x38, 0xdf |
146 | MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 | 217 | MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 |
147 | .endm | 218 | .endm |
219 | |||
220 | .macro MOVQ_R64_XMM opd1 opd2 | ||
221 | REG_TYPE movq_r64_xmm_opd1_type \opd1 | ||
222 | .if movq_r64_xmm_opd1_type == REG_TYPE_XMM | ||
223 | XMM_NUM movq_r64_xmm_opd1 \opd1 | ||
224 | R64_NUM movq_r64_xmm_opd2 \opd2 | ||
225 | .else | ||
226 | R64_NUM movq_r64_xmm_opd1 \opd1 | ||
227 | XMM_NUM movq_r64_xmm_opd2 \opd2 | ||
228 | .endif | ||
229 | PFX_OPD_SIZE | ||
230 | PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1 | ||
231 | .if movq_r64_xmm_opd1_type == REG_TYPE_XMM | ||
232 | .byte 0x0f, 0x7e | ||
233 | .else | ||
234 | .byte 0x0f, 0x6e | ||
235 | .endif | ||
236 | MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 | ||
237 | .endm | ||
148 | #endif | 238 | #endif |
149 | 239 | ||
150 | #endif | 240 | #endif |
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h new file mode 100644 index 000000000000..4470c9ad4a3e --- /dev/null +++ b/arch/x86/include/asm/intel_scu_ipc.h | |||
@@ -0,0 +1,55 @@ | |||
1 | #ifndef _ASM_X86_INTEL_SCU_IPC_H_ | ||
2 | #define _ASM_X86_INTEL_SCU_IPC_H_ | ||
3 | |||
4 | /* Read single register */ | ||
5 | int intel_scu_ipc_ioread8(u16 addr, u8 *data); | ||
6 | |||
7 | /* Read two sequential registers */ | ||
8 | int intel_scu_ipc_ioread16(u16 addr, u16 *data); | ||
9 | |||
10 | /* Read four sequential registers */ | ||
11 | int intel_scu_ipc_ioread32(u16 addr, u32 *data); | ||
12 | |||
13 | /* Read a vector */ | ||
14 | int intel_scu_ipc_readv(u16 *addr, u8 *data, int len); | ||
15 | |||
16 | /* Write single register */ | ||
17 | int intel_scu_ipc_iowrite8(u16 addr, u8 data); | ||
18 | |||
19 | /* Write two sequential registers */ | ||
20 | int intel_scu_ipc_iowrite16(u16 addr, u16 data); | ||
21 | |||
22 | /* Write four sequential registers */ | ||
23 | int intel_scu_ipc_iowrite32(u16 addr, u32 data); | ||
24 | |||
25 | /* Write a vector */ | ||
26 | int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); | ||
27 | |||
28 | /* Update single register based on the mask */ | ||
29 | int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); | ||
30 | |||
31 | /* | ||
32 | * Indirect register read | ||
33 | * Can be used when SCCB(System Controller Configuration Block) register | ||
34 | * HRIM(Honor Restricted IPC Messages) is set (bit 23) | ||
35 | */ | ||
36 | int intel_scu_ipc_register_read(u32 addr, u32 *data); | ||
37 | |||
38 | /* | ||
39 | * Indirect register write | ||
40 | * Can be used when SCCB(System Controller Configuration Block) register | ||
41 | * HRIM(Honor Restricted IPC Messages) is set (bit 23) | ||
42 | */ | ||
43 | int intel_scu_ipc_register_write(u32 addr, u32 data); | ||
44 | |||
45 | /* Issue commands to the SCU with or without data */ | ||
46 | int intel_scu_ipc_simple_command(int cmd, int sub); | ||
47 | int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, | ||
48 | u32 *out, int outlen); | ||
49 | /* I2C control api */ | ||
50 | int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data); | ||
51 | |||
52 | /* Update FW version */ | ||
53 | int intel_scu_ipc_fw_update(u8 *buffer, u32 length); | ||
54 | |||
55 | #endif | ||
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 35832a03a515..63cb4096c3dc 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -159,7 +159,6 @@ struct io_apic_irq_attr; | |||
159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | 159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, |
160 | struct io_apic_irq_attr *irq_attr); | 160 | struct io_apic_irq_attr *irq_attr); |
161 | void setup_IO_APIC_irq_extra(u32 gsi); | 161 | void setup_IO_APIC_irq_extra(u32 gsi); |
162 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
163 | extern void ioapic_init_mappings(void); | 162 | extern void ioapic_init_mappings(void); |
164 | extern void ioapic_insert_resources(void); | 163 | extern void ioapic_insert_resources(void); |
165 | 164 | ||
@@ -180,12 +179,13 @@ extern void ioapic_write_entry(int apic, int pin, | |||
180 | extern void setup_ioapic_ids_from_mpc(void); | 179 | extern void setup_ioapic_ids_from_mpc(void); |
181 | 180 | ||
182 | struct mp_ioapic_gsi{ | 181 | struct mp_ioapic_gsi{ |
183 | int gsi_base; | 182 | u32 gsi_base; |
184 | int gsi_end; | 183 | u32 gsi_end; |
185 | }; | 184 | }; |
186 | extern struct mp_ioapic_gsi mp_gsi_routing[]; | 185 | extern struct mp_ioapic_gsi mp_gsi_routing[]; |
187 | int mp_find_ioapic(int gsi); | 186 | extern u32 gsi_end; |
188 | int mp_find_ioapic_pin(int ioapic, int gsi); | 187 | int mp_find_ioapic(u32 gsi); |
188 | int mp_find_ioapic_pin(int ioapic, u32 gsi); | ||
189 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); | 189 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); |
190 | extern void __init pre_init_apic_IRQ0(void); | 190 | extern void __init pre_init_apic_IRQ0(void); |
191 | 191 | ||
@@ -197,7 +197,8 @@ static const int timer_through_8259 = 0; | |||
197 | static inline void ioapic_init_mappings(void) { } | 197 | static inline void ioapic_init_mappings(void) { } |
198 | static inline void ioapic_insert_resources(void) { } | 198 | static inline void ioapic_insert_resources(void) { } |
199 | static inline void probe_nr_irqs_gsi(void) { } | 199 | static inline void probe_nr_irqs_gsi(void) { } |
200 | static inline int mp_find_ioapic(int gsi) { return 0; } | 200 | #define gsi_end (NR_IRQS_LEGACY - 1) |
201 | static inline int mp_find_ioapic(u32 gsi) { return 0; } | ||
201 | 202 | ||
202 | struct io_apic_irq_attr; | 203 | struct io_apic_irq_attr; |
203 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, | 204 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, |
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index f70e60071fe8..af00bd1d2089 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h | |||
@@ -16,11 +16,16 @@ extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); | |||
16 | extern int k8_scan_nodes(void); | 16 | extern int k8_scan_nodes(void); |
17 | 17 | ||
18 | #ifdef CONFIG_K8_NB | 18 | #ifdef CONFIG_K8_NB |
19 | extern int num_k8_northbridges; | ||
20 | |||
19 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 21 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
20 | { | 22 | { |
21 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; | 23 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; |
22 | } | 24 | } |
25 | |||
23 | #else | 26 | #else |
27 | #define num_k8_northbridges 0 | ||
28 | |||
24 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 29 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
25 | { | 30 | { |
26 | return NULL; | 31 | return NULL; |
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index e6c6c808489f..006da3687cdc 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h | |||
@@ -76,4 +76,7 @@ static inline void arch_kgdb_breakpoint(void) | |||
76 | #define BREAK_INSTR_SIZE 1 | 76 | #define BREAK_INSTR_SIZE 1 |
77 | #define CACHE_FLUSH_IS_SAFE 1 | 77 | #define CACHE_FLUSH_IS_SAFE 1 |
78 | 78 | ||
79 | extern int kgdb_ll_trap(int cmd, const char *str, | ||
80 | struct pt_regs *regs, long err, int trap, int sig); | ||
81 | |||
79 | #endif /* _ASM_X86_KGDB_H */ | 82 | #endif /* _ASM_X86_KGDB_H */ |
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4ffa345a8ccb..547882539157 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <linux/ptrace.h> | 25 | #include <linux/ptrace.h> |
26 | #include <linux/percpu.h> | 26 | #include <linux/percpu.h> |
27 | #include <asm/insn.h> | ||
27 | 28 | ||
28 | #define __ARCH_WANT_KPROBES_INSN_SLOT | 29 | #define __ARCH_WANT_KPROBES_INSN_SLOT |
29 | 30 | ||
@@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t; | |||
36 | #define RELATIVEJUMP_SIZE 5 | 37 | #define RELATIVEJUMP_SIZE 5 |
37 | #define RELATIVECALL_OPCODE 0xe8 | 38 | #define RELATIVECALL_OPCODE 0xe8 |
38 | #define RELATIVE_ADDR_SIZE 4 | 39 | #define RELATIVE_ADDR_SIZE 4 |
39 | #define MAX_INSN_SIZE 16 | ||
40 | #define MAX_STACK_SIZE 64 | 40 | #define MAX_STACK_SIZE 64 |
41 | #define MIN_STACK_SIZE(ADDR) \ | 41 | #define MIN_STACK_SIZE(ADDR) \ |
42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ | 42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f46b79f6c16c..ff90055c7f0b 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -21,6 +21,7 @@ | |||
21 | #define __KVM_HAVE_PIT_STATE2 | 21 | #define __KVM_HAVE_PIT_STATE2 |
22 | #define __KVM_HAVE_XEN_HVM | 22 | #define __KVM_HAVE_XEN_HVM |
23 | #define __KVM_HAVE_VCPU_EVENTS | 23 | #define __KVM_HAVE_VCPU_EVENTS |
24 | #define __KVM_HAVE_DEBUGREGS | ||
24 | 25 | ||
25 | /* Architectural interrupt line count. */ | 26 | /* Architectural interrupt line count. */ |
26 | #define KVM_NR_INTERRUPTS 256 | 27 | #define KVM_NR_INTERRUPTS 256 |
@@ -257,6 +258,11 @@ struct kvm_reinject_control { | |||
257 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ | 258 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ |
258 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 | 259 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 |
259 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 | 260 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 |
261 | #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 | ||
262 | |||
263 | /* Interrupt shadow states */ | ||
264 | #define KVM_X86_SHADOW_INT_MOV_SS 0x01 | ||
265 | #define KVM_X86_SHADOW_INT_STI 0x02 | ||
260 | 266 | ||
261 | /* for KVM_GET/SET_VCPU_EVENTS */ | 267 | /* for KVM_GET/SET_VCPU_EVENTS */ |
262 | struct kvm_vcpu_events { | 268 | struct kvm_vcpu_events { |
@@ -271,7 +277,7 @@ struct kvm_vcpu_events { | |||
271 | __u8 injected; | 277 | __u8 injected; |
272 | __u8 nr; | 278 | __u8 nr; |
273 | __u8 soft; | 279 | __u8 soft; |
274 | __u8 pad; | 280 | __u8 shadow; |
275 | } interrupt; | 281 | } interrupt; |
276 | struct { | 282 | struct { |
277 | __u8 injected; | 283 | __u8 injected; |
@@ -284,4 +290,13 @@ struct kvm_vcpu_events { | |||
284 | __u32 reserved[10]; | 290 | __u32 reserved[10]; |
285 | }; | 291 | }; |
286 | 292 | ||
293 | /* for KVM_GET/SET_DEBUGREGS */ | ||
294 | struct kvm_debugregs { | ||
295 | __u64 db[4]; | ||
296 | __u64 dr6; | ||
297 | __u64 dr7; | ||
298 | __u64 flags; | ||
299 | __u64 reserved[9]; | ||
300 | }; | ||
301 | |||
287 | #endif /* _ASM_X86_KVM_H */ | 302 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7a6f54fa13ba..0b2729bf2070 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -11,6 +11,8 @@ | |||
11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H | 11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H |
12 | #define _ASM_X86_KVM_X86_EMULATE_H | 12 | #define _ASM_X86_KVM_X86_EMULATE_H |
13 | 13 | ||
14 | #include <asm/desc_defs.h> | ||
15 | |||
14 | struct x86_emulate_ctxt; | 16 | struct x86_emulate_ctxt; |
15 | 17 | ||
16 | /* | 18 | /* |
@@ -63,6 +65,15 @@ struct x86_emulate_ops { | |||
63 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | 65 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); |
64 | 66 | ||
65 | /* | 67 | /* |
68 | * write_std: Write bytes of standard (non-emulated/special) memory. | ||
69 | * Used for descriptor writing. | ||
70 | * @addr: [IN ] Linear address to which to write. | ||
71 | * @val: [OUT] Value write to memory, zero-extended to 'u_long'. | ||
72 | * @bytes: [IN ] Number of bytes to write to memory. | ||
73 | */ | ||
74 | int (*write_std)(unsigned long addr, void *val, | ||
75 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | ||
76 | /* | ||
66 | * fetch: Read bytes of standard (non-emulated/special) memory. | 77 | * fetch: Read bytes of standard (non-emulated/special) memory. |
67 | * Used for instruction fetch. | 78 | * Used for instruction fetch. |
68 | * @addr: [IN ] Linear address from which to read. | 79 | * @addr: [IN ] Linear address from which to read. |
@@ -109,6 +120,23 @@ struct x86_emulate_ops { | |||
109 | unsigned int bytes, | 120 | unsigned int bytes, |
110 | struct kvm_vcpu *vcpu); | 121 | struct kvm_vcpu *vcpu); |
111 | 122 | ||
123 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | ||
124 | unsigned int count, struct kvm_vcpu *vcpu); | ||
125 | |||
126 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | ||
127 | unsigned int count, struct kvm_vcpu *vcpu); | ||
128 | |||
129 | bool (*get_cached_descriptor)(struct desc_struct *desc, | ||
130 | int seg, struct kvm_vcpu *vcpu); | ||
131 | void (*set_cached_descriptor)(struct desc_struct *desc, | ||
132 | int seg, struct kvm_vcpu *vcpu); | ||
133 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | ||
134 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | ||
135 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | ||
136 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | ||
137 | void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | ||
138 | int (*cpl)(struct kvm_vcpu *vcpu); | ||
139 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | ||
112 | }; | 140 | }; |
113 | 141 | ||
114 | /* Type, address-of, and value of an instruction's operand. */ | 142 | /* Type, address-of, and value of an instruction's operand. */ |
@@ -124,6 +152,12 @@ struct fetch_cache { | |||
124 | unsigned long end; | 152 | unsigned long end; |
125 | }; | 153 | }; |
126 | 154 | ||
155 | struct read_cache { | ||
156 | u8 data[1024]; | ||
157 | unsigned long pos; | ||
158 | unsigned long end; | ||
159 | }; | ||
160 | |||
127 | struct decode_cache { | 161 | struct decode_cache { |
128 | u8 twobyte; | 162 | u8 twobyte; |
129 | u8 b; | 163 | u8 b; |
@@ -139,7 +173,7 @@ struct decode_cache { | |||
139 | u8 seg_override; | 173 | u8 seg_override; |
140 | unsigned int d; | 174 | unsigned int d; |
141 | unsigned long regs[NR_VCPU_REGS]; | 175 | unsigned long regs[NR_VCPU_REGS]; |
142 | unsigned long eip, eip_orig; | 176 | unsigned long eip; |
143 | /* modrm */ | 177 | /* modrm */ |
144 | u8 modrm; | 178 | u8 modrm; |
145 | u8 modrm_mod; | 179 | u8 modrm_mod; |
@@ -151,16 +185,15 @@ struct decode_cache { | |||
151 | void *modrm_ptr; | 185 | void *modrm_ptr; |
152 | unsigned long modrm_val; | 186 | unsigned long modrm_val; |
153 | struct fetch_cache fetch; | 187 | struct fetch_cache fetch; |
188 | struct read_cache io_read; | ||
154 | }; | 189 | }; |
155 | 190 | ||
156 | #define X86_SHADOW_INT_MOV_SS 1 | ||
157 | #define X86_SHADOW_INT_STI 2 | ||
158 | |||
159 | struct x86_emulate_ctxt { | 191 | struct x86_emulate_ctxt { |
160 | /* Register state before/after emulation. */ | 192 | /* Register state before/after emulation. */ |
161 | struct kvm_vcpu *vcpu; | 193 | struct kvm_vcpu *vcpu; |
162 | 194 | ||
163 | unsigned long eflags; | 195 | unsigned long eflags; |
196 | unsigned long eip; /* eip before instruction emulation */ | ||
164 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 197 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
165 | int mode; | 198 | int mode; |
166 | u32 cs_base; | 199 | u32 cs_base; |
@@ -168,6 +201,7 @@ struct x86_emulate_ctxt { | |||
168 | /* interruptibility state, as a result of execution of STI or MOV SS */ | 201 | /* interruptibility state, as a result of execution of STI or MOV SS */ |
169 | int interruptibility; | 202 | int interruptibility; |
170 | 203 | ||
204 | bool restart; /* restart string instruction after writeback */ | ||
171 | /* decode cache */ | 205 | /* decode cache */ |
172 | struct decode_cache decode; | 206 | struct decode_cache decode; |
173 | }; | 207 | }; |
@@ -194,5 +228,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, | |||
194 | struct x86_emulate_ops *ops); | 228 | struct x86_emulate_ops *ops); |
195 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, | 229 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, |
196 | struct x86_emulate_ops *ops); | 230 | struct x86_emulate_ops *ops); |
231 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
232 | struct x86_emulate_ops *ops, | ||
233 | u16 tss_selector, int reason, | ||
234 | bool has_error_code, u32 error_code); | ||
197 | 235 | ||
198 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 236 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 06d9e79ca37d..76f5483cffec 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -171,15 +171,15 @@ struct kvm_pte_chain { | |||
171 | union kvm_mmu_page_role { | 171 | union kvm_mmu_page_role { |
172 | unsigned word; | 172 | unsigned word; |
173 | struct { | 173 | struct { |
174 | unsigned glevels:4; | ||
175 | unsigned level:4; | 174 | unsigned level:4; |
175 | unsigned cr4_pae:1; | ||
176 | unsigned quadrant:2; | 176 | unsigned quadrant:2; |
177 | unsigned pad_for_nice_hex_output:6; | 177 | unsigned pad_for_nice_hex_output:6; |
178 | unsigned direct:1; | 178 | unsigned direct:1; |
179 | unsigned access:3; | 179 | unsigned access:3; |
180 | unsigned invalid:1; | 180 | unsigned invalid:1; |
181 | unsigned cr4_pge:1; | ||
182 | unsigned nxe:1; | 181 | unsigned nxe:1; |
182 | unsigned cr0_wp:1; | ||
183 | }; | 183 | }; |
184 | }; | 184 | }; |
185 | 185 | ||
@@ -187,8 +187,6 @@ struct kvm_mmu_page { | |||
187 | struct list_head link; | 187 | struct list_head link; |
188 | struct hlist_node hash_link; | 188 | struct hlist_node hash_link; |
189 | 189 | ||
190 | struct list_head oos_link; | ||
191 | |||
192 | /* | 190 | /* |
193 | * The following two entries are used to key the shadow page in the | 191 | * The following two entries are used to key the shadow page in the |
194 | * hash table. | 192 | * hash table. |
@@ -204,9 +202,9 @@ struct kvm_mmu_page { | |||
204 | * in this shadow page. | 202 | * in this shadow page. |
205 | */ | 203 | */ |
206 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 204 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
207 | int multimapped; /* More than one parent_pte? */ | 205 | bool multimapped; /* More than one parent_pte? */ |
208 | int root_count; /* Currently serving as active root */ | ||
209 | bool unsync; | 206 | bool unsync; |
207 | int root_count; /* Currently serving as active root */ | ||
210 | unsigned int unsync_children; | 208 | unsigned int unsync_children; |
211 | union { | 209 | union { |
212 | u64 *parent_pte; /* !multimapped */ | 210 | u64 *parent_pte; /* !multimapped */ |
@@ -224,14 +222,9 @@ struct kvm_pv_mmu_op_buffer { | |||
224 | 222 | ||
225 | struct kvm_pio_request { | 223 | struct kvm_pio_request { |
226 | unsigned long count; | 224 | unsigned long count; |
227 | int cur_count; | ||
228 | gva_t guest_gva; | ||
229 | int in; | 225 | int in; |
230 | int port; | 226 | int port; |
231 | int size; | 227 | int size; |
232 | int string; | ||
233 | int down; | ||
234 | int rep; | ||
235 | }; | 228 | }; |
236 | 229 | ||
237 | /* | 230 | /* |
@@ -320,6 +313,7 @@ struct kvm_vcpu_arch { | |||
320 | struct kvm_queued_exception { | 313 | struct kvm_queued_exception { |
321 | bool pending; | 314 | bool pending; |
322 | bool has_error_code; | 315 | bool has_error_code; |
316 | bool reinject; | ||
323 | u8 nr; | 317 | u8 nr; |
324 | u32 error_code; | 318 | u32 error_code; |
325 | } exception; | 319 | } exception; |
@@ -362,8 +356,8 @@ struct kvm_vcpu_arch { | |||
362 | u64 *mce_banks; | 356 | u64 *mce_banks; |
363 | 357 | ||
364 | /* used for guest single stepping over the given code position */ | 358 | /* used for guest single stepping over the given code position */ |
365 | u16 singlestep_cs; | ||
366 | unsigned long singlestep_rip; | 359 | unsigned long singlestep_rip; |
360 | |||
367 | /* fields used by HYPER-V emulation */ | 361 | /* fields used by HYPER-V emulation */ |
368 | u64 hv_vapic; | 362 | u64 hv_vapic; |
369 | }; | 363 | }; |
@@ -389,6 +383,7 @@ struct kvm_arch { | |||
389 | unsigned int n_free_mmu_pages; | 383 | unsigned int n_free_mmu_pages; |
390 | unsigned int n_requested_mmu_pages; | 384 | unsigned int n_requested_mmu_pages; |
391 | unsigned int n_alloc_mmu_pages; | 385 | unsigned int n_alloc_mmu_pages; |
386 | atomic_t invlpg_counter; | ||
392 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 387 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
393 | /* | 388 | /* |
394 | * Hash table of struct kvm_mmu_page. | 389 | * Hash table of struct kvm_mmu_page. |
@@ -461,11 +456,6 @@ struct kvm_vcpu_stat { | |||
461 | u32 nmi_injections; | 456 | u32 nmi_injections; |
462 | }; | 457 | }; |
463 | 458 | ||
464 | struct descriptor_table { | ||
465 | u16 limit; | ||
466 | unsigned long base; | ||
467 | } __attribute__((packed)); | ||
468 | |||
469 | struct kvm_x86_ops { | 459 | struct kvm_x86_ops { |
470 | int (*cpu_has_kvm_support)(void); /* __init */ | 460 | int (*cpu_has_kvm_support)(void); /* __init */ |
471 | int (*disabled_by_bios)(void); /* __init */ | 461 | int (*disabled_by_bios)(void); /* __init */ |
@@ -503,12 +493,11 @@ struct kvm_x86_ops { | |||
503 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 493 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
504 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); | 494 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); |
505 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); | 495 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
506 | void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 496 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
507 | void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 497 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
508 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 498 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
509 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 499 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
510 | int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); | 500 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
511 | int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); | ||
512 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 501 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
513 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 502 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
514 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 503 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
@@ -527,7 +516,8 @@ struct kvm_x86_ops { | |||
527 | void (*set_irq)(struct kvm_vcpu *vcpu); | 516 | void (*set_irq)(struct kvm_vcpu *vcpu); |
528 | void (*set_nmi)(struct kvm_vcpu *vcpu); | 517 | void (*set_nmi)(struct kvm_vcpu *vcpu); |
529 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, | 518 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, |
530 | bool has_error_code, u32 error_code); | 519 | bool has_error_code, u32 error_code, |
520 | bool reinject); | ||
531 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); | 521 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
532 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 522 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
533 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | 523 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
@@ -541,6 +531,8 @@ struct kvm_x86_ops { | |||
541 | int (*get_lpage_level)(void); | 531 | int (*get_lpage_level)(void); |
542 | bool (*rdtscp_supported)(void); | 532 | bool (*rdtscp_supported)(void); |
543 | 533 | ||
534 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); | ||
535 | |||
544 | const struct trace_print_flags *exit_reasons_str; | 536 | const struct trace_print_flags *exit_reasons_str; |
545 | }; | 537 | }; |
546 | 538 | ||
@@ -587,23 +579,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
587 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
588 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
589 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 581 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
590 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
591 | unsigned long *rflags); | ||
592 | 582 | ||
593 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); | ||
594 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, | ||
595 | unsigned long *rflags); | ||
596 | void kvm_enable_efer_bits(u64); | 583 | void kvm_enable_efer_bits(u64); |
597 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 584 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
598 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 585 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
599 | 586 | ||
600 | struct x86_emulate_ctxt; | 587 | struct x86_emulate_ctxt; |
601 | 588 | ||
602 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, | 589 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); |
603 | int size, unsigned port); | ||
604 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | ||
605 | int size, unsigned long count, int down, | ||
606 | gva_t address, int rep, unsigned port); | ||
607 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 590 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
608 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 591 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
609 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 592 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
@@ -616,12 +599,15 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | |||
616 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 599 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
617 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 600 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
618 | 601 | ||
619 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); | 602 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
603 | bool has_error_code, u32 error_code); | ||
620 | 604 | ||
621 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 605 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
622 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 606 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
623 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 607 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
624 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); | 608 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
609 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); | ||
610 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); | ||
625 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | 611 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
626 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 612 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); |
627 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 613 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
@@ -634,6 +620,8 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | |||
634 | 620 | ||
635 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 621 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
636 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 622 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
623 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); | ||
624 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | ||
637 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, | 625 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, |
638 | u32 error_code); | 626 | u32 error_code); |
639 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); | 627 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
@@ -649,8 +637,6 @@ int emulator_write_emulated(unsigned long addr, | |||
649 | unsigned int bytes, | 637 | unsigned int bytes, |
650 | struct kvm_vcpu *vcpu); | 638 | struct kvm_vcpu *vcpu); |
651 | 639 | ||
652 | unsigned long segment_base(u16 selector); | ||
653 | |||
654 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 640 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
655 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 641 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
656 | const u8 *new, int bytes, | 642 | const u8 *new, int bytes, |
@@ -675,7 +661,6 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | |||
675 | void kvm_enable_tdp(void); | 661 | void kvm_enable_tdp(void); |
676 | void kvm_disable_tdp(void); | 662 | void kvm_disable_tdp(void); |
677 | 663 | ||
678 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
679 | int complete_pio(struct kvm_vcpu *vcpu); | 664 | int complete_pio(struct kvm_vcpu *vcpu); |
680 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 665 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
681 | 666 | ||
@@ -724,23 +709,6 @@ static inline void kvm_load_ldt(u16 sel) | |||
724 | asm("lldt %0" : : "rm"(sel)); | 709 | asm("lldt %0" : : "rm"(sel)); |
725 | } | 710 | } |
726 | 711 | ||
727 | static inline void kvm_get_idt(struct descriptor_table *table) | ||
728 | { | ||
729 | asm("sidt %0" : "=m"(*table)); | ||
730 | } | ||
731 | |||
732 | static inline void kvm_get_gdt(struct descriptor_table *table) | ||
733 | { | ||
734 | asm("sgdt %0" : "=m"(*table)); | ||
735 | } | ||
736 | |||
737 | static inline unsigned long kvm_read_tr_base(void) | ||
738 | { | ||
739 | u16 tr; | ||
740 | asm("str %0" : "=g"(tr)); | ||
741 | return segment_base(tr); | ||
742 | } | ||
743 | |||
744 | #ifdef CONFIG_X86_64 | 712 | #ifdef CONFIG_X86_64 |
745 | static inline unsigned long read_msr(unsigned long msr) | 713 | static inline unsigned long read_msr(unsigned long msr) |
746 | { | 714 | { |
@@ -826,4 +794,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | |||
826 | void kvm_define_shared_msr(unsigned index, u32 msr); | 794 | void kvm_define_shared_msr(unsigned index, u32 msr); |
827 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 795 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
828 | 796 | ||
797 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | ||
798 | |||
829 | #endif /* _ASM_X86_KVM_HOST_H */ | 799 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index ffae1420e7d7..05eba5e9a8e8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -16,10 +16,23 @@ | |||
16 | #define KVM_FEATURE_CLOCKSOURCE 0 | 16 | #define KVM_FEATURE_CLOCKSOURCE 0 |
17 | #define KVM_FEATURE_NOP_IO_DELAY 1 | 17 | #define KVM_FEATURE_NOP_IO_DELAY 1 |
18 | #define KVM_FEATURE_MMU_OP 2 | 18 | #define KVM_FEATURE_MMU_OP 2 |
19 | /* This indicates that the new set of kvmclock msrs | ||
20 | * are available. The use of 0x11 and 0x12 is deprecated | ||
21 | */ | ||
22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | ||
23 | |||
24 | /* The last 8 bits are used to indicate how to interpret the flags field | ||
25 | * in pvclock structure. If no bits are set, all flags are ignored. | ||
26 | */ | ||
27 | #define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 | ||
19 | 28 | ||
20 | #define MSR_KVM_WALL_CLOCK 0x11 | 29 | #define MSR_KVM_WALL_CLOCK 0x11 |
21 | #define MSR_KVM_SYSTEM_TIME 0x12 | 30 | #define MSR_KVM_SYSTEM_TIME 0x12 |
22 | 31 | ||
32 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ | ||
33 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 | ||
34 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | ||
35 | |||
23 | #define KVM_MAX_MMU_OP_BATCH 32 | 36 | #define KVM_MAX_MMU_OP_BATCH 32 |
24 | 37 | ||
25 | /* Operations for KVM_HC_MMU_OP */ | 38 | /* Operations for KVM_HC_MMU_OP */ |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6c3fdd631ed3..f32a4301c4d4 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -225,5 +225,13 @@ extern void mcheck_intel_therm_init(void); | |||
225 | static inline void mcheck_intel_therm_init(void) { } | 225 | static inline void mcheck_intel_therm_init(void) { } |
226 | #endif | 226 | #endif |
227 | 227 | ||
228 | /* | ||
229 | * Used by APEI to report memory error via /dev/mcelog | ||
230 | */ | ||
231 | |||
232 | struct cper_sec_mem_err; | ||
233 | extern void apei_mce_report_mem_error(int corrected, | ||
234 | struct cper_sec_mem_err *mem_err); | ||
235 | |||
228 | #endif /* __KERNEL__ */ | 236 | #endif /* __KERNEL__ */ |
229 | #endif /* _ASM_X86_MCE_H */ | 237 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index d8bf23a88d05..c82868e9f905 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -105,16 +105,6 @@ extern void mp_config_acpi_legacy_irqs(void); | |||
105 | struct device; | 105 | struct device; |
106 | extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, | 106 | extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, |
107 | int active_high_low); | 107 | int active_high_low); |
108 | extern int acpi_probe_gsi(void); | ||
109 | #ifdef CONFIG_X86_IO_APIC | ||
110 | extern int mp_find_ioapic(int gsi); | ||
111 | extern int mp_find_ioapic_pin(int ioapic, int gsi); | ||
112 | #endif | ||
113 | #else /* !CONFIG_ACPI: */ | ||
114 | static inline int acpi_probe_gsi(void) | ||
115 | { | ||
116 | return 0; | ||
117 | } | ||
118 | #endif /* CONFIG_ACPI */ | 108 | #endif /* CONFIG_ACPI */ |
119 | 109 | ||
120 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) | 110 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) |
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h new file mode 100644 index 000000000000..79ce5685ab64 --- /dev/null +++ b/arch/x86/include/asm/mshyperv.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef _ASM_X86_MSHYPER_H | ||
2 | #define _ASM_X86_MSHYPER_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <asm/hyperv.h> | ||
6 | |||
7 | struct ms_hyperv_info { | ||
8 | u32 features; | ||
9 | u32 hints; | ||
10 | }; | ||
11 | |||
12 | extern struct ms_hyperv_info ms_hyperv; | ||
13 | |||
14 | #endif | ||
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4604e6a54d36..b49d8ca228f6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -71,11 +71,14 @@ | |||
71 | #define MSR_IA32_LASTINTTOIP 0x000001de | 71 | #define MSR_IA32_LASTINTTOIP 0x000001de |
72 | 72 | ||
73 | /* DEBUGCTLMSR bits (others vary by model): */ | 73 | /* DEBUGCTLMSR bits (others vary by model): */ |
74 | #define _DEBUGCTLMSR_LBR 0 /* last branch recording */ | 74 | #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ |
75 | #define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ | 75 | #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ |
76 | 76 | #define DEBUGCTLMSR_TR (1UL << 6) | |
77 | #define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) | 77 | #define DEBUGCTLMSR_BTS (1UL << 7) |
78 | #define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) | 78 | #define DEBUGCTLMSR_BTINT (1UL << 8) |
79 | #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) | ||
80 | #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) | ||
81 | #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) | ||
79 | 82 | ||
80 | #define MSR_IA32_MC0_CTL 0x00000400 | 83 | #define MSR_IA32_MC0_CTL 0x00000400 |
81 | #define MSR_IA32_MC0_STATUS 0x00000401 | 84 | #define MSR_IA32_MC0_STATUS 0x00000401 |
@@ -199,8 +202,9 @@ | |||
199 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a | 202 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a |
200 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a | 203 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a |
201 | 204 | ||
202 | #define FEATURE_CONTROL_LOCKED (1<<0) | 205 | #define FEATURE_CONTROL_LOCKED (1<<0) |
203 | #define FEATURE_CONTROL_VMXON_ENABLED (1<<2) | 206 | #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) |
207 | #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) | ||
204 | 208 | ||
205 | #define MSR_IA32_APICBASE 0x0000001b | 209 | #define MSR_IA32_APICBASE 0x0000001b |
206 | #define MSR_IA32_APICBASE_BSP (1<<8) | 210 | #define MSR_IA32_APICBASE_BSP (1<<8) |
@@ -232,6 +236,8 @@ | |||
232 | 236 | ||
233 | #define MSR_IA32_MISC_ENABLE 0x000001a0 | 237 | #define MSR_IA32_MISC_ENABLE 0x000001a0 |
234 | 238 | ||
239 | #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 | ||
240 | |||
235 | /* MISC_ENABLE bits: architectural */ | 241 | /* MISC_ENABLE bits: architectural */ |
236 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) | 242 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) |
237 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) | 243 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) |
@@ -359,6 +365,8 @@ | |||
359 | #define MSR_P4_U2L_ESCR0 0x000003b0 | 365 | #define MSR_P4_U2L_ESCR0 0x000003b0 |
360 | #define MSR_P4_U2L_ESCR1 0x000003b1 | 366 | #define MSR_P4_U2L_ESCR1 0x000003b1 |
361 | 367 | ||
368 | #define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 | ||
369 | |||
362 | /* Intel Core-based CPU performance counters */ | 370 | /* Intel Core-based CPU performance counters */ |
363 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 | 371 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 |
364 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a | 372 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 1a0422348d6d..8d8797eae5d7 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -83,7 +83,7 @@ struct irq_routing_table { | |||
83 | 83 | ||
84 | extern unsigned int pcibios_irq_mask; | 84 | extern unsigned int pcibios_irq_mask; |
85 | 85 | ||
86 | extern spinlock_t pci_config_lock; | 86 | extern raw_spinlock_t pci_config_lock; |
87 | 87 | ||
88 | extern int (*pcibios_enable_irq)(struct pci_dev *dev); | 88 | extern int (*pcibios_enable_irq)(struct pci_dev *dev); |
89 | extern void (*pcibios_disable_irq)(struct pci_dev *dev); | 89 | extern void (*pcibios_disable_irq)(struct pci_dev *dev); |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 66a272dfd8b8..0797e748d280 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -105,7 +105,7 @@ do { \ | |||
105 | 105 | ||
106 | /* | 106 | /* |
107 | * Generate a percpu add to memory instruction and optimize code | 107 | * Generate a percpu add to memory instruction and optimize code |
108 | * if a one is added or subtracted. | 108 | * if one is added or subtracted. |
109 | */ | 109 | */ |
110 | #define percpu_add_op(var, val) \ | 110 | #define percpu_add_op(var, val) \ |
111 | do { \ | 111 | do { \ |
@@ -190,6 +190,29 @@ do { \ | |||
190 | pfo_ret__; \ | 190 | pfo_ret__; \ |
191 | }) | 191 | }) |
192 | 192 | ||
193 | #define percpu_unary_op(op, var) \ | ||
194 | ({ \ | ||
195 | switch (sizeof(var)) { \ | ||
196 | case 1: \ | ||
197 | asm(op "b "__percpu_arg(0) \ | ||
198 | : "+m" (var)); \ | ||
199 | break; \ | ||
200 | case 2: \ | ||
201 | asm(op "w "__percpu_arg(0) \ | ||
202 | : "+m" (var)); \ | ||
203 | break; \ | ||
204 | case 4: \ | ||
205 | asm(op "l "__percpu_arg(0) \ | ||
206 | : "+m" (var)); \ | ||
207 | break; \ | ||
208 | case 8: \ | ||
209 | asm(op "q "__percpu_arg(0) \ | ||
210 | : "+m" (var)); \ | ||
211 | break; \ | ||
212 | default: __bad_percpu_size(); \ | ||
213 | } \ | ||
214 | }) | ||
215 | |||
193 | /* | 216 | /* |
194 | * percpu_read() makes gcc load the percpu variable every time it is | 217 | * percpu_read() makes gcc load the percpu variable every time it is |
195 | * accessed while percpu_read_stable() allows the value to be cached. | 218 | * accessed while percpu_read_stable() allows the value to be cached. |
@@ -207,6 +230,7 @@ do { \ | |||
207 | #define percpu_and(var, val) percpu_to_op("and", var, val) | 230 | #define percpu_and(var, val) percpu_to_op("and", var, val) |
208 | #define percpu_or(var, val) percpu_to_op("or", var, val) | 231 | #define percpu_or(var, val) percpu_to_op("or", var, val) |
209 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) | 232 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) |
233 | #define percpu_inc(var) percpu_unary_op("inc", var) | ||
210 | 234 | ||
211 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 235 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
212 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 236 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index db6109a885a7..254883d0c7e0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * Performance event hw details: | 5 | * Performance event hw details: |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define X86_PMC_MAX_GENERIC 8 | 8 | #define X86_PMC_MAX_GENERIC 32 |
9 | #define X86_PMC_MAX_FIXED 3 | 9 | #define X86_PMC_MAX_FIXED 3 |
10 | 10 | ||
11 | #define X86_PMC_IDX_GENERIC 0 | 11 | #define X86_PMC_IDX_GENERIC 0 |
@@ -18,39 +18,31 @@ | |||
18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | 18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 |
19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | 19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 |
20 | 20 | ||
21 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) | 21 | #define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL |
22 | #define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) | 22 | #define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL |
23 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | 23 | #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) |
24 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | 24 | #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) |
25 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | 25 | #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) |
26 | 26 | #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) | |
27 | /* | 27 | #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) |
28 | * Includes eventsel and unit mask as well: | 28 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) |
29 | */ | 29 | #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) |
30 | 30 | #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL | |
31 | 31 | ||
32 | #define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL | 32 | #define AMD64_EVENTSEL_EVENT \ |
33 | #define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL | 33 | (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) |
34 | #define INTEL_ARCH_EDGE_MASK 0x00040000ULL | 34 | #define INTEL_ARCH_EVENT_MASK \ |
35 | #define INTEL_ARCH_INV_MASK 0x00800000ULL | 35 | (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) |
36 | #define INTEL_ARCH_CNT_MASK 0xFF000000ULL | 36 | |
37 | #define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) | 37 | #define X86_RAW_EVENT_MASK \ |
38 | 38 | (ARCH_PERFMON_EVENTSEL_EVENT | \ | |
39 | /* | 39 | ARCH_PERFMON_EVENTSEL_UMASK | \ |
40 | * filter mask to validate fixed counter events. | 40 | ARCH_PERFMON_EVENTSEL_EDGE | \ |
41 | * the following filters disqualify for fixed counters: | 41 | ARCH_PERFMON_EVENTSEL_INV | \ |
42 | * - inv | 42 | ARCH_PERFMON_EVENTSEL_CMASK) |
43 | * - edge | 43 | #define AMD64_RAW_EVENT_MASK \ |
44 | * - cnt-mask | 44 | (X86_RAW_EVENT_MASK | \ |
45 | * The other filters are supported by fixed counters. | 45 | AMD64_EVENTSEL_EVENT) |
46 | * The any-thread option is supported starting with v3. | ||
47 | */ | ||
48 | #define INTEL_ARCH_FIXED_MASK \ | ||
49 | (INTEL_ARCH_CNT_MASK| \ | ||
50 | INTEL_ARCH_INV_MASK| \ | ||
51 | INTEL_ARCH_EDGE_MASK|\ | ||
52 | INTEL_ARCH_UNIT_MASK|\ | ||
53 | INTEL_ARCH_EVENT_MASK) | ||
54 | 46 | ||
55 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 47 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
56 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 48 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
@@ -67,7 +59,7 @@ | |||
67 | union cpuid10_eax { | 59 | union cpuid10_eax { |
68 | struct { | 60 | struct { |
69 | unsigned int version_id:8; | 61 | unsigned int version_id:8; |
70 | unsigned int num_events:8; | 62 | unsigned int num_counters:8; |
71 | unsigned int bit_width:8; | 63 | unsigned int bit_width:8; |
72 | unsigned int mask_length:8; | 64 | unsigned int mask_length:8; |
73 | } split; | 65 | } split; |
@@ -76,7 +68,7 @@ union cpuid10_eax { | |||
76 | 68 | ||
77 | union cpuid10_edx { | 69 | union cpuid10_edx { |
78 | struct { | 70 | struct { |
79 | unsigned int num_events_fixed:4; | 71 | unsigned int num_counters_fixed:4; |
80 | unsigned int reserved:28; | 72 | unsigned int reserved:28; |
81 | } split; | 73 | } split; |
82 | unsigned int full; | 74 | unsigned int full; |
@@ -136,6 +128,18 @@ extern void perf_events_lapic_init(void); | |||
136 | 128 | ||
137 | #define PERF_EVENT_INDEX_OFFSET 0 | 129 | #define PERF_EVENT_INDEX_OFFSET 0 |
138 | 130 | ||
131 | /* | ||
132 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | ||
133 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | ||
134 | * care what we do with it. | ||
135 | */ | ||
136 | #define PERF_EFLAGS_EXACT (1UL << 3) | ||
137 | |||
138 | struct pt_regs; | ||
139 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | ||
140 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | ||
141 | #define perf_misc_flags(regs) perf_misc_flags(regs) | ||
142 | |||
139 | #else | 143 | #else |
140 | static inline void init_hw_perf_events(void) { } | 144 | static inline void init_hw_perf_events(void) { } |
141 | static inline void perf_events_lapic_init(void) { } | 145 | static inline void perf_events_lapic_init(void) { } |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h new file mode 100644 index 000000000000..64a8ebff06fc --- /dev/null +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -0,0 +1,795 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | */ | ||
4 | |||
5 | #ifndef PERF_EVENT_P4_H | ||
6 | #define PERF_EVENT_P4_H | ||
7 | |||
8 | #include <linux/cpu.h> | ||
9 | #include <linux/bitops.h> | ||
10 | |||
11 | /* | ||
12 | * NetBurst has perfomance MSRs shared between | ||
13 | * threads if HT is turned on, ie for both logical | ||
14 | * processors (mem: in turn in Atom with HT support | ||
15 | * perf-MSRs are not shared and every thread has its | ||
16 | * own perf-MSRs set) | ||
17 | */ | ||
18 | #define ARCH_P4_TOTAL_ESCR (46) | ||
19 | #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ | ||
20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) | ||
21 | #define ARCH_P4_MAX_CCCR (18) | ||
22 | #define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) | ||
23 | |||
24 | #define P4_ESCR_EVENT_MASK 0x7e000000U | ||
25 | #define P4_ESCR_EVENT_SHIFT 25 | ||
26 | #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U | ||
27 | #define P4_ESCR_EVENTMASK_SHIFT 9 | ||
28 | #define P4_ESCR_TAG_MASK 0x000001e0U | ||
29 | #define P4_ESCR_TAG_SHIFT 5 | ||
30 | #define P4_ESCR_TAG_ENABLE 0x00000010U | ||
31 | #define P4_ESCR_T0_OS 0x00000008U | ||
32 | #define P4_ESCR_T0_USR 0x00000004U | ||
33 | #define P4_ESCR_T1_OS 0x00000002U | ||
34 | #define P4_ESCR_T1_USR 0x00000001U | ||
35 | |||
36 | #define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT) | ||
37 | #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) | ||
38 | #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) | ||
39 | |||
40 | /* Non HT mask */ | ||
41 | #define P4_ESCR_MASK \ | ||
42 | (P4_ESCR_EVENT_MASK | \ | ||
43 | P4_ESCR_EVENTMASK_MASK | \ | ||
44 | P4_ESCR_TAG_MASK | \ | ||
45 | P4_ESCR_TAG_ENABLE | \ | ||
46 | P4_ESCR_T0_OS | \ | ||
47 | P4_ESCR_T0_USR) | ||
48 | |||
49 | /* HT mask */ | ||
50 | #define P4_ESCR_MASK_HT \ | ||
51 | (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) | ||
52 | |||
53 | #define P4_CCCR_OVF 0x80000000U | ||
54 | #define P4_CCCR_CASCADE 0x40000000U | ||
55 | #define P4_CCCR_OVF_PMI_T0 0x04000000U | ||
56 | #define P4_CCCR_OVF_PMI_T1 0x08000000U | ||
57 | #define P4_CCCR_FORCE_OVF 0x02000000U | ||
58 | #define P4_CCCR_EDGE 0x01000000U | ||
59 | #define P4_CCCR_THRESHOLD_MASK 0x00f00000U | ||
60 | #define P4_CCCR_THRESHOLD_SHIFT 20 | ||
61 | #define P4_CCCR_COMPLEMENT 0x00080000U | ||
62 | #define P4_CCCR_COMPARE 0x00040000U | ||
63 | #define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U | ||
64 | #define P4_CCCR_ESCR_SELECT_SHIFT 13 | ||
65 | #define P4_CCCR_ENABLE 0x00001000U | ||
66 | #define P4_CCCR_THREAD_SINGLE 0x00010000U | ||
67 | #define P4_CCCR_THREAD_BOTH 0x00020000U | ||
68 | #define P4_CCCR_THREAD_ANY 0x00030000U | ||
69 | #define P4_CCCR_RESERVED 0x00000fffU | ||
70 | |||
71 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) | ||
72 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) | ||
73 | |||
74 | /* Custom bits in reerved CCCR area */ | ||
75 | #define P4_CCCR_CACHE_OPS_MASK 0x0000003fU | ||
76 | |||
77 | |||
78 | /* Non HT mask */ | ||
79 | #define P4_CCCR_MASK \ | ||
80 | (P4_CCCR_OVF | \ | ||
81 | P4_CCCR_CASCADE | \ | ||
82 | P4_CCCR_OVF_PMI_T0 | \ | ||
83 | P4_CCCR_FORCE_OVF | \ | ||
84 | P4_CCCR_EDGE | \ | ||
85 | P4_CCCR_THRESHOLD_MASK | \ | ||
86 | P4_CCCR_COMPLEMENT | \ | ||
87 | P4_CCCR_COMPARE | \ | ||
88 | P4_CCCR_ESCR_SELECT_MASK | \ | ||
89 | P4_CCCR_ENABLE) | ||
90 | |||
91 | /* HT mask */ | ||
92 | #define P4_CCCR_MASK_HT \ | ||
93 | (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) | ||
94 | |||
95 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ | ||
96 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) | ||
97 | #define P4_ESCR_EMASK_BIT(class, name) class##__##name | ||
98 | |||
99 | /* | ||
100 | * config field is 64bit width and consists of | ||
101 | * HT << 63 | ESCR << 32 | CCCR | ||
102 | * where HT is HyperThreading bit (since ESCR | ||
103 | * has it reserved we may use it for own purpose) | ||
104 | * | ||
105 | * note that this is NOT the addresses of respective | ||
106 | * ESCR and CCCR but rather an only packed value should | ||
107 | * be unpacked and written to a proper addresses | ||
108 | * | ||
109 | * the base idea is to pack as much info as | ||
110 | * possible | ||
111 | */ | ||
112 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) | ||
113 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
114 | #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) | ||
115 | #define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
116 | |||
117 | #define p4_config_unpack_emask(v) \ | ||
118 | ({ \ | ||
119 | u32 t = p4_config_unpack_escr((v)); \ | ||
120 | t = t & P4_ESCR_EVENTMASK_MASK; \ | ||
121 | t = t >> P4_ESCR_EVENTMASK_SHIFT; \ | ||
122 | t; \ | ||
123 | }) | ||
124 | |||
125 | #define p4_config_unpack_event(v) \ | ||
126 | ({ \ | ||
127 | u32 t = p4_config_unpack_escr((v)); \ | ||
128 | t = t & P4_ESCR_EVENT_MASK; \ | ||
129 | t = t >> P4_ESCR_EVENT_SHIFT; \ | ||
130 | t; \ | ||
131 | }) | ||
132 | |||
133 | #define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) | ||
134 | |||
135 | #define P4_CONFIG_HT_SHIFT 63 | ||
136 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | ||
137 | |||
138 | static inline bool p4_is_event_cascaded(u64 config) | ||
139 | { | ||
140 | u32 cccr = p4_config_unpack_cccr(config); | ||
141 | return !!(cccr & P4_CCCR_CASCADE); | ||
142 | } | ||
143 | |||
144 | static inline int p4_ht_config_thread(u64 config) | ||
145 | { | ||
146 | return !!(config & P4_CONFIG_HT); | ||
147 | } | ||
148 | |||
149 | static inline u64 p4_set_ht_bit(u64 config) | ||
150 | { | ||
151 | return config | P4_CONFIG_HT; | ||
152 | } | ||
153 | |||
154 | static inline u64 p4_clear_ht_bit(u64 config) | ||
155 | { | ||
156 | return config & ~P4_CONFIG_HT; | ||
157 | } | ||
158 | |||
159 | static inline int p4_ht_active(void) | ||
160 | { | ||
161 | #ifdef CONFIG_SMP | ||
162 | return smp_num_siblings > 1; | ||
163 | #endif | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static inline int p4_ht_thread(int cpu) | ||
168 | { | ||
169 | #ifdef CONFIG_SMP | ||
170 | if (smp_num_siblings == 2) | ||
171 | return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); | ||
172 | #endif | ||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | static inline int p4_should_swap_ts(u64 config, int cpu) | ||
177 | { | ||
178 | return p4_ht_config_thread(config) ^ p4_ht_thread(cpu); | ||
179 | } | ||
180 | |||
181 | static inline u32 p4_default_cccr_conf(int cpu) | ||
182 | { | ||
183 | /* | ||
184 | * Note that P4_CCCR_THREAD_ANY is "required" on | ||
185 | * non-HT machines (on HT machines we count TS events | ||
186 | * regardless the state of second logical processor | ||
187 | */ | ||
188 | u32 cccr = P4_CCCR_THREAD_ANY; | ||
189 | |||
190 | if (!p4_ht_thread(cpu)) | ||
191 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
192 | else | ||
193 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
194 | |||
195 | return cccr; | ||
196 | } | ||
197 | |||
198 | static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) | ||
199 | { | ||
200 | u32 escr = 0; | ||
201 | |||
202 | if (!p4_ht_thread(cpu)) { | ||
203 | if (!exclude_os) | ||
204 | escr |= P4_ESCR_T0_OS; | ||
205 | if (!exclude_usr) | ||
206 | escr |= P4_ESCR_T0_USR; | ||
207 | } else { | ||
208 | if (!exclude_os) | ||
209 | escr |= P4_ESCR_T1_OS; | ||
210 | if (!exclude_usr) | ||
211 | escr |= P4_ESCR_T1_USR; | ||
212 | } | ||
213 | |||
214 | return escr; | ||
215 | } | ||
216 | |||
217 | enum P4_EVENTS { | ||
218 | P4_EVENT_TC_DELIVER_MODE, | ||
219 | P4_EVENT_BPU_FETCH_REQUEST, | ||
220 | P4_EVENT_ITLB_REFERENCE, | ||
221 | P4_EVENT_MEMORY_CANCEL, | ||
222 | P4_EVENT_MEMORY_COMPLETE, | ||
223 | P4_EVENT_LOAD_PORT_REPLAY, | ||
224 | P4_EVENT_STORE_PORT_REPLAY, | ||
225 | P4_EVENT_MOB_LOAD_REPLAY, | ||
226 | P4_EVENT_PAGE_WALK_TYPE, | ||
227 | P4_EVENT_BSQ_CACHE_REFERENCE, | ||
228 | P4_EVENT_IOQ_ALLOCATION, | ||
229 | P4_EVENT_IOQ_ACTIVE_ENTRIES, | ||
230 | P4_EVENT_FSB_DATA_ACTIVITY, | ||
231 | P4_EVENT_BSQ_ALLOCATION, | ||
232 | P4_EVENT_BSQ_ACTIVE_ENTRIES, | ||
233 | P4_EVENT_SSE_INPUT_ASSIST, | ||
234 | P4_EVENT_PACKED_SP_UOP, | ||
235 | P4_EVENT_PACKED_DP_UOP, | ||
236 | P4_EVENT_SCALAR_SP_UOP, | ||
237 | P4_EVENT_SCALAR_DP_UOP, | ||
238 | P4_EVENT_64BIT_MMX_UOP, | ||
239 | P4_EVENT_128BIT_MMX_UOP, | ||
240 | P4_EVENT_X87_FP_UOP, | ||
241 | P4_EVENT_TC_MISC, | ||
242 | P4_EVENT_GLOBAL_POWER_EVENTS, | ||
243 | P4_EVENT_TC_MS_XFER, | ||
244 | P4_EVENT_UOP_QUEUE_WRITES, | ||
245 | P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, | ||
246 | P4_EVENT_RETIRED_BRANCH_TYPE, | ||
247 | P4_EVENT_RESOURCE_STALL, | ||
248 | P4_EVENT_WC_BUFFER, | ||
249 | P4_EVENT_B2B_CYCLES, | ||
250 | P4_EVENT_BNR, | ||
251 | P4_EVENT_SNOOP, | ||
252 | P4_EVENT_RESPONSE, | ||
253 | P4_EVENT_FRONT_END_EVENT, | ||
254 | P4_EVENT_EXECUTION_EVENT, | ||
255 | P4_EVENT_REPLAY_EVENT, | ||
256 | P4_EVENT_INSTR_RETIRED, | ||
257 | P4_EVENT_UOPS_RETIRED, | ||
258 | P4_EVENT_UOP_TYPE, | ||
259 | P4_EVENT_BRANCH_RETIRED, | ||
260 | P4_EVENT_MISPRED_BRANCH_RETIRED, | ||
261 | P4_EVENT_X87_ASSIST, | ||
262 | P4_EVENT_MACHINE_CLEAR, | ||
263 | P4_EVENT_INSTR_COMPLETED, | ||
264 | }; | ||
265 | |||
266 | #define P4_OPCODE(event) event##_OPCODE | ||
267 | #define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0) | ||
268 | #define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8) | ||
269 | #define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel) | ||
270 | |||
271 | /* | ||
272 | * Comments below the event represent ESCR restriction | ||
273 | * for this event and counter index per ESCR | ||
274 | * | ||
275 | * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early | ||
276 | * processor builds (family 0FH, models 01H-02H). These MSRs | ||
277 | * are not available on later versions, so that we don't use | ||
278 | * them completely | ||
279 | * | ||
280 | * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly | ||
281 | * working so that we should not use this CCCR and respective | ||
282 | * counter as result | ||
283 | */ | ||
284 | enum P4_EVENT_OPCODES { | ||
285 | P4_OPCODE(P4_EVENT_TC_DELIVER_MODE) = P4_OPCODE_PACK(0x01, 0x01), | ||
286 | /* | ||
287 | * MSR_P4_TC_ESCR0: 4, 5 | ||
288 | * MSR_P4_TC_ESCR1: 6, 7 | ||
289 | */ | ||
290 | |||
291 | P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST) = P4_OPCODE_PACK(0x03, 0x00), | ||
292 | /* | ||
293 | * MSR_P4_BPU_ESCR0: 0, 1 | ||
294 | * MSR_P4_BPU_ESCR1: 2, 3 | ||
295 | */ | ||
296 | |||
297 | P4_OPCODE(P4_EVENT_ITLB_REFERENCE) = P4_OPCODE_PACK(0x18, 0x03), | ||
298 | /* | ||
299 | * MSR_P4_ITLB_ESCR0: 0, 1 | ||
300 | * MSR_P4_ITLB_ESCR1: 2, 3 | ||
301 | */ | ||
302 | |||
303 | P4_OPCODE(P4_EVENT_MEMORY_CANCEL) = P4_OPCODE_PACK(0x02, 0x05), | ||
304 | /* | ||
305 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
306 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
307 | */ | ||
308 | |||
309 | P4_OPCODE(P4_EVENT_MEMORY_COMPLETE) = P4_OPCODE_PACK(0x08, 0x02), | ||
310 | /* | ||
311 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
312 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
313 | */ | ||
314 | |||
315 | P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY) = P4_OPCODE_PACK(0x04, 0x02), | ||
316 | /* | ||
317 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
318 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
319 | */ | ||
320 | |||
321 | P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY) = P4_OPCODE_PACK(0x05, 0x02), | ||
322 | /* | ||
323 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
324 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
325 | */ | ||
326 | |||
327 | P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY) = P4_OPCODE_PACK(0x03, 0x02), | ||
328 | /* | ||
329 | * MSR_P4_MOB_ESCR0: 0, 1 | ||
330 | * MSR_P4_MOB_ESCR1: 2, 3 | ||
331 | */ | ||
332 | |||
333 | P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE) = P4_OPCODE_PACK(0x01, 0x04), | ||
334 | /* | ||
335 | * MSR_P4_PMH_ESCR0: 0, 1 | ||
336 | * MSR_P4_PMH_ESCR1: 2, 3 | ||
337 | */ | ||
338 | |||
339 | P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE) = P4_OPCODE_PACK(0x0c, 0x07), | ||
340 | /* | ||
341 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
342 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
343 | */ | ||
344 | |||
345 | P4_OPCODE(P4_EVENT_IOQ_ALLOCATION) = P4_OPCODE_PACK(0x03, 0x06), | ||
346 | /* | ||
347 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
348 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
349 | */ | ||
350 | |||
351 | P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x1a, 0x06), | ||
352 | /* | ||
353 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
354 | */ | ||
355 | |||
356 | P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY) = P4_OPCODE_PACK(0x17, 0x06), | ||
357 | /* | ||
358 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
359 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
360 | */ | ||
361 | |||
362 | P4_OPCODE(P4_EVENT_BSQ_ALLOCATION) = P4_OPCODE_PACK(0x05, 0x07), | ||
363 | /* | ||
364 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
365 | */ | ||
366 | |||
367 | P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x06, 0x07), | ||
368 | /* | ||
369 | * NOTE: no ESCR name in docs, it's guessed | ||
370 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
371 | */ | ||
372 | |||
373 | P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST) = P4_OPCODE_PACK(0x34, 0x01), | ||
374 | /* | ||
375 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
376 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
377 | */ | ||
378 | |||
379 | P4_OPCODE(P4_EVENT_PACKED_SP_UOP) = P4_OPCODE_PACK(0x08, 0x01), | ||
380 | /* | ||
381 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
382 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
383 | */ | ||
384 | |||
385 | P4_OPCODE(P4_EVENT_PACKED_DP_UOP) = P4_OPCODE_PACK(0x0c, 0x01), | ||
386 | /* | ||
387 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
388 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
389 | */ | ||
390 | |||
391 | P4_OPCODE(P4_EVENT_SCALAR_SP_UOP) = P4_OPCODE_PACK(0x0a, 0x01), | ||
392 | /* | ||
393 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
394 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
395 | */ | ||
396 | |||
397 | P4_OPCODE(P4_EVENT_SCALAR_DP_UOP) = P4_OPCODE_PACK(0x0e, 0x01), | ||
398 | /* | ||
399 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
400 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
401 | */ | ||
402 | |||
403 | P4_OPCODE(P4_EVENT_64BIT_MMX_UOP) = P4_OPCODE_PACK(0x02, 0x01), | ||
404 | /* | ||
405 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
406 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
407 | */ | ||
408 | |||
409 | P4_OPCODE(P4_EVENT_128BIT_MMX_UOP) = P4_OPCODE_PACK(0x1a, 0x01), | ||
410 | /* | ||
411 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
412 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
413 | */ | ||
414 | |||
415 | P4_OPCODE(P4_EVENT_X87_FP_UOP) = P4_OPCODE_PACK(0x04, 0x01), | ||
416 | /* | ||
417 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
418 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
419 | */ | ||
420 | |||
421 | P4_OPCODE(P4_EVENT_TC_MISC) = P4_OPCODE_PACK(0x06, 0x01), | ||
422 | /* | ||
423 | * MSR_P4_TC_ESCR0: 4, 5 | ||
424 | * MSR_P4_TC_ESCR1: 6, 7 | ||
425 | */ | ||
426 | |||
427 | P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS) = P4_OPCODE_PACK(0x13, 0x06), | ||
428 | /* | ||
429 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
430 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
431 | */ | ||
432 | |||
433 | P4_OPCODE(P4_EVENT_TC_MS_XFER) = P4_OPCODE_PACK(0x05, 0x00), | ||
434 | /* | ||
435 | * MSR_P4_MS_ESCR0: 4, 5 | ||
436 | * MSR_P4_MS_ESCR1: 6, 7 | ||
437 | */ | ||
438 | |||
439 | P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES) = P4_OPCODE_PACK(0x09, 0x00), | ||
440 | /* | ||
441 | * MSR_P4_MS_ESCR0: 4, 5 | ||
442 | * MSR_P4_MS_ESCR1: 6, 7 | ||
443 | */ | ||
444 | |||
445 | P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x05, 0x02), | ||
446 | /* | ||
447 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
448 | * MSR_P4_TBPU_ESCR1: 6, 7 | ||
449 | */ | ||
450 | |||
451 | P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x04, 0x02), | ||
452 | /* | ||
453 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
454 | * MSR_P4_TBPU_ESCR1: 6, 7 | ||
455 | */ | ||
456 | |||
457 | P4_OPCODE(P4_EVENT_RESOURCE_STALL) = P4_OPCODE_PACK(0x01, 0x01), | ||
458 | /* | ||
459 | * MSR_P4_ALF_ESCR0: 12, 13, 16 | ||
460 | * MSR_P4_ALF_ESCR1: 14, 15, 17 | ||
461 | */ | ||
462 | |||
463 | P4_OPCODE(P4_EVENT_WC_BUFFER) = P4_OPCODE_PACK(0x05, 0x05), | ||
464 | /* | ||
465 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
466 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
467 | */ | ||
468 | |||
469 | P4_OPCODE(P4_EVENT_B2B_CYCLES) = P4_OPCODE_PACK(0x16, 0x03), | ||
470 | /* | ||
471 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
472 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
473 | */ | ||
474 | |||
475 | P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03), | ||
476 | /* | ||
477 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
478 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
479 | */ | ||
480 | |||
481 | P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03), | ||
482 | /* | ||
483 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
484 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
485 | */ | ||
486 | |||
487 | P4_OPCODE(P4_EVENT_RESPONSE) = P4_OPCODE_PACK(0x04, 0x03), | ||
488 | /* | ||
489 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
490 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
491 | */ | ||
492 | |||
493 | P4_OPCODE(P4_EVENT_FRONT_END_EVENT) = P4_OPCODE_PACK(0x08, 0x05), | ||
494 | /* | ||
495 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
496 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
497 | */ | ||
498 | |||
499 | P4_OPCODE(P4_EVENT_EXECUTION_EVENT) = P4_OPCODE_PACK(0x0c, 0x05), | ||
500 | /* | ||
501 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
502 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
503 | */ | ||
504 | |||
505 | P4_OPCODE(P4_EVENT_REPLAY_EVENT) = P4_OPCODE_PACK(0x09, 0x05), | ||
506 | /* | ||
507 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
508 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
509 | */ | ||
510 | |||
511 | P4_OPCODE(P4_EVENT_INSTR_RETIRED) = P4_OPCODE_PACK(0x02, 0x04), | ||
512 | /* | ||
513 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
514 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
515 | */ | ||
516 | |||
517 | P4_OPCODE(P4_EVENT_UOPS_RETIRED) = P4_OPCODE_PACK(0x01, 0x04), | ||
518 | /* | ||
519 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
520 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
521 | */ | ||
522 | |||
523 | P4_OPCODE(P4_EVENT_UOP_TYPE) = P4_OPCODE_PACK(0x02, 0x02), | ||
524 | /* | ||
525 | * MSR_P4_RAT_ESCR0: 12, 13, 16 | ||
526 | * MSR_P4_RAT_ESCR1: 14, 15, 17 | ||
527 | */ | ||
528 | |||
529 | P4_OPCODE(P4_EVENT_BRANCH_RETIRED) = P4_OPCODE_PACK(0x06, 0x05), | ||
530 | /* | ||
531 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
532 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
533 | */ | ||
534 | |||
535 | P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED) = P4_OPCODE_PACK(0x03, 0x04), | ||
536 | /* | ||
537 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
538 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
539 | */ | ||
540 | |||
541 | P4_OPCODE(P4_EVENT_X87_ASSIST) = P4_OPCODE_PACK(0x03, 0x05), | ||
542 | /* | ||
543 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
544 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
545 | */ | ||
546 | |||
547 | P4_OPCODE(P4_EVENT_MACHINE_CLEAR) = P4_OPCODE_PACK(0x02, 0x05), | ||
548 | /* | ||
549 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
550 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
551 | */ | ||
552 | |||
553 | P4_OPCODE(P4_EVENT_INSTR_COMPLETED) = P4_OPCODE_PACK(0x07, 0x04), | ||
554 | /* | ||
555 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
556 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
557 | */ | ||
558 | }; | ||
559 | |||
560 | /* | ||
561 | * a caller should use P4_ESCR_EMASK_NAME helper to | ||
562 | * pick the EventMask needed, for example | ||
563 | * | ||
564 | * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) | ||
565 | */ | ||
566 | enum P4_ESCR_EMASKS { | ||
567 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), | ||
568 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DB, 1), | ||
569 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DI, 2), | ||
570 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BD, 3), | ||
571 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BB, 4), | ||
572 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BI, 5), | ||
573 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, ID, 6), | ||
574 | |||
575 | P4_GEN_ESCR_EMASK(P4_EVENT_BPU_FETCH_REQUEST, TCMISS, 0), | ||
576 | |||
577 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT, 0), | ||
578 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, MISS, 1), | ||
579 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT_UK, 2), | ||
580 | |||
581 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL, 2), | ||
582 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, 64K_CONF, 3), | ||
583 | |||
584 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, LSC, 0), | ||
585 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, SSC, 1), | ||
586 | |||
587 | P4_GEN_ESCR_EMASK(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD, 1), | ||
588 | |||
589 | P4_GEN_ESCR_EMASK(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST, 1), | ||
590 | |||
591 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STA, 1), | ||
592 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STD, 3), | ||
593 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), | ||
594 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), | ||
595 | |||
596 | P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, DTMISS, 0), | ||
597 | P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, ITMISS, 1), | ||
598 | |||
599 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), | ||
600 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), | ||
601 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), | ||
602 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), | ||
603 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), | ||
604 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), | ||
605 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), | ||
606 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), | ||
607 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), | ||
608 | |||
609 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, DEFAULT, 0), | ||
610 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_READ, 5), | ||
611 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE, 6), | ||
612 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_UC, 7), | ||
613 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WC, 8), | ||
614 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WT, 9), | ||
615 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WP, 10), | ||
616 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WB, 11), | ||
617 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OWN, 13), | ||
618 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OTHER, 14), | ||
619 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, PREFETCH, 15), | ||
620 | |||
621 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), | ||
622 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), | ||
623 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), | ||
624 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), | ||
625 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), | ||
626 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), | ||
627 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), | ||
628 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), | ||
629 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN, 13), | ||
630 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER, 14), | ||
631 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), | ||
632 | |||
633 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV, 0), | ||
634 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN, 1), | ||
635 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), | ||
636 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV, 3), | ||
637 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN, 4), | ||
638 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), | ||
639 | |||
640 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0, 0), | ||
641 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1, 1), | ||
642 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0, 2), | ||
643 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1, 3), | ||
644 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE, 5), | ||
645 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), | ||
646 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), | ||
647 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), | ||
648 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), | ||
649 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), | ||
650 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0, 11), | ||
651 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1, 12), | ||
652 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2, 13), | ||
653 | |||
654 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), | ||
655 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), | ||
656 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), | ||
657 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), | ||
658 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), | ||
659 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), | ||
660 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), | ||
661 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), | ||
662 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), | ||
663 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), | ||
664 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), | ||
665 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), | ||
666 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), | ||
667 | |||
668 | P4_GEN_ESCR_EMASK(P4_EVENT_SSE_INPUT_ASSIST, ALL, 15), | ||
669 | |||
670 | P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_SP_UOP, ALL, 15), | ||
671 | |||
672 | P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_DP_UOP, ALL, 15), | ||
673 | |||
674 | P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_SP_UOP, ALL, 15), | ||
675 | |||
676 | P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_DP_UOP, ALL, 15), | ||
677 | |||
678 | P4_GEN_ESCR_EMASK(P4_EVENT_64BIT_MMX_UOP, ALL, 15), | ||
679 | |||
680 | P4_GEN_ESCR_EMASK(P4_EVENT_128BIT_MMX_UOP, ALL, 15), | ||
681 | |||
682 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_FP_UOP, ALL, 15), | ||
683 | |||
684 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_MISC, FLUSH, 4), | ||
685 | |||
686 | P4_GEN_ESCR_EMASK(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING, 0), | ||
687 | |||
688 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_MS_XFER, CISC, 0), | ||
689 | |||
690 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), | ||
691 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), | ||
692 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM, 2), | ||
693 | |||
694 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
695 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), | ||
696 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), | ||
697 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), | ||
698 | |||
699 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
700 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CALL, 2), | ||
701 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN, 3), | ||
702 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT, 4), | ||
703 | |||
704 | P4_GEN_ESCR_EMASK(P4_EVENT_RESOURCE_STALL, SBFULL, 5), | ||
705 | |||
706 | P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0), | ||
707 | P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1), | ||
708 | |||
709 | P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, NBOGUS, 0), | ||
710 | P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, BOGUS, 1), | ||
711 | |||
712 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS0, 0), | ||
713 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS1, 1), | ||
714 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS2, 2), | ||
715 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS3, 3), | ||
716 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS0, 4), | ||
717 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS1, 5), | ||
718 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS2, 6), | ||
719 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS3, 7), | ||
720 | |||
721 | P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, NBOGUS, 0), | ||
722 | P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, BOGUS, 1), | ||
723 | |||
724 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG, 0), | ||
725 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSTAG, 1), | ||
726 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSNTAG, 2), | ||
727 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSTAG, 3), | ||
728 | |||
729 | P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, NBOGUS, 0), | ||
730 | P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, BOGUS, 1), | ||
731 | |||
732 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1), | ||
733 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2), | ||
734 | |||
735 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNP, 0), | ||
736 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNM, 1), | ||
737 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTP, 2), | ||
738 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTM, 3), | ||
739 | |||
740 | P4_GEN_ESCR_EMASK(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS, 0), | ||
741 | |||
742 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSU, 0), | ||
743 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSO, 1), | ||
744 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAO, 2), | ||
745 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAU, 3), | ||
746 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, PREA, 4), | ||
747 | |||
748 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, CLEAR, 0), | ||
749 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, MOCLEAR, 1), | ||
750 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, SMCLEAR, 2), | ||
751 | |||
752 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, NBOGUS, 0), | ||
753 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), | ||
754 | }; | ||
755 | |||
756 | /* P4 PEBS: stale for a while */ | ||
757 | #define P4_PEBS_METRIC_MASK 0x00001fffU | ||
758 | #define P4_PEBS_UOB_TAG 0x01000000U | ||
759 | #define P4_PEBS_ENABLE 0x02000000U | ||
760 | |||
761 | /* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ | ||
762 | #define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 | ||
763 | #define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 | ||
764 | #define P4_PEBS__dtlb_load_miss_retired 0x3000004 | ||
765 | #define P4_PEBS__dtlb_store_miss_retired 0x3000004 | ||
766 | #define P4_PEBS__dtlb_all_miss_retired 0x3000004 | ||
767 | #define P4_PEBS__tagged_mispred_branch 0x3018000 | ||
768 | #define P4_PEBS__mob_load_replay_retired 0x3000200 | ||
769 | #define P4_PEBS__split_load_retired 0x3000400 | ||
770 | #define P4_PEBS__split_store_retired 0x3000400 | ||
771 | |||
772 | #define P4_VERT__1stl_cache_load_miss_retired 0x0000001 | ||
773 | #define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 | ||
774 | #define P4_VERT__dtlb_load_miss_retired 0x0000001 | ||
775 | #define P4_VERT__dtlb_store_miss_retired 0x0000002 | ||
776 | #define P4_VERT__dtlb_all_miss_retired 0x0000003 | ||
777 | #define P4_VERT__tagged_mispred_branch 0x0000010 | ||
778 | #define P4_VERT__mob_load_replay_retired 0x0000001 | ||
779 | #define P4_VERT__split_load_retired 0x0000001 | ||
780 | #define P4_VERT__split_store_retired 0x0000002 | ||
781 | |||
782 | enum P4_CACHE_EVENTS { | ||
783 | P4_CACHE__NONE, | ||
784 | |||
785 | P4_CACHE__1stl_cache_load_miss_retired, | ||
786 | P4_CACHE__2ndl_cache_load_miss_retired, | ||
787 | P4_CACHE__dtlb_load_miss_retired, | ||
788 | P4_CACHE__dtlb_store_miss_retired, | ||
789 | P4_CACHE__itlb_reference_hit, | ||
790 | P4_CACHE__itlb_reference_miss, | ||
791 | |||
792 | P4_CACHE__MAX | ||
793 | }; | ||
794 | |||
795 | #endif /* PERF_EVENT_P4_H */ | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b753ea59703a..7e5c6a60b8ee 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -21,7 +21,6 @@ struct mm_struct; | |||
21 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
22 | #include <asm/desc_defs.h> | 22 | #include <asm/desc_defs.h> |
23 | #include <asm/nops.h> | 23 | #include <asm/nops.h> |
24 | #include <asm/ds.h> | ||
25 | 24 | ||
26 | #include <linux/personality.h> | 25 | #include <linux/personality.h> |
27 | #include <linux/cpumask.h> | 26 | #include <linux/cpumask.h> |
@@ -29,6 +28,7 @@ struct mm_struct; | |||
29 | #include <linux/threads.h> | 28 | #include <linux/threads.h> |
30 | #include <linux/math64.h> | 29 | #include <linux/math64.h> |
31 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | #include <linux/err.h> | ||
32 | 32 | ||
33 | #define HBP_NUM 4 | 33 | #define HBP_NUM 4 |
34 | /* | 34 | /* |
@@ -113,7 +113,6 @@ struct cpuinfo_x86 { | |||
113 | /* Index into per_cpu list: */ | 113 | /* Index into per_cpu list: */ |
114 | u16 cpu_index; | 114 | u16 cpu_index; |
115 | #endif | 115 | #endif |
116 | unsigned int x86_hyper_vendor; | ||
117 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 116 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
118 | 117 | ||
119 | #define X86_VENDOR_INTEL 0 | 118 | #define X86_VENDOR_INTEL 0 |
@@ -127,9 +126,6 @@ struct cpuinfo_x86 { | |||
127 | 126 | ||
128 | #define X86_VENDOR_UNKNOWN 0xff | 127 | #define X86_VENDOR_UNKNOWN 0xff |
129 | 128 | ||
130 | #define X86_HYPER_VENDOR_NONE 0 | ||
131 | #define X86_HYPER_VENDOR_VMWARE 1 | ||
132 | |||
133 | /* | 129 | /* |
134 | * capabilities of CPUs | 130 | * capabilities of CPUs |
135 | */ | 131 | */ |
@@ -380,6 +376,10 @@ union thread_xstate { | |||
380 | struct xsave_struct xsave; | 376 | struct xsave_struct xsave; |
381 | }; | 377 | }; |
382 | 378 | ||
379 | struct fpu { | ||
380 | union thread_xstate *state; | ||
381 | }; | ||
382 | |||
383 | #ifdef CONFIG_X86_64 | 383 | #ifdef CONFIG_X86_64 |
384 | DECLARE_PER_CPU(struct orig_ist, orig_ist); | 384 | DECLARE_PER_CPU(struct orig_ist, orig_ist); |
385 | 385 | ||
@@ -457,7 +457,7 @@ struct thread_struct { | |||
457 | unsigned long trap_no; | 457 | unsigned long trap_no; |
458 | unsigned long error_code; | 458 | unsigned long error_code; |
459 | /* floating point and extended processor state */ | 459 | /* floating point and extended processor state */ |
460 | union thread_xstate *xstate; | 460 | struct fpu fpu; |
461 | #ifdef CONFIG_X86_32 | 461 | #ifdef CONFIG_X86_32 |
462 | /* Virtual 86 mode info */ | 462 | /* Virtual 86 mode info */ |
463 | struct vm86_struct __user *vm86_info; | 463 | struct vm86_struct __user *vm86_info; |
@@ -473,10 +473,6 @@ struct thread_struct { | |||
473 | unsigned long iopl; | 473 | unsigned long iopl; |
474 | /* Max allowed port in the bitmap, in bytes: */ | 474 | /* Max allowed port in the bitmap, in bytes: */ |
475 | unsigned io_bitmap_max; | 475 | unsigned io_bitmap_max; |
476 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ | ||
477 | unsigned long debugctlmsr; | ||
478 | /* Debug Store context; see asm/ds.h */ | ||
479 | struct ds_context *ds_ctx; | ||
480 | }; | 476 | }; |
481 | 477 | ||
482 | static inline unsigned long native_get_debugreg(int regno) | 478 | static inline unsigned long native_get_debugreg(int regno) |
@@ -793,6 +789,8 @@ static inline void wbinvd_halt(void) | |||
793 | extern void enable_sep_cpu(void); | 789 | extern void enable_sep_cpu(void); |
794 | extern int sysenter_setup(void); | 790 | extern int sysenter_setup(void); |
795 | 791 | ||
792 | extern void early_trap_init(void); | ||
793 | |||
796 | /* Defined in head.S */ | 794 | /* Defined in head.S */ |
797 | extern struct desc_ptr early_gdt_descr; | 795 | extern struct desc_ptr early_gdt_descr; |
798 | 796 | ||
@@ -803,7 +801,7 @@ extern void cpu_init(void); | |||
803 | 801 | ||
804 | static inline unsigned long get_debugctlmsr(void) | 802 | static inline unsigned long get_debugctlmsr(void) |
805 | { | 803 | { |
806 | unsigned long debugctlmsr = 0; | 804 | unsigned long debugctlmsr = 0; |
807 | 805 | ||
808 | #ifndef CONFIG_X86_DEBUGCTLMSR | 806 | #ifndef CONFIG_X86_DEBUGCTLMSR |
809 | if (boot_cpu_data.x86 < 6) | 807 | if (boot_cpu_data.x86 < 6) |
@@ -811,21 +809,6 @@ static inline unsigned long get_debugctlmsr(void) | |||
811 | #endif | 809 | #endif |
812 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 810 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
813 | 811 | ||
814 | return debugctlmsr; | ||
815 | } | ||
816 | |||
817 | static inline unsigned long get_debugctlmsr_on_cpu(int cpu) | ||
818 | { | ||
819 | u64 debugctlmsr = 0; | ||
820 | u32 val1, val2; | ||
821 | |||
822 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
823 | if (boot_cpu_data.x86 < 6) | ||
824 | return 0; | ||
825 | #endif | ||
826 | rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); | ||
827 | debugctlmsr = val1 | ((u64)val2 << 32); | ||
828 | |||
829 | return debugctlmsr; | 812 | return debugctlmsr; |
830 | } | 813 | } |
831 | 814 | ||
@@ -838,18 +821,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) | |||
838 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 821 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
839 | } | 822 | } |
840 | 823 | ||
841 | static inline void update_debugctlmsr_on_cpu(int cpu, | ||
842 | unsigned long debugctlmsr) | ||
843 | { | ||
844 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
845 | if (boot_cpu_data.x86 < 6) | ||
846 | return; | ||
847 | #endif | ||
848 | wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, | ||
849 | (u32)((u64)debugctlmsr), | ||
850 | (u32)((u64)debugctlmsr >> 32)); | ||
851 | } | ||
852 | |||
853 | /* | 824 | /* |
854 | * from system description table in BIOS. Mostly for MCA use, but | 825 | * from system description table in BIOS. Mostly for MCA use, but |
855 | * others may find it useful: | 826 | * others may find it useful: |
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 86723035a515..52b098a6eebb 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h | |||
@@ -82,61 +82,6 @@ | |||
82 | 82 | ||
83 | #ifndef __ASSEMBLY__ | 83 | #ifndef __ASSEMBLY__ |
84 | #include <linux/types.h> | 84 | #include <linux/types.h> |
85 | 85 | #endif | |
86 | /* configuration/status structure used in PTRACE_BTS_CONFIG and | ||
87 | PTRACE_BTS_STATUS commands. | ||
88 | */ | ||
89 | struct ptrace_bts_config { | ||
90 | /* requested or actual size of BTS buffer in bytes */ | ||
91 | __u32 size; | ||
92 | /* bitmask of below flags */ | ||
93 | __u32 flags; | ||
94 | /* buffer overflow signal */ | ||
95 | __u32 signal; | ||
96 | /* actual size of bts_struct in bytes */ | ||
97 | __u32 bts_size; | ||
98 | }; | ||
99 | #endif /* __ASSEMBLY__ */ | ||
100 | |||
101 | #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ | ||
102 | #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ | ||
103 | #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow | ||
104 | instead of wrapping around */ | ||
105 | #define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ | ||
106 | |||
107 | #define PTRACE_BTS_CONFIG 40 | ||
108 | /* Configure branch trace recording. | ||
109 | ADDR points to a struct ptrace_bts_config. | ||
110 | DATA gives the size of that buffer. | ||
111 | A new buffer is allocated, if requested in the flags. | ||
112 | An overflow signal may only be requested for new buffers. | ||
113 | Returns the number of bytes read. | ||
114 | */ | ||
115 | #define PTRACE_BTS_STATUS 41 | ||
116 | /* Return the current configuration in a struct ptrace_bts_config | ||
117 | pointed to by ADDR; DATA gives the size of that buffer. | ||
118 | Returns the number of bytes written. | ||
119 | */ | ||
120 | #define PTRACE_BTS_SIZE 42 | ||
121 | /* Return the number of available BTS records for draining. | ||
122 | DATA and ADDR are ignored. | ||
123 | */ | ||
124 | #define PTRACE_BTS_GET 43 | ||
125 | /* Get a single BTS record. | ||
126 | DATA defines the index into the BTS array, where 0 is the newest | ||
127 | entry, and higher indices refer to older entries. | ||
128 | ADDR is pointing to struct bts_struct (see asm/ds.h). | ||
129 | */ | ||
130 | #define PTRACE_BTS_CLEAR 44 | ||
131 | /* Clear the BTS buffer. | ||
132 | DATA and ADDR are ignored. | ||
133 | */ | ||
134 | #define PTRACE_BTS_DRAIN 45 | ||
135 | /* Read all available BTS records and clear the buffer. | ||
136 | ADDR points to an array of struct bts_struct. | ||
137 | DATA gives the size of that buffer. | ||
138 | BTS records are read from oldest to newest. | ||
139 | Returns number of BTS records drained. | ||
140 | */ | ||
141 | 86 | ||
142 | #endif /* _ASM_X86_PTRACE_ABI_H */ | 87 | #endif /* _ASM_X86_PTRACE_ABI_H */ |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 69a686a7dff0..78cd1ea94500 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -289,12 +289,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, | |||
289 | extern int do_set_thread_area(struct task_struct *p, int idx, | 289 | extern int do_set_thread_area(struct task_struct *p, int idx, |
290 | struct user_desc __user *info, int can_allocate); | 290 | struct user_desc __user *info, int can_allocate); |
291 | 291 | ||
292 | #ifdef CONFIG_X86_PTRACE_BTS | ||
293 | extern void ptrace_bts_untrace(struct task_struct *tsk); | ||
294 | |||
295 | #define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) | ||
296 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
297 | |||
298 | #endif /* __KERNEL__ */ | 292 | #endif /* __KERNEL__ */ |
299 | 293 | ||
300 | #endif /* !__ASSEMBLY__ */ | 294 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 6d93508f2626..35f2d1948ada 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h | |||
@@ -29,7 +29,8 @@ struct pvclock_vcpu_time_info { | |||
29 | u64 system_time; | 29 | u64 system_time; |
30 | u32 tsc_to_system_mul; | 30 | u32 tsc_to_system_mul; |
31 | s8 tsc_shift; | 31 | s8 tsc_shift; |
32 | u8 pad[3]; | 32 | u8 flags; |
33 | u8 pad[2]; | ||
33 | } __attribute__((__packed__)); /* 32 bytes */ | 34 | } __attribute__((__packed__)); /* 32 bytes */ |
34 | 35 | ||
35 | struct pvclock_wall_clock { | 36 | struct pvclock_wall_clock { |
@@ -38,5 +39,6 @@ struct pvclock_wall_clock { | |||
38 | u32 nsec; | 39 | u32 nsec; |
39 | } __attribute__((__packed__)); | 40 | } __attribute__((__packed__)); |
40 | 41 | ||
42 | #define PVCLOCK_TSC_STABLE_BIT (1 << 0) | ||
41 | #endif /* __ASSEMBLY__ */ | 43 | #endif /* __ASSEMBLY__ */ |
42 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ | 44 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 53235fd5f8ce..cd02f324aa6b 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -6,6 +6,7 @@ | |||
6 | 6 | ||
7 | /* some helper functions for xen and kvm pv clock sources */ | 7 | /* some helper functions for xen and kvm pv clock sources */ |
8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); | 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); |
9 | void pvclock_set_flags(u8 flags); | ||
9 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); | 10 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); |
10 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, | 11 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, |
11 | struct pvclock_vcpu_time_info *vcpu, | 12 | struct pvclock_vcpu_time_info *vcpu, |
diff --git a/arch/x86/include/asm/rdc321x_defs.h b/arch/x86/include/asm/rdc321x_defs.h deleted file mode 100644 index c8e9c8bed3d0..000000000000 --- a/arch/x86/include/asm/rdc321x_defs.h +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | #define PFX "rdc321x: " | ||
2 | |||
3 | /* General purpose configuration and data registers */ | ||
4 | #define RDC3210_CFGREG_ADDR 0x0CF8 | ||
5 | #define RDC3210_CFGREG_DATA 0x0CFC | ||
6 | |||
7 | #define RDC321X_GPIO_CTRL_REG1 0x48 | ||
8 | #define RDC321X_GPIO_CTRL_REG2 0x84 | ||
9 | #define RDC321X_GPIO_DATA_REG1 0x4c | ||
10 | #define RDC321X_GPIO_DATA_REG2 0x88 | ||
11 | |||
12 | #define RDC321X_MAX_GPIO 58 | ||
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h index 75af592677ec..fb0b1874396f 100644 --- a/arch/x86/include/asm/scatterlist.h +++ b/arch/x86/include/asm/scatterlist.h | |||
@@ -1,8 +1,9 @@ | |||
1 | #ifndef _ASM_X86_SCATTERLIST_H | 1 | #ifndef _ASM_X86_SCATTERLIST_H |
2 | #define _ASM_X86_SCATTERLIST_H | 2 | #define _ASM_X86_SCATTERLIST_H |
3 | 3 | ||
4 | #define ISA_DMA_THRESHOLD (0x00ffffff) | ||
5 | |||
6 | #include <asm-generic/scatterlist.h> | 4 | #include <asm-generic/scatterlist.h> |
7 | 5 | ||
6 | #define ISA_DMA_THRESHOLD (0x00ffffff) | ||
7 | #define ARCH_HAS_SG_CHAIN | ||
8 | |||
8 | #endif /* _ASM_X86_SCATTERLIST_H */ | 9 | #endif /* _ASM_X86_SCATTERLIST_H */ |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 38638cd2fa4c..0e831059ac5a 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -81,7 +81,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
81 | u32 event_inj_err; | 81 | u32 event_inj_err; |
82 | u64 nested_cr3; | 82 | u64 nested_cr3; |
83 | u64 lbr_ctl; | 83 | u64 lbr_ctl; |
84 | u8 reserved_5[832]; | 84 | u64 reserved_5; |
85 | u64 next_rip; | ||
86 | u8 reserved_6[816]; | ||
85 | }; | 87 | }; |
86 | 88 | ||
87 | 89 | ||
@@ -115,6 +117,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
115 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) | 117 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) |
116 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) | 118 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) |
117 | 119 | ||
120 | #define SVM_VM_CR_VALID_MASK 0x001fULL | ||
121 | #define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL | ||
122 | #define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL | ||
123 | |||
118 | struct __attribute__ ((__packed__)) vmcb_seg { | 124 | struct __attribute__ ((__packed__)) vmcb_seg { |
119 | u16 selector; | 125 | u16 selector; |
120 | u16 attrib; | 126 | u16 attrib; |
@@ -238,6 +244,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
238 | 244 | ||
239 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 | 245 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 |
240 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 | 246 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 |
247 | #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 | ||
241 | 248 | ||
242 | #define SVM_EXIT_READ_CR0 0x000 | 249 | #define SVM_EXIT_READ_CR0 0x000 |
243 | #define SVM_EXIT_READ_CR3 0x003 | 250 | #define SVM_EXIT_READ_CR3 0x003 |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0d28901e969..f0b6e5dbc5a0 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -87,13 +87,12 @@ struct thread_info { | |||
87 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 87 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
88 | #define TIF_IA32 17 /* 32bit process */ | 88 | #define TIF_IA32 17 /* 32bit process */ |
89 | #define TIF_FORK 18 /* ret_from_fork */ | 89 | #define TIF_FORK 18 /* ret_from_fork */ |
90 | #define TIF_MEMDIE 20 | 90 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ |
91 | #define TIF_DEBUG 21 /* uses debug registers */ | 91 | #define TIF_DEBUG 21 /* uses debug registers */ |
92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ | 92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
93 | #define TIF_FREEZE 23 /* is freezing for suspend */ | 93 | #define TIF_FREEZE 23 /* is freezing for suspend */ |
94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 95 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ |
96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | ||
97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ | 96 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
98 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ | 97 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ |
99 | 98 | ||
@@ -115,8 +114,7 @@ struct thread_info { | |||
115 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 114 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
116 | #define _TIF_FREEZE (1 << TIF_FREEZE) | 115 | #define _TIF_FREEZE (1 << TIF_FREEZE) |
117 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 116 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 117 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) |
119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | ||
120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | 118 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) |
121 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) | 119 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) |
122 | 120 | ||
@@ -147,7 +145,7 @@ struct thread_info { | |||
147 | 145 | ||
148 | /* flags to check in __switch_to() */ | 146 | /* flags to check in __switch_to() */ |
149 | #define _TIF_WORK_CTXSW \ | 147 | #define _TIF_WORK_CTXSW \ |
150 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) | 148 | (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) |
151 | 149 | ||
152 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) | 150 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
153 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | 151 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
@@ -241,10 +239,9 @@ static inline struct thread_info *current_thread_info(void) | |||
241 | #define TS_USEDFPU 0x0001 /* FPU was used by this task | 239 | #define TS_USEDFPU 0x0001 /* FPU was used by this task |
242 | this quantum (SMP) */ | 240 | this quantum (SMP) */ |
243 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ | 241 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ |
244 | #define TS_POLLING 0x0004 /* true if in idle loop | 242 | #define TS_POLLING 0x0004 /* idle task polling need_resched, |
245 | and not sleeping */ | 243 | skip sending interrupt */ |
246 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ | 244 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ |
247 | #define TS_XSAVE 0x0010 /* Use xsave/xrstor */ | ||
248 | 245 | ||
249 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) | 246 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) |
250 | 247 | ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c5087d796587..21899cc31e52 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -53,33 +53,29 @@ | |||
53 | extern int cpu_to_node_map[]; | 53 | extern int cpu_to_node_map[]; |
54 | 54 | ||
55 | /* Returns the number of the node containing CPU 'cpu' */ | 55 | /* Returns the number of the node containing CPU 'cpu' */ |
56 | static inline int cpu_to_node(int cpu) | 56 | static inline int __cpu_to_node(int cpu) |
57 | { | 57 | { |
58 | return cpu_to_node_map[cpu]; | 58 | return cpu_to_node_map[cpu]; |
59 | } | 59 | } |
60 | #define early_cpu_to_node(cpu) cpu_to_node(cpu) | 60 | #define early_cpu_to_node __cpu_to_node |
61 | #define cpu_to_node __cpu_to_node | ||
61 | 62 | ||
62 | #else /* CONFIG_X86_64 */ | 63 | #else /* CONFIG_X86_64 */ |
63 | 64 | ||
64 | /* Mappings between logical cpu number and node number */ | 65 | /* Mappings between logical cpu number and node number */ |
65 | DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); | 66 | DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); |
66 | 67 | ||
67 | /* Returns the number of the current Node. */ | ||
68 | DECLARE_PER_CPU(int, node_number); | ||
69 | #define numa_node_id() percpu_read(node_number) | ||
70 | |||
71 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 68 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
72 | extern int cpu_to_node(int cpu); | 69 | /* |
70 | * override generic percpu implementation of cpu_to_node | ||
71 | */ | ||
72 | extern int __cpu_to_node(int cpu); | ||
73 | #define cpu_to_node __cpu_to_node | ||
74 | |||
73 | extern int early_cpu_to_node(int cpu); | 75 | extern int early_cpu_to_node(int cpu); |
74 | 76 | ||
75 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 77 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
76 | 78 | ||
77 | /* Returns the number of the node containing CPU 'cpu' */ | ||
78 | static inline int cpu_to_node(int cpu) | ||
79 | { | ||
80 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
81 | } | ||
82 | |||
83 | /* Same function but used if called before per_cpu areas are setup */ | 79 | /* Same function but used if called before per_cpu areas are setup */ |
84 | static inline int early_cpu_to_node(int cpu) | 80 | static inline int early_cpu_to_node(int cpu) |
85 | { | 81 | { |
@@ -170,6 +166,10 @@ static inline int numa_node_id(void) | |||
170 | { | 166 | { |
171 | return 0; | 167 | return 0; |
172 | } | 168 | } |
169 | /* | ||
170 | * indicate override: | ||
171 | */ | ||
172 | #define numa_node_id numa_node_id | ||
173 | 173 | ||
174 | static inline int early_cpu_to_node(int cpu) | 174 | static inline int early_cpu_to_node(int cpu) |
175 | { | 175 | { |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 4da91ad69e0d..f66cda56781d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -79,7 +79,7 @@ static inline int get_si_code(unsigned long condition) | |||
79 | 79 | ||
80 | extern int panic_on_unrecovered_nmi; | 80 | extern int panic_on_unrecovered_nmi; |
81 | 81 | ||
82 | void math_error(void __user *); | 82 | void math_error(struct pt_regs *, int, int); |
83 | void math_emulate(struct math_emu_info *); | 83 | void math_emulate(struct math_emu_info *); |
84 | #ifndef CONFIG_X86_32 | 84 | #ifndef CONFIG_X86_32 |
85 | asmlinkage void smp_thermal_interrupt(void); | 85 | asmlinkage void smp_thermal_interrupt(void); |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index b414d2b401f6..aa558ac0306e 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -27,13 +27,14 @@ | |||
27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. | 27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. |
28 | * | 28 | * |
29 | * We will use 31 sets, one for sending BAU messages from each of the 32 | 29 | * We will use 31 sets, one for sending BAU messages from each of the 32 |
30 | * cpu's on the node. | 30 | * cpu's on the uvhub. |
31 | * | 31 | * |
32 | * TLB shootdown will use the first of the 8 descriptors of each set. | 32 | * TLB shootdown will use the first of the 8 descriptors of each set. |
33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). | 33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). |
34 | */ | 34 | */ |
35 | 35 | ||
36 | #define UV_ITEMS_PER_DESCRIPTOR 8 | 36 | #define UV_ITEMS_PER_DESCRIPTOR 8 |
37 | #define MAX_BAU_CONCURRENT 3 | ||
37 | #define UV_CPUS_PER_ACT_STATUS 32 | 38 | #define UV_CPUS_PER_ACT_STATUS 32 |
38 | #define UV_ACT_STATUS_MASK 0x3 | 39 | #define UV_ACT_STATUS_MASK 0x3 |
39 | #define UV_ACT_STATUS_SIZE 2 | 40 | #define UV_ACT_STATUS_SIZE 2 |
@@ -45,6 +46,9 @@ | |||
45 | #define UV_PAYLOADQ_PNODE_SHIFT 49 | 46 | #define UV_PAYLOADQ_PNODE_SHIFT 49 |
46 | #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" | 47 | #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" |
47 | #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) | 48 | #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) |
49 | #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 | ||
50 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 | ||
51 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL | ||
48 | 52 | ||
49 | /* | 53 | /* |
50 | * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 | 54 | * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 |
@@ -55,15 +59,29 @@ | |||
55 | #define DESC_STATUS_SOURCE_TIMEOUT 3 | 59 | #define DESC_STATUS_SOURCE_TIMEOUT 3 |
56 | 60 | ||
57 | /* | 61 | /* |
58 | * source side thresholds at which message retries print a warning | 62 | * source side threshholds at which message retries print a warning |
59 | */ | 63 | */ |
60 | #define SOURCE_TIMEOUT_LIMIT 20 | 64 | #define SOURCE_TIMEOUT_LIMIT 20 |
61 | #define DESTINATION_TIMEOUT_LIMIT 20 | 65 | #define DESTINATION_TIMEOUT_LIMIT 20 |
62 | 66 | ||
63 | /* | 67 | /* |
68 | * misc. delays, in microseconds | ||
69 | */ | ||
70 | #define THROTTLE_DELAY 10 | ||
71 | #define TIMEOUT_DELAY 10 | ||
72 | #define BIOS_TO 1000 | ||
73 | /* BIOS is assumed to set the destination timeout to 1003520 nanoseconds */ | ||
74 | |||
75 | /* | ||
76 | * threshholds at which to use IPI to free resources | ||
77 | */ | ||
78 | #define PLUGSB4RESET 100 | ||
79 | #define TIMEOUTSB4RESET 100 | ||
80 | |||
81 | /* | ||
64 | * number of entries in the destination side payload queue | 82 | * number of entries in the destination side payload queue |
65 | */ | 83 | */ |
66 | #define DEST_Q_SIZE 17 | 84 | #define DEST_Q_SIZE 20 |
67 | /* | 85 | /* |
68 | * number of destination side software ack resources | 86 | * number of destination side software ack resources |
69 | */ | 87 | */ |
@@ -72,9 +90,10 @@ | |||
72 | /* | 90 | /* |
73 | * completion statuses for sending a TLB flush message | 91 | * completion statuses for sending a TLB flush message |
74 | */ | 92 | */ |
75 | #define FLUSH_RETRY 1 | 93 | #define FLUSH_RETRY_PLUGGED 1 |
76 | #define FLUSH_GIVEUP 2 | 94 | #define FLUSH_RETRY_TIMEOUT 2 |
77 | #define FLUSH_COMPLETE 3 | 95 | #define FLUSH_GIVEUP 3 |
96 | #define FLUSH_COMPLETE 4 | ||
78 | 97 | ||
79 | /* | 98 | /* |
80 | * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) | 99 | * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) |
@@ -86,14 +105,14 @@ | |||
86 | * 'base_dest_nodeid' field of the header corresponds to the | 105 | * 'base_dest_nodeid' field of the header corresponds to the |
87 | * destination nodeID associated with that specified bit. | 106 | * destination nodeID associated with that specified bit. |
88 | */ | 107 | */ |
89 | struct bau_target_nodemask { | 108 | struct bau_target_uvhubmask { |
90 | unsigned long bits[BITS_TO_LONGS(256)]; | 109 | unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; |
91 | }; | 110 | }; |
92 | 111 | ||
93 | /* | 112 | /* |
94 | * mask of cpu's on a node | 113 | * mask of cpu's on a uvhub |
95 | * (during initialization we need to check that unsigned long has | 114 | * (during initialization we need to check that unsigned long has |
96 | * enough bits for max. cpu's per node) | 115 | * enough bits for max. cpu's per uvhub) |
97 | */ | 116 | */ |
98 | struct bau_local_cpumask { | 117 | struct bau_local_cpumask { |
99 | unsigned long bits; | 118 | unsigned long bits; |
@@ -135,8 +154,8 @@ struct bau_msg_payload { | |||
135 | struct bau_msg_header { | 154 | struct bau_msg_header { |
136 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ | 155 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ |
137 | /* bits 5:0 */ | 156 | /* bits 5:0 */ |
138 | unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ | 157 | unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ |
139 | /* bits 20:6 */ /* first bit in node_map */ | 158 | /* bits 20:6 */ /* first bit in uvhub map */ |
140 | unsigned int command:8; /* message type */ | 159 | unsigned int command:8; /* message type */ |
141 | /* bits 28:21 */ | 160 | /* bits 28:21 */ |
142 | /* 0x38: SN3net EndPoint Message */ | 161 | /* 0x38: SN3net EndPoint Message */ |
@@ -146,26 +165,38 @@ struct bau_msg_header { | |||
146 | unsigned int rsvd_2:9; /* must be zero */ | 165 | unsigned int rsvd_2:9; /* must be zero */ |
147 | /* bits 40:32 */ | 166 | /* bits 40:32 */ |
148 | /* Suppl_A is 56-41 */ | 167 | /* Suppl_A is 56-41 */ |
149 | unsigned int payload_2a:8;/* becomes byte 16 of msg */ | 168 | unsigned int sequence:16;/* message sequence number */ |
150 | /* bits 48:41 */ /* not currently using */ | 169 | /* bits 56:41 */ /* becomes bytes 16-17 of msg */ |
151 | unsigned int payload_2b:8;/* becomes byte 17 of msg */ | ||
152 | /* bits 56:49 */ /* not currently using */ | ||
153 | /* Address field (96:57) is never used as an | 170 | /* Address field (96:57) is never used as an |
154 | address (these are address bits 42:3) */ | 171 | address (these are address bits 42:3) */ |
172 | |||
155 | unsigned int rsvd_3:1; /* must be zero */ | 173 | unsigned int rsvd_3:1; /* must be zero */ |
156 | /* bit 57 */ | 174 | /* bit 57 */ |
157 | /* address bits 27:4 are payload */ | 175 | /* address bits 27:4 are payload */ |
158 | /* these 24 bits become bytes 12-14 of msg */ | 176 | /* these next 24 (58-81) bits become bytes 12-14 of msg */ |
177 | |||
178 | /* bits 65:58 land in byte 12 */ | ||
159 | unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ | 179 | unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ |
160 | /* bit 58 */ | 180 | /* bit 58 */ |
161 | 181 | unsigned int msg_type:3; /* software type of the message*/ | |
162 | unsigned int payload_1a:5;/* not currently used */ | 182 | /* bits 61:59 */ |
163 | /* bits 63:59 */ | 183 | unsigned int canceled:1; /* message canceled, resource to be freed*/ |
164 | unsigned int payload_1b:8;/* not currently used */ | 184 | /* bit 62 */ |
165 | /* bits 71:64 */ | 185 | unsigned int payload_1a:1;/* not currently used */ |
166 | unsigned int payload_1c:8;/* not currently used */ | 186 | /* bit 63 */ |
167 | /* bits 79:72 */ | 187 | unsigned int payload_1b:2;/* not currently used */ |
168 | unsigned int payload_1d:2;/* not currently used */ | 188 | /* bits 65:64 */ |
189 | |||
190 | /* bits 73:66 land in byte 13 */ | ||
191 | unsigned int payload_1ca:6;/* not currently used */ | ||
192 | /* bits 71:66 */ | ||
193 | unsigned int payload_1c:2;/* not currently used */ | ||
194 | /* bits 73:72 */ | ||
195 | |||
196 | /* bits 81:74 land in byte 14 */ | ||
197 | unsigned int payload_1d:6;/* not currently used */ | ||
198 | /* bits 79:74 */ | ||
199 | unsigned int payload_1e:2;/* not currently used */ | ||
169 | /* bits 81:80 */ | 200 | /* bits 81:80 */ |
170 | 201 | ||
171 | unsigned int rsvd_4:7; /* must be zero */ | 202 | unsigned int rsvd_4:7; /* must be zero */ |
@@ -178,7 +209,7 @@ struct bau_msg_header { | |||
178 | /* bits 95:90 */ | 209 | /* bits 95:90 */ |
179 | unsigned int rsvd_6:5; /* must be zero */ | 210 | unsigned int rsvd_6:5; /* must be zero */ |
180 | /* bits 100:96 */ | 211 | /* bits 100:96 */ |
181 | unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ | 212 | unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */ |
182 | /* bit 101*/ | 213 | /* bit 101*/ |
183 | unsigned int fairness:3;/* usually zero */ | 214 | unsigned int fairness:3;/* usually zero */ |
184 | /* bits 104:102 */ | 215 | /* bits 104:102 */ |
@@ -191,13 +222,18 @@ struct bau_msg_header { | |||
191 | /* bits 127:107 */ | 222 | /* bits 127:107 */ |
192 | }; | 223 | }; |
193 | 224 | ||
225 | /* see msg_type: */ | ||
226 | #define MSG_NOOP 0 | ||
227 | #define MSG_REGULAR 1 | ||
228 | #define MSG_RETRY 2 | ||
229 | |||
194 | /* | 230 | /* |
195 | * The activation descriptor: | 231 | * The activation descriptor: |
196 | * The format of the message to send, plus all accompanying control | 232 | * The format of the message to send, plus all accompanying control |
197 | * Should be 64 bytes | 233 | * Should be 64 bytes |
198 | */ | 234 | */ |
199 | struct bau_desc { | 235 | struct bau_desc { |
200 | struct bau_target_nodemask distribution; | 236 | struct bau_target_uvhubmask distribution; |
201 | /* | 237 | /* |
202 | * message template, consisting of header and payload: | 238 | * message template, consisting of header and payload: |
203 | */ | 239 | */ |
@@ -237,19 +273,25 @@ struct bau_payload_queue_entry { | |||
237 | unsigned short acknowledge_count; /* filled in by destination */ | 273 | unsigned short acknowledge_count; /* filled in by destination */ |
238 | /* 16 bits, bytes 10-11 */ | 274 | /* 16 bits, bytes 10-11 */ |
239 | 275 | ||
240 | unsigned short replied_to:1; /* sent as 0 by the source */ | 276 | /* these next 3 bytes come from bits 58-81 of the message header */ |
241 | /* 1 bit */ | 277 | unsigned short replied_to:1; /* sent as 0 by the source */ |
242 | unsigned short unused1:7; /* not currently using */ | 278 | unsigned short msg_type:3; /* software message type */ |
243 | /* 7 bits: byte 12) */ | 279 | unsigned short canceled:1; /* sent as 0 by the source */ |
280 | unsigned short unused1:3; /* not currently using */ | ||
281 | /* byte 12 */ | ||
244 | 282 | ||
245 | unsigned char unused2[2]; /* not currently using */ | 283 | unsigned char unused2a; /* not currently using */ |
246 | /* bytes 13-14 */ | 284 | /* byte 13 */ |
285 | unsigned char unused2; /* not currently using */ | ||
286 | /* byte 14 */ | ||
247 | 287 | ||
248 | unsigned char sw_ack_vector; /* filled in by the hardware */ | 288 | unsigned char sw_ack_vector; /* filled in by the hardware */ |
249 | /* byte 15 (bits 127:120) */ | 289 | /* byte 15 (bits 127:120) */ |
250 | 290 | ||
251 | unsigned char unused4[3]; /* not currently using bytes 17-19 */ | 291 | unsigned short sequence; /* message sequence number */ |
252 | /* bytes 17-19 */ | 292 | /* bytes 16-17 */ |
293 | unsigned char unused4[2]; /* not currently using bytes 18-19 */ | ||
294 | /* bytes 18-19 */ | ||
253 | 295 | ||
254 | int number_of_cpus; /* filled in at destination */ | 296 | int number_of_cpus; /* filled in at destination */ |
255 | /* 32 bits, bytes 20-23 (aligned) */ | 297 | /* 32 bits, bytes 20-23 (aligned) */ |
@@ -259,63 +301,93 @@ struct bau_payload_queue_entry { | |||
259 | }; | 301 | }; |
260 | 302 | ||
261 | /* | 303 | /* |
262 | * one for every slot in the destination payload queue | 304 | * one per-cpu; to locate the software tables |
263 | */ | ||
264 | struct bau_msg_status { | ||
265 | struct bau_local_cpumask seen_by; /* map of cpu's */ | ||
266 | }; | ||
267 | |||
268 | /* | ||
269 | * one for every slot in the destination software ack resources | ||
270 | */ | ||
271 | struct bau_sw_ack_status { | ||
272 | struct bau_payload_queue_entry *msg; /* associated message */ | ||
273 | int watcher; /* cpu monitoring, or -1 */ | ||
274 | }; | ||
275 | |||
276 | /* | ||
277 | * one on every node and per-cpu; to locate the software tables | ||
278 | */ | 305 | */ |
279 | struct bau_control { | 306 | struct bau_control { |
280 | struct bau_desc *descriptor_base; | 307 | struct bau_desc *descriptor_base; |
281 | struct bau_payload_queue_entry *bau_msg_head; | ||
282 | struct bau_payload_queue_entry *va_queue_first; | 308 | struct bau_payload_queue_entry *va_queue_first; |
283 | struct bau_payload_queue_entry *va_queue_last; | 309 | struct bau_payload_queue_entry *va_queue_last; |
284 | struct bau_msg_status *msg_statuses; | 310 | struct bau_payload_queue_entry *bau_msg_head; |
285 | int *watching; /* pointer to array */ | 311 | struct bau_control *uvhub_master; |
312 | struct bau_control *socket_master; | ||
313 | unsigned long timeout_interval; | ||
314 | atomic_t active_descriptor_count; | ||
315 | int max_concurrent; | ||
316 | int max_concurrent_constant; | ||
317 | int retry_message_scans; | ||
318 | int plugged_tries; | ||
319 | int timeout_tries; | ||
320 | int ipi_attempts; | ||
321 | int conseccompletes; | ||
322 | short cpu; | ||
323 | short uvhub_cpu; | ||
324 | short uvhub; | ||
325 | short cpus_in_socket; | ||
326 | short cpus_in_uvhub; | ||
327 | unsigned short message_number; | ||
328 | unsigned short uvhub_quiesce; | ||
329 | short socket_acknowledge_count[DEST_Q_SIZE]; | ||
330 | cycles_t send_message; | ||
331 | spinlock_t masks_lock; | ||
332 | spinlock_t uvhub_lock; | ||
333 | spinlock_t queue_lock; | ||
286 | }; | 334 | }; |
287 | 335 | ||
288 | /* | 336 | /* |
289 | * This structure is allocated per_cpu for UV TLB shootdown statistics. | 337 | * This structure is allocated per_cpu for UV TLB shootdown statistics. |
290 | */ | 338 | */ |
291 | struct ptc_stats { | 339 | struct ptc_stats { |
292 | unsigned long ptc_i; /* number of IPI-style flushes */ | 340 | /* sender statistics */ |
293 | unsigned long requestor; /* number of nodes this cpu sent to */ | 341 | unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ |
294 | unsigned long requestee; /* times cpu was remotely requested */ | 342 | unsigned long s_requestor; /* number of shootdown requests */ |
295 | unsigned long alltlb; /* times all tlb's on this cpu were flushed */ | 343 | unsigned long s_stimeout; /* source side timeouts */ |
296 | unsigned long onetlb; /* times just one tlb on this cpu was flushed */ | 344 | unsigned long s_dtimeout; /* destination side timeouts */ |
297 | unsigned long s_retry; /* retries on source side timeouts */ | 345 | unsigned long s_time; /* time spent in sending side */ |
298 | unsigned long d_retry; /* retries on destination side timeouts */ | 346 | unsigned long s_retriesok; /* successful retries */ |
299 | unsigned long sflush; /* cycles spent in uv_flush_tlb_others */ | 347 | unsigned long s_ntargcpu; /* number of cpus targeted */ |
300 | unsigned long dflush; /* cycles spent on destination side */ | 348 | unsigned long s_ntarguvhub; /* number of uvhubs targeted */ |
301 | unsigned long retriesok; /* successes on retries */ | 349 | unsigned long s_ntarguvhub16; /* number of times >= 16 target hubs */ |
302 | unsigned long nomsg; /* interrupts with no message */ | 350 | unsigned long s_ntarguvhub8; /* number of times >= 8 target hubs */ |
303 | unsigned long multmsg; /* interrupts with multiple messages */ | 351 | unsigned long s_ntarguvhub4; /* number of times >= 4 target hubs */ |
304 | unsigned long ntargeted;/* nodes targeted */ | 352 | unsigned long s_ntarguvhub2; /* number of times >= 2 target hubs */ |
353 | unsigned long s_ntarguvhub1; /* number of times == 1 target hub */ | ||
354 | unsigned long s_resets_plug; /* ipi-style resets from plug state */ | ||
355 | unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ | ||
356 | unsigned long s_busy; /* status stayed busy past s/w timer */ | ||
357 | unsigned long s_throttles; /* waits in throttle */ | ||
358 | unsigned long s_retry_messages; /* retry broadcasts */ | ||
359 | /* destination statistics */ | ||
360 | unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ | ||
361 | unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ | ||
362 | unsigned long d_multmsg; /* interrupts with multiple messages */ | ||
363 | unsigned long d_nomsg; /* interrupts with no message */ | ||
364 | unsigned long d_time; /* time spent on destination side */ | ||
365 | unsigned long d_requestee; /* number of messages processed */ | ||
366 | unsigned long d_retries; /* number of retry messages processed */ | ||
367 | unsigned long d_canceled; /* number of messages canceled by retries */ | ||
368 | unsigned long d_nocanceled; /* retries that found nothing to cancel */ | ||
369 | unsigned long d_resets; /* number of ipi-style requests processed */ | ||
370 | unsigned long d_rcanceled; /* number of messages canceled by resets */ | ||
305 | }; | 371 | }; |
306 | 372 | ||
307 | static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp) | 373 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) |
308 | { | 374 | { |
309 | return constant_test_bit(node, &dstp->bits[0]); | 375 | return constant_test_bit(uvhub, &dstp->bits[0]); |
310 | } | 376 | } |
311 | static inline void bau_node_set(int node, struct bau_target_nodemask *dstp) | 377 | static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) |
312 | { | 378 | { |
313 | __set_bit(node, &dstp->bits[0]); | 379 | __set_bit(uvhub, &dstp->bits[0]); |
314 | } | 380 | } |
315 | static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits) | 381 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, |
382 | int nbits) | ||
316 | { | 383 | { |
317 | bitmap_zero(&dstp->bits[0], nbits); | 384 | bitmap_zero(&dstp->bits[0], nbits); |
318 | } | 385 | } |
386 | static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp) | ||
387 | { | ||
388 | return bitmap_weight((unsigned long *)&dstp->bits[0], | ||
389 | UV_DISTRIBUTION_SIZE); | ||
390 | } | ||
319 | 391 | ||
320 | static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) | 392 | static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) |
321 | { | 393 | { |
@@ -328,4 +400,35 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) | |||
328 | extern void uv_bau_message_intr1(void); | 400 | extern void uv_bau_message_intr1(void); |
329 | extern void uv_bau_timeout_intr1(void); | 401 | extern void uv_bau_timeout_intr1(void); |
330 | 402 | ||
403 | struct atomic_short { | ||
404 | short counter; | ||
405 | }; | ||
406 | |||
407 | /** | ||
408 | * atomic_read_short - read a short atomic variable | ||
409 | * @v: pointer of type atomic_short | ||
410 | * | ||
411 | * Atomically reads the value of @v. | ||
412 | */ | ||
413 | static inline int atomic_read_short(const struct atomic_short *v) | ||
414 | { | ||
415 | return v->counter; | ||
416 | } | ||
417 | |||
418 | /** | ||
419 | * atomic_add_short_return - add and return a short int | ||
420 | * @i: short value to add | ||
421 | * @v: pointer of type atomic_short | ||
422 | * | ||
423 | * Atomically adds @i to @v and returns @i + @v | ||
424 | */ | ||
425 | static inline int atomic_add_short_return(short i, struct atomic_short *v) | ||
426 | { | ||
427 | short __i = i; | ||
428 | asm volatile(LOCK_PREFIX "xaddw %0, %1" | ||
429 | : "+r" (i), "+m" (v->counter) | ||
430 | : : "memory"); | ||
431 | return i + __i; | ||
432 | } | ||
433 | |||
331 | #endif /* _ASM_X86_UV_UV_BAU_H */ | 434 | #endif /* _ASM_X86_UV_UV_BAU_H */ |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 14cc74ba5d23..bf6b88ef8eeb 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -307,7 +307,7 @@ static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset | |||
307 | * Access Global MMR space using the MMR space located at the top of physical | 307 | * Access Global MMR space using the MMR space located at the top of physical |
308 | * memory. | 308 | * memory. |
309 | */ | 309 | */ |
310 | static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) | 310 | static inline volatile void __iomem *uv_global_mmr64_address(int pnode, unsigned long offset) |
311 | { | 311 | { |
312 | return __va(UV_GLOBAL_MMR64_BASE | | 312 | return __va(UV_GLOBAL_MMR64_BASE | |
313 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); | 313 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 2cae46c7c8a2..b2f2d2e05cec 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
@@ -1,4 +1,3 @@ | |||
1 | |||
2 | /* | 1 | /* |
3 | * This file is subject to the terms and conditions of the GNU General Public | 2 | * This file is subject to the terms and conditions of the GNU General Public |
4 | * License. See the file "COPYING" in the main directory of this archive | 3 | * License. See the file "COPYING" in the main directory of this archive |
@@ -15,13 +14,25 @@ | |||
15 | #define UV_MMR_ENABLE (1UL << 63) | 14 | #define UV_MMR_ENABLE (1UL << 63) |
16 | 15 | ||
17 | /* ========================================================================= */ | 16 | /* ========================================================================= */ |
17 | /* UVH_BAU_DATA_BROADCAST */ | ||
18 | /* ========================================================================= */ | ||
19 | #define UVH_BAU_DATA_BROADCAST 0x61688UL | ||
20 | #define UVH_BAU_DATA_BROADCAST_32 0x0440 | ||
21 | |||
22 | #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 | ||
23 | #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL | ||
24 | |||
25 | union uvh_bau_data_broadcast_u { | ||
26 | unsigned long v; | ||
27 | struct uvh_bau_data_broadcast_s { | ||
28 | unsigned long enable : 1; /* RW */ | ||
29 | unsigned long rsvd_1_63: 63; /* */ | ||
30 | } s; | ||
31 | }; | ||
32 | |||
33 | /* ========================================================================= */ | ||
18 | /* UVH_BAU_DATA_CONFIG */ | 34 | /* UVH_BAU_DATA_CONFIG */ |
19 | /* ========================================================================= */ | 35 | /* ========================================================================= */ |
20 | #define UVH_LB_BAU_MISC_CONTROL 0x320170UL | ||
21 | #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 | ||
22 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 | ||
23 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL | ||
24 | /* 1011 timebase 7 (168millisec) * 3 ticks -> 500ms */ | ||
25 | #define UVH_BAU_DATA_CONFIG 0x61680UL | 36 | #define UVH_BAU_DATA_CONFIG 0x61680UL |
26 | #define UVH_BAU_DATA_CONFIG_32 0x0438 | 37 | #define UVH_BAU_DATA_CONFIG_32 0x0438 |
27 | 38 | ||
@@ -604,6 +615,68 @@ union uvh_lb_bau_intd_software_acknowledge_u { | |||
604 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 | 615 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 |
605 | 616 | ||
606 | /* ========================================================================= */ | 617 | /* ========================================================================= */ |
618 | /* UVH_LB_BAU_MISC_CONTROL */ | ||
619 | /* ========================================================================= */ | ||
620 | #define UVH_LB_BAU_MISC_CONTROL 0x320170UL | ||
621 | #define UVH_LB_BAU_MISC_CONTROL_32 0x00a10 | ||
622 | |||
623 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 | ||
624 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL | ||
625 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 | ||
626 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL | ||
627 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 | ||
628 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL | ||
629 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 | ||
630 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL | ||
631 | #define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_SHFT 11 | ||
632 | #define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL | ||
633 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 | ||
634 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL | ||
635 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 | ||
636 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL | ||
637 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 | ||
638 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL | ||
639 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 | ||
640 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL | ||
641 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 | ||
642 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL | ||
643 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 | ||
644 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL | ||
645 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 | ||
646 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL | ||
647 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 | ||
648 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL | ||
649 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 | ||
650 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL | ||
651 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 | ||
652 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL | ||
653 | #define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 | ||
654 | #define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL | ||
655 | |||
656 | union uvh_lb_bau_misc_control_u { | ||
657 | unsigned long v; | ||
658 | struct uvh_lb_bau_misc_control_s { | ||
659 | unsigned long rejection_delay : 8; /* RW */ | ||
660 | unsigned long apic_mode : 1; /* RW */ | ||
661 | unsigned long force_broadcast : 1; /* RW */ | ||
662 | unsigned long force_lock_nop : 1; /* RW */ | ||
663 | unsigned long csi_agent_presence_vector : 3; /* RW */ | ||
664 | unsigned long descriptor_fetch_mode : 1; /* RW */ | ||
665 | unsigned long enable_intd_soft_ack_mode : 1; /* RW */ | ||
666 | unsigned long intd_soft_ack_timeout_period : 4; /* RW */ | ||
667 | unsigned long enable_dual_mapping_mode : 1; /* RW */ | ||
668 | unsigned long vga_io_port_decode_enable : 1; /* RW */ | ||
669 | unsigned long vga_io_port_16_bit_decode : 1; /* RW */ | ||
670 | unsigned long suppress_dest_registration : 1; /* RW */ | ||
671 | unsigned long programmed_initial_priority : 3; /* RW */ | ||
672 | unsigned long use_incoming_priority : 1; /* RW */ | ||
673 | unsigned long enable_programmed_initial_priority : 1; /* RW */ | ||
674 | unsigned long rsvd_29_47 : 19; /* */ | ||
675 | unsigned long fun : 16; /* RW */ | ||
676 | } s; | ||
677 | }; | ||
678 | |||
679 | /* ========================================================================= */ | ||
607 | /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ | 680 | /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ |
608 | /* ========================================================================= */ | 681 | /* ========================================================================= */ |
609 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL | 682 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL |
@@ -681,334 +754,6 @@ union uvh_lb_bau_sb_descriptor_base_u { | |||
681 | }; | 754 | }; |
682 | 755 | ||
683 | /* ========================================================================= */ | 756 | /* ========================================================================= */ |
684 | /* UVH_LB_MCAST_AOERR0_RPT_ENABLE */ | ||
685 | /* ========================================================================= */ | ||
686 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL | ||
687 | |||
688 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0 | ||
689 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL | ||
690 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1 | ||
691 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL | ||
692 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2 | ||
693 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL | ||
694 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3 | ||
695 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL | ||
696 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4 | ||
697 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL | ||
698 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5 | ||
699 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL | ||
700 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6 | ||
701 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL | ||
702 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7 | ||
703 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL | ||
704 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8 | ||
705 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL | ||
706 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9 | ||
707 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL | ||
708 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10 | ||
709 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL | ||
710 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11 | ||
711 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL | ||
712 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12 | ||
713 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL | ||
714 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13 | ||
715 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL | ||
716 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14 | ||
717 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL | ||
718 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15 | ||
719 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL | ||
720 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16 | ||
721 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL | ||
722 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17 | ||
723 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL | ||
724 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18 | ||
725 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL | ||
726 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19 | ||
727 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL | ||
728 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20 | ||
729 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL | ||
730 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21 | ||
731 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL | ||
732 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_SHFT 22 | ||
733 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_MASK 0x0000000000400000UL | ||
734 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_SHFT 23 | ||
735 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_MASK 0x0000000000800000UL | ||
736 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 24 | ||
737 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000001000000UL | ||
738 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 25 | ||
739 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000002000000UL | ||
740 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 26 | ||
741 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000004000000UL | ||
742 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 27 | ||
743 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000008000000UL | ||
744 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 28 | ||
745 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000010000000UL | ||
746 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 29 | ||
747 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000020000000UL | ||
748 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 30 | ||
749 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000040000000UL | ||
750 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 31 | ||
751 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000080000000UL | ||
752 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 32 | ||
753 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000100000000UL | ||
754 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 33 | ||
755 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000200000000UL | ||
756 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 34 | ||
757 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000400000000UL | ||
758 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 35 | ||
759 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000800000000UL | ||
760 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 36 | ||
761 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000001000000000UL | ||
762 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 37 | ||
763 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000002000000000UL | ||
764 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 38 | ||
765 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000004000000000UL | ||
766 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 39 | ||
767 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000008000000000UL | ||
768 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 40 | ||
769 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000010000000000UL | ||
770 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 41 | ||
771 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000020000000000UL | ||
772 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 42 | ||
773 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000040000000000UL | ||
774 | |||
775 | union uvh_lb_mcast_aoerr0_rpt_enable_u { | ||
776 | unsigned long v; | ||
777 | struct uvh_lb_mcast_aoerr0_rpt_enable_s { | ||
778 | unsigned long mcast_obese_msg : 1; /* RW */ | ||
779 | unsigned long mcast_data_sb_err : 1; /* RW */ | ||
780 | unsigned long mcast_nack_buff_parity : 1; /* RW */ | ||
781 | unsigned long mcast_timeout : 1; /* RW */ | ||
782 | unsigned long mcast_inactive_reply : 1; /* RW */ | ||
783 | unsigned long mcast_upgrade_error : 1; /* RW */ | ||
784 | unsigned long mcast_reg_count_underflow : 1; /* RW */ | ||
785 | unsigned long mcast_rep_obese_msg : 1; /* RW */ | ||
786 | unsigned long ucache_req_runt_msg : 1; /* RW */ | ||
787 | unsigned long ucache_req_obese_msg : 1; /* RW */ | ||
788 | unsigned long ucache_req_data_sb_err : 1; /* RW */ | ||
789 | unsigned long ucache_rep_runt_msg : 1; /* RW */ | ||
790 | unsigned long ucache_rep_obese_msg : 1; /* RW */ | ||
791 | unsigned long ucache_rep_data_sb_err : 1; /* RW */ | ||
792 | unsigned long ucache_rep_command_err : 1; /* RW */ | ||
793 | unsigned long ucache_pend_timeout : 1; /* RW */ | ||
794 | unsigned long macc_req_runt_msg : 1; /* RW */ | ||
795 | unsigned long macc_req_obese_msg : 1; /* RW */ | ||
796 | unsigned long macc_req_data_sb_err : 1; /* RW */ | ||
797 | unsigned long macc_rep_runt_msg : 1; /* RW */ | ||
798 | unsigned long macc_rep_obese_msg : 1; /* RW */ | ||
799 | unsigned long macc_rep_data_sb_err : 1; /* RW */ | ||
800 | unsigned long macc_amo_timeout : 1; /* RW */ | ||
801 | unsigned long macc_put_timeout : 1; /* RW */ | ||
802 | unsigned long macc_spurious_event : 1; /* RW */ | ||
803 | unsigned long ioh_destination_table_parity : 1; /* RW */ | ||
804 | unsigned long get_had_error_reply : 1; /* RW */ | ||
805 | unsigned long get_timeout : 1; /* RW */ | ||
806 | unsigned long lock_manager_had_error_reply : 1; /* RW */ | ||
807 | unsigned long put_had_error_reply : 1; /* RW */ | ||
808 | unsigned long put_timeout : 1; /* RW */ | ||
809 | unsigned long sb_activation_overrun : 1; /* RW */ | ||
810 | unsigned long completed_gb_activation_had_error_reply : 1; /* RW */ | ||
811 | unsigned long completed_gb_activation_timeout : 1; /* RW */ | ||
812 | unsigned long descriptor_buffer_0_parity : 1; /* RW */ | ||
813 | unsigned long descriptor_buffer_1_parity : 1; /* RW */ | ||
814 | unsigned long socket_destination_table_parity : 1; /* RW */ | ||
815 | unsigned long bau_reply_payload_corruption : 1; /* RW */ | ||
816 | unsigned long io_port_destination_table_parity : 1; /* RW */ | ||
817 | unsigned long intd_soft_ack_timeout : 1; /* RW */ | ||
818 | unsigned long int_rep_obese_msg : 1; /* RW */ | ||
819 | unsigned long int_rep_command_err : 1; /* RW */ | ||
820 | unsigned long int_timeout : 1; /* RW */ | ||
821 | unsigned long rsvd_43_63 : 21; /* */ | ||
822 | } s; | ||
823 | }; | ||
824 | |||
825 | /* ========================================================================= */ | ||
826 | /* UVH_LOCAL_INT0_CONFIG */ | ||
827 | /* ========================================================================= */ | ||
828 | #define UVH_LOCAL_INT0_CONFIG 0x61000UL | ||
829 | |||
830 | #define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0 | ||
831 | #define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL | ||
832 | #define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8 | ||
833 | #define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL | ||
834 | #define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11 | ||
835 | #define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL | ||
836 | #define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12 | ||
837 | #define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL | ||
838 | #define UVH_LOCAL_INT0_CONFIG_P_SHFT 13 | ||
839 | #define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL | ||
840 | #define UVH_LOCAL_INT0_CONFIG_T_SHFT 15 | ||
841 | #define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL | ||
842 | #define UVH_LOCAL_INT0_CONFIG_M_SHFT 16 | ||
843 | #define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL | ||
844 | #define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32 | ||
845 | #define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | ||
846 | |||
847 | union uvh_local_int0_config_u { | ||
848 | unsigned long v; | ||
849 | struct uvh_local_int0_config_s { | ||
850 | unsigned long vector_ : 8; /* RW */ | ||
851 | unsigned long dm : 3; /* RW */ | ||
852 | unsigned long destmode : 1; /* RW */ | ||
853 | unsigned long status : 1; /* RO */ | ||
854 | unsigned long p : 1; /* RO */ | ||
855 | unsigned long rsvd_14 : 1; /* */ | ||
856 | unsigned long t : 1; /* RO */ | ||
857 | unsigned long m : 1; /* RW */ | ||
858 | unsigned long rsvd_17_31: 15; /* */ | ||
859 | unsigned long apic_id : 32; /* RW */ | ||
860 | } s; | ||
861 | }; | ||
862 | |||
863 | /* ========================================================================= */ | ||
864 | /* UVH_LOCAL_INT0_ENABLE */ | ||
865 | /* ========================================================================= */ | ||
866 | #define UVH_LOCAL_INT0_ENABLE 0x65000UL | ||
867 | |||
868 | #define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0 | ||
869 | #define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL | ||
870 | #define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1 | ||
871 | #define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL | ||
872 | #define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2 | ||
873 | #define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL | ||
874 | #define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3 | ||
875 | #define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL | ||
876 | #define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4 | ||
877 | #define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL | ||
878 | #define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5 | ||
879 | #define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL | ||
880 | #define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6 | ||
881 | #define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL | ||
882 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7 | ||
883 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL | ||
884 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8 | ||
885 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL | ||
886 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9 | ||
887 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL | ||
888 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10 | ||
889 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL | ||
890 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11 | ||
891 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL | ||
892 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12 | ||
893 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL | ||
894 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13 | ||
895 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL | ||
896 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14 | ||
897 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL | ||
898 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15 | ||
899 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL | ||
900 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16 | ||
901 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL | ||
902 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17 | ||
903 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL | ||
904 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18 | ||
905 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL | ||
906 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19 | ||
907 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL | ||
908 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20 | ||
909 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL | ||
910 | #define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21 | ||
911 | #define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL | ||
912 | #define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22 | ||
913 | #define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL | ||
914 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23 | ||
915 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL | ||
916 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24 | ||
917 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL | ||
918 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25 | ||
919 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL | ||
920 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26 | ||
921 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL | ||
922 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27 | ||
923 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL | ||
924 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28 | ||
925 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL | ||
926 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29 | ||
927 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL | ||
928 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30 | ||
929 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL | ||
930 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31 | ||
931 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL | ||
932 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32 | ||
933 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL | ||
934 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33 | ||
935 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL | ||
936 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34 | ||
937 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL | ||
938 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35 | ||
939 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL | ||
940 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36 | ||
941 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL | ||
942 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37 | ||
943 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL | ||
944 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38 | ||
945 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL | ||
946 | #define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39 | ||
947 | #define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL | ||
948 | #define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40 | ||
949 | #define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL | ||
950 | #define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41 | ||
951 | #define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL | ||
952 | #define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42 | ||
953 | #define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL | ||
954 | #define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43 | ||
955 | #define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL | ||
956 | #define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44 | ||
957 | #define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL | ||
958 | |||
959 | union uvh_local_int0_enable_u { | ||
960 | unsigned long v; | ||
961 | struct uvh_local_int0_enable_s { | ||
962 | unsigned long lb_hcerr : 1; /* RW */ | ||
963 | unsigned long gr0_hcerr : 1; /* RW */ | ||
964 | unsigned long gr1_hcerr : 1; /* RW */ | ||
965 | unsigned long lh_hcerr : 1; /* RW */ | ||
966 | unsigned long rh_hcerr : 1; /* RW */ | ||
967 | unsigned long xn_hcerr : 1; /* RW */ | ||
968 | unsigned long si_hcerr : 1; /* RW */ | ||
969 | unsigned long lb_aoerr0 : 1; /* RW */ | ||
970 | unsigned long gr0_aoerr0 : 1; /* RW */ | ||
971 | unsigned long gr1_aoerr0 : 1; /* RW */ | ||
972 | unsigned long lh_aoerr0 : 1; /* RW */ | ||
973 | unsigned long rh_aoerr0 : 1; /* RW */ | ||
974 | unsigned long xn_aoerr0 : 1; /* RW */ | ||
975 | unsigned long si_aoerr0 : 1; /* RW */ | ||
976 | unsigned long lb_aoerr1 : 1; /* RW */ | ||
977 | unsigned long gr0_aoerr1 : 1; /* RW */ | ||
978 | unsigned long gr1_aoerr1 : 1; /* RW */ | ||
979 | unsigned long lh_aoerr1 : 1; /* RW */ | ||
980 | unsigned long rh_aoerr1 : 1; /* RW */ | ||
981 | unsigned long xn_aoerr1 : 1; /* RW */ | ||
982 | unsigned long si_aoerr1 : 1; /* RW */ | ||
983 | unsigned long rh_vpi_int : 1; /* RW */ | ||
984 | unsigned long system_shutdown_int : 1; /* RW */ | ||
985 | unsigned long lb_irq_int_0 : 1; /* RW */ | ||
986 | unsigned long lb_irq_int_1 : 1; /* RW */ | ||
987 | unsigned long lb_irq_int_2 : 1; /* RW */ | ||
988 | unsigned long lb_irq_int_3 : 1; /* RW */ | ||
989 | unsigned long lb_irq_int_4 : 1; /* RW */ | ||
990 | unsigned long lb_irq_int_5 : 1; /* RW */ | ||
991 | unsigned long lb_irq_int_6 : 1; /* RW */ | ||
992 | unsigned long lb_irq_int_7 : 1; /* RW */ | ||
993 | unsigned long lb_irq_int_8 : 1; /* RW */ | ||
994 | unsigned long lb_irq_int_9 : 1; /* RW */ | ||
995 | unsigned long lb_irq_int_10 : 1; /* RW */ | ||
996 | unsigned long lb_irq_int_11 : 1; /* RW */ | ||
997 | unsigned long lb_irq_int_12 : 1; /* RW */ | ||
998 | unsigned long lb_irq_int_13 : 1; /* RW */ | ||
999 | unsigned long lb_irq_int_14 : 1; /* RW */ | ||
1000 | unsigned long lb_irq_int_15 : 1; /* RW */ | ||
1001 | unsigned long l1_nmi_int : 1; /* RW */ | ||
1002 | unsigned long stop_clock : 1; /* RW */ | ||
1003 | unsigned long asic_to_l1 : 1; /* RW */ | ||
1004 | unsigned long l1_to_asic : 1; /* RW */ | ||
1005 | unsigned long ltc_int : 1; /* RW */ | ||
1006 | unsigned long la_seq_trigger : 1; /* RW */ | ||
1007 | unsigned long rsvd_45_63 : 19; /* */ | ||
1008 | } s; | ||
1009 | }; | ||
1010 | |||
1011 | /* ========================================================================= */ | ||
1012 | /* UVH_NODE_ID */ | 757 | /* UVH_NODE_ID */ |
1013 | /* ========================================================================= */ | 758 | /* ========================================================================= */ |
1014 | #define UVH_NODE_ID 0x0UL | 759 | #define UVH_NODE_ID 0x0UL |
@@ -1112,26 +857,6 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { | |||
1112 | }; | 857 | }; |
1113 | 858 | ||
1114 | /* ========================================================================= */ | 859 | /* ========================================================================= */ |
1115 | /* UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR */ | ||
1116 | /* ========================================================================= */ | ||
1117 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL | ||
1118 | |||
1119 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26 | ||
1120 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL | ||
1121 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | ||
1122 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL | ||
1123 | |||
1124 | union uvh_rh_gam_cfg_overlay_config_mmr_u { | ||
1125 | unsigned long v; | ||
1126 | struct uvh_rh_gam_cfg_overlay_config_mmr_s { | ||
1127 | unsigned long rsvd_0_25: 26; /* */ | ||
1128 | unsigned long base : 20; /* RW */ | ||
1129 | unsigned long rsvd_46_62: 17; /* */ | ||
1130 | unsigned long enable : 1; /* RW */ | ||
1131 | } s; | ||
1132 | }; | ||
1133 | |||
1134 | /* ========================================================================= */ | ||
1135 | /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ | 860 | /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ |
1136 | /* ========================================================================= */ | 861 | /* ========================================================================= */ |
1137 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL | 862 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL |
@@ -1263,101 +988,6 @@ union uvh_rtc1_int_config_u { | |||
1263 | }; | 988 | }; |
1264 | 989 | ||
1265 | /* ========================================================================= */ | 990 | /* ========================================================================= */ |
1266 | /* UVH_RTC2_INT_CONFIG */ | ||
1267 | /* ========================================================================= */ | ||
1268 | #define UVH_RTC2_INT_CONFIG 0x61600UL | ||
1269 | |||
1270 | #define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0 | ||
1271 | #define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL | ||
1272 | #define UVH_RTC2_INT_CONFIG_DM_SHFT 8 | ||
1273 | #define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL | ||
1274 | #define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11 | ||
1275 | #define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL | ||
1276 | #define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12 | ||
1277 | #define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL | ||
1278 | #define UVH_RTC2_INT_CONFIG_P_SHFT 13 | ||
1279 | #define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL | ||
1280 | #define UVH_RTC2_INT_CONFIG_T_SHFT 15 | ||
1281 | #define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL | ||
1282 | #define UVH_RTC2_INT_CONFIG_M_SHFT 16 | ||
1283 | #define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL | ||
1284 | #define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32 | ||
1285 | #define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | ||
1286 | |||
1287 | union uvh_rtc2_int_config_u { | ||
1288 | unsigned long v; | ||
1289 | struct uvh_rtc2_int_config_s { | ||
1290 | unsigned long vector_ : 8; /* RW */ | ||
1291 | unsigned long dm : 3; /* RW */ | ||
1292 | unsigned long destmode : 1; /* RW */ | ||
1293 | unsigned long status : 1; /* RO */ | ||
1294 | unsigned long p : 1; /* RO */ | ||
1295 | unsigned long rsvd_14 : 1; /* */ | ||
1296 | unsigned long t : 1; /* RO */ | ||
1297 | unsigned long m : 1; /* RW */ | ||
1298 | unsigned long rsvd_17_31: 15; /* */ | ||
1299 | unsigned long apic_id : 32; /* RW */ | ||
1300 | } s; | ||
1301 | }; | ||
1302 | |||
1303 | /* ========================================================================= */ | ||
1304 | /* UVH_RTC3_INT_CONFIG */ | ||
1305 | /* ========================================================================= */ | ||
1306 | #define UVH_RTC3_INT_CONFIG 0x61640UL | ||
1307 | |||
1308 | #define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0 | ||
1309 | #define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL | ||
1310 | #define UVH_RTC3_INT_CONFIG_DM_SHFT 8 | ||
1311 | #define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL | ||
1312 | #define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11 | ||
1313 | #define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL | ||
1314 | #define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12 | ||
1315 | #define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL | ||
1316 | #define UVH_RTC3_INT_CONFIG_P_SHFT 13 | ||
1317 | #define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL | ||
1318 | #define UVH_RTC3_INT_CONFIG_T_SHFT 15 | ||
1319 | #define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL | ||
1320 | #define UVH_RTC3_INT_CONFIG_M_SHFT 16 | ||
1321 | #define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL | ||
1322 | #define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32 | ||
1323 | #define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | ||
1324 | |||
1325 | union uvh_rtc3_int_config_u { | ||
1326 | unsigned long v; | ||
1327 | struct uvh_rtc3_int_config_s { | ||
1328 | unsigned long vector_ : 8; /* RW */ | ||
1329 | unsigned long dm : 3; /* RW */ | ||
1330 | unsigned long destmode : 1; /* RW */ | ||
1331 | unsigned long status : 1; /* RO */ | ||
1332 | unsigned long p : 1; /* RO */ | ||
1333 | unsigned long rsvd_14 : 1; /* */ | ||
1334 | unsigned long t : 1; /* RO */ | ||
1335 | unsigned long m : 1; /* RW */ | ||
1336 | unsigned long rsvd_17_31: 15; /* */ | ||
1337 | unsigned long apic_id : 32; /* RW */ | ||
1338 | } s; | ||
1339 | }; | ||
1340 | |||
1341 | /* ========================================================================= */ | ||
1342 | /* UVH_RTC_INC_RATIO */ | ||
1343 | /* ========================================================================= */ | ||
1344 | #define UVH_RTC_INC_RATIO 0x350000UL | ||
1345 | |||
1346 | #define UVH_RTC_INC_RATIO_FRACTION_SHFT 0 | ||
1347 | #define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL | ||
1348 | #define UVH_RTC_INC_RATIO_RATIO_SHFT 20 | ||
1349 | #define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL | ||
1350 | |||
1351 | union uvh_rtc_inc_ratio_u { | ||
1352 | unsigned long v; | ||
1353 | struct uvh_rtc_inc_ratio_s { | ||
1354 | unsigned long fraction : 20; /* RW */ | ||
1355 | unsigned long ratio : 3; /* RW */ | ||
1356 | unsigned long rsvd_23_63: 41; /* */ | ||
1357 | } s; | ||
1358 | }; | ||
1359 | |||
1360 | /* ========================================================================= */ | ||
1361 | /* UVH_SI_ADDR_MAP_CONFIG */ | 991 | /* UVH_SI_ADDR_MAP_CONFIG */ |
1362 | /* ========================================================================= */ | 992 | /* ========================================================================= */ |
1363 | #define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL | 993 | #define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL |
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h deleted file mode 100644 index e49ed6d2fd4e..000000000000 --- a/arch/x86/include/asm/vmware.h +++ /dev/null | |||
@@ -1,27 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008, VMware, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
12 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
13 | * details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * | ||
19 | */ | ||
20 | #ifndef ASM_X86__VMWARE_H | ||
21 | #define ASM_X86__VMWARE_H | ||
22 | |||
23 | extern void vmware_platform_setup(void); | ||
24 | extern int vmware_platform(void); | ||
25 | extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); | ||
26 | |||
27 | #endif | ||
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fb9a080740ec..9e6779f7cf2d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -25,6 +25,8 @@ | |||
25 | * | 25 | * |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #include <linux/types.h> | ||
29 | |||
28 | /* | 30 | /* |
29 | * Definitions of Primary Processor-Based VM-Execution Controls. | 31 | * Definitions of Primary Processor-Based VM-Execution Controls. |
30 | */ | 32 | */ |
@@ -120,6 +122,8 @@ enum vmcs_field { | |||
120 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 122 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
121 | GUEST_IA32_PAT = 0x00002804, | 123 | GUEST_IA32_PAT = 0x00002804, |
122 | GUEST_IA32_PAT_HIGH = 0x00002805, | 124 | GUEST_IA32_PAT_HIGH = 0x00002805, |
125 | GUEST_IA32_EFER = 0x00002806, | ||
126 | GUEST_IA32_EFER_HIGH = 0x00002807, | ||
123 | GUEST_PDPTR0 = 0x0000280a, | 127 | GUEST_PDPTR0 = 0x0000280a, |
124 | GUEST_PDPTR0_HIGH = 0x0000280b, | 128 | GUEST_PDPTR0_HIGH = 0x0000280b, |
125 | GUEST_PDPTR1 = 0x0000280c, | 129 | GUEST_PDPTR1 = 0x0000280c, |
@@ -130,6 +134,8 @@ enum vmcs_field { | |||
130 | GUEST_PDPTR3_HIGH = 0x00002811, | 134 | GUEST_PDPTR3_HIGH = 0x00002811, |
131 | HOST_IA32_PAT = 0x00002c00, | 135 | HOST_IA32_PAT = 0x00002c00, |
132 | HOST_IA32_PAT_HIGH = 0x00002c01, | 136 | HOST_IA32_PAT_HIGH = 0x00002c01, |
137 | HOST_IA32_EFER = 0x00002c02, | ||
138 | HOST_IA32_EFER_HIGH = 0x00002c03, | ||
133 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 139 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
134 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 140 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
135 | EXCEPTION_BITMAP = 0x00004004, | 141 | EXCEPTION_BITMAP = 0x00004004, |
@@ -394,6 +400,10 @@ enum vmcs_field { | |||
394 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" | 400 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" |
395 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | 401 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" |
396 | 402 | ||
397 | 403 | struct vmx_msr_entry { | |
404 | u32 index; | ||
405 | u32 reserved; | ||
406 | u64 value; | ||
407 | } __aligned(16); | ||
398 | 408 | ||
399 | #endif | 409 | #endif |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index ddc04ccad03b..2c4390cae228 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
@@ -37,8 +37,9 @@ extern int check_for_xstate(struct i387_fxsave_struct __user *buf, | |||
37 | void __user *fpstate, | 37 | void __user *fpstate, |
38 | struct _fpx_sw_bytes *sw); | 38 | struct _fpx_sw_bytes *sw); |
39 | 39 | ||
40 | static inline int xrstor_checking(struct xsave_struct *fx) | 40 | static inline int fpu_xrstor_checking(struct fpu *fpu) |
41 | { | 41 | { |
42 | struct xsave_struct *fx = &fpu->state->xsave; | ||
42 | int err; | 43 | int err; |
43 | 44 | ||
44 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" | 45 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" |
@@ -110,12 +111,12 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask) | |||
110 | : "memory"); | 111 | : "memory"); |
111 | } | 112 | } |
112 | 113 | ||
113 | static inline void xsave(struct task_struct *tsk) | 114 | static inline void fpu_xsave(struct fpu *fpu) |
114 | { | 115 | { |
115 | /* This, however, we can work around by forcing the compiler to select | 116 | /* This, however, we can work around by forcing the compiler to select |
116 | an addressing mode that doesn't require extended registers. */ | 117 | an addressing mode that doesn't require extended registers. */ |
117 | __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" | 118 | __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" |
118 | : : "D" (&(tsk->thread.xstate->xsave)), | 119 | : : "D" (&(fpu->state->xsave)), |
119 | "a" (-1), "d"(-1) : "memory"); | 120 | "a" (-1), "d"(-1) : "memory"); |
120 | } | 121 | } |
121 | #endif | 122 | #endif |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4c58352209e0..e77b22083721 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -47,8 +47,6 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | |||
47 | obj-y += process.o | 47 | obj-y += process.o |
48 | obj-y += i387.o xsave.o | 48 | obj-y += i387.o xsave.o |
49 | obj-y += ptrace.o | 49 | obj-y += ptrace.o |
50 | obj-$(CONFIG_X86_DS) += ds.o | ||
51 | obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | ||
52 | obj-$(CONFIG_X86_32) += tls.o | 50 | obj-$(CONFIG_X86_32) += tls.o |
53 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 51 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
54 | obj-y += step.o | 52 | obj-y += step.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cd40aba6aa95..60cc4058ed5f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -63,7 +63,6 @@ EXPORT_SYMBOL(acpi_disabled); | |||
63 | int acpi_noirq; /* skip ACPI IRQ initialization */ | 63 | int acpi_noirq; /* skip ACPI IRQ initialization */ |
64 | int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */ | 64 | int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */ |
65 | EXPORT_SYMBOL(acpi_pci_disabled); | 65 | EXPORT_SYMBOL(acpi_pci_disabled); |
66 | int acpi_ht __initdata = 1; /* enable HT */ | ||
67 | 66 | ||
68 | int acpi_lapic; | 67 | int acpi_lapic; |
69 | int acpi_ioapic; | 68 | int acpi_ioapic; |
@@ -94,6 +93,53 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; | |||
94 | 93 | ||
95 | 94 | ||
96 | /* | 95 | /* |
96 | * ISA irqs by default are the first 16 gsis but can be | ||
97 | * any gsi as specified by an interrupt source override. | ||
98 | */ | ||
99 | static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { | ||
100 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 | ||
101 | }; | ||
102 | |||
103 | static unsigned int gsi_to_irq(unsigned int gsi) | ||
104 | { | ||
105 | unsigned int irq = gsi + NR_IRQS_LEGACY; | ||
106 | unsigned int i; | ||
107 | |||
108 | for (i = 0; i < NR_IRQS_LEGACY; i++) { | ||
109 | if (isa_irq_to_gsi[i] == gsi) { | ||
110 | return i; | ||
111 | } | ||
112 | } | ||
113 | |||
114 | /* Provide an identity mapping of gsi == irq | ||
115 | * except on truly weird platforms that have | ||
116 | * non isa irqs in the first 16 gsis. | ||
117 | */ | ||
118 | if (gsi >= NR_IRQS_LEGACY) | ||
119 | irq = gsi; | ||
120 | else | ||
121 | irq = gsi_end + 1 + gsi; | ||
122 | |||
123 | return irq; | ||
124 | } | ||
125 | |||
126 | static u32 irq_to_gsi(int irq) | ||
127 | { | ||
128 | unsigned int gsi; | ||
129 | |||
130 | if (irq < NR_IRQS_LEGACY) | ||
131 | gsi = isa_irq_to_gsi[irq]; | ||
132 | else if (irq <= gsi_end) | ||
133 | gsi = irq; | ||
134 | else if (irq <= (gsi_end + NR_IRQS_LEGACY)) | ||
135 | gsi = irq - gsi_end; | ||
136 | else | ||
137 | gsi = 0xffffffff; | ||
138 | |||
139 | return gsi; | ||
140 | } | ||
141 | |||
142 | /* | ||
97 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, | 143 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, |
98 | * to map the target physical address. The problem is that set_fixmap() | 144 | * to map the target physical address. The problem is that set_fixmap() |
99 | * provides a single page, and it is possible that the page is not | 145 | * provides a single page, and it is possible that the page is not |
@@ -313,7 +359,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) | |||
313 | /* | 359 | /* |
314 | * Parse Interrupt Source Override for the ACPI SCI | 360 | * Parse Interrupt Source Override for the ACPI SCI |
315 | */ | 361 | */ |
316 | static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) | 362 | static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, u32 gsi) |
317 | { | 363 | { |
318 | if (trigger == 0) /* compatible SCI trigger is level */ | 364 | if (trigger == 0) /* compatible SCI trigger is level */ |
319 | trigger = 3; | 365 | trigger = 3; |
@@ -333,7 +379,7 @@ static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) | |||
333 | * If GSI is < 16, this will update its flags, | 379 | * If GSI is < 16, this will update its flags, |
334 | * else it will create a new mp_irqs[] entry. | 380 | * else it will create a new mp_irqs[] entry. |
335 | */ | 381 | */ |
336 | mp_override_legacy_irq(gsi, polarity, trigger, gsi); | 382 | mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); |
337 | 383 | ||
338 | /* | 384 | /* |
339 | * stash over-ride to indicate we've been here | 385 | * stash over-ride to indicate we've been here |
@@ -357,9 +403,10 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, | |||
357 | acpi_table_print_madt_entry(header); | 403 | acpi_table_print_madt_entry(header); |
358 | 404 | ||
359 | if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { | 405 | if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { |
360 | acpi_sci_ioapic_setup(intsrc->global_irq, | 406 | acpi_sci_ioapic_setup(intsrc->source_irq, |
361 | intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, | 407 | intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, |
362 | (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2); | 408 | (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, |
409 | intsrc->global_irq); | ||
363 | return 0; | 410 | return 0; |
364 | } | 411 | } |
365 | 412 | ||
@@ -448,7 +495,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) | |||
448 | 495 | ||
449 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | 496 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) |
450 | { | 497 | { |
451 | *irq = gsi; | 498 | *irq = gsi_to_irq(gsi); |
452 | 499 | ||
453 | #ifdef CONFIG_X86_IO_APIC | 500 | #ifdef CONFIG_X86_IO_APIC |
454 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) | 501 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) |
@@ -458,6 +505,14 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | |||
458 | return 0; | 505 | return 0; |
459 | } | 506 | } |
460 | 507 | ||
508 | int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) | ||
509 | { | ||
510 | if (isa_irq >= 16) | ||
511 | return -1; | ||
512 | *gsi = irq_to_gsi(isa_irq); | ||
513 | return 0; | ||
514 | } | ||
515 | |||
461 | /* | 516 | /* |
462 | * success: return IRQ number (>=0) | 517 | * success: return IRQ number (>=0) |
463 | * failure: return < 0 | 518 | * failure: return < 0 |
@@ -482,7 +537,7 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
482 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); | 537 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); |
483 | } | 538 | } |
484 | #endif | 539 | #endif |
485 | irq = plat_gsi; | 540 | irq = gsi_to_irq(plat_gsi); |
486 | 541 | ||
487 | return irq; | 542 | return irq; |
488 | } | 543 | } |
@@ -867,29 +922,6 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
867 | extern int es7000_plat; | 922 | extern int es7000_plat; |
868 | #endif | 923 | #endif |
869 | 924 | ||
870 | int __init acpi_probe_gsi(void) | ||
871 | { | ||
872 | int idx; | ||
873 | int gsi; | ||
874 | int max_gsi = 0; | ||
875 | |||
876 | if (acpi_disabled) | ||
877 | return 0; | ||
878 | |||
879 | if (!acpi_ioapic) | ||
880 | return 0; | ||
881 | |||
882 | max_gsi = 0; | ||
883 | for (idx = 0; idx < nr_ioapics; idx++) { | ||
884 | gsi = mp_gsi_routing[idx].gsi_end; | ||
885 | |||
886 | if (gsi > max_gsi) | ||
887 | max_gsi = gsi; | ||
888 | } | ||
889 | |||
890 | return max_gsi + 1; | ||
891 | } | ||
892 | |||
893 | static void assign_to_mp_irq(struct mpc_intsrc *m, | 925 | static void assign_to_mp_irq(struct mpc_intsrc *m, |
894 | struct mpc_intsrc *mp_irq) | 926 | struct mpc_intsrc *mp_irq) |
895 | { | 927 | { |
@@ -947,13 +979,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
947 | mp_irq.dstirq = pin; /* INTIN# */ | 979 | mp_irq.dstirq = pin; /* INTIN# */ |
948 | 980 | ||
949 | save_mp_irq(&mp_irq); | 981 | save_mp_irq(&mp_irq); |
982 | |||
983 | isa_irq_to_gsi[bus_irq] = gsi; | ||
950 | } | 984 | } |
951 | 985 | ||
952 | void __init mp_config_acpi_legacy_irqs(void) | 986 | void __init mp_config_acpi_legacy_irqs(void) |
953 | { | 987 | { |
954 | int i; | 988 | int i; |
955 | int ioapic; | ||
956 | unsigned int dstapic; | ||
957 | struct mpc_intsrc mp_irq; | 989 | struct mpc_intsrc mp_irq; |
958 | 990 | ||
959 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 991 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) |
@@ -974,19 +1006,27 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
974 | #endif | 1006 | #endif |
975 | 1007 | ||
976 | /* | 1008 | /* |
977 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | ||
978 | */ | ||
979 | ioapic = mp_find_ioapic(0); | ||
980 | if (ioapic < 0) | ||
981 | return; | ||
982 | dstapic = mp_ioapics[ioapic].apicid; | ||
983 | |||
984 | /* | ||
985 | * Use the default configuration for the IRQs 0-15. Unless | 1009 | * Use the default configuration for the IRQs 0-15. Unless |
986 | * overridden by (MADT) interrupt source override entries. | 1010 | * overridden by (MADT) interrupt source override entries. |
987 | */ | 1011 | */ |
988 | for (i = 0; i < 16; i++) { | 1012 | for (i = 0; i < 16; i++) { |
1013 | int ioapic, pin; | ||
1014 | unsigned int dstapic; | ||
989 | int idx; | 1015 | int idx; |
1016 | u32 gsi; | ||
1017 | |||
1018 | /* Locate the gsi that irq i maps to. */ | ||
1019 | if (acpi_isa_irq_to_gsi(i, &gsi)) | ||
1020 | continue; | ||
1021 | |||
1022 | /* | ||
1023 | * Locate the IOAPIC that manages the ISA IRQ. | ||
1024 | */ | ||
1025 | ioapic = mp_find_ioapic(gsi); | ||
1026 | if (ioapic < 0) | ||
1027 | continue; | ||
1028 | pin = mp_find_ioapic_pin(ioapic, gsi); | ||
1029 | dstapic = mp_ioapics[ioapic].apicid; | ||
990 | 1030 | ||
991 | for (idx = 0; idx < mp_irq_entries; idx++) { | 1031 | for (idx = 0; idx < mp_irq_entries; idx++) { |
992 | struct mpc_intsrc *irq = mp_irqs + idx; | 1032 | struct mpc_intsrc *irq = mp_irqs + idx; |
@@ -996,7 +1036,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
996 | break; | 1036 | break; |
997 | 1037 | ||
998 | /* Do we already have a mapping for this IOAPIC pin */ | 1038 | /* Do we already have a mapping for this IOAPIC pin */ |
999 | if (irq->dstapic == dstapic && irq->dstirq == i) | 1039 | if (irq->dstapic == dstapic && irq->dstirq == pin) |
1000 | break; | 1040 | break; |
1001 | } | 1041 | } |
1002 | 1042 | ||
@@ -1011,7 +1051,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1011 | mp_irq.dstapic = dstapic; | 1051 | mp_irq.dstapic = dstapic; |
1012 | mp_irq.irqtype = mp_INT; | 1052 | mp_irq.irqtype = mp_INT; |
1013 | mp_irq.srcbusirq = i; /* Identity mapped */ | 1053 | mp_irq.srcbusirq = i; /* Identity mapped */ |
1014 | mp_irq.dstirq = i; | 1054 | mp_irq.dstirq = pin; |
1015 | 1055 | ||
1016 | save_mp_irq(&mp_irq); | 1056 | save_mp_irq(&mp_irq); |
1017 | } | 1057 | } |
@@ -1076,11 +1116,6 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
1076 | 1116 | ||
1077 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); | 1117 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); |
1078 | 1118 | ||
1079 | #ifdef CONFIG_X86_32 | ||
1080 | if (ioapic_renumber_irq) | ||
1081 | gsi = ioapic_renumber_irq(ioapic, gsi); | ||
1082 | #endif | ||
1083 | |||
1084 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | 1119 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { |
1085 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | 1120 | printk(KERN_ERR "Invalid reference to IOAPIC pin " |
1086 | "%d-%d\n", mp_ioapics[ioapic].apicid, | 1121 | "%d-%d\n", mp_ioapics[ioapic].apicid, |
@@ -1094,7 +1129,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
1094 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, | 1129 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, |
1095 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, | 1130 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, |
1096 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | 1131 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); |
1097 | io_apic_set_pci_routing(dev, gsi, &irq_attr); | 1132 | io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); |
1098 | 1133 | ||
1099 | return gsi; | 1134 | return gsi; |
1100 | } | 1135 | } |
@@ -1154,7 +1189,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
1154 | * pretend we got one so we can set the SCI flags. | 1189 | * pretend we got one so we can set the SCI flags. |
1155 | */ | 1190 | */ |
1156 | if (!acpi_sci_override_gsi) | 1191 | if (!acpi_sci_override_gsi) |
1157 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); | 1192 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, |
1193 | acpi_gbl_FADT.sci_interrupt); | ||
1158 | 1194 | ||
1159 | /* Fill in identity legacy mappings where no override */ | 1195 | /* Fill in identity legacy mappings where no override */ |
1160 | mp_config_acpi_legacy_irqs(); | 1196 | mp_config_acpi_legacy_irqs(); |
@@ -1464,9 +1500,8 @@ void __init acpi_boot_table_init(void) | |||
1464 | 1500 | ||
1465 | /* | 1501 | /* |
1466 | * If acpi_disabled, bail out | 1502 | * If acpi_disabled, bail out |
1467 | * One exception: acpi=ht continues far enough to enumerate LAPICs | ||
1468 | */ | 1503 | */ |
1469 | if (acpi_disabled && !acpi_ht) | 1504 | if (acpi_disabled) |
1470 | return; | 1505 | return; |
1471 | 1506 | ||
1472 | /* | 1507 | /* |
@@ -1497,9 +1532,8 @@ int __init early_acpi_boot_init(void) | |||
1497 | { | 1532 | { |
1498 | /* | 1533 | /* |
1499 | * If acpi_disabled, bail out | 1534 | * If acpi_disabled, bail out |
1500 | * One exception: acpi=ht continues far enough to enumerate LAPICs | ||
1501 | */ | 1535 | */ |
1502 | if (acpi_disabled && !acpi_ht) | 1536 | if (acpi_disabled) |
1503 | return 1; | 1537 | return 1; |
1504 | 1538 | ||
1505 | /* | 1539 | /* |
@@ -1517,9 +1551,8 @@ int __init acpi_boot_init(void) | |||
1517 | 1551 | ||
1518 | /* | 1552 | /* |
1519 | * If acpi_disabled, bail out | 1553 | * If acpi_disabled, bail out |
1520 | * One exception: acpi=ht continues far enough to enumerate LAPICs | ||
1521 | */ | 1554 | */ |
1522 | if (acpi_disabled && !acpi_ht) | 1555 | if (acpi_disabled) |
1523 | return 1; | 1556 | return 1; |
1524 | 1557 | ||
1525 | acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); | 1558 | acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); |
@@ -1554,21 +1587,12 @@ static int __init parse_acpi(char *arg) | |||
1554 | /* acpi=force to over-ride black-list */ | 1587 | /* acpi=force to over-ride black-list */ |
1555 | else if (strcmp(arg, "force") == 0) { | 1588 | else if (strcmp(arg, "force") == 0) { |
1556 | acpi_force = 1; | 1589 | acpi_force = 1; |
1557 | acpi_ht = 1; | ||
1558 | acpi_disabled = 0; | 1590 | acpi_disabled = 0; |
1559 | } | 1591 | } |
1560 | /* acpi=strict disables out-of-spec workarounds */ | 1592 | /* acpi=strict disables out-of-spec workarounds */ |
1561 | else if (strcmp(arg, "strict") == 0) { | 1593 | else if (strcmp(arg, "strict") == 0) { |
1562 | acpi_strict = 1; | 1594 | acpi_strict = 1; |
1563 | } | 1595 | } |
1564 | /* Limit ACPI just to boot-time to enable HT */ | ||
1565 | else if (strcmp(arg, "ht") == 0) { | ||
1566 | if (!acpi_force) { | ||
1567 | printk(KERN_WARNING "acpi=ht will be removed in Linux-2.6.35\n"); | ||
1568 | disable_acpi(); | ||
1569 | } | ||
1570 | acpi_ht = 1; | ||
1571 | } | ||
1572 | /* acpi=rsdt use RSDT instead of XSDT */ | 1596 | /* acpi=rsdt use RSDT instead of XSDT */ |
1573 | else if (strcmp(arg, "rsdt") == 0) { | 1597 | else if (strcmp(arg, "rsdt") == 0) { |
1574 | acpi_rsdt_forced = 1; | 1598 | acpi_rsdt_forced = 1; |
@@ -1576,6 +1600,10 @@ static int __init parse_acpi(char *arg) | |||
1576 | /* "acpi=noirq" disables ACPI interrupt routing */ | 1600 | /* "acpi=noirq" disables ACPI interrupt routing */ |
1577 | else if (strcmp(arg, "noirq") == 0) { | 1601 | else if (strcmp(arg, "noirq") == 0) { |
1578 | acpi_noirq_set(); | 1602 | acpi_noirq_set(); |
1603 | } | ||
1604 | /* "acpi=copy_dsdt" copys DSDT */ | ||
1605 | else if (strcmp(arg, "copy_dsdt") == 0) { | ||
1606 | acpi_gbl_copy_dsdt_locally = 1; | ||
1579 | } else { | 1607 | } else { |
1580 | /* Core will printk when we return error. */ | 1608 | /* Core will printk when we return error. */ |
1581 | return -EINVAL; | 1609 | return -EINVAL; |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index f9961034e557..82e508677b91 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -162,8 +162,6 @@ static int __init acpi_sleep_setup(char *str) | |||
162 | #endif | 162 | #endif |
163 | if (strncmp(str, "old_ordering", 12) == 0) | 163 | if (strncmp(str, "old_ordering", 12) == 0) |
164 | acpi_old_suspend_ordering(); | 164 | acpi_old_suspend_ordering(); |
165 | if (strncmp(str, "sci_force_enable", 16) == 0) | ||
166 | acpi_set_sci_en_on_resume(); | ||
167 | str = strchr(str, ','); | 165 | str = strchr(str, ','); |
168 | if (str != NULL) | 166 | if (str != NULL) |
169 | str += strspn(str, ", \t"); | 167 | str += strspn(str, ", \t"); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1a160d5d44d0..70237732a6c7 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -194,7 +194,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
194 | } | 194 | } |
195 | 195 | ||
196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
197 | extern u8 *__smp_locks[], *__smp_locks_end[]; | 197 | extern s32 __smp_locks[], __smp_locks_end[]; |
198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); | 198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); |
199 | 199 | ||
200 | /* Replace instructions with better alternatives for this CPU type. | 200 | /* Replace instructions with better alternatives for this CPU type. |
@@ -235,37 +235,41 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
235 | 235 | ||
236 | #ifdef CONFIG_SMP | 236 | #ifdef CONFIG_SMP |
237 | 237 | ||
238 | static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 238 | static void alternatives_smp_lock(const s32 *start, const s32 *end, |
239 | u8 *text, u8 *text_end) | ||
239 | { | 240 | { |
240 | u8 **ptr; | 241 | const s32 *poff; |
241 | 242 | ||
242 | mutex_lock(&text_mutex); | 243 | mutex_lock(&text_mutex); |
243 | for (ptr = start; ptr < end; ptr++) { | 244 | for (poff = start; poff < end; poff++) { |
244 | if (*ptr < text) | 245 | u8 *ptr = (u8 *)poff + *poff; |
245 | continue; | 246 | |
246 | if (*ptr > text_end) | 247 | if (!*poff || ptr < text || ptr >= text_end) |
247 | continue; | 248 | continue; |
248 | /* turn DS segment override prefix into lock prefix */ | 249 | /* turn DS segment override prefix into lock prefix */ |
249 | text_poke(*ptr, ((unsigned char []){0xf0}), 1); | 250 | if (*ptr == 0x3e) |
251 | text_poke(ptr, ((unsigned char []){0xf0}), 1); | ||
250 | }; | 252 | }; |
251 | mutex_unlock(&text_mutex); | 253 | mutex_unlock(&text_mutex); |
252 | } | 254 | } |
253 | 255 | ||
254 | static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 256 | static void alternatives_smp_unlock(const s32 *start, const s32 *end, |
257 | u8 *text, u8 *text_end) | ||
255 | { | 258 | { |
256 | u8 **ptr; | 259 | const s32 *poff; |
257 | 260 | ||
258 | if (noreplace_smp) | 261 | if (noreplace_smp) |
259 | return; | 262 | return; |
260 | 263 | ||
261 | mutex_lock(&text_mutex); | 264 | mutex_lock(&text_mutex); |
262 | for (ptr = start; ptr < end; ptr++) { | 265 | for (poff = start; poff < end; poff++) { |
263 | if (*ptr < text) | 266 | u8 *ptr = (u8 *)poff + *poff; |
264 | continue; | 267 | |
265 | if (*ptr > text_end) | 268 | if (!*poff || ptr < text || ptr >= text_end) |
266 | continue; | 269 | continue; |
267 | /* turn lock prefix into DS segment override prefix */ | 270 | /* turn lock prefix into DS segment override prefix */ |
268 | text_poke(*ptr, ((unsigned char []){0x3E}), 1); | 271 | if (*ptr == 0xf0) |
272 | text_poke(ptr, ((unsigned char []){0x3E}), 1); | ||
269 | }; | 273 | }; |
270 | mutex_unlock(&text_mutex); | 274 | mutex_unlock(&text_mutex); |
271 | } | 275 | } |
@@ -276,8 +280,8 @@ struct smp_alt_module { | |||
276 | char *name; | 280 | char *name; |
277 | 281 | ||
278 | /* ptrs to lock prefixes */ | 282 | /* ptrs to lock prefixes */ |
279 | u8 **locks; | 283 | const s32 *locks; |
280 | u8 **locks_end; | 284 | const s32 *locks_end; |
281 | 285 | ||
282 | /* .text segment, needed to avoid patching init code ;) */ | 286 | /* .text segment, needed to avoid patching init code ;) */ |
283 | u8 *text; | 287 | u8 *text; |
@@ -398,16 +402,19 @@ void alternatives_smp_switch(int smp) | |||
398 | int alternatives_text_reserved(void *start, void *end) | 402 | int alternatives_text_reserved(void *start, void *end) |
399 | { | 403 | { |
400 | struct smp_alt_module *mod; | 404 | struct smp_alt_module *mod; |
401 | u8 **ptr; | 405 | const s32 *poff; |
402 | u8 *text_start = start; | 406 | u8 *text_start = start; |
403 | u8 *text_end = end; | 407 | u8 *text_end = end; |
404 | 408 | ||
405 | list_for_each_entry(mod, &smp_alt_modules, next) { | 409 | list_for_each_entry(mod, &smp_alt_modules, next) { |
406 | if (mod->text > text_end || mod->text_end < text_start) | 410 | if (mod->text > text_end || mod->text_end < text_start) |
407 | continue; | 411 | continue; |
408 | for (ptr = mod->locks; ptr < mod->locks_end; ptr++) | 412 | for (poff = mod->locks; poff < mod->locks_end; poff++) { |
409 | if (text_start <= *ptr && text_end >= *ptr) | 413 | const u8 *ptr = (const u8 *)poff + *poff; |
414 | |||
415 | if (text_start <= ptr && text_end > ptr) | ||
410 | return 1; | 416 | return 1; |
417 | } | ||
411 | } | 418 | } |
412 | 419 | ||
413 | return 0; | 420 | return 0; |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e5a4a1e01618..c02cc692985c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/smp.h> | 51 | #include <asm/smp.h> |
52 | #include <asm/mce.h> | 52 | #include <asm/mce.h> |
53 | #include <asm/kvm_para.h> | 53 | #include <asm/kvm_para.h> |
54 | #include <asm/tsc.h> | ||
54 | 55 | ||
55 | unsigned int num_processors; | 56 | unsigned int num_processors; |
56 | 57 | ||
@@ -1151,8 +1152,13 @@ static void __cpuinit lapic_setup_esr(void) | |||
1151 | */ | 1152 | */ |
1152 | void __cpuinit setup_local_APIC(void) | 1153 | void __cpuinit setup_local_APIC(void) |
1153 | { | 1154 | { |
1154 | unsigned int value; | 1155 | unsigned int value, queued; |
1155 | int i, j; | 1156 | int i, j, acked = 0; |
1157 | unsigned long long tsc = 0, ntsc; | ||
1158 | long long max_loops = cpu_khz; | ||
1159 | |||
1160 | if (cpu_has_tsc) | ||
1161 | rdtscll(tsc); | ||
1156 | 1162 | ||
1157 | if (disable_apic) { | 1163 | if (disable_apic) { |
1158 | arch_disable_smp_support(); | 1164 | arch_disable_smp_support(); |
@@ -1204,13 +1210,32 @@ void __cpuinit setup_local_APIC(void) | |||
1204 | * the interrupt. Hence a vector might get locked. It was noticed | 1210 | * the interrupt. Hence a vector might get locked. It was noticed |
1205 | * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. | 1211 | * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. |
1206 | */ | 1212 | */ |
1207 | for (i = APIC_ISR_NR - 1; i >= 0; i--) { | 1213 | do { |
1208 | value = apic_read(APIC_ISR + i*0x10); | 1214 | queued = 0; |
1209 | for (j = 31; j >= 0; j--) { | 1215 | for (i = APIC_ISR_NR - 1; i >= 0; i--) |
1210 | if (value & (1<<j)) | 1216 | queued |= apic_read(APIC_IRR + i*0x10); |
1211 | ack_APIC_irq(); | 1217 | |
1218 | for (i = APIC_ISR_NR - 1; i >= 0; i--) { | ||
1219 | value = apic_read(APIC_ISR + i*0x10); | ||
1220 | for (j = 31; j >= 0; j--) { | ||
1221 | if (value & (1<<j)) { | ||
1222 | ack_APIC_irq(); | ||
1223 | acked++; | ||
1224 | } | ||
1225 | } | ||
1212 | } | 1226 | } |
1213 | } | 1227 | if (acked > 256) { |
1228 | printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n", | ||
1229 | acked); | ||
1230 | break; | ||
1231 | } | ||
1232 | if (cpu_has_tsc) { | ||
1233 | rdtscll(ntsc); | ||
1234 | max_loops = (cpu_khz << 10) - (ntsc - tsc); | ||
1235 | } else | ||
1236 | max_loops--; | ||
1237 | } while (queued && max_loops > 0); | ||
1238 | WARN_ON(max_loops <= 0); | ||
1214 | 1239 | ||
1215 | /* | 1240 | /* |
1216 | * Now that we are all set up, enable the APIC | 1241 | * Now that we are all set up, enable the APIC |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 03ba1b895f5e..425e53a87feb 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -131,24 +131,6 @@ int es7000_plat; | |||
131 | 131 | ||
132 | static unsigned int base; | 132 | static unsigned int base; |
133 | 133 | ||
134 | static int | ||
135 | es7000_rename_gsi(int ioapic, int gsi) | ||
136 | { | ||
137 | if (es7000_plat == ES7000_ZORRO) | ||
138 | return gsi; | ||
139 | |||
140 | if (!base) { | ||
141 | int i; | ||
142 | for (i = 0; i < nr_ioapics; i++) | ||
143 | base += nr_ioapic_registers[i]; | ||
144 | } | ||
145 | |||
146 | if (!ioapic && (gsi < 16)) | ||
147 | gsi += base; | ||
148 | |||
149 | return gsi; | ||
150 | } | ||
151 | |||
152 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | 134 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) |
153 | { | 135 | { |
154 | unsigned long vect = 0, psaival = 0; | 136 | unsigned long vect = 0, psaival = 0; |
@@ -190,7 +172,6 @@ static void setup_unisys(void) | |||
190 | es7000_plat = ES7000_ZORRO; | 172 | es7000_plat = ES7000_ZORRO; |
191 | else | 173 | else |
192 | es7000_plat = ES7000_CLASSIC; | 174 | es7000_plat = ES7000_CLASSIC; |
193 | ioapic_renumber_irq = es7000_rename_gsi; | ||
194 | } | 175 | } |
195 | 176 | ||
196 | /* | 177 | /* |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index eb2789c3f721..33f3563a2a52 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -89,6 +89,9 @@ int nr_ioapics; | |||
89 | /* IO APIC gsi routing info */ | 89 | /* IO APIC gsi routing info */ |
90 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; | 90 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; |
91 | 91 | ||
92 | /* The last gsi number used */ | ||
93 | u32 gsi_end; | ||
94 | |||
92 | /* MP IRQ source entries */ | 95 | /* MP IRQ source entries */ |
93 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 96 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
94 | 97 | ||
@@ -1013,10 +1016,9 @@ static inline int irq_trigger(int idx) | |||
1013 | return MPBIOS_trigger(idx); | 1016 | return MPBIOS_trigger(idx); |
1014 | } | 1017 | } |
1015 | 1018 | ||
1016 | int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
1017 | static int pin_2_irq(int idx, int apic, int pin) | 1019 | static int pin_2_irq(int idx, int apic, int pin) |
1018 | { | 1020 | { |
1019 | int irq, i; | 1021 | int irq; |
1020 | int bus = mp_irqs[idx].srcbus; | 1022 | int bus = mp_irqs[idx].srcbus; |
1021 | 1023 | ||
1022 | /* | 1024 | /* |
@@ -1028,18 +1030,12 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
1028 | if (test_bit(bus, mp_bus_not_pci)) { | 1030 | if (test_bit(bus, mp_bus_not_pci)) { |
1029 | irq = mp_irqs[idx].srcbusirq; | 1031 | irq = mp_irqs[idx].srcbusirq; |
1030 | } else { | 1032 | } else { |
1031 | /* | 1033 | u32 gsi = mp_gsi_routing[apic].gsi_base + pin; |
1032 | * PCI IRQs are mapped in order | 1034 | |
1033 | */ | 1035 | if (gsi >= NR_IRQS_LEGACY) |
1034 | i = irq = 0; | 1036 | irq = gsi; |
1035 | while (i < apic) | 1037 | else |
1036 | irq += nr_ioapic_registers[i++]; | 1038 | irq = gsi_end + 1 + gsi; |
1037 | irq += pin; | ||
1038 | /* | ||
1039 | * For MPS mode, so far only needed by ES7000 platform | ||
1040 | */ | ||
1041 | if (ioapic_renumber_irq) | ||
1042 | irq = ioapic_renumber_irq(apic, irq); | ||
1043 | } | 1039 | } |
1044 | 1040 | ||
1045 | #ifdef CONFIG_X86_32 | 1041 | #ifdef CONFIG_X86_32 |
@@ -1950,20 +1946,8 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | |||
1950 | 1946 | ||
1951 | void __init enable_IO_APIC(void) | 1947 | void __init enable_IO_APIC(void) |
1952 | { | 1948 | { |
1953 | union IO_APIC_reg_01 reg_01; | ||
1954 | int i8259_apic, i8259_pin; | 1949 | int i8259_apic, i8259_pin; |
1955 | int apic; | 1950 | int apic; |
1956 | unsigned long flags; | ||
1957 | |||
1958 | /* | ||
1959 | * The number of IO-APIC IRQ registers (== #pins): | ||
1960 | */ | ||
1961 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
1962 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
1963 | reg_01.raw = io_apic_read(apic, 1); | ||
1964 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1965 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | ||
1966 | } | ||
1967 | 1951 | ||
1968 | if (!legacy_pic->nr_legacy_irqs) | 1952 | if (!legacy_pic->nr_legacy_irqs) |
1969 | return; | 1953 | return; |
@@ -3858,27 +3842,20 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
3858 | reg_01.raw = io_apic_read(ioapic, 1); | 3842 | reg_01.raw = io_apic_read(ioapic, 1); |
3859 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 3843 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3860 | 3844 | ||
3861 | return reg_01.bits.entries; | 3845 | /* The register returns the maximum index redir index |
3846 | * supported, which is one less than the total number of redir | ||
3847 | * entries. | ||
3848 | */ | ||
3849 | return reg_01.bits.entries + 1; | ||
3862 | } | 3850 | } |
3863 | 3851 | ||
3864 | void __init probe_nr_irqs_gsi(void) | 3852 | void __init probe_nr_irqs_gsi(void) |
3865 | { | 3853 | { |
3866 | int nr = 0; | 3854 | int nr; |
3867 | 3855 | ||
3868 | nr = acpi_probe_gsi(); | 3856 | nr = gsi_end + 1 + NR_IRQS_LEGACY; |
3869 | if (nr > nr_irqs_gsi) { | 3857 | if (nr > nr_irqs_gsi) |
3870 | nr_irqs_gsi = nr; | 3858 | nr_irqs_gsi = nr; |
3871 | } else { | ||
3872 | /* for acpi=off or acpi is not compiled in */ | ||
3873 | int idx; | ||
3874 | |||
3875 | nr = 0; | ||
3876 | for (idx = 0; idx < nr_ioapics; idx++) | ||
3877 | nr += io_apic_get_redir_entries(idx) + 1; | ||
3878 | |||
3879 | if (nr > nr_irqs_gsi) | ||
3880 | nr_irqs_gsi = nr; | ||
3881 | } | ||
3882 | 3859 | ||
3883 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); | 3860 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); |
3884 | } | 3861 | } |
@@ -4085,22 +4062,27 @@ int __init io_apic_get_version(int ioapic) | |||
4085 | return reg_01.bits.version; | 4062 | return reg_01.bits.version; |
4086 | } | 4063 | } |
4087 | 4064 | ||
4088 | int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | 4065 | int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) |
4089 | { | 4066 | { |
4090 | int i; | 4067 | int ioapic, pin, idx; |
4091 | 4068 | ||
4092 | if (skip_ioapic_setup) | 4069 | if (skip_ioapic_setup) |
4093 | return -1; | 4070 | return -1; |
4094 | 4071 | ||
4095 | for (i = 0; i < mp_irq_entries; i++) | 4072 | ioapic = mp_find_ioapic(gsi); |
4096 | if (mp_irqs[i].irqtype == mp_INT && | 4073 | if (ioapic < 0) |
4097 | mp_irqs[i].srcbusirq == bus_irq) | ||
4098 | break; | ||
4099 | if (i >= mp_irq_entries) | ||
4100 | return -1; | 4074 | return -1; |
4101 | 4075 | ||
4102 | *trigger = irq_trigger(i); | 4076 | pin = mp_find_ioapic_pin(ioapic, gsi); |
4103 | *polarity = irq_polarity(i); | 4077 | if (pin < 0) |
4078 | return -1; | ||
4079 | |||
4080 | idx = find_irq_entry(ioapic, pin, mp_INT); | ||
4081 | if (idx < 0) | ||
4082 | return -1; | ||
4083 | |||
4084 | *trigger = irq_trigger(idx); | ||
4085 | *polarity = irq_polarity(idx); | ||
4104 | return 0; | 4086 | return 0; |
4105 | } | 4087 | } |
4106 | 4088 | ||
@@ -4241,7 +4223,7 @@ void __init ioapic_insert_resources(void) | |||
4241 | } | 4223 | } |
4242 | } | 4224 | } |
4243 | 4225 | ||
4244 | int mp_find_ioapic(int gsi) | 4226 | int mp_find_ioapic(u32 gsi) |
4245 | { | 4227 | { |
4246 | int i = 0; | 4228 | int i = 0; |
4247 | 4229 | ||
@@ -4256,7 +4238,7 @@ int mp_find_ioapic(int gsi) | |||
4256 | return -1; | 4238 | return -1; |
4257 | } | 4239 | } |
4258 | 4240 | ||
4259 | int mp_find_ioapic_pin(int ioapic, int gsi) | 4241 | int mp_find_ioapic_pin(int ioapic, u32 gsi) |
4260 | { | 4242 | { |
4261 | if (WARN_ON(ioapic == -1)) | 4243 | if (WARN_ON(ioapic == -1)) |
4262 | return -1; | 4244 | return -1; |
@@ -4284,6 +4266,7 @@ static int bad_ioapic(unsigned long address) | |||
4284 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | 4266 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) |
4285 | { | 4267 | { |
4286 | int idx = 0; | 4268 | int idx = 0; |
4269 | int entries; | ||
4287 | 4270 | ||
4288 | if (bad_ioapic(address)) | 4271 | if (bad_ioapic(address)) |
4289 | return; | 4272 | return; |
@@ -4302,9 +4285,17 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
4302 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 4285 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups |
4303 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 4286 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). |
4304 | */ | 4287 | */ |
4288 | entries = io_apic_get_redir_entries(idx); | ||
4305 | mp_gsi_routing[idx].gsi_base = gsi_base; | 4289 | mp_gsi_routing[idx].gsi_base = gsi_base; |
4306 | mp_gsi_routing[idx].gsi_end = gsi_base + | 4290 | mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1; |
4307 | io_apic_get_redir_entries(idx); | 4291 | |
4292 | /* | ||
4293 | * The number of IO-APIC IRQ registers (== #pins): | ||
4294 | */ | ||
4295 | nr_ioapic_registers[idx] = entries; | ||
4296 | |||
4297 | if (mp_gsi_routing[idx].gsi_end > gsi_end) | ||
4298 | gsi_end = mp_gsi_routing[idx].gsi_end; | ||
4308 | 4299 | ||
4309 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | 4300 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " |
4310 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, | 4301 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c085d52dbaf2..e46f98f36e31 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -735,9 +735,6 @@ void __init uv_system_init(void) | |||
735 | uv_node_to_blade[nid] = blade; | 735 | uv_node_to_blade[nid] = blade; |
736 | uv_cpu_to_blade[cpu] = blade; | 736 | uv_cpu_to_blade[cpu] = blade; |
737 | max_pnode = max(pnode, max_pnode); | 737 | max_pnode = max(pnode, max_pnode); |
738 | |||
739 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", | ||
740 | cpu, apicid, pnode, nid, lcpu, blade); | ||
741 | } | 738 | } |
742 | 739 | ||
743 | /* Add blade/pnode info for nodes without cpus */ | 740 | /* Add blade/pnode info for nodes without cpus */ |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 031aa887b0eb..c4f9182ca3ac 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -1224,7 +1224,7 @@ static void reinit_timer(void) | |||
1224 | #ifdef INIT_TIMER_AFTER_SUSPEND | 1224 | #ifdef INIT_TIMER_AFTER_SUSPEND |
1225 | unsigned long flags; | 1225 | unsigned long flags; |
1226 | 1226 | ||
1227 | spin_lock_irqsave(&i8253_lock, flags); | 1227 | raw_spin_lock_irqsave(&i8253_lock, flags); |
1228 | /* set the clock to HZ */ | 1228 | /* set the clock to HZ */ |
1229 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | 1229 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ |
1230 | udelay(10); | 1230 | udelay(10); |
@@ -1232,7 +1232,7 @@ static void reinit_timer(void) | |||
1232 | udelay(10); | 1232 | udelay(10); |
1233 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ | 1233 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ |
1234 | udelay(10); | 1234 | udelay(10); |
1235 | spin_unlock_irqrestore(&i8253_lock, flags); | 1235 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
1236 | #endif | 1236 | #endif |
1237 | } | 1237 | } |
1238 | 1238 | ||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c202b62f3671..3a785da34b6f 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
18 | 18 | ||
19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 97ad79cdf688..10fa5684a662 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -30,12 +30,14 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
30 | const struct cpuid_bit *cb; | 30 | const struct cpuid_bit *cb; |
31 | 31 | ||
32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | 33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, |
34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | 34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, |
35 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | 35 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006 }, |
36 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | 36 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 }, |
37 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | 37 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, |
38 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | 38 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, |
39 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
40 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
39 | { 0, 0, 0, 0 } | 41 | { 0, 0, 0, 0 } |
40 | }; | 42 | }; |
41 | 43 | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01a265212395..c39576cb3018 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -86,7 +86,7 @@ static void __init check_fpu(void) | |||
86 | 86 | ||
87 | static void __init check_hlt(void) | 87 | static void __init check_hlt(void) |
88 | { | 88 | { |
89 | if (paravirt_enabled()) | 89 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) |
90 | return; | 90 | return; |
91 | 91 | ||
92 | printk(KERN_INFO "Checking 'hlt' instruction... "); | 92 | printk(KERN_INFO "Checking 'hlt' instruction... "); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4868e4a951ee..68e4a6f2211e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1084,6 +1084,20 @@ static void clear_all_debug_regs(void) | |||
1084 | } | 1084 | } |
1085 | } | 1085 | } |
1086 | 1086 | ||
1087 | #ifdef CONFIG_KGDB | ||
1088 | /* | ||
1089 | * Restore debug regs if using kgdbwait and you have a kernel debugger | ||
1090 | * connection established. | ||
1091 | */ | ||
1092 | static void dbg_restore_debug_regs(void) | ||
1093 | { | ||
1094 | if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) | ||
1095 | arch_kgdb_ops.correct_hw_break(); | ||
1096 | } | ||
1097 | #else /* ! CONFIG_KGDB */ | ||
1098 | #define dbg_restore_debug_regs() | ||
1099 | #endif /* ! CONFIG_KGDB */ | ||
1100 | |||
1087 | /* | 1101 | /* |
1088 | * cpu_init() initializes state that is per-CPU. Some data is already | 1102 | * cpu_init() initializes state that is per-CPU. Some data is already |
1089 | * initialized (naturally) in the bootstrap process, such as the GDT | 1103 | * initialized (naturally) in the bootstrap process, such as the GDT |
@@ -1107,9 +1121,9 @@ void __cpuinit cpu_init(void) | |||
1107 | oist = &per_cpu(orig_ist, cpu); | 1121 | oist = &per_cpu(orig_ist, cpu); |
1108 | 1122 | ||
1109 | #ifdef CONFIG_NUMA | 1123 | #ifdef CONFIG_NUMA |
1110 | if (cpu != 0 && percpu_read(node_number) == 0 && | 1124 | if (cpu != 0 && percpu_read(numa_node) == 0 && |
1111 | cpu_to_node(cpu) != NUMA_NO_NODE) | 1125 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
1112 | percpu_write(node_number, cpu_to_node(cpu)); | 1126 | set_numa_node(early_cpu_to_node(cpu)); |
1113 | #endif | 1127 | #endif |
1114 | 1128 | ||
1115 | me = current; | 1129 | me = current; |
@@ -1174,18 +1188,8 @@ void __cpuinit cpu_init(void) | |||
1174 | load_TR_desc(); | 1188 | load_TR_desc(); |
1175 | load_LDT(&init_mm.context); | 1189 | load_LDT(&init_mm.context); |
1176 | 1190 | ||
1177 | #ifdef CONFIG_KGDB | 1191 | clear_all_debug_regs(); |
1178 | /* | 1192 | dbg_restore_debug_regs(); |
1179 | * If the kgdb is connected no debug regs should be altered. This | ||
1180 | * is only applicable when KGDB and a KGDB I/O module are built | ||
1181 | * into the kernel and you are using early debugging with | ||
1182 | * kgdbwait. KGDB will control the kernel HW breakpoint registers. | ||
1183 | */ | ||
1184 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | ||
1185 | arch_kgdb_ops.correct_hw_break(); | ||
1186 | else | ||
1187 | #endif | ||
1188 | clear_all_debug_regs(); | ||
1189 | 1193 | ||
1190 | fpu_init(); | 1194 | fpu_init(); |
1191 | 1195 | ||
@@ -1239,14 +1243,12 @@ void __cpuinit cpu_init(void) | |||
1239 | #endif | 1243 | #endif |
1240 | 1244 | ||
1241 | clear_all_debug_regs(); | 1245 | clear_all_debug_regs(); |
1246 | dbg_restore_debug_regs(); | ||
1242 | 1247 | ||
1243 | /* | 1248 | /* |
1244 | * Force FPU initialization: | 1249 | * Force FPU initialization: |
1245 | */ | 1250 | */ |
1246 | if (cpu_has_xsave) | 1251 | current_thread_info()->status = 0; |
1247 | current_thread_info()->status = TS_XSAVE; | ||
1248 | else | ||
1249 | current_thread_info()->status = 0; | ||
1250 | clear_used_math(); | 1252 | clear_used_math(); |
1251 | mxcsr_feature_mask_init(); | 1253 | mxcsr_feature_mask_init(); |
1252 | 1254 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 1840c0a5170b..bd54bf67e6fb 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile | |||
@@ -2,8 +2,8 @@ | |||
2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. | 2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. |
3 | # speedstep-* is preferred over p4-clockmod. | 3 | # speedstep-* is preferred over p4-clockmod. |
4 | 4 | ||
5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o |
6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o |
7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | 7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o |
8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o |
9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 459168083b77..1d3cddaa40ee 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/msr.h> | 46 | #include <asm/msr.h> |
47 | #include <asm/processor.h> | 47 | #include <asm/processor.h> |
48 | #include <asm/cpufeature.h> | 48 | #include <asm/cpufeature.h> |
49 | #include "mperf.h" | ||
49 | 50 | ||
50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | 51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ |
51 | "acpi-cpufreq", msg) | 52 | "acpi-cpufreq", msg) |
@@ -71,8 +72,6 @@ struct acpi_cpufreq_data { | |||
71 | 72 | ||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | 73 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); |
73 | 74 | ||
74 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
75 | |||
76 | /* acpi_perf_data is a pointer to percpu data. */ | 75 | /* acpi_perf_data is a pointer to percpu data. */ |
77 | static struct acpi_processor_performance *acpi_perf_data; | 76 | static struct acpi_processor_performance *acpi_perf_data; |
78 | 77 | ||
@@ -240,45 +239,6 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
240 | return cmd.val; | 239 | return cmd.val; |
241 | } | 240 | } |
242 | 241 | ||
243 | /* Called via smp_call_function_single(), on the target CPU */ | ||
244 | static void read_measured_perf_ctrs(void *_cur) | ||
245 | { | ||
246 | struct aperfmperf *am = _cur; | ||
247 | |||
248 | get_aperfmperf(am); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Return the measured active (C0) frequency on this CPU since last call | ||
253 | * to this function. | ||
254 | * Input: cpu number | ||
255 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
256 | * | ||
257 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
258 | * over a period of time, while CPU is in C0 state. | ||
259 | * IA32_MPERF counts at the rate of max advertised frequency | ||
260 | * IA32_APERF counts at the rate of actual CPU frequency | ||
261 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
262 | * no meaning should be associated with absolute values of these MSRs. | ||
263 | */ | ||
264 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | ||
265 | unsigned int cpu) | ||
266 | { | ||
267 | struct aperfmperf perf; | ||
268 | unsigned long ratio; | ||
269 | unsigned int retval; | ||
270 | |||
271 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
272 | return 0; | ||
273 | |||
274 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
275 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
276 | |||
277 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
278 | |||
279 | return retval; | ||
280 | } | ||
281 | |||
282 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 242 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
283 | { | 243 | { |
284 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); | 244 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); |
@@ -702,7 +662,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
702 | 662 | ||
703 | /* Check for APERF/MPERF support in hardware */ | 663 | /* Check for APERF/MPERF support in hardware */ |
704 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | 664 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
705 | acpi_cpufreq_driver.getavg = get_measured_perf; | 665 | acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; |
706 | 666 | ||
707 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 667 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
708 | for (i = 0; i < perf->state_count; i++) | 668 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c new file mode 100644 index 000000000000..911e193018ae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.c | |||
@@ -0,0 +1,51 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/cpufreq.h> | ||
6 | #include <linux/slab.h> | ||
7 | |||
8 | #include "mperf.h" | ||
9 | |||
10 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
11 | |||
12 | /* Called via smp_call_function_single(), on the target CPU */ | ||
13 | static void read_measured_perf_ctrs(void *_cur) | ||
14 | { | ||
15 | struct aperfmperf *am = _cur; | ||
16 | |||
17 | get_aperfmperf(am); | ||
18 | } | ||
19 | |||
20 | /* | ||
21 | * Return the measured active (C0) frequency on this CPU since last call | ||
22 | * to this function. | ||
23 | * Input: cpu number | ||
24 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
25 | * | ||
26 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
27 | * over a period of time, while CPU is in C0 state. | ||
28 | * IA32_MPERF counts at the rate of max advertised frequency | ||
29 | * IA32_APERF counts at the rate of actual CPU frequency | ||
30 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
31 | * no meaning should be associated with absolute values of these MSRs. | ||
32 | */ | ||
33 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
34 | unsigned int cpu) | ||
35 | { | ||
36 | struct aperfmperf perf; | ||
37 | unsigned long ratio; | ||
38 | unsigned int retval; | ||
39 | |||
40 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
41 | return 0; | ||
42 | |||
43 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
44 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
45 | |||
46 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
47 | |||
48 | return retval; | ||
49 | } | ||
50 | EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); | ||
51 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h new file mode 100644 index 000000000000..5dbf2950dc22 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.h | |||
@@ -0,0 +1,9 @@ | |||
1 | /* | ||
2 | * (c) 2010 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
9 | unsigned int cpu); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index b6215b9798e2..7ec2123838e6 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1,6 +1,5 @@ | |||
1 | |||
2 | /* | 1 | /* |
3 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 2 | * (c) 2003-2010 Advanced Micro Devices, Inc. |
4 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
5 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
6 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
@@ -46,6 +45,7 @@ | |||
46 | #define PFX "powernow-k8: " | 45 | #define PFX "powernow-k8: " |
47 | #define VERSION "version 2.20.00" | 46 | #define VERSION "version 2.20.00" |
48 | #include "powernow-k8.h" | 47 | #include "powernow-k8.h" |
48 | #include "mperf.h" | ||
49 | 49 | ||
50 | /* serialize freq changes */ | 50 | /* serialize freq changes */ |
51 | static DEFINE_MUTEX(fidvid_mutex); | 51 | static DEFINE_MUTEX(fidvid_mutex); |
@@ -54,6 +54,12 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | |||
54 | 54 | ||
55 | static int cpu_family = CPU_OPTERON; | 55 | static int cpu_family = CPU_OPTERON; |
56 | 56 | ||
57 | /* core performance boost */ | ||
58 | static bool cpb_capable, cpb_enabled; | ||
59 | static struct msr __percpu *msrs; | ||
60 | |||
61 | static struct cpufreq_driver cpufreq_amd64_driver; | ||
62 | |||
57 | #ifndef CONFIG_SMP | 63 | #ifndef CONFIG_SMP |
58 | static inline const struct cpumask *cpu_core_mask(int cpu) | 64 | static inline const struct cpumask *cpu_core_mask(int cpu) |
59 | { | 65 | { |
@@ -1249,6 +1255,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1249 | struct powernow_k8_data *data; | 1255 | struct powernow_k8_data *data; |
1250 | struct init_on_cpu init_on_cpu; | 1256 | struct init_on_cpu init_on_cpu; |
1251 | int rc; | 1257 | int rc; |
1258 | struct cpuinfo_x86 *c = &cpu_data(pol->cpu); | ||
1252 | 1259 | ||
1253 | if (!cpu_online(pol->cpu)) | 1260 | if (!cpu_online(pol->cpu)) |
1254 | return -ENODEV; | 1261 | return -ENODEV; |
@@ -1323,6 +1330,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1323 | return -EINVAL; | 1330 | return -EINVAL; |
1324 | } | 1331 | } |
1325 | 1332 | ||
1333 | /* Check for APERF/MPERF support in hardware */ | ||
1334 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
1335 | cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; | ||
1336 | |||
1326 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | 1337 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); |
1327 | 1338 | ||
1328 | if (cpu_family == CPU_HW_PSTATE) | 1339 | if (cpu_family == CPU_HW_PSTATE) |
@@ -1394,8 +1405,77 @@ out: | |||
1394 | return khz; | 1405 | return khz; |
1395 | } | 1406 | } |
1396 | 1407 | ||
1408 | static void _cpb_toggle_msrs(bool t) | ||
1409 | { | ||
1410 | int cpu; | ||
1411 | |||
1412 | get_online_cpus(); | ||
1413 | |||
1414 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1415 | |||
1416 | for_each_cpu(cpu, cpu_online_mask) { | ||
1417 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1418 | if (t) | ||
1419 | reg->l &= ~BIT(25); | ||
1420 | else | ||
1421 | reg->l |= BIT(25); | ||
1422 | } | ||
1423 | wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1424 | |||
1425 | put_online_cpus(); | ||
1426 | } | ||
1427 | |||
1428 | /* | ||
1429 | * Switch on/off core performance boosting. | ||
1430 | * | ||
1431 | * 0=disable | ||
1432 | * 1=enable. | ||
1433 | */ | ||
1434 | static void cpb_toggle(bool t) | ||
1435 | { | ||
1436 | if (!cpb_capable) | ||
1437 | return; | ||
1438 | |||
1439 | if (t && !cpb_enabled) { | ||
1440 | cpb_enabled = true; | ||
1441 | _cpb_toggle_msrs(t); | ||
1442 | printk(KERN_INFO PFX "Core Boosting enabled.\n"); | ||
1443 | } else if (!t && cpb_enabled) { | ||
1444 | cpb_enabled = false; | ||
1445 | _cpb_toggle_msrs(t); | ||
1446 | printk(KERN_INFO PFX "Core Boosting disabled.\n"); | ||
1447 | } | ||
1448 | } | ||
1449 | |||
1450 | static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, | ||
1451 | size_t count) | ||
1452 | { | ||
1453 | int ret = -EINVAL; | ||
1454 | unsigned long val = 0; | ||
1455 | |||
1456 | ret = strict_strtoul(buf, 10, &val); | ||
1457 | if (!ret && (val == 0 || val == 1) && cpb_capable) | ||
1458 | cpb_toggle(val); | ||
1459 | else | ||
1460 | return -EINVAL; | ||
1461 | |||
1462 | return count; | ||
1463 | } | ||
1464 | |||
1465 | static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) | ||
1466 | { | ||
1467 | return sprintf(buf, "%u\n", cpb_enabled); | ||
1468 | } | ||
1469 | |||
1470 | #define define_one_rw(_name) \ | ||
1471 | static struct freq_attr _name = \ | ||
1472 | __ATTR(_name, 0644, show_##_name, store_##_name) | ||
1473 | |||
1474 | define_one_rw(cpb); | ||
1475 | |||
1397 | static struct freq_attr *powernow_k8_attr[] = { | 1476 | static struct freq_attr *powernow_k8_attr[] = { |
1398 | &cpufreq_freq_attr_scaling_available_freqs, | 1477 | &cpufreq_freq_attr_scaling_available_freqs, |
1478 | &cpb, | ||
1399 | NULL, | 1479 | NULL, |
1400 | }; | 1480 | }; |
1401 | 1481 | ||
@@ -1411,10 +1491,51 @@ static struct cpufreq_driver cpufreq_amd64_driver = { | |||
1411 | .attr = powernow_k8_attr, | 1491 | .attr = powernow_k8_attr, |
1412 | }; | 1492 | }; |
1413 | 1493 | ||
1494 | /* | ||
1495 | * Clear the boost-disable flag on the CPU_DOWN path so that this cpu | ||
1496 | * cannot block the remaining ones from boosting. On the CPU_UP path we | ||
1497 | * simply keep the boost-disable flag in sync with the current global | ||
1498 | * state. | ||
1499 | */ | ||
1500 | static int cpb_notify(struct notifier_block *nb, unsigned long action, | ||
1501 | void *hcpu) | ||
1502 | { | ||
1503 | unsigned cpu = (long)hcpu; | ||
1504 | u32 lo, hi; | ||
1505 | |||
1506 | switch (action) { | ||
1507 | case CPU_UP_PREPARE: | ||
1508 | case CPU_UP_PREPARE_FROZEN: | ||
1509 | |||
1510 | if (!cpb_enabled) { | ||
1511 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1512 | lo |= BIT(25); | ||
1513 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1514 | } | ||
1515 | break; | ||
1516 | |||
1517 | case CPU_DOWN_PREPARE: | ||
1518 | case CPU_DOWN_PREPARE_FROZEN: | ||
1519 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1520 | lo &= ~BIT(25); | ||
1521 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1522 | break; | ||
1523 | |||
1524 | default: | ||
1525 | break; | ||
1526 | } | ||
1527 | |||
1528 | return NOTIFY_OK; | ||
1529 | } | ||
1530 | |||
1531 | static struct notifier_block cpb_nb = { | ||
1532 | .notifier_call = cpb_notify, | ||
1533 | }; | ||
1534 | |||
1414 | /* driver entry point for init */ | 1535 | /* driver entry point for init */ |
1415 | static int __cpuinit powernowk8_init(void) | 1536 | static int __cpuinit powernowk8_init(void) |
1416 | { | 1537 | { |
1417 | unsigned int i, supported_cpus = 0; | 1538 | unsigned int i, supported_cpus = 0, cpu; |
1418 | 1539 | ||
1419 | for_each_online_cpu(i) { | 1540 | for_each_online_cpu(i) { |
1420 | int rc; | 1541 | int rc; |
@@ -1423,15 +1544,36 @@ static int __cpuinit powernowk8_init(void) | |||
1423 | supported_cpus++; | 1544 | supported_cpus++; |
1424 | } | 1545 | } |
1425 | 1546 | ||
1426 | if (supported_cpus == num_online_cpus()) { | 1547 | if (supported_cpus != num_online_cpus()) |
1427 | printk(KERN_INFO PFX "Found %d %s " | 1548 | return -ENODEV; |
1428 | "processors (%d cpu cores) (" VERSION ")\n", | 1549 | |
1429 | num_online_nodes(), | 1550 | printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", |
1430 | boot_cpu_data.x86_model_id, supported_cpus); | 1551 | num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); |
1431 | return cpufreq_register_driver(&cpufreq_amd64_driver); | 1552 | |
1553 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1554 | |||
1555 | cpb_capable = true; | ||
1556 | |||
1557 | register_cpu_notifier(&cpb_nb); | ||
1558 | |||
1559 | msrs = msrs_alloc(); | ||
1560 | if (!msrs) { | ||
1561 | printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); | ||
1562 | return -ENOMEM; | ||
1563 | } | ||
1564 | |||
1565 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1566 | |||
1567 | for_each_cpu(cpu, cpu_online_mask) { | ||
1568 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1569 | cpb_enabled |= !(!!(reg->l & BIT(25))); | ||
1570 | } | ||
1571 | |||
1572 | printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", | ||
1573 | (cpb_enabled ? "on" : "off")); | ||
1432 | } | 1574 | } |
1433 | 1575 | ||
1434 | return -ENODEV; | 1576 | return cpufreq_register_driver(&cpufreq_amd64_driver); |
1435 | } | 1577 | } |
1436 | 1578 | ||
1437 | /* driver entry point for term */ | 1579 | /* driver entry point for term */ |
@@ -1439,6 +1581,13 @@ static void __exit powernowk8_exit(void) | |||
1439 | { | 1581 | { |
1440 | dprintk("exit\n"); | 1582 | dprintk("exit\n"); |
1441 | 1583 | ||
1584 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1585 | msrs_free(msrs); | ||
1586 | msrs = NULL; | ||
1587 | |||
1588 | unregister_cpu_notifier(&cpb_nb); | ||
1589 | } | ||
1590 | |||
1442 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1591 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
1443 | } | 1592 | } |
1444 | 1593 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 02ce824073cb..df3529b1c02d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -5,7 +5,6 @@ | |||
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | */ | 6 | */ |
7 | 7 | ||
8 | |||
9 | enum pstate { | 8 | enum pstate { |
10 | HW_PSTATE_INVALID = 0xff, | 9 | HW_PSTATE_INVALID = 0xff, |
11 | HW_PSTATE_0 = 0, | 10 | HW_PSTATE_0 = 0, |
@@ -55,7 +54,6 @@ struct powernow_k8_data { | |||
55 | struct cpumask *available_cores; | 54 | struct cpumask *available_cores; |
56 | }; | 55 | }; |
57 | 56 | ||
58 | |||
59 | /* processor's cpuid instruction support */ | 57 | /* processor's cpuid instruction support */ |
60 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | 58 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ |
61 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | 59 | #define CPUID_XFAM 0x0ff00000 /* extended family */ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 08be922de33a..dd531cc56a8f 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -21,37 +21,55 @@ | |||
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/module.h> | ||
24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
25 | #include <asm/vmware.h> | ||
26 | #include <asm/hypervisor.h> | 26 | #include <asm/hypervisor.h> |
27 | 27 | ||
28 | static inline void __cpuinit | 28 | /* |
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | 29 | * Hypervisor detect order. This is specified explicitly here because |
30 | * some hypervisors might implement compatibility modes for other | ||
31 | * hypervisors and therefore need to be detected in specific sequence. | ||
32 | */ | ||
33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | ||
30 | { | 34 | { |
31 | if (vmware_platform()) | 35 | &x86_hyper_vmware, |
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | 36 | &x86_hyper_ms_hyperv, |
33 | else | 37 | }; |
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | ||
35 | } | ||
36 | 38 | ||
37 | static inline void __cpuinit | 39 | const struct hypervisor_x86 *x86_hyper; |
38 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | 40 | EXPORT_SYMBOL(x86_hyper); |
41 | |||
42 | static inline void __init | ||
43 | detect_hypervisor_vendor(void) | ||
39 | { | 44 | { |
40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | 45 | const struct hypervisor_x86 *h, * const *p; |
41 | vmware_set_feature_bits(c); | 46 | |
42 | return; | 47 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
48 | h = *p; | ||
49 | if (h->detect()) { | ||
50 | x86_hyper = h; | ||
51 | printk(KERN_INFO "Hypervisor detected: %s\n", h->name); | ||
52 | break; | ||
53 | } | ||
43 | } | 54 | } |
44 | } | 55 | } |
45 | 56 | ||
46 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | 57 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) |
47 | { | 58 | { |
48 | detect_hypervisor_vendor(c); | 59 | if (x86_hyper && x86_hyper->set_cpu_features) |
49 | hypervisor_set_feature_bits(c); | 60 | x86_hyper->set_cpu_features(c); |
50 | } | 61 | } |
51 | 62 | ||
52 | void __init init_hypervisor_platform(void) | 63 | void __init init_hypervisor_platform(void) |
53 | { | 64 | { |
65 | |||
66 | detect_hypervisor_vendor(); | ||
67 | |||
68 | if (!x86_hyper) | ||
69 | return; | ||
70 | |||
54 | init_hypervisor(&boot_cpu_data); | 71 | init_hypervisor(&boot_cpu_data); |
55 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | 72 | |
56 | vmware_platform_setup(); | 73 | if (x86_hyper->init_platform) |
74 | x86_hyper->init_platform(); | ||
57 | } | 75 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1366c7cfd483..85f69cdeae10 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
13 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | #include <asm/ds.h> | ||
16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
17 | #include <asm/cpu.h> | 16 | #include <asm/cpu.h> |
18 | 17 | ||
@@ -373,12 +372,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
373 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 372 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
374 | } | 373 | } |
375 | 374 | ||
376 | if (c->cpuid_level > 6) { | ||
377 | unsigned ecx = cpuid_ecx(6); | ||
378 | if (ecx & 0x01) | ||
379 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
380 | } | ||
381 | |||
382 | if (cpu_has_xmm2) | 375 | if (cpu_has_xmm2) |
383 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 376 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
384 | if (cpu_has_ds) { | 377 | if (cpu_has_ds) { |
@@ -388,7 +381,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
388 | set_cpu_cap(c, X86_FEATURE_BTS); | 381 | set_cpu_cap(c, X86_FEATURE_BTS); |
389 | if (!(l1 & (1<<12))) | 382 | if (!(l1 & (1<<12))) |
390 | set_cpu_cap(c, X86_FEATURE_PEBS); | 383 | set_cpu_cap(c, X86_FEATURE_PEBS); |
391 | ds_init_intel(c); | ||
392 | } | 384 | } |
393 | 385 | ||
394 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) | 386 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index b3eeb66c0a51..33eae2062cf5 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -148,13 +148,19 @@ union _cpuid4_leaf_ecx { | |||
148 | u32 full; | 148 | u32 full; |
149 | }; | 149 | }; |
150 | 150 | ||
151 | struct amd_l3_cache { | ||
152 | struct pci_dev *dev; | ||
153 | bool can_disable; | ||
154 | unsigned indices; | ||
155 | u8 subcaches[4]; | ||
156 | }; | ||
157 | |||
151 | struct _cpuid4_info { | 158 | struct _cpuid4_info { |
152 | union _cpuid4_leaf_eax eax; | 159 | union _cpuid4_leaf_eax eax; |
153 | union _cpuid4_leaf_ebx ebx; | 160 | union _cpuid4_leaf_ebx ebx; |
154 | union _cpuid4_leaf_ecx ecx; | 161 | union _cpuid4_leaf_ecx ecx; |
155 | unsigned long size; | 162 | unsigned long size; |
156 | bool can_disable; | 163 | struct amd_l3_cache *l3; |
157 | unsigned int l3_indices; | ||
158 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | 164 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
159 | }; | 165 | }; |
160 | 166 | ||
@@ -164,8 +170,7 @@ struct _cpuid4_info_regs { | |||
164 | union _cpuid4_leaf_ebx ebx; | 170 | union _cpuid4_leaf_ebx ebx; |
165 | union _cpuid4_leaf_ecx ecx; | 171 | union _cpuid4_leaf_ecx ecx; |
166 | unsigned long size; | 172 | unsigned long size; |
167 | bool can_disable; | 173 | struct amd_l3_cache *l3; |
168 | unsigned int l3_indices; | ||
169 | }; | 174 | }; |
170 | 175 | ||
171 | unsigned short num_cache_leaves; | 176 | unsigned short num_cache_leaves; |
@@ -302,87 +307,163 @@ struct _cache_attr { | |||
302 | }; | 307 | }; |
303 | 308 | ||
304 | #ifdef CONFIG_CPU_SUP_AMD | 309 | #ifdef CONFIG_CPU_SUP_AMD |
305 | static unsigned int __cpuinit amd_calc_l3_indices(void) | 310 | |
311 | /* | ||
312 | * L3 cache descriptors | ||
313 | */ | ||
314 | static struct amd_l3_cache **__cpuinitdata l3_caches; | ||
315 | |||
316 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | ||
306 | { | 317 | { |
307 | /* | ||
308 | * We're called over smp_call_function_single() and therefore | ||
309 | * are on the correct cpu. | ||
310 | */ | ||
311 | int cpu = smp_processor_id(); | ||
312 | int node = cpu_to_node(cpu); | ||
313 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
314 | unsigned int sc0, sc1, sc2, sc3; | 318 | unsigned int sc0, sc1, sc2, sc3; |
315 | u32 val = 0; | 319 | u32 val = 0; |
316 | 320 | ||
317 | pci_read_config_dword(dev, 0x1C4, &val); | 321 | pci_read_config_dword(l3->dev, 0x1C4, &val); |
318 | 322 | ||
319 | /* calculate subcache sizes */ | 323 | /* calculate subcache sizes */ |
320 | sc0 = !(val & BIT(0)); | 324 | l3->subcaches[0] = sc0 = !(val & BIT(0)); |
321 | sc1 = !(val & BIT(4)); | 325 | l3->subcaches[1] = sc1 = !(val & BIT(4)); |
322 | sc2 = !(val & BIT(8)) + !(val & BIT(9)); | 326 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
323 | sc3 = !(val & BIT(12)) + !(val & BIT(13)); | 327 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
324 | 328 | ||
325 | return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | 329 | l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; |
330 | } | ||
331 | |||
332 | static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) | ||
333 | { | ||
334 | struct amd_l3_cache *l3; | ||
335 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
336 | |||
337 | l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); | ||
338 | if (!l3) { | ||
339 | printk(KERN_WARNING "Error allocating L3 struct\n"); | ||
340 | return NULL; | ||
341 | } | ||
342 | |||
343 | l3->dev = dev; | ||
344 | |||
345 | amd_calc_l3_indices(l3); | ||
346 | |||
347 | return l3; | ||
326 | } | 348 | } |
327 | 349 | ||
328 | static void __cpuinit | 350 | static void __cpuinit |
329 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 351 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) |
330 | { | 352 | { |
331 | if (index < 3) | 353 | int node; |
354 | |||
355 | if (boot_cpu_data.x86 != 0x10) | ||
332 | return; | 356 | return; |
333 | 357 | ||
334 | if (boot_cpu_data.x86 == 0x11) | 358 | if (index < 3) |
335 | return; | 359 | return; |
336 | 360 | ||
337 | /* see errata #382 and #388 */ | 361 | /* see errata #382 and #388 */ |
338 | if ((boot_cpu_data.x86 == 0x10) && | 362 | if (boot_cpu_data.x86_model < 0x8) |
339 | ((boot_cpu_data.x86_model < 0x8) || | 363 | return; |
340 | (boot_cpu_data.x86_mask < 0x1))) | 364 | |
365 | if ((boot_cpu_data.x86_model == 0x8 || | ||
366 | boot_cpu_data.x86_model == 0x9) | ||
367 | && | ||
368 | boot_cpu_data.x86_mask < 0x1) | ||
369 | return; | ||
370 | |||
371 | /* not in virtualized environments */ | ||
372 | if (num_k8_northbridges == 0) | ||
341 | return; | 373 | return; |
342 | 374 | ||
343 | this_leaf->can_disable = true; | 375 | /* |
344 | this_leaf->l3_indices = amd_calc_l3_indices(); | 376 | * Strictly speaking, the amount in @size below is leaked since it is |
377 | * never freed but this is done only on shutdown so it doesn't matter. | ||
378 | */ | ||
379 | if (!l3_caches) { | ||
380 | int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); | ||
381 | |||
382 | l3_caches = kzalloc(size, GFP_ATOMIC); | ||
383 | if (!l3_caches) | ||
384 | return; | ||
385 | } | ||
386 | |||
387 | node = amd_get_nb_id(smp_processor_id()); | ||
388 | |||
389 | if (!l3_caches[node]) { | ||
390 | l3_caches[node] = amd_init_l3_cache(node); | ||
391 | l3_caches[node]->can_disable = true; | ||
392 | } | ||
393 | |||
394 | WARN_ON(!l3_caches[node]); | ||
395 | |||
396 | this_leaf->l3 = l3_caches[node]; | ||
345 | } | 397 | } |
346 | 398 | ||
347 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | 399 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, |
348 | unsigned int index) | 400 | unsigned int slot) |
349 | { | 401 | { |
350 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 402 | struct pci_dev *dev = this_leaf->l3->dev; |
351 | int node = amd_get_nb_id(cpu); | ||
352 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
353 | unsigned int reg = 0; | 403 | unsigned int reg = 0; |
354 | 404 | ||
355 | if (!this_leaf->can_disable) | 405 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
356 | return -EINVAL; | 406 | return -EINVAL; |
357 | 407 | ||
358 | if (!dev) | 408 | if (!dev) |
359 | return -EINVAL; | 409 | return -EINVAL; |
360 | 410 | ||
361 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | 411 | pci_read_config_dword(dev, 0x1BC + slot * 4, ®); |
362 | return sprintf(buf, "0x%08x\n", reg); | 412 | return sprintf(buf, "0x%08x\n", reg); |
363 | } | 413 | } |
364 | 414 | ||
365 | #define SHOW_CACHE_DISABLE(index) \ | 415 | #define SHOW_CACHE_DISABLE(slot) \ |
366 | static ssize_t \ | 416 | static ssize_t \ |
367 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | 417 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ |
368 | { \ | 418 | { \ |
369 | return show_cache_disable(this_leaf, buf, index); \ | 419 | return show_cache_disable(this_leaf, buf, slot); \ |
370 | } | 420 | } |
371 | SHOW_CACHE_DISABLE(0) | 421 | SHOW_CACHE_DISABLE(0) |
372 | SHOW_CACHE_DISABLE(1) | 422 | SHOW_CACHE_DISABLE(1) |
373 | 423 | ||
424 | static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | ||
425 | unsigned slot, unsigned long idx) | ||
426 | { | ||
427 | int i; | ||
428 | |||
429 | idx |= BIT(30); | ||
430 | |||
431 | /* | ||
432 | * disable index in all 4 subcaches | ||
433 | */ | ||
434 | for (i = 0; i < 4; i++) { | ||
435 | u32 reg = idx | (i << 20); | ||
436 | |||
437 | if (!l3->subcaches[i]) | ||
438 | continue; | ||
439 | |||
440 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
441 | |||
442 | /* | ||
443 | * We need to WBINVD on a core on the node containing the L3 | ||
444 | * cache which indices we disable therefore a simple wbinvd() | ||
445 | * is not sufficient. | ||
446 | */ | ||
447 | wbinvd_on_cpu(cpu); | ||
448 | |||
449 | reg |= BIT(31); | ||
450 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
451 | } | ||
452 | } | ||
453 | |||
454 | |||
374 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | 455 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, |
375 | const char *buf, size_t count, unsigned int index) | 456 | const char *buf, size_t count, |
457 | unsigned int slot) | ||
376 | { | 458 | { |
459 | struct pci_dev *dev = this_leaf->l3->dev; | ||
377 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 460 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
378 | int node = amd_get_nb_id(cpu); | ||
379 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
380 | unsigned long val = 0; | 461 | unsigned long val = 0; |
381 | 462 | ||
382 | #define SUBCACHE_MASK (3UL << 20) | 463 | #define SUBCACHE_MASK (3UL << 20) |
383 | #define SUBCACHE_INDEX 0xfff | 464 | #define SUBCACHE_INDEX 0xfff |
384 | 465 | ||
385 | if (!this_leaf->can_disable) | 466 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
386 | return -EINVAL; | 467 | return -EINVAL; |
387 | 468 | ||
388 | if (!capable(CAP_SYS_ADMIN)) | 469 | if (!capable(CAP_SYS_ADMIN)) |
@@ -396,26 +477,20 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
396 | 477 | ||
397 | /* do not allow writes outside of allowed bits */ | 478 | /* do not allow writes outside of allowed bits */ |
398 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | 479 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || |
399 | ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) | 480 | ((val & SUBCACHE_INDEX) > this_leaf->l3->indices)) |
400 | return -EINVAL; | 481 | return -EINVAL; |
401 | 482 | ||
402 | val |= BIT(30); | 483 | amd_l3_disable_index(this_leaf->l3, cpu, slot, val); |
403 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | 484 | |
404 | /* | ||
405 | * We need to WBINVD on a core on the node containing the L3 cache which | ||
406 | * indices we disable therefore a simple wbinvd() is not sufficient. | ||
407 | */ | ||
408 | wbinvd_on_cpu(cpu); | ||
409 | pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); | ||
410 | return count; | 485 | return count; |
411 | } | 486 | } |
412 | 487 | ||
413 | #define STORE_CACHE_DISABLE(index) \ | 488 | #define STORE_CACHE_DISABLE(slot) \ |
414 | static ssize_t \ | 489 | static ssize_t \ |
415 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | 490 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ |
416 | const char *buf, size_t count) \ | 491 | const char *buf, size_t count) \ |
417 | { \ | 492 | { \ |
418 | return store_cache_disable(this_leaf, buf, count, index); \ | 493 | return store_cache_disable(this_leaf, buf, count, slot); \ |
419 | } | 494 | } |
420 | STORE_CACHE_DISABLE(0) | 495 | STORE_CACHE_DISABLE(0) |
421 | STORE_CACHE_DISABLE(1) | 496 | STORE_CACHE_DISABLE(1) |
@@ -443,8 +518,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
443 | 518 | ||
444 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 519 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
445 | amd_cpuid4(index, &eax, &ebx, &ecx); | 520 | amd_cpuid4(index, &eax, &ebx, &ecx); |
446 | if (boot_cpu_data.x86 >= 0x10) | 521 | amd_check_l3_disable(index, this_leaf); |
447 | amd_check_l3_disable(index, this_leaf); | ||
448 | } else { | 522 | } else { |
449 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 523 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
450 | } | 524 | } |
@@ -701,6 +775,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
701 | for (i = 0; i < num_cache_leaves; i++) | 775 | for (i = 0; i < num_cache_leaves; i++) |
702 | cache_remove_shared_cpu_map(cpu, i); | 776 | cache_remove_shared_cpu_map(cpu, i); |
703 | 777 | ||
778 | kfree(per_cpu(ici_cpuid4_info, cpu)->l3); | ||
704 | kfree(per_cpu(ici_cpuid4_info, cpu)); | 779 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
705 | per_cpu(ici_cpuid4_info, cpu) = NULL; | 780 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
706 | } | 781 | } |
@@ -985,7 +1060,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
985 | 1060 | ||
986 | this_leaf = CPUID4_INFO_IDX(cpu, i); | 1061 | this_leaf = CPUID4_INFO_IDX(cpu, i); |
987 | 1062 | ||
988 | if (this_leaf->can_disable) | 1063 | if (this_leaf->l3 && this_leaf->l3->can_disable) |
989 | ktype_cache.default_attrs = default_l3_attrs; | 1064 | ktype_cache.default_attrs = default_l3_attrs; |
990 | else | 1065 | else |
991 | ktype_cache.default_attrs = default_attrs; | 1066 | ktype_cache.default_attrs = default_attrs; |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 4ac6d48fe11b..bb34b03af252 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | |||
7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
8 | 8 | ||
9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | 9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o |
10 | |||
11 | obj-$(CONFIG_ACPI_APEI) += mce-apei.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c new file mode 100644 index 000000000000..745b54f9be89 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -0,0 +1,138 @@ | |||
1 | /* | ||
2 | * Bridge between MCE and APEI | ||
3 | * | ||
4 | * On some machine, corrected memory errors are reported via APEI | ||
5 | * generic hardware error source (GHES) instead of corrected Machine | ||
6 | * Check. These corrected memory errors can be reported to user space | ||
7 | * through /dev/mcelog via faking a corrected Machine Check, so that | ||
8 | * the error memory page can be offlined by /sbin/mcelog if the error | ||
9 | * count for one page is beyond the threshold. | ||
10 | * | ||
11 | * For fatal MCE, save MCE record into persistent storage via ERST, so | ||
12 | * that the MCE record can be logged after reboot via ERST. | ||
13 | * | ||
14 | * Copyright 2010 Intel Corp. | ||
15 | * Author: Huang Ying <ying.huang@intel.com> | ||
16 | * | ||
17 | * This program is free software; you can redistribute it and/or | ||
18 | * modify it under the terms of the GNU General Public License version | ||
19 | * 2 as published by the Free Software Foundation. | ||
20 | * | ||
21 | * This program is distributed in the hope that it will be useful, | ||
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
24 | * GNU General Public License for more details. | ||
25 | * | ||
26 | * You should have received a copy of the GNU General Public License | ||
27 | * along with this program; if not, write to the Free Software | ||
28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
29 | */ | ||
30 | |||
31 | #include <linux/kernel.h> | ||
32 | #include <linux/acpi.h> | ||
33 | #include <linux/cper.h> | ||
34 | #include <acpi/apei.h> | ||
35 | #include <asm/mce.h> | ||
36 | |||
37 | #include "mce-internal.h" | ||
38 | |||
39 | void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | ||
40 | { | ||
41 | struct mce m; | ||
42 | |||
43 | /* Only corrected MC is reported */ | ||
44 | if (!corrected) | ||
45 | return; | ||
46 | |||
47 | mce_setup(&m); | ||
48 | m.bank = 1; | ||
49 | /* Fake a memory read corrected error with unknown channel */ | ||
50 | m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; | ||
51 | m.addr = mem_err->physical_addr; | ||
52 | mce_log(&m); | ||
53 | mce_notify_irq(); | ||
54 | } | ||
55 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); | ||
56 | |||
57 | #define CPER_CREATOR_MCE \ | ||
58 | UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ | ||
59 | 0x64, 0x90, 0xb8, 0x9d) | ||
60 | #define CPER_SECTION_TYPE_MCE \ | ||
61 | UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ | ||
62 | 0x04, 0x4a, 0x38, 0xfc) | ||
63 | |||
64 | /* | ||
65 | * CPER specification (in UEFI specification 2.3 appendix N) requires | ||
66 | * byte-packed. | ||
67 | */ | ||
68 | struct cper_mce_record { | ||
69 | struct cper_record_header hdr; | ||
70 | struct cper_section_descriptor sec_hdr; | ||
71 | struct mce mce; | ||
72 | } __packed; | ||
73 | |||
74 | int apei_write_mce(struct mce *m) | ||
75 | { | ||
76 | struct cper_mce_record rcd; | ||
77 | |||
78 | memset(&rcd, 0, sizeof(rcd)); | ||
79 | memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); | ||
80 | rcd.hdr.revision = CPER_RECORD_REV; | ||
81 | rcd.hdr.signature_end = CPER_SIG_END; | ||
82 | rcd.hdr.section_count = 1; | ||
83 | rcd.hdr.error_severity = CPER_SER_FATAL; | ||
84 | /* timestamp, platform_id, partition_id are all invalid */ | ||
85 | rcd.hdr.validation_bits = 0; | ||
86 | rcd.hdr.record_length = sizeof(rcd); | ||
87 | rcd.hdr.creator_id = CPER_CREATOR_MCE; | ||
88 | rcd.hdr.notification_type = CPER_NOTIFY_MCE; | ||
89 | rcd.hdr.record_id = cper_next_record_id(); | ||
90 | rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; | ||
91 | |||
92 | rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; | ||
93 | rcd.sec_hdr.section_length = sizeof(rcd.mce); | ||
94 | rcd.sec_hdr.revision = CPER_SEC_REV; | ||
95 | /* fru_id and fru_text is invalid */ | ||
96 | rcd.sec_hdr.validation_bits = 0; | ||
97 | rcd.sec_hdr.flags = CPER_SEC_PRIMARY; | ||
98 | rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; | ||
99 | rcd.sec_hdr.section_severity = CPER_SER_FATAL; | ||
100 | |||
101 | memcpy(&rcd.mce, m, sizeof(*m)); | ||
102 | |||
103 | return erst_write(&rcd.hdr); | ||
104 | } | ||
105 | |||
106 | ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
107 | { | ||
108 | struct cper_mce_record rcd; | ||
109 | ssize_t len; | ||
110 | |||
111 | len = erst_read_next(&rcd.hdr, sizeof(rcd)); | ||
112 | if (len <= 0) | ||
113 | return len; | ||
114 | /* Can not skip other records in storage via ERST unless clear them */ | ||
115 | else if (len != sizeof(rcd) || | ||
116 | uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { | ||
117 | if (printk_ratelimit()) | ||
118 | pr_warning( | ||
119 | "MCE-APEI: Can not skip the unknown record in ERST"); | ||
120 | return -EIO; | ||
121 | } | ||
122 | |||
123 | memcpy(m, &rcd.mce, sizeof(*m)); | ||
124 | *record_id = rcd.hdr.record_id; | ||
125 | |||
126 | return sizeof(*m); | ||
127 | } | ||
128 | |||
129 | /* Check whether there is record in ERST */ | ||
130 | int apei_check_mce(void) | ||
131 | { | ||
132 | return erst_get_record_count(); | ||
133 | } | ||
134 | |||
135 | int apei_clear_mce(u64 record_id) | ||
136 | { | ||
137 | return erst_clear(record_id); | ||
138 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 32996f9fab67..fefcc69ee8b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -28,3 +28,26 @@ extern int mce_ser; | |||
28 | 28 | ||
29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
30 | 30 | ||
31 | #ifdef CONFIG_ACPI_APEI | ||
32 | int apei_write_mce(struct mce *m); | ||
33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | ||
34 | int apei_check_mce(void); | ||
35 | int apei_clear_mce(u64 record_id); | ||
36 | #else | ||
37 | static inline int apei_write_mce(struct mce *m) | ||
38 | { | ||
39 | return -EINVAL; | ||
40 | } | ||
41 | static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
42 | { | ||
43 | return 0; | ||
44 | } | ||
45 | static inline int apei_check_mce(void) | ||
46 | { | ||
47 | return 0; | ||
48 | } | ||
49 | static inline int apei_clear_mce(u64 record_id) | ||
50 | { | ||
51 | return -EINVAL; | ||
52 | } | ||
53 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767e..707165dbc203 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -264,7 +264,7 @@ static void wait_for_panic(void) | |||
264 | 264 | ||
265 | static void mce_panic(char *msg, struct mce *final, char *exp) | 265 | static void mce_panic(char *msg, struct mce *final, char *exp) |
266 | { | 266 | { |
267 | int i; | 267 | int i, apei_err = 0; |
268 | 268 | ||
269 | if (!fake_panic) { | 269 | if (!fake_panic) { |
270 | /* | 270 | /* |
@@ -287,8 +287,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
287 | struct mce *m = &mcelog.entry[i]; | 287 | struct mce *m = &mcelog.entry[i]; |
288 | if (!(m->status & MCI_STATUS_VAL)) | 288 | if (!(m->status & MCI_STATUS_VAL)) |
289 | continue; | 289 | continue; |
290 | if (!(m->status & MCI_STATUS_UC)) | 290 | if (!(m->status & MCI_STATUS_UC)) { |
291 | print_mce(m); | 291 | print_mce(m); |
292 | if (!apei_err) | ||
293 | apei_err = apei_write_mce(m); | ||
294 | } | ||
292 | } | 295 | } |
293 | /* Now print uncorrected but with the final one last */ | 296 | /* Now print uncorrected but with the final one last */ |
294 | for (i = 0; i < MCE_LOG_LEN; i++) { | 297 | for (i = 0; i < MCE_LOG_LEN; i++) { |
@@ -297,11 +300,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
297 | continue; | 300 | continue; |
298 | if (!(m->status & MCI_STATUS_UC)) | 301 | if (!(m->status & MCI_STATUS_UC)) |
299 | continue; | 302 | continue; |
300 | if (!final || memcmp(m, final, sizeof(struct mce))) | 303 | if (!final || memcmp(m, final, sizeof(struct mce))) { |
301 | print_mce(m); | 304 | print_mce(m); |
305 | if (!apei_err) | ||
306 | apei_err = apei_write_mce(m); | ||
307 | } | ||
302 | } | 308 | } |
303 | if (final) | 309 | if (final) { |
304 | print_mce(final); | 310 | print_mce(final); |
311 | if (!apei_err) | ||
312 | apei_err = apei_write_mce(final); | ||
313 | } | ||
305 | if (cpu_missing) | 314 | if (cpu_missing) |
306 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 315 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); |
307 | print_mce_tail(); | 316 | print_mce_tail(); |
@@ -539,7 +548,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
539 | struct mce m; | 548 | struct mce m; |
540 | int i; | 549 | int i; |
541 | 550 | ||
542 | __get_cpu_var(mce_poll_count)++; | 551 | percpu_inc(mce_poll_count); |
543 | 552 | ||
544 | mce_setup(&m); | 553 | mce_setup(&m); |
545 | 554 | ||
@@ -934,7 +943,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
934 | 943 | ||
935 | atomic_inc(&mce_entry); | 944 | atomic_inc(&mce_entry); |
936 | 945 | ||
937 | __get_cpu_var(mce_exception_count)++; | 946 | percpu_inc(mce_exception_count); |
938 | 947 | ||
939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | 948 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
940 | 18, SIGKILL) == NOTIFY_STOP) | 949 | 18, SIGKILL) == NOTIFY_STOP) |
@@ -1493,6 +1502,43 @@ static void collect_tscs(void *data) | |||
1493 | rdtscll(cpu_tsc[smp_processor_id()]); | 1502 | rdtscll(cpu_tsc[smp_processor_id()]); |
1494 | } | 1503 | } |
1495 | 1504 | ||
1505 | static int mce_apei_read_done; | ||
1506 | |||
1507 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ | ||
1508 | static int __mce_read_apei(char __user **ubuf, size_t usize) | ||
1509 | { | ||
1510 | int rc; | ||
1511 | u64 record_id; | ||
1512 | struct mce m; | ||
1513 | |||
1514 | if (usize < sizeof(struct mce)) | ||
1515 | return -EINVAL; | ||
1516 | |||
1517 | rc = apei_read_mce(&m, &record_id); | ||
1518 | /* Error or no more MCE record */ | ||
1519 | if (rc <= 0) { | ||
1520 | mce_apei_read_done = 1; | ||
1521 | return rc; | ||
1522 | } | ||
1523 | rc = -EFAULT; | ||
1524 | if (copy_to_user(*ubuf, &m, sizeof(struct mce))) | ||
1525 | return rc; | ||
1526 | /* | ||
1527 | * In fact, we should have cleared the record after that has | ||
1528 | * been flushed to the disk or sent to network in | ||
1529 | * /sbin/mcelog, but we have no interface to support that now, | ||
1530 | * so just clear it to avoid duplication. | ||
1531 | */ | ||
1532 | rc = apei_clear_mce(record_id); | ||
1533 | if (rc) { | ||
1534 | mce_apei_read_done = 1; | ||
1535 | return rc; | ||
1536 | } | ||
1537 | *ubuf += sizeof(struct mce); | ||
1538 | |||
1539 | return 0; | ||
1540 | } | ||
1541 | |||
1496 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1542 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
1497 | loff_t *off) | 1543 | loff_t *off) |
1498 | { | 1544 | { |
@@ -1506,15 +1552,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1506 | return -ENOMEM; | 1552 | return -ENOMEM; |
1507 | 1553 | ||
1508 | mutex_lock(&mce_read_mutex); | 1554 | mutex_lock(&mce_read_mutex); |
1555 | |||
1556 | if (!mce_apei_read_done) { | ||
1557 | err = __mce_read_apei(&buf, usize); | ||
1558 | if (err || buf != ubuf) | ||
1559 | goto out; | ||
1560 | } | ||
1561 | |||
1509 | next = rcu_dereference_check_mce(mcelog.next); | 1562 | next = rcu_dereference_check_mce(mcelog.next); |
1510 | 1563 | ||
1511 | /* Only supports full reads right now */ | 1564 | /* Only supports full reads right now */ |
1512 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 1565 | err = -EINVAL; |
1513 | mutex_unlock(&mce_read_mutex); | 1566 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) |
1514 | kfree(cpu_tsc); | 1567 | goto out; |
1515 | |||
1516 | return -EINVAL; | ||
1517 | } | ||
1518 | 1568 | ||
1519 | err = 0; | 1569 | err = 0; |
1520 | prev = 0; | 1570 | prev = 0; |
@@ -1562,10 +1612,15 @@ timeout: | |||
1562 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 1612 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); |
1563 | } | 1613 | } |
1564 | } | 1614 | } |
1615 | |||
1616 | if (err) | ||
1617 | err = -EFAULT; | ||
1618 | |||
1619 | out: | ||
1565 | mutex_unlock(&mce_read_mutex); | 1620 | mutex_unlock(&mce_read_mutex); |
1566 | kfree(cpu_tsc); | 1621 | kfree(cpu_tsc); |
1567 | 1622 | ||
1568 | return err ? -EFAULT : buf - ubuf; | 1623 | return err ? err : buf - ubuf; |
1569 | } | 1624 | } |
1570 | 1625 | ||
1571 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1626 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
@@ -1573,6 +1628,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) | |||
1573 | poll_wait(file, &mce_wait, wait); | 1628 | poll_wait(file, &mce_wait, wait); |
1574 | if (rcu_dereference_check_mce(mcelog.next)) | 1629 | if (rcu_dereference_check_mce(mcelog.next)) |
1575 | return POLLIN | POLLRDNORM; | 1630 | return POLLIN | POLLRDNORM; |
1631 | if (!mce_apei_read_done && apei_check_mce()) | ||
1632 | return POLLIN | POLLRDNORM; | ||
1576 | return 0; | 1633 | return 0; |
1577 | } | 1634 | } |
1578 | 1635 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 81c499eceb21..e1a0a3bf9716 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -190,7 +190,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
190 | mutex_unlock(&therm_cpu_lock); | 190 | mutex_unlock(&therm_cpu_lock); |
191 | break; | 191 | break; |
192 | } | 192 | } |
193 | return err ? NOTIFY_BAD : NOTIFY_OK; | 193 | return notifier_from_errno(err); |
194 | } | 194 | } |
195 | 195 | ||
196 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = | 196 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c new file mode 100644 index 000000000000..16f41bbe46b6 --- /dev/null +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * HyperV Detection code. | ||
3 | * | ||
4 | * Copyright (C) 2010, Novell, Inc. | ||
5 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; version 2 of the License. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <asm/processor.h> | ||
16 | #include <asm/hypervisor.h> | ||
17 | #include <asm/hyperv.h> | ||
18 | #include <asm/mshyperv.h> | ||
19 | |||
20 | struct ms_hyperv_info ms_hyperv; | ||
21 | |||
22 | static bool __init ms_hyperv_platform(void) | ||
23 | { | ||
24 | u32 eax; | ||
25 | u32 hyp_signature[3]; | ||
26 | |||
27 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | ||
28 | return false; | ||
29 | |||
30 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | ||
31 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | ||
32 | |||
33 | return eax >= HYPERV_CPUID_MIN && | ||
34 | eax <= HYPERV_CPUID_MAX && | ||
35 | !memcmp("Microsoft Hv", hyp_signature, 12); | ||
36 | } | ||
37 | |||
38 | static void __init ms_hyperv_init_platform(void) | ||
39 | { | ||
40 | /* | ||
41 | * Extract the features and hints | ||
42 | */ | ||
43 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); | ||
44 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); | ||
45 | |||
46 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | ||
47 | ms_hyperv.features, ms_hyperv.hints); | ||
48 | } | ||
49 | |||
50 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | ||
51 | .name = "Microsoft HyperV", | ||
52 | .detect = ms_hyperv_platform, | ||
53 | .init_platform = ms_hyperv_init_platform, | ||
54 | }; | ||
55 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index db5bdc8addf8..c77586061bcb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -31,46 +31,51 @@ | |||
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
33 | 33 | ||
34 | static u64 perf_event_mask __read_mostly; | 34 | #if 0 |
35 | #undef wrmsrl | ||
36 | #define wrmsrl(msr, val) \ | ||
37 | do { \ | ||
38 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
39 | (unsigned long)(val)); \ | ||
40 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
41 | (u32)((u64)(val) >> 32)); \ | ||
42 | } while (0) | ||
43 | #endif | ||
35 | 44 | ||
36 | /* The maximal number of PEBS events: */ | 45 | /* |
37 | #define MAX_PEBS_EVENTS 4 | 46 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
47 | */ | ||
48 | static unsigned long | ||
49 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
50 | { | ||
51 | unsigned long offset, addr = (unsigned long)from; | ||
52 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
53 | unsigned long size, len = 0; | ||
54 | struct page *page; | ||
55 | void *map; | ||
56 | int ret; | ||
38 | 57 | ||
39 | /* The size of a BTS record in bytes: */ | 58 | do { |
40 | #define BTS_RECORD_SIZE 24 | 59 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
60 | if (!ret) | ||
61 | break; | ||
41 | 62 | ||
42 | /* The size of a per-cpu BTS buffer in bytes: */ | 63 | offset = addr & (PAGE_SIZE - 1); |
43 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | 64 | size = min(PAGE_SIZE - offset, n - len); |
44 | 65 | ||
45 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | 66 | map = kmap_atomic(page, type); |
46 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | 67 | memcpy(to, map+offset, size); |
68 | kunmap_atomic(map, type); | ||
69 | put_page(page); | ||
47 | 70 | ||
71 | len += size; | ||
72 | to += size; | ||
73 | addr += size; | ||
48 | 74 | ||
49 | /* | 75 | } while (len < n); |
50 | * Bits in the debugctlmsr controlling branch tracing. | ||
51 | */ | ||
52 | #define X86_DEBUGCTL_TR (1 << 6) | ||
53 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
54 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
55 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
56 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
57 | 76 | ||
58 | /* | 77 | return len; |
59 | * A debug store configuration. | 78 | } |
60 | * | ||
61 | * We only support architectures that use 64bit fields. | ||
62 | */ | ||
63 | struct debug_store { | ||
64 | u64 bts_buffer_base; | ||
65 | u64 bts_index; | ||
66 | u64 bts_absolute_maximum; | ||
67 | u64 bts_interrupt_threshold; | ||
68 | u64 pebs_buffer_base; | ||
69 | u64 pebs_index; | ||
70 | u64 pebs_absolute_maximum; | ||
71 | u64 pebs_interrupt_threshold; | ||
72 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
73 | }; | ||
74 | 79 | ||
75 | struct event_constraint { | 80 | struct event_constraint { |
76 | union { | 81 | union { |
@@ -89,18 +94,41 @@ struct amd_nb { | |||
89 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
90 | }; | 95 | }; |
91 | 96 | ||
97 | #define MAX_LBR_ENTRIES 16 | ||
98 | |||
92 | struct cpu_hw_events { | 99 | struct cpu_hw_events { |
100 | /* | ||
101 | * Generic x86 PMC bits | ||
102 | */ | ||
93 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
94 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
95 | unsigned long interrupts; | ||
96 | int enabled; | 105 | int enabled; |
97 | struct debug_store *ds; | ||
98 | 106 | ||
99 | int n_events; | 107 | int n_events; |
100 | int n_added; | 108 | int n_added; |
101 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 109 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
102 | u64 tags[X86_PMC_IDX_MAX]; | 110 | u64 tags[X86_PMC_IDX_MAX]; |
103 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 111 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
112 | |||
113 | unsigned int group_flag; | ||
114 | |||
115 | /* | ||
116 | * Intel DebugStore bits | ||
117 | */ | ||
118 | struct debug_store *ds; | ||
119 | u64 pebs_enabled; | ||
120 | |||
121 | /* | ||
122 | * Intel LBR bits | ||
123 | */ | ||
124 | int lbr_users; | ||
125 | void *lbr_context; | ||
126 | struct perf_branch_stack lbr_stack; | ||
127 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
128 | |||
129 | /* | ||
130 | * AMD specific bits | ||
131 | */ | ||
104 | struct amd_nb *amd_nb; | 132 | struct amd_nb *amd_nb; |
105 | }; | 133 | }; |
106 | 134 | ||
@@ -114,44 +142,75 @@ struct cpu_hw_events { | |||
114 | #define EVENT_CONSTRAINT(c, n, m) \ | 142 | #define EVENT_CONSTRAINT(c, n, m) \ |
115 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 143 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
116 | 144 | ||
145 | /* | ||
146 | * Constraint on the Event code. | ||
147 | */ | ||
117 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 148 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
118 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 149 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) |
119 | 150 | ||
151 | /* | ||
152 | * Constraint on the Event code + UMask + fixed-mask | ||
153 | * | ||
154 | * filter mask to validate fixed counter events. | ||
155 | * the following filters disqualify for fixed counters: | ||
156 | * - inv | ||
157 | * - edge | ||
158 | * - cnt-mask | ||
159 | * The other filters are supported by fixed counters. | ||
160 | * The any-thread option is supported starting with v3. | ||
161 | */ | ||
120 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 162 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
121 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 163 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) |
164 | |||
165 | /* | ||
166 | * Constraint on the Event code + UMask | ||
167 | */ | ||
168 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
169 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
122 | 170 | ||
123 | #define EVENT_CONSTRAINT_END \ | 171 | #define EVENT_CONSTRAINT_END \ |
124 | EVENT_CONSTRAINT(0, 0, 0) | 172 | EVENT_CONSTRAINT(0, 0, 0) |
125 | 173 | ||
126 | #define for_each_event_constraint(e, c) \ | 174 | #define for_each_event_constraint(e, c) \ |
127 | for ((e) = (c); (e)->cmask; (e)++) | 175 | for ((e) = (c); (e)->weight; (e)++) |
176 | |||
177 | union perf_capabilities { | ||
178 | struct { | ||
179 | u64 lbr_format : 6; | ||
180 | u64 pebs_trap : 1; | ||
181 | u64 pebs_arch_reg : 1; | ||
182 | u64 pebs_format : 4; | ||
183 | u64 smm_freeze : 1; | ||
184 | }; | ||
185 | u64 capabilities; | ||
186 | }; | ||
128 | 187 | ||
129 | /* | 188 | /* |
130 | * struct x86_pmu - generic x86 pmu | 189 | * struct x86_pmu - generic x86 pmu |
131 | */ | 190 | */ |
132 | struct x86_pmu { | 191 | struct x86_pmu { |
192 | /* | ||
193 | * Generic x86 PMC bits | ||
194 | */ | ||
133 | const char *name; | 195 | const char *name; |
134 | int version; | 196 | int version; |
135 | int (*handle_irq)(struct pt_regs *); | 197 | int (*handle_irq)(struct pt_regs *); |
136 | void (*disable_all)(void); | 198 | void (*disable_all)(void); |
137 | void (*enable_all)(void); | 199 | void (*enable_all)(int added); |
138 | void (*enable)(struct perf_event *); | 200 | void (*enable)(struct perf_event *); |
139 | void (*disable)(struct perf_event *); | 201 | void (*disable)(struct perf_event *); |
202 | int (*hw_config)(struct perf_event *event); | ||
203 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
140 | unsigned eventsel; | 204 | unsigned eventsel; |
141 | unsigned perfctr; | 205 | unsigned perfctr; |
142 | u64 (*event_map)(int); | 206 | u64 (*event_map)(int); |
143 | u64 (*raw_event)(u64); | ||
144 | int max_events; | 207 | int max_events; |
145 | int num_events; | 208 | int num_counters; |
146 | int num_events_fixed; | 209 | int num_counters_fixed; |
147 | int event_bits; | 210 | int cntval_bits; |
148 | u64 event_mask; | 211 | u64 cntval_mask; |
149 | int apic; | 212 | int apic; |
150 | u64 max_period; | 213 | u64 max_period; |
151 | u64 intel_ctrl; | ||
152 | void (*enable_bts)(u64 config); | ||
153 | void (*disable_bts)(void); | ||
154 | |||
155 | struct event_constraint * | 214 | struct event_constraint * |
156 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 215 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
157 | struct perf_event *event); | 216 | struct perf_event *event); |
@@ -159,11 +218,32 @@ struct x86_pmu { | |||
159 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 218 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
160 | struct perf_event *event); | 219 | struct perf_event *event); |
161 | struct event_constraint *event_constraints; | 220 | struct event_constraint *event_constraints; |
221 | void (*quirks)(void); | ||
162 | 222 | ||
163 | int (*cpu_prepare)(int cpu); | 223 | int (*cpu_prepare)(int cpu); |
164 | void (*cpu_starting)(int cpu); | 224 | void (*cpu_starting)(int cpu); |
165 | void (*cpu_dying)(int cpu); | 225 | void (*cpu_dying)(int cpu); |
166 | void (*cpu_dead)(int cpu); | 226 | void (*cpu_dead)(int cpu); |
227 | |||
228 | /* | ||
229 | * Intel Arch Perfmon v2+ | ||
230 | */ | ||
231 | u64 intel_ctrl; | ||
232 | union perf_capabilities intel_cap; | ||
233 | |||
234 | /* | ||
235 | * Intel DebugStore bits | ||
236 | */ | ||
237 | int bts, pebs; | ||
238 | int pebs_record_size; | ||
239 | void (*drain_pebs)(struct pt_regs *regs); | ||
240 | struct event_constraint *pebs_constraints; | ||
241 | |||
242 | /* | ||
243 | * Intel LBR | ||
244 | */ | ||
245 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
246 | int lbr_nr; /* hardware stack size */ | ||
167 | }; | 247 | }; |
168 | 248 | ||
169 | static struct x86_pmu x86_pmu __read_mostly; | 249 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -198,7 +278,7 @@ static u64 | |||
198 | x86_perf_event_update(struct perf_event *event) | 278 | x86_perf_event_update(struct perf_event *event) |
199 | { | 279 | { |
200 | struct hw_perf_event *hwc = &event->hw; | 280 | struct hw_perf_event *hwc = &event->hw; |
201 | int shift = 64 - x86_pmu.event_bits; | 281 | int shift = 64 - x86_pmu.cntval_bits; |
202 | u64 prev_raw_count, new_raw_count; | 282 | u64 prev_raw_count, new_raw_count; |
203 | int idx = hwc->idx; | 283 | int idx = hwc->idx; |
204 | s64 delta; | 284 | s64 delta; |
@@ -241,33 +321,32 @@ again: | |||
241 | static atomic_t active_events; | 321 | static atomic_t active_events; |
242 | static DEFINE_MUTEX(pmc_reserve_mutex); | 322 | static DEFINE_MUTEX(pmc_reserve_mutex); |
243 | 323 | ||
324 | #ifdef CONFIG_X86_LOCAL_APIC | ||
325 | |||
244 | static bool reserve_pmc_hardware(void) | 326 | static bool reserve_pmc_hardware(void) |
245 | { | 327 | { |
246 | #ifdef CONFIG_X86_LOCAL_APIC | ||
247 | int i; | 328 | int i; |
248 | 329 | ||
249 | if (nmi_watchdog == NMI_LOCAL_APIC) | 330 | if (nmi_watchdog == NMI_LOCAL_APIC) |
250 | disable_lapic_nmi_watchdog(); | 331 | disable_lapic_nmi_watchdog(); |
251 | 332 | ||
252 | for (i = 0; i < x86_pmu.num_events; i++) { | 333 | for (i = 0; i < x86_pmu.num_counters; i++) { |
253 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 334 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
254 | goto perfctr_fail; | 335 | goto perfctr_fail; |
255 | } | 336 | } |
256 | 337 | ||
257 | for (i = 0; i < x86_pmu.num_events; i++) { | 338 | for (i = 0; i < x86_pmu.num_counters; i++) { |
258 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 339 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
259 | goto eventsel_fail; | 340 | goto eventsel_fail; |
260 | } | 341 | } |
261 | #endif | ||
262 | 342 | ||
263 | return true; | 343 | return true; |
264 | 344 | ||
265 | #ifdef CONFIG_X86_LOCAL_APIC | ||
266 | eventsel_fail: | 345 | eventsel_fail: |
267 | for (i--; i >= 0; i--) | 346 | for (i--; i >= 0; i--) |
268 | release_evntsel_nmi(x86_pmu.eventsel + i); | 347 | release_evntsel_nmi(x86_pmu.eventsel + i); |
269 | 348 | ||
270 | i = x86_pmu.num_events; | 349 | i = x86_pmu.num_counters; |
271 | 350 | ||
272 | perfctr_fail: | 351 | perfctr_fail: |
273 | for (i--; i >= 0; i--) | 352 | for (i--; i >= 0; i--) |
@@ -277,128 +356,36 @@ perfctr_fail: | |||
277 | enable_lapic_nmi_watchdog(); | 356 | enable_lapic_nmi_watchdog(); |
278 | 357 | ||
279 | return false; | 358 | return false; |
280 | #endif | ||
281 | } | 359 | } |
282 | 360 | ||
283 | static void release_pmc_hardware(void) | 361 | static void release_pmc_hardware(void) |
284 | { | 362 | { |
285 | #ifdef CONFIG_X86_LOCAL_APIC | ||
286 | int i; | 363 | int i; |
287 | 364 | ||
288 | for (i = 0; i < x86_pmu.num_events; i++) { | 365 | for (i = 0; i < x86_pmu.num_counters; i++) { |
289 | release_perfctr_nmi(x86_pmu.perfctr + i); | 366 | release_perfctr_nmi(x86_pmu.perfctr + i); |
290 | release_evntsel_nmi(x86_pmu.eventsel + i); | 367 | release_evntsel_nmi(x86_pmu.eventsel + i); |
291 | } | 368 | } |
292 | 369 | ||
293 | if (nmi_watchdog == NMI_LOCAL_APIC) | 370 | if (nmi_watchdog == NMI_LOCAL_APIC) |
294 | enable_lapic_nmi_watchdog(); | 371 | enable_lapic_nmi_watchdog(); |
295 | #endif | ||
296 | } | ||
297 | |||
298 | static inline bool bts_available(void) | ||
299 | { | ||
300 | return x86_pmu.enable_bts != NULL; | ||
301 | } | ||
302 | |||
303 | static void init_debug_store_on_cpu(int cpu) | ||
304 | { | ||
305 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
306 | |||
307 | if (!ds) | ||
308 | return; | ||
309 | |||
310 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
311 | (u32)((u64)(unsigned long)ds), | ||
312 | (u32)((u64)(unsigned long)ds >> 32)); | ||
313 | } | ||
314 | |||
315 | static void fini_debug_store_on_cpu(int cpu) | ||
316 | { | ||
317 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
318 | return; | ||
319 | |||
320 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
321 | } | ||
322 | |||
323 | static void release_bts_hardware(void) | ||
324 | { | ||
325 | int cpu; | ||
326 | |||
327 | if (!bts_available()) | ||
328 | return; | ||
329 | |||
330 | get_online_cpus(); | ||
331 | |||
332 | for_each_online_cpu(cpu) | ||
333 | fini_debug_store_on_cpu(cpu); | ||
334 | |||
335 | for_each_possible_cpu(cpu) { | ||
336 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
337 | |||
338 | if (!ds) | ||
339 | continue; | ||
340 | |||
341 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
342 | |||
343 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
344 | kfree(ds); | ||
345 | } | ||
346 | |||
347 | put_online_cpus(); | ||
348 | } | 372 | } |
349 | 373 | ||
350 | static int reserve_bts_hardware(void) | 374 | #else |
351 | { | ||
352 | int cpu, err = 0; | ||
353 | |||
354 | if (!bts_available()) | ||
355 | return 0; | ||
356 | |||
357 | get_online_cpus(); | ||
358 | |||
359 | for_each_possible_cpu(cpu) { | ||
360 | struct debug_store *ds; | ||
361 | void *buffer; | ||
362 | |||
363 | err = -ENOMEM; | ||
364 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
365 | if (unlikely(!buffer)) | ||
366 | break; | ||
367 | |||
368 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
369 | if (unlikely(!ds)) { | ||
370 | kfree(buffer); | ||
371 | break; | ||
372 | } | ||
373 | |||
374 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
375 | ds->bts_index = ds->bts_buffer_base; | ||
376 | ds->bts_absolute_maximum = | ||
377 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
378 | ds->bts_interrupt_threshold = | ||
379 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
380 | |||
381 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
382 | err = 0; | ||
383 | } | ||
384 | 375 | ||
385 | if (err) | 376 | static bool reserve_pmc_hardware(void) { return true; } |
386 | release_bts_hardware(); | 377 | static void release_pmc_hardware(void) {} |
387 | else { | ||
388 | for_each_online_cpu(cpu) | ||
389 | init_debug_store_on_cpu(cpu); | ||
390 | } | ||
391 | 378 | ||
392 | put_online_cpus(); | 379 | #endif |
393 | 380 | ||
394 | return err; | 381 | static int reserve_ds_buffers(void); |
395 | } | 382 | static void release_ds_buffers(void); |
396 | 383 | ||
397 | static void hw_perf_event_destroy(struct perf_event *event) | 384 | static void hw_perf_event_destroy(struct perf_event *event) |
398 | { | 385 | { |
399 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 386 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
400 | release_pmc_hardware(); | 387 | release_pmc_hardware(); |
401 | release_bts_hardware(); | 388 | release_ds_buffers(); |
402 | mutex_unlock(&pmc_reserve_mutex); | 389 | mutex_unlock(&pmc_reserve_mutex); |
403 | } | 390 | } |
404 | } | 391 | } |
@@ -441,54 +428,11 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
441 | return 0; | 428 | return 0; |
442 | } | 429 | } |
443 | 430 | ||
444 | /* | 431 | static int x86_setup_perfctr(struct perf_event *event) |
445 | * Setup the hardware configuration for a given attr_type | ||
446 | */ | ||
447 | static int __hw_perf_event_init(struct perf_event *event) | ||
448 | { | 432 | { |
449 | struct perf_event_attr *attr = &event->attr; | 433 | struct perf_event_attr *attr = &event->attr; |
450 | struct hw_perf_event *hwc = &event->hw; | 434 | struct hw_perf_event *hwc = &event->hw; |
451 | u64 config; | 435 | u64 config; |
452 | int err; | ||
453 | |||
454 | if (!x86_pmu_initialized()) | ||
455 | return -ENODEV; | ||
456 | |||
457 | err = 0; | ||
458 | if (!atomic_inc_not_zero(&active_events)) { | ||
459 | mutex_lock(&pmc_reserve_mutex); | ||
460 | if (atomic_read(&active_events) == 0) { | ||
461 | if (!reserve_pmc_hardware()) | ||
462 | err = -EBUSY; | ||
463 | else | ||
464 | err = reserve_bts_hardware(); | ||
465 | } | ||
466 | if (!err) | ||
467 | atomic_inc(&active_events); | ||
468 | mutex_unlock(&pmc_reserve_mutex); | ||
469 | } | ||
470 | if (err) | ||
471 | return err; | ||
472 | |||
473 | event->destroy = hw_perf_event_destroy; | ||
474 | |||
475 | /* | ||
476 | * Generate PMC IRQs: | ||
477 | * (keep 'enabled' bit clear for now) | ||
478 | */ | ||
479 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
480 | |||
481 | hwc->idx = -1; | ||
482 | hwc->last_cpu = -1; | ||
483 | hwc->last_tag = ~0ULL; | ||
484 | |||
485 | /* | ||
486 | * Count user and OS events unless requested not to. | ||
487 | */ | ||
488 | if (!attr->exclude_user) | ||
489 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
490 | if (!attr->exclude_kernel) | ||
491 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
492 | 436 | ||
493 | if (!hwc->sample_period) { | 437 | if (!hwc->sample_period) { |
494 | hwc->sample_period = x86_pmu.max_period; | 438 | hwc->sample_period = x86_pmu.max_period; |
@@ -505,16 +449,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
505 | return -EOPNOTSUPP; | 449 | return -EOPNOTSUPP; |
506 | } | 450 | } |
507 | 451 | ||
508 | /* | 452 | if (attr->type == PERF_TYPE_RAW) |
509 | * Raw hw_event type provide the config in the hw_event structure | ||
510 | */ | ||
511 | if (attr->type == PERF_TYPE_RAW) { | ||
512 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
513 | if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && | ||
514 | perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
515 | return -EACCES; | ||
516 | return 0; | 453 | return 0; |
517 | } | ||
518 | 454 | ||
519 | if (attr->type == PERF_TYPE_HW_CACHE) | 455 | if (attr->type == PERF_TYPE_HW_CACHE) |
520 | return set_ext_hw_attr(hwc, attr); | 456 | return set_ext_hw_attr(hwc, attr); |
@@ -539,11 +475,11 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
539 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 475 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
540 | (hwc->sample_period == 1)) { | 476 | (hwc->sample_period == 1)) { |
541 | /* BTS is not supported by this architecture. */ | 477 | /* BTS is not supported by this architecture. */ |
542 | if (!bts_available()) | 478 | if (!x86_pmu.bts) |
543 | return -EOPNOTSUPP; | 479 | return -EOPNOTSUPP; |
544 | 480 | ||
545 | /* BTS is currently only allowed for user-mode. */ | 481 | /* BTS is currently only allowed for user-mode. */ |
546 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 482 | if (!attr->exclude_kernel) |
547 | return -EOPNOTSUPP; | 483 | return -EOPNOTSUPP; |
548 | } | 484 | } |
549 | 485 | ||
@@ -552,12 +488,87 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
552 | return 0; | 488 | return 0; |
553 | } | 489 | } |
554 | 490 | ||
491 | static int x86_pmu_hw_config(struct perf_event *event) | ||
492 | { | ||
493 | if (event->attr.precise_ip) { | ||
494 | int precise = 0; | ||
495 | |||
496 | /* Support for constant skid */ | ||
497 | if (x86_pmu.pebs) | ||
498 | precise++; | ||
499 | |||
500 | /* Support for IP fixup */ | ||
501 | if (x86_pmu.lbr_nr) | ||
502 | precise++; | ||
503 | |||
504 | if (event->attr.precise_ip > precise) | ||
505 | return -EOPNOTSUPP; | ||
506 | } | ||
507 | |||
508 | /* | ||
509 | * Generate PMC IRQs: | ||
510 | * (keep 'enabled' bit clear for now) | ||
511 | */ | ||
512 | event->hw.config = ARCH_PERFMON_EVENTSEL_INT; | ||
513 | |||
514 | /* | ||
515 | * Count user and OS events unless requested not to | ||
516 | */ | ||
517 | if (!event->attr.exclude_user) | ||
518 | event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; | ||
519 | if (!event->attr.exclude_kernel) | ||
520 | event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; | ||
521 | |||
522 | if (event->attr.type == PERF_TYPE_RAW) | ||
523 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; | ||
524 | |||
525 | return x86_setup_perfctr(event); | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Setup the hardware configuration for a given attr_type | ||
530 | */ | ||
531 | static int __hw_perf_event_init(struct perf_event *event) | ||
532 | { | ||
533 | int err; | ||
534 | |||
535 | if (!x86_pmu_initialized()) | ||
536 | return -ENODEV; | ||
537 | |||
538 | err = 0; | ||
539 | if (!atomic_inc_not_zero(&active_events)) { | ||
540 | mutex_lock(&pmc_reserve_mutex); | ||
541 | if (atomic_read(&active_events) == 0) { | ||
542 | if (!reserve_pmc_hardware()) | ||
543 | err = -EBUSY; | ||
544 | else { | ||
545 | err = reserve_ds_buffers(); | ||
546 | if (err) | ||
547 | release_pmc_hardware(); | ||
548 | } | ||
549 | } | ||
550 | if (!err) | ||
551 | atomic_inc(&active_events); | ||
552 | mutex_unlock(&pmc_reserve_mutex); | ||
553 | } | ||
554 | if (err) | ||
555 | return err; | ||
556 | |||
557 | event->destroy = hw_perf_event_destroy; | ||
558 | |||
559 | event->hw.idx = -1; | ||
560 | event->hw.last_cpu = -1; | ||
561 | event->hw.last_tag = ~0ULL; | ||
562 | |||
563 | return x86_pmu.hw_config(event); | ||
564 | } | ||
565 | |||
555 | static void x86_pmu_disable_all(void) | 566 | static void x86_pmu_disable_all(void) |
556 | { | 567 | { |
557 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 568 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
558 | int idx; | 569 | int idx; |
559 | 570 | ||
560 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 571 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
561 | u64 val; | 572 | u64 val; |
562 | 573 | ||
563 | if (!test_bit(idx, cpuc->active_mask)) | 574 | if (!test_bit(idx, cpuc->active_mask)) |
@@ -587,12 +598,12 @@ void hw_perf_disable(void) | |||
587 | x86_pmu.disable_all(); | 598 | x86_pmu.disable_all(); |
588 | } | 599 | } |
589 | 600 | ||
590 | static void x86_pmu_enable_all(void) | 601 | static void x86_pmu_enable_all(int added) |
591 | { | 602 | { |
592 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 603 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
593 | int idx; | 604 | int idx; |
594 | 605 | ||
595 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 606 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
596 | struct perf_event *event = cpuc->events[idx]; | 607 | struct perf_event *event = cpuc->events[idx]; |
597 | u64 val; | 608 | u64 val; |
598 | 609 | ||
@@ -667,14 +678,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
667 | * assign events to counters starting with most | 678 | * assign events to counters starting with most |
668 | * constrained events. | 679 | * constrained events. |
669 | */ | 680 | */ |
670 | wmax = x86_pmu.num_events; | 681 | wmax = x86_pmu.num_counters; |
671 | 682 | ||
672 | /* | 683 | /* |
673 | * when fixed event counters are present, | 684 | * when fixed event counters are present, |
674 | * wmax is incremented by 1 to account | 685 | * wmax is incremented by 1 to account |
675 | * for one more choice | 686 | * for one more choice |
676 | */ | 687 | */ |
677 | if (x86_pmu.num_events_fixed) | 688 | if (x86_pmu.num_counters_fixed) |
678 | wmax++; | 689 | wmax++; |
679 | 690 | ||
680 | for (w = 1, num = n; num && w <= wmax; w++) { | 691 | for (w = 1, num = n; num && w <= wmax; w++) { |
@@ -724,7 +735,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
724 | struct perf_event *event; | 735 | struct perf_event *event; |
725 | int n, max_count; | 736 | int n, max_count; |
726 | 737 | ||
727 | max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; | 738 | max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; |
728 | 739 | ||
729 | /* current number of events already accepted */ | 740 | /* current number of events already accepted */ |
730 | n = cpuc->n_events; | 741 | n = cpuc->n_events; |
@@ -795,7 +806,7 @@ void hw_perf_enable(void) | |||
795 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 806 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
796 | struct perf_event *event; | 807 | struct perf_event *event; |
797 | struct hw_perf_event *hwc; | 808 | struct hw_perf_event *hwc; |
798 | int i; | 809 | int i, added = cpuc->n_added; |
799 | 810 | ||
800 | if (!x86_pmu_initialized()) | 811 | if (!x86_pmu_initialized()) |
801 | return; | 812 | return; |
@@ -847,19 +858,20 @@ void hw_perf_enable(void) | |||
847 | cpuc->enabled = 1; | 858 | cpuc->enabled = 1; |
848 | barrier(); | 859 | barrier(); |
849 | 860 | ||
850 | x86_pmu.enable_all(); | 861 | x86_pmu.enable_all(added); |
851 | } | 862 | } |
852 | 863 | ||
853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) | 864 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
865 | u64 enable_mask) | ||
854 | { | 866 | { |
855 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 867 | wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); |
856 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
857 | } | 868 | } |
858 | 869 | ||
859 | static inline void x86_pmu_disable_event(struct perf_event *event) | 870 | static inline void x86_pmu_disable_event(struct perf_event *event) |
860 | { | 871 | { |
861 | struct hw_perf_event *hwc = &event->hw; | 872 | struct hw_perf_event *hwc = &event->hw; |
862 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); | 873 | |
874 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
863 | } | 875 | } |
864 | 876 | ||
865 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 877 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -874,7 +886,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
874 | struct hw_perf_event *hwc = &event->hw; | 886 | struct hw_perf_event *hwc = &event->hw; |
875 | s64 left = atomic64_read(&hwc->period_left); | 887 | s64 left = atomic64_read(&hwc->period_left); |
876 | s64 period = hwc->sample_period; | 888 | s64 period = hwc->sample_period; |
877 | int err, ret = 0, idx = hwc->idx; | 889 | int ret = 0, idx = hwc->idx; |
878 | 890 | ||
879 | if (idx == X86_PMC_IDX_FIXED_BTS) | 891 | if (idx == X86_PMC_IDX_FIXED_BTS) |
880 | return 0; | 892 | return 0; |
@@ -912,8 +924,8 @@ x86_perf_event_set_period(struct perf_event *event) | |||
912 | */ | 924 | */ |
913 | atomic64_set(&hwc->prev_count, (u64)-left); | 925 | atomic64_set(&hwc->prev_count, (u64)-left); |
914 | 926 | ||
915 | err = checking_wrmsrl(hwc->event_base + idx, | 927 | wrmsrl(hwc->event_base + idx, |
916 | (u64)(-left) & x86_pmu.event_mask); | 928 | (u64)(-left) & x86_pmu.cntval_mask); |
917 | 929 | ||
918 | perf_event_update_userpage(event); | 930 | perf_event_update_userpage(event); |
919 | 931 | ||
@@ -924,7 +936,8 @@ static void x86_pmu_enable_event(struct perf_event *event) | |||
924 | { | 936 | { |
925 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 937 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
926 | if (cpuc->enabled) | 938 | if (cpuc->enabled) |
927 | __x86_pmu_enable_event(&event->hw); | 939 | __x86_pmu_enable_event(&event->hw, |
940 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
928 | } | 941 | } |
929 | 942 | ||
930 | /* | 943 | /* |
@@ -950,7 +963,15 @@ static int x86_pmu_enable(struct perf_event *event) | |||
950 | if (n < 0) | 963 | if (n < 0) |
951 | return n; | 964 | return n; |
952 | 965 | ||
953 | ret = x86_schedule_events(cpuc, n, assign); | 966 | /* |
967 | * If group events scheduling transaction was started, | ||
968 | * skip the schedulability test here, it will be peformed | ||
969 | * at commit time(->commit_txn) as a whole | ||
970 | */ | ||
971 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | ||
972 | goto out; | ||
973 | |||
974 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
954 | if (ret) | 975 | if (ret) |
955 | return ret; | 976 | return ret; |
956 | /* | 977 | /* |
@@ -959,6 +980,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
959 | */ | 980 | */ |
960 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 981 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
961 | 982 | ||
983 | out: | ||
962 | cpuc->n_events = n; | 984 | cpuc->n_events = n; |
963 | cpuc->n_added += n - n0; | 985 | cpuc->n_added += n - n0; |
964 | 986 | ||
@@ -991,11 +1013,12 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
991 | void perf_event_print_debug(void) | 1013 | void perf_event_print_debug(void) |
992 | { | 1014 | { |
993 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 1015 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
1016 | u64 pebs; | ||
994 | struct cpu_hw_events *cpuc; | 1017 | struct cpu_hw_events *cpuc; |
995 | unsigned long flags; | 1018 | unsigned long flags; |
996 | int cpu, idx; | 1019 | int cpu, idx; |
997 | 1020 | ||
998 | if (!x86_pmu.num_events) | 1021 | if (!x86_pmu.num_counters) |
999 | return; | 1022 | return; |
1000 | 1023 | ||
1001 | local_irq_save(flags); | 1024 | local_irq_save(flags); |
@@ -1008,16 +1031,18 @@ void perf_event_print_debug(void) | |||
1008 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 1031 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
1009 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 1032 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
1010 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 1033 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
1034 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
1011 | 1035 | ||
1012 | pr_info("\n"); | 1036 | pr_info("\n"); |
1013 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 1037 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
1014 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 1038 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
1015 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1039 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1016 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1040 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
1041 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
1017 | } | 1042 | } |
1018 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 1043 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1019 | 1044 | ||
1020 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1045 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1021 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1046 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
1022 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1047 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
1023 | 1048 | ||
@@ -1030,7 +1055,7 @@ void perf_event_print_debug(void) | |||
1030 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | 1055 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", |
1031 | cpu, idx, prev_left); | 1056 | cpu, idx, prev_left); |
1032 | } | 1057 | } |
1033 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 1058 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
1034 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | 1059 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); |
1035 | 1060 | ||
1036 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | 1061 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", |
@@ -1095,7 +1120,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1095 | 1120 | ||
1096 | cpuc = &__get_cpu_var(cpu_hw_events); | 1121 | cpuc = &__get_cpu_var(cpu_hw_events); |
1097 | 1122 | ||
1098 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1123 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1099 | if (!test_bit(idx, cpuc->active_mask)) | 1124 | if (!test_bit(idx, cpuc->active_mask)) |
1100 | continue; | 1125 | continue; |
1101 | 1126 | ||
@@ -1103,7 +1128,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1103 | hwc = &event->hw; | 1128 | hwc = &event->hw; |
1104 | 1129 | ||
1105 | val = x86_perf_event_update(event); | 1130 | val = x86_perf_event_update(event); |
1106 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1131 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) |
1107 | continue; | 1132 | continue; |
1108 | 1133 | ||
1109 | /* | 1134 | /* |
@@ -1146,7 +1171,6 @@ void set_perf_event_pending(void) | |||
1146 | 1171 | ||
1147 | void perf_events_lapic_init(void) | 1172 | void perf_events_lapic_init(void) |
1148 | { | 1173 | { |
1149 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1150 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1174 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
1151 | return; | 1175 | return; |
1152 | 1176 | ||
@@ -1154,7 +1178,6 @@ void perf_events_lapic_init(void) | |||
1154 | * Always use NMI for PMU | 1178 | * Always use NMI for PMU |
1155 | */ | 1179 | */ |
1156 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1180 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1157 | #endif | ||
1158 | } | 1181 | } |
1159 | 1182 | ||
1160 | static int __kprobes | 1183 | static int __kprobes |
@@ -1178,9 +1201,7 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1178 | 1201 | ||
1179 | regs = args->regs; | 1202 | regs = args->regs; |
1180 | 1203 | ||
1181 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1182 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1204 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1183 | #endif | ||
1184 | /* | 1205 | /* |
1185 | * Can't rely on the handled return value to say it was our NMI, two | 1206 | * Can't rely on the handled return value to say it was our NMI, two |
1186 | * events could trigger 'simultaneously' raising two back-to-back NMIs. | 1207 | * events could trigger 'simultaneously' raising two back-to-back NMIs. |
@@ -1217,118 +1238,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
1217 | return &unconstrained; | 1238 | return &unconstrained; |
1218 | } | 1239 | } |
1219 | 1240 | ||
1220 | static int x86_event_sched_in(struct perf_event *event, | ||
1221 | struct perf_cpu_context *cpuctx) | ||
1222 | { | ||
1223 | int ret = 0; | ||
1224 | |||
1225 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
1226 | event->oncpu = smp_processor_id(); | ||
1227 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
1228 | |||
1229 | if (!is_x86_event(event)) | ||
1230 | ret = event->pmu->enable(event); | ||
1231 | |||
1232 | if (!ret && !is_software_event(event)) | ||
1233 | cpuctx->active_oncpu++; | ||
1234 | |||
1235 | if (!ret && event->attr.exclusive) | ||
1236 | cpuctx->exclusive = 1; | ||
1237 | |||
1238 | return ret; | ||
1239 | } | ||
1240 | |||
1241 | static void x86_event_sched_out(struct perf_event *event, | ||
1242 | struct perf_cpu_context *cpuctx) | ||
1243 | { | ||
1244 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
1245 | event->oncpu = -1; | ||
1246 | |||
1247 | if (!is_x86_event(event)) | ||
1248 | event->pmu->disable(event); | ||
1249 | |||
1250 | event->tstamp_running -= event->ctx->time - event->tstamp_stopped; | ||
1251 | |||
1252 | if (!is_software_event(event)) | ||
1253 | cpuctx->active_oncpu--; | ||
1254 | |||
1255 | if (event->attr.exclusive || !cpuctx->active_oncpu) | ||
1256 | cpuctx->exclusive = 0; | ||
1257 | } | ||
1258 | |||
1259 | /* | ||
1260 | * Called to enable a whole group of events. | ||
1261 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
1262 | * Assumes the caller has disabled interrupts and has | ||
1263 | * frozen the PMU with hw_perf_save_disable. | ||
1264 | * | ||
1265 | * called with PMU disabled. If successful and return value 1, | ||
1266 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
1267 | */ | ||
1268 | int hw_perf_group_sched_in(struct perf_event *leader, | ||
1269 | struct perf_cpu_context *cpuctx, | ||
1270 | struct perf_event_context *ctx) | ||
1271 | { | ||
1272 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1273 | struct perf_event *sub; | ||
1274 | int assign[X86_PMC_IDX_MAX]; | ||
1275 | int n0, n1, ret; | ||
1276 | |||
1277 | /* n0 = total number of events */ | ||
1278 | n0 = collect_events(cpuc, leader, true); | ||
1279 | if (n0 < 0) | ||
1280 | return n0; | ||
1281 | |||
1282 | ret = x86_schedule_events(cpuc, n0, assign); | ||
1283 | if (ret) | ||
1284 | return ret; | ||
1285 | |||
1286 | ret = x86_event_sched_in(leader, cpuctx); | ||
1287 | if (ret) | ||
1288 | return ret; | ||
1289 | |||
1290 | n1 = 1; | ||
1291 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1292 | if (sub->state > PERF_EVENT_STATE_OFF) { | ||
1293 | ret = x86_event_sched_in(sub, cpuctx); | ||
1294 | if (ret) | ||
1295 | goto undo; | ||
1296 | ++n1; | ||
1297 | } | ||
1298 | } | ||
1299 | /* | ||
1300 | * copy new assignment, now we know it is possible | ||
1301 | * will be used by hw_perf_enable() | ||
1302 | */ | ||
1303 | memcpy(cpuc->assign, assign, n0*sizeof(int)); | ||
1304 | |||
1305 | cpuc->n_events = n0; | ||
1306 | cpuc->n_added += n1; | ||
1307 | ctx->nr_active += n1; | ||
1308 | |||
1309 | /* | ||
1310 | * 1 means successful and events are active | ||
1311 | * This is not quite true because we defer | ||
1312 | * actual activation until hw_perf_enable() but | ||
1313 | * this way we* ensure caller won't try to enable | ||
1314 | * individual events | ||
1315 | */ | ||
1316 | return 1; | ||
1317 | undo: | ||
1318 | x86_event_sched_out(leader, cpuctx); | ||
1319 | n0 = 1; | ||
1320 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1321 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { | ||
1322 | x86_event_sched_out(sub, cpuctx); | ||
1323 | if (++n0 == n1) | ||
1324 | break; | ||
1325 | } | ||
1326 | } | ||
1327 | return ret; | ||
1328 | } | ||
1329 | |||
1330 | #include "perf_event_amd.c" | 1241 | #include "perf_event_amd.c" |
1331 | #include "perf_event_p6.c" | 1242 | #include "perf_event_p6.c" |
1243 | #include "perf_event_p4.c" | ||
1244 | #include "perf_event_intel_lbr.c" | ||
1245 | #include "perf_event_intel_ds.c" | ||
1332 | #include "perf_event_intel.c" | 1246 | #include "perf_event_intel.c" |
1333 | 1247 | ||
1334 | static int __cpuinit | 1248 | static int __cpuinit |
@@ -1402,48 +1316,50 @@ void __init init_hw_perf_events(void) | |||
1402 | 1316 | ||
1403 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1317 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1404 | 1318 | ||
1405 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 1319 | if (x86_pmu.quirks) |
1320 | x86_pmu.quirks(); | ||
1321 | |||
1322 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
1406 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1323 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
1407 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); | 1324 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
1408 | x86_pmu.num_events = X86_PMC_MAX_GENERIC; | 1325 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; |
1409 | } | 1326 | } |
1410 | perf_event_mask = (1 << x86_pmu.num_events) - 1; | 1327 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
1411 | perf_max_events = x86_pmu.num_events; | 1328 | perf_max_events = x86_pmu.num_counters; |
1412 | 1329 | ||
1413 | if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { | 1330 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
1414 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 1331 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
1415 | x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); | 1332 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
1416 | x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; | 1333 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; |
1417 | } | 1334 | } |
1418 | 1335 | ||
1419 | perf_event_mask |= | 1336 | x86_pmu.intel_ctrl |= |
1420 | ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; | 1337 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
1421 | x86_pmu.intel_ctrl = perf_event_mask; | ||
1422 | 1338 | ||
1423 | perf_events_lapic_init(); | 1339 | perf_events_lapic_init(); |
1424 | register_die_notifier(&perf_event_nmi_notifier); | 1340 | register_die_notifier(&perf_event_nmi_notifier); |
1425 | 1341 | ||
1426 | unconstrained = (struct event_constraint) | 1342 | unconstrained = (struct event_constraint) |
1427 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, | 1343 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1428 | 0, x86_pmu.num_events); | 1344 | 0, x86_pmu.num_counters); |
1429 | 1345 | ||
1430 | if (x86_pmu.event_constraints) { | 1346 | if (x86_pmu.event_constraints) { |
1431 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1347 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1432 | if (c->cmask != INTEL_ARCH_FIXED_MASK) | 1348 | if (c->cmask != X86_RAW_EVENT_MASK) |
1433 | continue; | 1349 | continue; |
1434 | 1350 | ||
1435 | c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; | 1351 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
1436 | c->weight += x86_pmu.num_events; | 1352 | c->weight += x86_pmu.num_counters; |
1437 | } | 1353 | } |
1438 | } | 1354 | } |
1439 | 1355 | ||
1440 | pr_info("... version: %d\n", x86_pmu.version); | 1356 | pr_info("... version: %d\n", x86_pmu.version); |
1441 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1357 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1442 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1358 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
1443 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); | 1359 | pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); |
1444 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1360 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
1445 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1361 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
1446 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1362 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
1447 | 1363 | ||
1448 | perf_cpu_notifier(x86_pmu_notifier); | 1364 | perf_cpu_notifier(x86_pmu_notifier); |
1449 | } | 1365 | } |
@@ -1453,6 +1369,59 @@ static inline void x86_pmu_read(struct perf_event *event) | |||
1453 | x86_perf_event_update(event); | 1369 | x86_perf_event_update(event); |
1454 | } | 1370 | } |
1455 | 1371 | ||
1372 | /* | ||
1373 | * Start group events scheduling transaction | ||
1374 | * Set the flag to make pmu::enable() not perform the | ||
1375 | * schedulability test, it will be performed at commit time | ||
1376 | */ | ||
1377 | static void x86_pmu_start_txn(const struct pmu *pmu) | ||
1378 | { | ||
1379 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1380 | |||
1381 | cpuc->group_flag |= PERF_EVENT_TXN_STARTED; | ||
1382 | } | ||
1383 | |||
1384 | /* | ||
1385 | * Stop group events scheduling transaction | ||
1386 | * Clear the flag and pmu::enable() will perform the | ||
1387 | * schedulability test. | ||
1388 | */ | ||
1389 | static void x86_pmu_cancel_txn(const struct pmu *pmu) | ||
1390 | { | ||
1391 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1392 | |||
1393 | cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; | ||
1394 | } | ||
1395 | |||
1396 | /* | ||
1397 | * Commit group events scheduling transaction | ||
1398 | * Perform the group schedulability test as a whole | ||
1399 | * Return 0 if success | ||
1400 | */ | ||
1401 | static int x86_pmu_commit_txn(const struct pmu *pmu) | ||
1402 | { | ||
1403 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1404 | int assign[X86_PMC_IDX_MAX]; | ||
1405 | int n, ret; | ||
1406 | |||
1407 | n = cpuc->n_events; | ||
1408 | |||
1409 | if (!x86_pmu_initialized()) | ||
1410 | return -EAGAIN; | ||
1411 | |||
1412 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
1413 | if (ret) | ||
1414 | return ret; | ||
1415 | |||
1416 | /* | ||
1417 | * copy new assignment, now we know it is possible | ||
1418 | * will be used by hw_perf_enable() | ||
1419 | */ | ||
1420 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
1421 | |||
1422 | return 0; | ||
1423 | } | ||
1424 | |||
1456 | static const struct pmu pmu = { | 1425 | static const struct pmu pmu = { |
1457 | .enable = x86_pmu_enable, | 1426 | .enable = x86_pmu_enable, |
1458 | .disable = x86_pmu_disable, | 1427 | .disable = x86_pmu_disable, |
@@ -1460,9 +1429,38 @@ static const struct pmu pmu = { | |||
1460 | .stop = x86_pmu_stop, | 1429 | .stop = x86_pmu_stop, |
1461 | .read = x86_pmu_read, | 1430 | .read = x86_pmu_read, |
1462 | .unthrottle = x86_pmu_unthrottle, | 1431 | .unthrottle = x86_pmu_unthrottle, |
1432 | .start_txn = x86_pmu_start_txn, | ||
1433 | .cancel_txn = x86_pmu_cancel_txn, | ||
1434 | .commit_txn = x86_pmu_commit_txn, | ||
1463 | }; | 1435 | }; |
1464 | 1436 | ||
1465 | /* | 1437 | /* |
1438 | * validate that we can schedule this event | ||
1439 | */ | ||
1440 | static int validate_event(struct perf_event *event) | ||
1441 | { | ||
1442 | struct cpu_hw_events *fake_cpuc; | ||
1443 | struct event_constraint *c; | ||
1444 | int ret = 0; | ||
1445 | |||
1446 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1447 | if (!fake_cpuc) | ||
1448 | return -ENOMEM; | ||
1449 | |||
1450 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
1451 | |||
1452 | if (!c || !c->weight) | ||
1453 | ret = -ENOSPC; | ||
1454 | |||
1455 | if (x86_pmu.put_event_constraints) | ||
1456 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
1457 | |||
1458 | kfree(fake_cpuc); | ||
1459 | |||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | /* | ||
1466 | * validate a single event group | 1464 | * validate a single event group |
1467 | * | 1465 | * |
1468 | * validation include: | 1466 | * validation include: |
@@ -1502,7 +1500,7 @@ static int validate_group(struct perf_event *event) | |||
1502 | 1500 | ||
1503 | fake_cpuc->n_events = n; | 1501 | fake_cpuc->n_events = n; |
1504 | 1502 | ||
1505 | ret = x86_schedule_events(fake_cpuc, n, NULL); | 1503 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1506 | 1504 | ||
1507 | out_free: | 1505 | out_free: |
1508 | kfree(fake_cpuc); | 1506 | kfree(fake_cpuc); |
@@ -1527,6 +1525,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1527 | 1525 | ||
1528 | if (event->group_leader != event) | 1526 | if (event->group_leader != event) |
1529 | err = validate_group(event); | 1527 | err = validate_group(event); |
1528 | else | ||
1529 | err = validate_event(event); | ||
1530 | 1530 | ||
1531 | event->pmu = tmp; | 1531 | event->pmu = tmp; |
1532 | } | 1532 | } |
@@ -1574,8 +1574,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
1574 | { | 1574 | { |
1575 | struct perf_callchain_entry *entry = data; | 1575 | struct perf_callchain_entry *entry = data; |
1576 | 1576 | ||
1577 | if (reliable) | 1577 | callchain_store(entry, addr); |
1578 | callchain_store(entry, addr); | ||
1579 | } | 1578 | } |
1580 | 1579 | ||
1581 | static const struct stacktrace_ops backtrace_ops = { | 1580 | static const struct stacktrace_ops backtrace_ops = { |
@@ -1597,41 +1596,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1597 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1596 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
1598 | } | 1597 | } |
1599 | 1598 | ||
1600 | /* | ||
1601 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
1602 | */ | ||
1603 | static unsigned long | ||
1604 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
1605 | { | ||
1606 | unsigned long offset, addr = (unsigned long)from; | ||
1607 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1608 | unsigned long size, len = 0; | ||
1609 | struct page *page; | ||
1610 | void *map; | ||
1611 | int ret; | ||
1612 | |||
1613 | do { | ||
1614 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1615 | if (!ret) | ||
1616 | break; | ||
1617 | |||
1618 | offset = addr & (PAGE_SIZE - 1); | ||
1619 | size = min(PAGE_SIZE - offset, n - len); | ||
1620 | |||
1621 | map = kmap_atomic(page, type); | ||
1622 | memcpy(to, map+offset, size); | ||
1623 | kunmap_atomic(map, type); | ||
1624 | put_page(page); | ||
1625 | |||
1626 | len += size; | ||
1627 | to += size; | ||
1628 | addr += size; | ||
1629 | |||
1630 | } while (len < n); | ||
1631 | |||
1632 | return len; | ||
1633 | } | ||
1634 | |||
1635 | #ifdef CONFIG_COMPAT | 1599 | #ifdef CONFIG_COMPAT |
1636 | static inline int | 1600 | static inline int |
1637 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1601 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -1727,6 +1691,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1727 | { | 1691 | { |
1728 | struct perf_callchain_entry *entry; | 1692 | struct perf_callchain_entry *entry; |
1729 | 1693 | ||
1694 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1695 | /* TODO: We don't support guest os callchain now */ | ||
1696 | return NULL; | ||
1697 | } | ||
1698 | |||
1730 | if (in_nmi()) | 1699 | if (in_nmi()) |
1731 | entry = &__get_cpu_var(pmc_nmi_entry); | 1700 | entry = &__get_cpu_var(pmc_nmi_entry); |
1732 | else | 1701 | else |
@@ -1748,5 +1717,43 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
1748 | */ | 1717 | */ |
1749 | regs->bp = rewind_frame_pointer(skip + 1); | 1718 | regs->bp = rewind_frame_pointer(skip + 1); |
1750 | regs->cs = __KERNEL_CS; | 1719 | regs->cs = __KERNEL_CS; |
1751 | local_save_flags(regs->flags); | 1720 | /* |
1721 | * We abuse bit 3 to pass exact information, see perf_misc_flags | ||
1722 | * and the comment with PERF_EFLAGS_EXACT. | ||
1723 | */ | ||
1724 | regs->flags = 0; | ||
1725 | } | ||
1726 | |||
1727 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1728 | { | ||
1729 | unsigned long ip; | ||
1730 | |||
1731 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | ||
1732 | ip = perf_guest_cbs->get_guest_ip(); | ||
1733 | else | ||
1734 | ip = instruction_pointer(regs); | ||
1735 | |||
1736 | return ip; | ||
1737 | } | ||
1738 | |||
1739 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
1740 | { | ||
1741 | int misc = 0; | ||
1742 | |||
1743 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1744 | if (perf_guest_cbs->is_user_mode()) | ||
1745 | misc |= PERF_RECORD_MISC_GUEST_USER; | ||
1746 | else | ||
1747 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | ||
1748 | } else { | ||
1749 | if (user_mode(regs)) | ||
1750 | misc |= PERF_RECORD_MISC_USER; | ||
1751 | else | ||
1752 | misc |= PERF_RECORD_MISC_KERNEL; | ||
1753 | } | ||
1754 | |||
1755 | if (regs->flags & PERF_EFLAGS_EXACT) | ||
1756 | misc |= PERF_RECORD_MISC_EXACT_IP; | ||
1757 | |||
1758 | return misc; | ||
1752 | } | 1759 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index db6f7d4056e1..611df11ba15e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | 3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); |
4 | 4 | ||
5 | static __initconst u64 amd_hw_cache_event_ids | 5 | static __initconst const u64 amd_hw_cache_event_ids |
6 | [PERF_COUNT_HW_CACHE_MAX] | 6 | [PERF_COUNT_HW_CACHE_MAX] |
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | 7 | [PERF_COUNT_HW_CACHE_OP_MAX] |
8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -111,22 +111,19 @@ static u64 amd_pmu_event_map(int hw_event) | |||
111 | return amd_perfmon_event_map[hw_event]; | 111 | return amd_perfmon_event_map[hw_event]; |
112 | } | 112 | } |
113 | 113 | ||
114 | static u64 amd_pmu_raw_event(u64 hw_event) | 114 | static int amd_pmu_hw_config(struct perf_event *event) |
115 | { | 115 | { |
116 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | 116 | int ret = x86_pmu_hw_config(event); |
117 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | 117 | |
118 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | 118 | if (ret) |
119 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | 119 | return ret; |
120 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | 120 | |
121 | 121 | if (event->attr.type != PERF_TYPE_RAW) | |
122 | #define K7_EVNTSEL_MASK \ | 122 | return 0; |
123 | (K7_EVNTSEL_EVENT_MASK | \ | 123 | |
124 | K7_EVNTSEL_UNIT_MASK | \ | 124 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; |
125 | K7_EVNTSEL_EDGE_MASK | \ | 125 | |
126 | K7_EVNTSEL_INV_MASK | \ | 126 | return 0; |
127 | K7_EVNTSEL_REG_MASK) | ||
128 | |||
129 | return hw_event & K7_EVNTSEL_MASK; | ||
130 | } | 127 | } |
131 | 128 | ||
132 | /* | 129 | /* |
@@ -165,7 +162,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
165 | * be removed on one CPU at a time AND PMU is disabled | 162 | * be removed on one CPU at a time AND PMU is disabled |
166 | * when we come here | 163 | * when we come here |
167 | */ | 164 | */ |
168 | for (i = 0; i < x86_pmu.num_events; i++) { | 165 | for (i = 0; i < x86_pmu.num_counters; i++) { |
169 | if (nb->owners[i] == event) { | 166 | if (nb->owners[i] == event) { |
170 | cmpxchg(nb->owners+i, event, NULL); | 167 | cmpxchg(nb->owners+i, event, NULL); |
171 | break; | 168 | break; |
@@ -215,7 +212,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
215 | struct hw_perf_event *hwc = &event->hw; | 212 | struct hw_perf_event *hwc = &event->hw; |
216 | struct amd_nb *nb = cpuc->amd_nb; | 213 | struct amd_nb *nb = cpuc->amd_nb; |
217 | struct perf_event *old = NULL; | 214 | struct perf_event *old = NULL; |
218 | int max = x86_pmu.num_events; | 215 | int max = x86_pmu.num_counters; |
219 | int i, j, k = -1; | 216 | int i, j, k = -1; |
220 | 217 | ||
221 | /* | 218 | /* |
@@ -293,7 +290,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
293 | /* | 290 | /* |
294 | * initialize all possible NB constraints | 291 | * initialize all possible NB constraints |
295 | */ | 292 | */ |
296 | for (i = 0; i < x86_pmu.num_events; i++) { | 293 | for (i = 0; i < x86_pmu.num_counters; i++) { |
297 | __set_bit(i, nb->event_constraints[i].idxmsk); | 294 | __set_bit(i, nb->event_constraints[i].idxmsk); |
298 | nb->event_constraints[i].weight = 1; | 295 | nb->event_constraints[i].weight = 1; |
299 | } | 296 | } |
@@ -371,21 +368,22 @@ static void amd_pmu_cpu_dead(int cpu) | |||
371 | raw_spin_unlock(&amd_nb_lock); | 368 | raw_spin_unlock(&amd_nb_lock); |
372 | } | 369 | } |
373 | 370 | ||
374 | static __initconst struct x86_pmu amd_pmu = { | 371 | static __initconst const struct x86_pmu amd_pmu = { |
375 | .name = "AMD", | 372 | .name = "AMD", |
376 | .handle_irq = x86_pmu_handle_irq, | 373 | .handle_irq = x86_pmu_handle_irq, |
377 | .disable_all = x86_pmu_disable_all, | 374 | .disable_all = x86_pmu_disable_all, |
378 | .enable_all = x86_pmu_enable_all, | 375 | .enable_all = x86_pmu_enable_all, |
379 | .enable = x86_pmu_enable_event, | 376 | .enable = x86_pmu_enable_event, |
380 | .disable = x86_pmu_disable_event, | 377 | .disable = x86_pmu_disable_event, |
378 | .hw_config = amd_pmu_hw_config, | ||
379 | .schedule_events = x86_schedule_events, | ||
381 | .eventsel = MSR_K7_EVNTSEL0, | 380 | .eventsel = MSR_K7_EVNTSEL0, |
382 | .perfctr = MSR_K7_PERFCTR0, | 381 | .perfctr = MSR_K7_PERFCTR0, |
383 | .event_map = amd_pmu_event_map, | 382 | .event_map = amd_pmu_event_map, |
384 | .raw_event = amd_pmu_raw_event, | ||
385 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 383 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
386 | .num_events = 4, | 384 | .num_counters = 4, |
387 | .event_bits = 48, | 385 | .cntval_bits = 48, |
388 | .event_mask = (1ULL << 48) - 1, | 386 | .cntval_mask = (1ULL << 48) - 1, |
389 | .apic = 1, | 387 | .apic = 1, |
390 | /* use highest bit to detect overflow */ | 388 | /* use highest bit to detect overflow */ |
391 | .max_period = (1ULL << 47) - 1, | 389 | .max_period = (1ULL << 47) - 1, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9c794ac87837..fdbc652d3feb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -88,7 +88,7 @@ static u64 intel_pmu_event_map(int hw_event) | |||
88 | return intel_perfmon_event_map[hw_event]; | 88 | return intel_perfmon_event_map[hw_event]; |
89 | } | 89 | } |
90 | 90 | ||
91 | static __initconst u64 westmere_hw_cache_event_ids | 91 | static __initconst const u64 westmere_hw_cache_event_ids |
92 | [PERF_COUNT_HW_CACHE_MAX] | 92 | [PERF_COUNT_HW_CACHE_MAX] |
93 | [PERF_COUNT_HW_CACHE_OP_MAX] | 93 | [PERF_COUNT_HW_CACHE_OP_MAX] |
94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -179,7 +179,7 @@ static __initconst u64 westmere_hw_cache_event_ids | |||
179 | }, | 179 | }, |
180 | }; | 180 | }; |
181 | 181 | ||
182 | static __initconst u64 nehalem_hw_cache_event_ids | 182 | static __initconst const u64 nehalem_hw_cache_event_ids |
183 | [PERF_COUNT_HW_CACHE_MAX] | 183 | [PERF_COUNT_HW_CACHE_MAX] |
184 | [PERF_COUNT_HW_CACHE_OP_MAX] | 184 | [PERF_COUNT_HW_CACHE_OP_MAX] |
185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -270,7 +270,7 @@ static __initconst u64 nehalem_hw_cache_event_ids | |||
270 | }, | 270 | }, |
271 | }; | 271 | }; |
272 | 272 | ||
273 | static __initconst u64 core2_hw_cache_event_ids | 273 | static __initconst const u64 core2_hw_cache_event_ids |
274 | [PERF_COUNT_HW_CACHE_MAX] | 274 | [PERF_COUNT_HW_CACHE_MAX] |
275 | [PERF_COUNT_HW_CACHE_OP_MAX] | 275 | [PERF_COUNT_HW_CACHE_OP_MAX] |
276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -361,7 +361,7 @@ static __initconst u64 core2_hw_cache_event_ids | |||
361 | }, | 361 | }, |
362 | }; | 362 | }; |
363 | 363 | ||
364 | static __initconst u64 atom_hw_cache_event_ids | 364 | static __initconst const u64 atom_hw_cache_event_ids |
365 | [PERF_COUNT_HW_CACHE_MAX] | 365 | [PERF_COUNT_HW_CACHE_MAX] |
366 | [PERF_COUNT_HW_CACHE_OP_MAX] | 366 | [PERF_COUNT_HW_CACHE_OP_MAX] |
367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -452,60 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids | |||
452 | }, | 452 | }, |
453 | }; | 453 | }; |
454 | 454 | ||
455 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
456 | { | ||
457 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
458 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
459 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
460 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
461 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
462 | |||
463 | #define CORE_EVNTSEL_MASK \ | ||
464 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
465 | INTEL_ARCH_UNIT_MASK | \ | ||
466 | INTEL_ARCH_EDGE_MASK | \ | ||
467 | INTEL_ARCH_INV_MASK | \ | ||
468 | INTEL_ARCH_CNT_MASK) | ||
469 | |||
470 | return hw_event & CORE_EVNTSEL_MASK; | ||
471 | } | ||
472 | |||
473 | static void intel_pmu_enable_bts(u64 config) | ||
474 | { | ||
475 | unsigned long debugctlmsr; | ||
476 | |||
477 | debugctlmsr = get_debugctlmsr(); | ||
478 | |||
479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
482 | |||
483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
485 | |||
486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
488 | |||
489 | update_debugctlmsr(debugctlmsr); | ||
490 | } | ||
491 | |||
492 | static void intel_pmu_disable_bts(void) | ||
493 | { | ||
494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
495 | unsigned long debugctlmsr; | ||
496 | |||
497 | if (!cpuc->ds) | ||
498 | return; | ||
499 | |||
500 | debugctlmsr = get_debugctlmsr(); | ||
501 | |||
502 | debugctlmsr &= | ||
503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
505 | |||
506 | update_debugctlmsr(debugctlmsr); | ||
507 | } | ||
508 | |||
509 | static void intel_pmu_disable_all(void) | 455 | static void intel_pmu_disable_all(void) |
510 | { | 456 | { |
511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 457 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -514,12 +460,17 @@ static void intel_pmu_disable_all(void) | |||
514 | 460 | ||
515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 461 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
516 | intel_pmu_disable_bts(); | 462 | intel_pmu_disable_bts(); |
463 | |||
464 | intel_pmu_pebs_disable_all(); | ||
465 | intel_pmu_lbr_disable_all(); | ||
517 | } | 466 | } |
518 | 467 | ||
519 | static void intel_pmu_enable_all(void) | 468 | static void intel_pmu_enable_all(int added) |
520 | { | 469 | { |
521 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 470 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
522 | 471 | ||
472 | intel_pmu_pebs_enable_all(); | ||
473 | intel_pmu_lbr_enable_all(); | ||
523 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 474 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
524 | 475 | ||
525 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 476 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
@@ -533,6 +484,42 @@ static void intel_pmu_enable_all(void) | |||
533 | } | 484 | } |
534 | } | 485 | } |
535 | 486 | ||
487 | /* | ||
488 | * Workaround for: | ||
489 | * Intel Errata AAK100 (model 26) | ||
490 | * Intel Errata AAP53 (model 30) | ||
491 | * Intel Errata BD53 (model 44) | ||
492 | * | ||
493 | * These chips need to be 'reset' when adding counters by programming | ||
494 | * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 | ||
495 | * either in sequence on the same PMC or on different PMCs. | ||
496 | */ | ||
497 | static void intel_pmu_nhm_enable_all(int added) | ||
498 | { | ||
499 | if (added) { | ||
500 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
501 | int i; | ||
502 | |||
503 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); | ||
504 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); | ||
505 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); | ||
506 | |||
507 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); | ||
508 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | ||
509 | |||
510 | for (i = 0; i < 3; i++) { | ||
511 | struct perf_event *event = cpuc->events[i]; | ||
512 | |||
513 | if (!event) | ||
514 | continue; | ||
515 | |||
516 | __x86_pmu_enable_event(&event->hw, | ||
517 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
518 | } | ||
519 | } | ||
520 | intel_pmu_enable_all(added); | ||
521 | } | ||
522 | |||
536 | static inline u64 intel_pmu_get_status(void) | 523 | static inline u64 intel_pmu_get_status(void) |
537 | { | 524 | { |
538 | u64 status; | 525 | u64 status; |
@@ -547,8 +534,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 534 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
548 | } | 535 | } |
549 | 536 | ||
550 | static inline void | 537 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
552 | { | 538 | { |
553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 539 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
554 | u64 ctrl_val, mask; | 540 | u64 ctrl_val, mask; |
@@ -557,71 +543,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
557 | 543 | ||
558 | rdmsrl(hwc->config_base, ctrl_val); | 544 | rdmsrl(hwc->config_base, ctrl_val); |
559 | ctrl_val &= ~mask; | 545 | ctrl_val &= ~mask; |
560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 546 | wrmsrl(hwc->config_base, ctrl_val); |
561 | } | ||
562 | |||
563 | static void intel_pmu_drain_bts_buffer(void) | ||
564 | { | ||
565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
566 | struct debug_store *ds = cpuc->ds; | ||
567 | struct bts_record { | ||
568 | u64 from; | ||
569 | u64 to; | ||
570 | u64 flags; | ||
571 | }; | ||
572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
573 | struct bts_record *at, *top; | ||
574 | struct perf_output_handle handle; | ||
575 | struct perf_event_header header; | ||
576 | struct perf_sample_data data; | ||
577 | struct pt_regs regs; | ||
578 | |||
579 | if (!event) | ||
580 | return; | ||
581 | |||
582 | if (!ds) | ||
583 | return; | ||
584 | |||
585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
587 | |||
588 | if (top <= at) | ||
589 | return; | ||
590 | |||
591 | ds->bts_index = ds->bts_buffer_base; | ||
592 | |||
593 | perf_sample_data_init(&data, 0); | ||
594 | |||
595 | data.period = event->hw.last_period; | ||
596 | regs.ip = 0; | ||
597 | |||
598 | /* | ||
599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
600 | * We will overwrite the from and to address before we output | ||
601 | * the sample. | ||
602 | */ | ||
603 | perf_prepare_sample(&header, &data, event, ®s); | ||
604 | |||
605 | if (perf_output_begin(&handle, event, | ||
606 | header.size * (top - at), 1, 1)) | ||
607 | return; | ||
608 | |||
609 | for (; at < top; at++) { | ||
610 | data.ip = at->from; | ||
611 | data.addr = at->to; | ||
612 | |||
613 | perf_output_sample(&handle, &header, &data, event); | ||
614 | } | ||
615 | |||
616 | perf_output_end(&handle); | ||
617 | |||
618 | /* There's new data available. */ | ||
619 | event->hw.interrupts++; | ||
620 | event->pending_kill = POLL_IN; | ||
621 | } | 547 | } |
622 | 548 | ||
623 | static inline void | 549 | static void intel_pmu_disable_event(struct perf_event *event) |
624 | intel_pmu_disable_event(struct perf_event *event) | ||
625 | { | 550 | { |
626 | struct hw_perf_event *hwc = &event->hw; | 551 | struct hw_perf_event *hwc = &event->hw; |
627 | 552 | ||
@@ -637,14 +562,15 @@ intel_pmu_disable_event(struct perf_event *event) | |||
637 | } | 562 | } |
638 | 563 | ||
639 | x86_pmu_disable_event(event); | 564 | x86_pmu_disable_event(event); |
565 | |||
566 | if (unlikely(event->attr.precise_ip)) | ||
567 | intel_pmu_pebs_disable(event); | ||
640 | } | 568 | } |
641 | 569 | ||
642 | static inline void | 570 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
644 | { | 571 | { |
645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 572 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
646 | u64 ctrl_val, bits, mask; | 573 | u64 ctrl_val, bits, mask; |
647 | int err; | ||
648 | 574 | ||
649 | /* | 575 | /* |
650 | * Enable IRQ generation (0x8), | 576 | * Enable IRQ generation (0x8), |
@@ -669,7 +595,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc) | |||
669 | rdmsrl(hwc->config_base, ctrl_val); | 595 | rdmsrl(hwc->config_base, ctrl_val); |
670 | ctrl_val &= ~mask; | 596 | ctrl_val &= ~mask; |
671 | ctrl_val |= bits; | 597 | ctrl_val |= bits; |
672 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 598 | wrmsrl(hwc->config_base, ctrl_val); |
673 | } | 599 | } |
674 | 600 | ||
675 | static void intel_pmu_enable_event(struct perf_event *event) | 601 | static void intel_pmu_enable_event(struct perf_event *event) |
@@ -689,7 +615,10 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
689 | return; | 615 | return; |
690 | } | 616 | } |
691 | 617 | ||
692 | __x86_pmu_enable_event(hwc); | 618 | if (unlikely(event->attr.precise_ip)) |
619 | intel_pmu_pebs_enable(event); | ||
620 | |||
621 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | ||
693 | } | 622 | } |
694 | 623 | ||
695 | /* | 624 | /* |
@@ -708,20 +637,20 @@ static void intel_pmu_reset(void) | |||
708 | unsigned long flags; | 637 | unsigned long flags; |
709 | int idx; | 638 | int idx; |
710 | 639 | ||
711 | if (!x86_pmu.num_events) | 640 | if (!x86_pmu.num_counters) |
712 | return; | 641 | return; |
713 | 642 | ||
714 | local_irq_save(flags); | 643 | local_irq_save(flags); |
715 | 644 | ||
716 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 645 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
717 | 646 | ||
718 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 647 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
719 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 648 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); |
720 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 649 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); |
721 | } | 650 | } |
722 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 651 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
723 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 652 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
724 | } | 653 | |
725 | if (ds) | 654 | if (ds) |
726 | ds->bts_index = ds->bts_buffer_base; | 655 | ds->bts_index = ds->bts_buffer_base; |
727 | 656 | ||
@@ -747,7 +676,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
747 | intel_pmu_drain_bts_buffer(); | 676 | intel_pmu_drain_bts_buffer(); |
748 | status = intel_pmu_get_status(); | 677 | status = intel_pmu_get_status(); |
749 | if (!status) { | 678 | if (!status) { |
750 | intel_pmu_enable_all(); | 679 | intel_pmu_enable_all(0); |
751 | return 0; | 680 | return 0; |
752 | } | 681 | } |
753 | 682 | ||
@@ -762,6 +691,15 @@ again: | |||
762 | 691 | ||
763 | inc_irq_stat(apic_perf_irqs); | 692 | inc_irq_stat(apic_perf_irqs); |
764 | ack = status; | 693 | ack = status; |
694 | |||
695 | intel_pmu_lbr_read(); | ||
696 | |||
697 | /* | ||
698 | * PEBS overflow sets bit 62 in the global status register | ||
699 | */ | ||
700 | if (__test_and_clear_bit(62, (unsigned long *)&status)) | ||
701 | x86_pmu.drain_pebs(regs); | ||
702 | |||
765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 703 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
766 | struct perf_event *event = cpuc->events[bit]; | 704 | struct perf_event *event = cpuc->events[bit]; |
767 | 705 | ||
@@ -787,26 +725,22 @@ again: | |||
787 | goto again; | 725 | goto again; |
788 | 726 | ||
789 | done: | 727 | done: |
790 | intel_pmu_enable_all(); | 728 | intel_pmu_enable_all(0); |
791 | return 1; | 729 | return 1; |
792 | } | 730 | } |
793 | 731 | ||
794 | static struct event_constraint bts_constraint = | ||
795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
796 | |||
797 | static struct event_constraint * | 732 | static struct event_constraint * |
798 | intel_special_constraints(struct perf_event *event) | 733 | intel_bts_constraints(struct perf_event *event) |
799 | { | 734 | { |
800 | unsigned int hw_event; | 735 | struct hw_perf_event *hwc = &event->hw; |
801 | 736 | unsigned int hw_event, bts_event; | |
802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
803 | 737 | ||
804 | if (unlikely((hw_event == | 738 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 739 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
806 | (event->hw.sample_period == 1))) { | ||
807 | 740 | ||
741 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) | ||
808 | return &bts_constraint; | 742 | return &bts_constraint; |
809 | } | 743 | |
810 | return NULL; | 744 | return NULL; |
811 | } | 745 | } |
812 | 746 | ||
@@ -815,24 +749,53 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
815 | { | 749 | { |
816 | struct event_constraint *c; | 750 | struct event_constraint *c; |
817 | 751 | ||
818 | c = intel_special_constraints(event); | 752 | c = intel_bts_constraints(event); |
753 | if (c) | ||
754 | return c; | ||
755 | |||
756 | c = intel_pebs_constraints(event); | ||
819 | if (c) | 757 | if (c) |
820 | return c; | 758 | return c; |
821 | 759 | ||
822 | return x86_get_event_constraints(cpuc, event); | 760 | return x86_get_event_constraints(cpuc, event); |
823 | } | 761 | } |
824 | 762 | ||
825 | static __initconst struct x86_pmu core_pmu = { | 763 | static int intel_pmu_hw_config(struct perf_event *event) |
764 | { | ||
765 | int ret = x86_pmu_hw_config(event); | ||
766 | |||
767 | if (ret) | ||
768 | return ret; | ||
769 | |||
770 | if (event->attr.type != PERF_TYPE_RAW) | ||
771 | return 0; | ||
772 | |||
773 | if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) | ||
774 | return 0; | ||
775 | |||
776 | if (x86_pmu.version < 3) | ||
777 | return -EINVAL; | ||
778 | |||
779 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
780 | return -EACCES; | ||
781 | |||
782 | event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; | ||
783 | |||
784 | return 0; | ||
785 | } | ||
786 | |||
787 | static __initconst const struct x86_pmu core_pmu = { | ||
826 | .name = "core", | 788 | .name = "core", |
827 | .handle_irq = x86_pmu_handle_irq, | 789 | .handle_irq = x86_pmu_handle_irq, |
828 | .disable_all = x86_pmu_disable_all, | 790 | .disable_all = x86_pmu_disable_all, |
829 | .enable_all = x86_pmu_enable_all, | 791 | .enable_all = x86_pmu_enable_all, |
830 | .enable = x86_pmu_enable_event, | 792 | .enable = x86_pmu_enable_event, |
831 | .disable = x86_pmu_disable_event, | 793 | .disable = x86_pmu_disable_event, |
794 | .hw_config = x86_pmu_hw_config, | ||
795 | .schedule_events = x86_schedule_events, | ||
832 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 796 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
833 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 797 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
834 | .event_map = intel_pmu_event_map, | 798 | .event_map = intel_pmu_event_map, |
835 | .raw_event = intel_pmu_raw_event, | ||
836 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 799 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
837 | .apic = 1, | 800 | .apic = 1, |
838 | /* | 801 | /* |
@@ -845,17 +808,32 @@ static __initconst struct x86_pmu core_pmu = { | |||
845 | .event_constraints = intel_core_event_constraints, | 808 | .event_constraints = intel_core_event_constraints, |
846 | }; | 809 | }; |
847 | 810 | ||
848 | static __initconst struct x86_pmu intel_pmu = { | 811 | static void intel_pmu_cpu_starting(int cpu) |
812 | { | ||
813 | init_debug_store_on_cpu(cpu); | ||
814 | /* | ||
815 | * Deal with CPUs that don't clear their LBRs on power-up. | ||
816 | */ | ||
817 | intel_pmu_lbr_reset(); | ||
818 | } | ||
819 | |||
820 | static void intel_pmu_cpu_dying(int cpu) | ||
821 | { | ||
822 | fini_debug_store_on_cpu(cpu); | ||
823 | } | ||
824 | |||
825 | static __initconst const struct x86_pmu intel_pmu = { | ||
849 | .name = "Intel", | 826 | .name = "Intel", |
850 | .handle_irq = intel_pmu_handle_irq, | 827 | .handle_irq = intel_pmu_handle_irq, |
851 | .disable_all = intel_pmu_disable_all, | 828 | .disable_all = intel_pmu_disable_all, |
852 | .enable_all = intel_pmu_enable_all, | 829 | .enable_all = intel_pmu_enable_all, |
853 | .enable = intel_pmu_enable_event, | 830 | .enable = intel_pmu_enable_event, |
854 | .disable = intel_pmu_disable_event, | 831 | .disable = intel_pmu_disable_event, |
832 | .hw_config = intel_pmu_hw_config, | ||
833 | .schedule_events = x86_schedule_events, | ||
855 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 834 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
856 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 835 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
857 | .event_map = intel_pmu_event_map, | 836 | .event_map = intel_pmu_event_map, |
858 | .raw_event = intel_pmu_raw_event, | ||
859 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 837 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
860 | .apic = 1, | 838 | .apic = 1, |
861 | /* | 839 | /* |
@@ -864,14 +842,38 @@ static __initconst struct x86_pmu intel_pmu = { | |||
864 | * the generic event period: | 842 | * the generic event period: |
865 | */ | 843 | */ |
866 | .max_period = (1ULL << 31) - 1, | 844 | .max_period = (1ULL << 31) - 1, |
867 | .enable_bts = intel_pmu_enable_bts, | ||
868 | .disable_bts = intel_pmu_disable_bts, | ||
869 | .get_event_constraints = intel_get_event_constraints, | 845 | .get_event_constraints = intel_get_event_constraints, |
870 | 846 | ||
871 | .cpu_starting = init_debug_store_on_cpu, | 847 | .cpu_starting = intel_pmu_cpu_starting, |
872 | .cpu_dying = fini_debug_store_on_cpu, | 848 | .cpu_dying = intel_pmu_cpu_dying, |
873 | }; | 849 | }; |
874 | 850 | ||
851 | static void intel_clovertown_quirks(void) | ||
852 | { | ||
853 | /* | ||
854 | * PEBS is unreliable due to: | ||
855 | * | ||
856 | * AJ67 - PEBS may experience CPL leaks | ||
857 | * AJ68 - PEBS PMI may be delayed by one event | ||
858 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] | ||
859 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS | ||
860 | * | ||
861 | * AJ67 could be worked around by restricting the OS/USR flags. | ||
862 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. | ||
863 | * | ||
864 | * AJ106 could possibly be worked around by not allowing LBR | ||
865 | * usage from PEBS, including the fixup. | ||
866 | * AJ68 could possibly be worked around by always programming | ||
867 | * a pebs_event_reset[0] value and coping with the lost events. | ||
868 | * | ||
869 | * But taken together it might just make sense to not enable PEBS on | ||
870 | * these chips. | ||
871 | */ | ||
872 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
873 | x86_pmu.pebs = 0; | ||
874 | x86_pmu.pebs_constraints = NULL; | ||
875 | } | ||
876 | |||
875 | static __init int intel_pmu_init(void) | 877 | static __init int intel_pmu_init(void) |
876 | { | 878 | { |
877 | union cpuid10_edx edx; | 879 | union cpuid10_edx edx; |
@@ -881,12 +883,13 @@ static __init int intel_pmu_init(void) | |||
881 | int version; | 883 | int version; |
882 | 884 | ||
883 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 885 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
884 | /* check for P6 processor family */ | 886 | switch (boot_cpu_data.x86) { |
885 | if (boot_cpu_data.x86 == 6) { | 887 | case 0x6: |
886 | return p6_pmu_init(); | 888 | return p6_pmu_init(); |
887 | } else { | 889 | case 0xf: |
890 | return p4_pmu_init(); | ||
891 | } | ||
888 | return -ENODEV; | 892 | return -ENODEV; |
889 | } | ||
890 | } | 893 | } |
891 | 894 | ||
892 | /* | 895 | /* |
@@ -904,16 +907,28 @@ static __init int intel_pmu_init(void) | |||
904 | x86_pmu = intel_pmu; | 907 | x86_pmu = intel_pmu; |
905 | 908 | ||
906 | x86_pmu.version = version; | 909 | x86_pmu.version = version; |
907 | x86_pmu.num_events = eax.split.num_events; | 910 | x86_pmu.num_counters = eax.split.num_counters; |
908 | x86_pmu.event_bits = eax.split.bit_width; | 911 | x86_pmu.cntval_bits = eax.split.bit_width; |
909 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | 912 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
910 | 913 | ||
911 | /* | 914 | /* |
912 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 915 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
913 | * assume at least 3 events: | 916 | * assume at least 3 events: |
914 | */ | 917 | */ |
915 | if (version > 1) | 918 | if (version > 1) |
916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 919 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
920 | |||
921 | /* | ||
922 | * v2 and above have a perf capabilities MSR | ||
923 | */ | ||
924 | if (version > 1) { | ||
925 | u64 capabilities; | ||
926 | |||
927 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
928 | x86_pmu.intel_cap.capabilities = capabilities; | ||
929 | } | ||
930 | |||
931 | intel_ds_init(); | ||
917 | 932 | ||
918 | /* | 933 | /* |
919 | * Install the hw-cache-events table: | 934 | * Install the hw-cache-events table: |
@@ -924,12 +939,15 @@ static __init int intel_pmu_init(void) | |||
924 | break; | 939 | break; |
925 | 940 | ||
926 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 941 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
942 | x86_pmu.quirks = intel_clovertown_quirks; | ||
927 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 943 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
928 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 944 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
929 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 945 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
930 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 946 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
931 | sizeof(hw_cache_event_ids)); | 947 | sizeof(hw_cache_event_ids)); |
932 | 948 | ||
949 | intel_pmu_lbr_init_core(); | ||
950 | |||
933 | x86_pmu.event_constraints = intel_core2_event_constraints; | 951 | x86_pmu.event_constraints = intel_core2_event_constraints; |
934 | pr_cont("Core2 events, "); | 952 | pr_cont("Core2 events, "); |
935 | break; | 953 | break; |
@@ -940,13 +958,19 @@ static __init int intel_pmu_init(void) | |||
940 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 958 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
941 | sizeof(hw_cache_event_ids)); | 959 | sizeof(hw_cache_event_ids)); |
942 | 960 | ||
961 | intel_pmu_lbr_init_nhm(); | ||
962 | |||
943 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 963 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
944 | pr_cont("Nehalem/Corei7 events, "); | 964 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
965 | pr_cont("Nehalem events, "); | ||
945 | break; | 966 | break; |
967 | |||
946 | case 28: /* Atom */ | 968 | case 28: /* Atom */ |
947 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 969 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
948 | sizeof(hw_cache_event_ids)); | 970 | sizeof(hw_cache_event_ids)); |
949 | 971 | ||
972 | intel_pmu_lbr_init_atom(); | ||
973 | |||
950 | x86_pmu.event_constraints = intel_gen_event_constraints; | 974 | x86_pmu.event_constraints = intel_gen_event_constraints; |
951 | pr_cont("Atom events, "); | 975 | pr_cont("Atom events, "); |
952 | break; | 976 | break; |
@@ -956,7 +980,10 @@ static __init int intel_pmu_init(void) | |||
956 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 980 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
957 | sizeof(hw_cache_event_ids)); | 981 | sizeof(hw_cache_event_ids)); |
958 | 982 | ||
983 | intel_pmu_lbr_init_nhm(); | ||
984 | |||
959 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 985 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
986 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | ||
960 | pr_cont("Westmere events, "); | 987 | pr_cont("Westmere events, "); |
961 | break; | 988 | break; |
962 | 989 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 000000000000..18018d1311cd --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -0,0 +1,641 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* The maximal number of PEBS events: */ | ||
4 | #define MAX_PEBS_EVENTS 4 | ||
5 | |||
6 | /* The size of a BTS record in bytes: */ | ||
7 | #define BTS_RECORD_SIZE 24 | ||
8 | |||
9 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | ||
10 | #define PEBS_BUFFER_SIZE PAGE_SIZE | ||
11 | |||
12 | /* | ||
13 | * pebs_record_32 for p4 and core not supported | ||
14 | |||
15 | struct pebs_record_32 { | ||
16 | u32 flags, ip; | ||
17 | u32 ax, bc, cx, dx; | ||
18 | u32 si, di, bp, sp; | ||
19 | }; | ||
20 | |||
21 | */ | ||
22 | |||
23 | struct pebs_record_core { | ||
24 | u64 flags, ip; | ||
25 | u64 ax, bx, cx, dx; | ||
26 | u64 si, di, bp, sp; | ||
27 | u64 r8, r9, r10, r11; | ||
28 | u64 r12, r13, r14, r15; | ||
29 | }; | ||
30 | |||
31 | struct pebs_record_nhm { | ||
32 | u64 flags, ip; | ||
33 | u64 ax, bx, cx, dx; | ||
34 | u64 si, di, bp, sp; | ||
35 | u64 r8, r9, r10, r11; | ||
36 | u64 r12, r13, r14, r15; | ||
37 | u64 status, dla, dse, lat; | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * A debug store configuration. | ||
42 | * | ||
43 | * We only support architectures that use 64bit fields. | ||
44 | */ | ||
45 | struct debug_store { | ||
46 | u64 bts_buffer_base; | ||
47 | u64 bts_index; | ||
48 | u64 bts_absolute_maximum; | ||
49 | u64 bts_interrupt_threshold; | ||
50 | u64 pebs_buffer_base; | ||
51 | u64 pebs_index; | ||
52 | u64 pebs_absolute_maximum; | ||
53 | u64 pebs_interrupt_threshold; | ||
54 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
55 | }; | ||
56 | |||
57 | static void init_debug_store_on_cpu(int cpu) | ||
58 | { | ||
59 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
60 | |||
61 | if (!ds) | ||
62 | return; | ||
63 | |||
64 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
65 | (u32)((u64)(unsigned long)ds), | ||
66 | (u32)((u64)(unsigned long)ds >> 32)); | ||
67 | } | ||
68 | |||
69 | static void fini_debug_store_on_cpu(int cpu) | ||
70 | { | ||
71 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
72 | return; | ||
73 | |||
74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
75 | } | ||
76 | |||
77 | static void release_ds_buffers(void) | ||
78 | { | ||
79 | int cpu; | ||
80 | |||
81 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
82 | return; | ||
83 | |||
84 | get_online_cpus(); | ||
85 | |||
86 | for_each_online_cpu(cpu) | ||
87 | fini_debug_store_on_cpu(cpu); | ||
88 | |||
89 | for_each_possible_cpu(cpu) { | ||
90 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
91 | |||
92 | if (!ds) | ||
93 | continue; | ||
94 | |||
95 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
96 | |||
97 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
98 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
99 | kfree(ds); | ||
100 | } | ||
101 | |||
102 | put_online_cpus(); | ||
103 | } | ||
104 | |||
105 | static int reserve_ds_buffers(void) | ||
106 | { | ||
107 | int cpu, err = 0; | ||
108 | |||
109 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
110 | return 0; | ||
111 | |||
112 | get_online_cpus(); | ||
113 | |||
114 | for_each_possible_cpu(cpu) { | ||
115 | struct debug_store *ds; | ||
116 | void *buffer; | ||
117 | int max, thresh; | ||
118 | |||
119 | err = -ENOMEM; | ||
120 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
121 | if (unlikely(!ds)) | ||
122 | break; | ||
123 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
124 | |||
125 | if (x86_pmu.bts) { | ||
126 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
127 | if (unlikely(!buffer)) | ||
128 | break; | ||
129 | |||
130 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
131 | thresh = max / 16; | ||
132 | |||
133 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
134 | ds->bts_index = ds->bts_buffer_base; | ||
135 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
136 | max * BTS_RECORD_SIZE; | ||
137 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
138 | thresh * BTS_RECORD_SIZE; | ||
139 | } | ||
140 | |||
141 | if (x86_pmu.pebs) { | ||
142 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | ||
143 | if (unlikely(!buffer)) | ||
144 | break; | ||
145 | |||
146 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
147 | |||
148 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
149 | ds->pebs_index = ds->pebs_buffer_base; | ||
150 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
151 | max * x86_pmu.pebs_record_size; | ||
152 | /* | ||
153 | * Always use single record PEBS | ||
154 | */ | ||
155 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
156 | x86_pmu.pebs_record_size; | ||
157 | } | ||
158 | |||
159 | err = 0; | ||
160 | } | ||
161 | |||
162 | if (err) | ||
163 | release_ds_buffers(); | ||
164 | else { | ||
165 | for_each_online_cpu(cpu) | ||
166 | init_debug_store_on_cpu(cpu); | ||
167 | } | ||
168 | |||
169 | put_online_cpus(); | ||
170 | |||
171 | return err; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * BTS | ||
176 | */ | ||
177 | |||
178 | static struct event_constraint bts_constraint = | ||
179 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
180 | |||
181 | static void intel_pmu_enable_bts(u64 config) | ||
182 | { | ||
183 | unsigned long debugctlmsr; | ||
184 | |||
185 | debugctlmsr = get_debugctlmsr(); | ||
186 | |||
187 | debugctlmsr |= DEBUGCTLMSR_TR; | ||
188 | debugctlmsr |= DEBUGCTLMSR_BTS; | ||
189 | debugctlmsr |= DEBUGCTLMSR_BTINT; | ||
190 | |||
191 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
192 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; | ||
193 | |||
194 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
195 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; | ||
196 | |||
197 | update_debugctlmsr(debugctlmsr); | ||
198 | } | ||
199 | |||
200 | static void intel_pmu_disable_bts(void) | ||
201 | { | ||
202 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
203 | unsigned long debugctlmsr; | ||
204 | |||
205 | if (!cpuc->ds) | ||
206 | return; | ||
207 | |||
208 | debugctlmsr = get_debugctlmsr(); | ||
209 | |||
210 | debugctlmsr &= | ||
211 | ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | | ||
212 | DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); | ||
213 | |||
214 | update_debugctlmsr(debugctlmsr); | ||
215 | } | ||
216 | |||
217 | static void intel_pmu_drain_bts_buffer(void) | ||
218 | { | ||
219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
220 | struct debug_store *ds = cpuc->ds; | ||
221 | struct bts_record { | ||
222 | u64 from; | ||
223 | u64 to; | ||
224 | u64 flags; | ||
225 | }; | ||
226 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
227 | struct bts_record *at, *top; | ||
228 | struct perf_output_handle handle; | ||
229 | struct perf_event_header header; | ||
230 | struct perf_sample_data data; | ||
231 | struct pt_regs regs; | ||
232 | |||
233 | if (!event) | ||
234 | return; | ||
235 | |||
236 | if (!ds) | ||
237 | return; | ||
238 | |||
239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
240 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
241 | |||
242 | if (top <= at) | ||
243 | return; | ||
244 | |||
245 | ds->bts_index = ds->bts_buffer_base; | ||
246 | |||
247 | perf_sample_data_init(&data, 0); | ||
248 | data.period = event->hw.last_period; | ||
249 | regs.ip = 0; | ||
250 | |||
251 | /* | ||
252 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
253 | * We will overwrite the from and to address before we output | ||
254 | * the sample. | ||
255 | */ | ||
256 | perf_prepare_sample(&header, &data, event, ®s); | ||
257 | |||
258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | ||
259 | return; | ||
260 | |||
261 | for (; at < top; at++) { | ||
262 | data.ip = at->from; | ||
263 | data.addr = at->to; | ||
264 | |||
265 | perf_output_sample(&handle, &header, &data, event); | ||
266 | } | ||
267 | |||
268 | perf_output_end(&handle); | ||
269 | |||
270 | /* There's new data available. */ | ||
271 | event->hw.interrupts++; | ||
272 | event->pending_kill = POLL_IN; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * PEBS | ||
277 | */ | ||
278 | |||
279 | static struct event_constraint intel_core_pebs_events[] = { | ||
280 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
281 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
282 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
283 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
284 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
285 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
286 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
287 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
288 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
289 | EVENT_CONSTRAINT_END | ||
290 | }; | ||
291 | |||
292 | static struct event_constraint intel_nehalem_pebs_events[] = { | ||
293 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | ||
294 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | ||
295 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | ||
296 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | ||
297 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
298 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
299 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
300 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
301 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
302 | EVENT_CONSTRAINT_END | ||
303 | }; | ||
304 | |||
305 | static struct event_constraint * | ||
306 | intel_pebs_constraints(struct perf_event *event) | ||
307 | { | ||
308 | struct event_constraint *c; | ||
309 | |||
310 | if (!event->attr.precise_ip) | ||
311 | return NULL; | ||
312 | |||
313 | if (x86_pmu.pebs_constraints) { | ||
314 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | ||
315 | if ((event->hw.config & c->cmask) == c->code) | ||
316 | return c; | ||
317 | } | ||
318 | } | ||
319 | |||
320 | return &emptyconstraint; | ||
321 | } | ||
322 | |||
323 | static void intel_pmu_pebs_enable(struct perf_event *event) | ||
324 | { | ||
325 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
326 | struct hw_perf_event *hwc = &event->hw; | ||
327 | |||
328 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | ||
329 | |||
330 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | ||
331 | WARN_ON_ONCE(cpuc->enabled); | ||
332 | |||
333 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
334 | intel_pmu_lbr_enable(event); | ||
335 | } | ||
336 | |||
337 | static void intel_pmu_pebs_disable(struct perf_event *event) | ||
338 | { | ||
339 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
340 | struct hw_perf_event *hwc = &event->hw; | ||
341 | |||
342 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | ||
343 | if (cpuc->enabled) | ||
344 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
345 | |||
346 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | ||
347 | |||
348 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
349 | intel_pmu_lbr_disable(event); | ||
350 | } | ||
351 | |||
352 | static void intel_pmu_pebs_enable_all(void) | ||
353 | { | ||
354 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
355 | |||
356 | if (cpuc->pebs_enabled) | ||
357 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
358 | } | ||
359 | |||
360 | static void intel_pmu_pebs_disable_all(void) | ||
361 | { | ||
362 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
363 | |||
364 | if (cpuc->pebs_enabled) | ||
365 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||
366 | } | ||
367 | |||
368 | #include <asm/insn.h> | ||
369 | |||
370 | static inline bool kernel_ip(unsigned long ip) | ||
371 | { | ||
372 | #ifdef CONFIG_X86_32 | ||
373 | return ip > PAGE_OFFSET; | ||
374 | #else | ||
375 | return (long)ip < 0; | ||
376 | #endif | ||
377 | } | ||
378 | |||
379 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | ||
380 | { | ||
381 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
382 | unsigned long from = cpuc->lbr_entries[0].from; | ||
383 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | ||
384 | unsigned long ip = regs->ip; | ||
385 | |||
386 | /* | ||
387 | * We don't need to fixup if the PEBS assist is fault like | ||
388 | */ | ||
389 | if (!x86_pmu.intel_cap.pebs_trap) | ||
390 | return 1; | ||
391 | |||
392 | /* | ||
393 | * No LBR entry, no basic block, no rewinding | ||
394 | */ | ||
395 | if (!cpuc->lbr_stack.nr || !from || !to) | ||
396 | return 0; | ||
397 | |||
398 | /* | ||
399 | * Basic blocks should never cross user/kernel boundaries | ||
400 | */ | ||
401 | if (kernel_ip(ip) != kernel_ip(to)) | ||
402 | return 0; | ||
403 | |||
404 | /* | ||
405 | * unsigned math, either ip is before the start (impossible) or | ||
406 | * the basic block is larger than 1 page (sanity) | ||
407 | */ | ||
408 | if ((ip - to) > PAGE_SIZE) | ||
409 | return 0; | ||
410 | |||
411 | /* | ||
412 | * We sampled a branch insn, rewind using the LBR stack | ||
413 | */ | ||
414 | if (ip == to) { | ||
415 | regs->ip = from; | ||
416 | return 1; | ||
417 | } | ||
418 | |||
419 | do { | ||
420 | struct insn insn; | ||
421 | u8 buf[MAX_INSN_SIZE]; | ||
422 | void *kaddr; | ||
423 | |||
424 | old_to = to; | ||
425 | if (!kernel_ip(ip)) { | ||
426 | int bytes, size = MAX_INSN_SIZE; | ||
427 | |||
428 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
429 | if (bytes != size) | ||
430 | return 0; | ||
431 | |||
432 | kaddr = buf; | ||
433 | } else | ||
434 | kaddr = (void *)to; | ||
435 | |||
436 | kernel_insn_init(&insn, kaddr); | ||
437 | insn_get_length(&insn); | ||
438 | to += insn.length; | ||
439 | } while (to < ip); | ||
440 | |||
441 | if (to == ip) { | ||
442 | regs->ip = old_to; | ||
443 | return 1; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Even though we decoded the basic block, the instruction stream | ||
448 | * never matched the given IP, either the TO or the IP got corrupted. | ||
449 | */ | ||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
454 | |||
455 | static void __intel_pmu_pebs_event(struct perf_event *event, | ||
456 | struct pt_regs *iregs, void *__pebs) | ||
457 | { | ||
458 | /* | ||
459 | * We cast to pebs_record_core since that is a subset of | ||
460 | * both formats and we don't use the other fields in this | ||
461 | * routine. | ||
462 | */ | ||
463 | struct pebs_record_core *pebs = __pebs; | ||
464 | struct perf_sample_data data; | ||
465 | struct pt_regs regs; | ||
466 | |||
467 | if (!intel_pmu_save_and_restart(event)) | ||
468 | return; | ||
469 | |||
470 | perf_sample_data_init(&data, 0); | ||
471 | data.period = event->hw.last_period; | ||
472 | |||
473 | /* | ||
474 | * We use the interrupt regs as a base because the PEBS record | ||
475 | * does not contain a full regs set, specifically it seems to | ||
476 | * lack segment descriptors, which get used by things like | ||
477 | * user_mode(). | ||
478 | * | ||
479 | * In the simple case fix up only the IP and BP,SP regs, for | ||
480 | * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. | ||
481 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | ||
482 | */ | ||
483 | regs = *iregs; | ||
484 | regs.ip = pebs->ip; | ||
485 | regs.bp = pebs->bp; | ||
486 | regs.sp = pebs->sp; | ||
487 | |||
488 | if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | ||
489 | regs.flags |= PERF_EFLAGS_EXACT; | ||
490 | else | ||
491 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
492 | |||
493 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
494 | x86_pmu_stop(event); | ||
495 | } | ||
496 | |||
497 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | ||
498 | { | ||
499 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
500 | struct debug_store *ds = cpuc->ds; | ||
501 | struct perf_event *event = cpuc->events[0]; /* PMC0 only */ | ||
502 | struct pebs_record_core *at, *top; | ||
503 | int n; | ||
504 | |||
505 | if (!ds || !x86_pmu.pebs) | ||
506 | return; | ||
507 | |||
508 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | ||
509 | top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; | ||
510 | |||
511 | /* | ||
512 | * Whatever else happens, drain the thing | ||
513 | */ | ||
514 | ds->pebs_index = ds->pebs_buffer_base; | ||
515 | |||
516 | if (!test_bit(0, cpuc->active_mask)) | ||
517 | return; | ||
518 | |||
519 | WARN_ON_ONCE(!event); | ||
520 | |||
521 | if (!event->attr.precise_ip) | ||
522 | return; | ||
523 | |||
524 | n = top - at; | ||
525 | if (n <= 0) | ||
526 | return; | ||
527 | |||
528 | /* | ||
529 | * Should not happen, we program the threshold at 1 and do not | ||
530 | * set a reset value. | ||
531 | */ | ||
532 | WARN_ON_ONCE(n > 1); | ||
533 | at += n - 1; | ||
534 | |||
535 | __intel_pmu_pebs_event(event, iregs, at); | ||
536 | } | ||
537 | |||
538 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
539 | { | ||
540 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
541 | struct debug_store *ds = cpuc->ds; | ||
542 | struct pebs_record_nhm *at, *top; | ||
543 | struct perf_event *event = NULL; | ||
544 | u64 status = 0; | ||
545 | int bit, n; | ||
546 | |||
547 | if (!ds || !x86_pmu.pebs) | ||
548 | return; | ||
549 | |||
550 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
551 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
552 | |||
553 | ds->pebs_index = ds->pebs_buffer_base; | ||
554 | |||
555 | n = top - at; | ||
556 | if (n <= 0) | ||
557 | return; | ||
558 | |||
559 | /* | ||
560 | * Should not happen, we program the threshold at 1 and do not | ||
561 | * set a reset value. | ||
562 | */ | ||
563 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | ||
564 | |||
565 | for ( ; at < top; at++) { | ||
566 | for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | ||
567 | event = cpuc->events[bit]; | ||
568 | if (!test_bit(bit, cpuc->active_mask)) | ||
569 | continue; | ||
570 | |||
571 | WARN_ON_ONCE(!event); | ||
572 | |||
573 | if (!event->attr.precise_ip) | ||
574 | continue; | ||
575 | |||
576 | if (__test_and_set_bit(bit, (unsigned long *)&status)) | ||
577 | continue; | ||
578 | |||
579 | break; | ||
580 | } | ||
581 | |||
582 | if (!event || bit >= MAX_PEBS_EVENTS) | ||
583 | continue; | ||
584 | |||
585 | __intel_pmu_pebs_event(event, iregs, at); | ||
586 | } | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * BTS, PEBS probe and setup | ||
591 | */ | ||
592 | |||
593 | static void intel_ds_init(void) | ||
594 | { | ||
595 | /* | ||
596 | * No support for 32bit formats | ||
597 | */ | ||
598 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | ||
599 | return; | ||
600 | |||
601 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | ||
602 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | ||
603 | if (x86_pmu.pebs) { | ||
604 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; | ||
605 | int format = x86_pmu.intel_cap.pebs_format; | ||
606 | |||
607 | switch (format) { | ||
608 | case 0: | ||
609 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | ||
610 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | ||
611 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | ||
612 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
613 | break; | ||
614 | |||
615 | case 1: | ||
616 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | ||
617 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | ||
618 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | ||
619 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
620 | break; | ||
621 | |||
622 | default: | ||
623 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | ||
624 | x86_pmu.pebs = 0; | ||
625 | break; | ||
626 | } | ||
627 | } | ||
628 | } | ||
629 | |||
630 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
631 | |||
632 | static int reserve_ds_buffers(void) | ||
633 | { | ||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | static void release_ds_buffers(void) | ||
638 | { | ||
639 | } | ||
640 | |||
641 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 000000000000..d202c1bece1a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -0,0 +1,218 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | enum { | ||
4 | LBR_FORMAT_32 = 0x00, | ||
5 | LBR_FORMAT_LIP = 0x01, | ||
6 | LBR_FORMAT_EIP = 0x02, | ||
7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
8 | }; | ||
9 | |||
10 | /* | ||
11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
12 | * otherwise it becomes near impossible to get a reliable stack. | ||
13 | */ | ||
14 | |||
15 | static void __intel_pmu_lbr_enable(void) | ||
16 | { | ||
17 | u64 debugctl; | ||
18 | |||
19 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
20 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
21 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
22 | } | ||
23 | |||
24 | static void __intel_pmu_lbr_disable(void) | ||
25 | { | ||
26 | u64 debugctl; | ||
27 | |||
28 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
29 | debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
30 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
31 | } | ||
32 | |||
33 | static void intel_pmu_lbr_reset_32(void) | ||
34 | { | ||
35 | int i; | ||
36 | |||
37 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
38 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
39 | } | ||
40 | |||
41 | static void intel_pmu_lbr_reset_64(void) | ||
42 | { | ||
43 | int i; | ||
44 | |||
45 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
46 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
47 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
48 | } | ||
49 | } | ||
50 | |||
51 | static void intel_pmu_lbr_reset(void) | ||
52 | { | ||
53 | if (!x86_pmu.lbr_nr) | ||
54 | return; | ||
55 | |||
56 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
57 | intel_pmu_lbr_reset_32(); | ||
58 | else | ||
59 | intel_pmu_lbr_reset_64(); | ||
60 | } | ||
61 | |||
62 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
63 | { | ||
64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
65 | |||
66 | if (!x86_pmu.lbr_nr) | ||
67 | return; | ||
68 | |||
69 | WARN_ON_ONCE(cpuc->enabled); | ||
70 | |||
71 | /* | ||
72 | * Reset the LBR stack if we changed task context to | ||
73 | * avoid data leaks. | ||
74 | */ | ||
75 | |||
76 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | ||
77 | intel_pmu_lbr_reset(); | ||
78 | cpuc->lbr_context = event->ctx; | ||
79 | } | ||
80 | |||
81 | cpuc->lbr_users++; | ||
82 | } | ||
83 | |||
84 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
85 | { | ||
86 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
87 | |||
88 | if (!x86_pmu.lbr_nr) | ||
89 | return; | ||
90 | |||
91 | cpuc->lbr_users--; | ||
92 | WARN_ON_ONCE(cpuc->lbr_users < 0); | ||
93 | |||
94 | if (cpuc->enabled && !cpuc->lbr_users) | ||
95 | __intel_pmu_lbr_disable(); | ||
96 | } | ||
97 | |||
98 | static void intel_pmu_lbr_enable_all(void) | ||
99 | { | ||
100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
101 | |||
102 | if (cpuc->lbr_users) | ||
103 | __intel_pmu_lbr_enable(); | ||
104 | } | ||
105 | |||
106 | static void intel_pmu_lbr_disable_all(void) | ||
107 | { | ||
108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
109 | |||
110 | if (cpuc->lbr_users) | ||
111 | __intel_pmu_lbr_disable(); | ||
112 | } | ||
113 | |||
114 | static inline u64 intel_pmu_lbr_tos(void) | ||
115 | { | ||
116 | u64 tos; | ||
117 | |||
118 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
119 | |||
120 | return tos; | ||
121 | } | ||
122 | |||
123 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
124 | { | ||
125 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
126 | u64 tos = intel_pmu_lbr_tos(); | ||
127 | int i; | ||
128 | |||
129 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
130 | unsigned long lbr_idx = (tos - i) & mask; | ||
131 | union { | ||
132 | struct { | ||
133 | u32 from; | ||
134 | u32 to; | ||
135 | }; | ||
136 | u64 lbr; | ||
137 | } msr_lastbranch; | ||
138 | |||
139 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
140 | |||
141 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
142 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
143 | cpuc->lbr_entries[i].flags = 0; | ||
144 | } | ||
145 | cpuc->lbr_stack.nr = i; | ||
146 | } | ||
147 | |||
148 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
149 | |||
150 | /* | ||
151 | * Due to lack of segmentation in Linux the effective address (offset) | ||
152 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
153 | * LBR formats. | ||
154 | */ | ||
155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
156 | { | ||
157 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
158 | int lbr_format = x86_pmu.intel_cap.lbr_format; | ||
159 | u64 tos = intel_pmu_lbr_tos(); | ||
160 | int i; | ||
161 | |||
162 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
163 | unsigned long lbr_idx = (tos - i) & mask; | ||
164 | u64 from, to, flags = 0; | ||
165 | |||
166 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
167 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
168 | |||
169 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
170 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
171 | from = (u64)((((s64)from) << 1) >> 1); | ||
172 | } | ||
173 | |||
174 | cpuc->lbr_entries[i].from = from; | ||
175 | cpuc->lbr_entries[i].to = to; | ||
176 | cpuc->lbr_entries[i].flags = flags; | ||
177 | } | ||
178 | cpuc->lbr_stack.nr = i; | ||
179 | } | ||
180 | |||
181 | static void intel_pmu_lbr_read(void) | ||
182 | { | ||
183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
184 | |||
185 | if (!cpuc->lbr_users) | ||
186 | return; | ||
187 | |||
188 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
189 | intel_pmu_lbr_read_32(cpuc); | ||
190 | else | ||
191 | intel_pmu_lbr_read_64(cpuc); | ||
192 | } | ||
193 | |||
194 | static void intel_pmu_lbr_init_core(void) | ||
195 | { | ||
196 | x86_pmu.lbr_nr = 4; | ||
197 | x86_pmu.lbr_tos = 0x01c9; | ||
198 | x86_pmu.lbr_from = 0x40; | ||
199 | x86_pmu.lbr_to = 0x60; | ||
200 | } | ||
201 | |||
202 | static void intel_pmu_lbr_init_nhm(void) | ||
203 | { | ||
204 | x86_pmu.lbr_nr = 16; | ||
205 | x86_pmu.lbr_tos = 0x01c9; | ||
206 | x86_pmu.lbr_from = 0x680; | ||
207 | x86_pmu.lbr_to = 0x6c0; | ||
208 | } | ||
209 | |||
210 | static void intel_pmu_lbr_init_atom(void) | ||
211 | { | ||
212 | x86_pmu.lbr_nr = 8; | ||
213 | x86_pmu.lbr_tos = 0x01c9; | ||
214 | x86_pmu.lbr_from = 0x40; | ||
215 | x86_pmu.lbr_to = 0x60; | ||
216 | } | ||
217 | |||
218 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 000000000000..ae85d69644d1 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -0,0 +1,858 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | * | ||
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | ||
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | ||
6 | * | ||
7 | * For licencing details see kernel-base/COPYING | ||
8 | */ | ||
9 | |||
10 | #ifdef CONFIG_CPU_SUP_INTEL | ||
11 | |||
12 | #include <asm/perf_event_p4.h> | ||
13 | |||
14 | #define P4_CNTR_LIMIT 3 | ||
15 | /* | ||
16 | * array indices: 0,1 - HT threads, used with HT enabled cpu | ||
17 | */ | ||
18 | struct p4_event_bind { | ||
19 | unsigned int opcode; /* Event code and ESCR selector */ | ||
20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | ||
21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | ||
22 | }; | ||
23 | |||
24 | struct p4_cache_event_bind { | ||
25 | unsigned int metric_pebs; | ||
26 | unsigned int metric_vert; | ||
27 | }; | ||
28 | |||
29 | #define P4_GEN_CACHE_EVENT_BIND(name) \ | ||
30 | [P4_CACHE__##name] = { \ | ||
31 | .metric_pebs = P4_PEBS__##name, \ | ||
32 | .metric_vert = P4_VERT__##name, \ | ||
33 | } | ||
34 | |||
35 | static struct p4_cache_event_bind p4_cache_event_bind_map[] = { | ||
36 | P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), | ||
37 | P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), | ||
38 | P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), | ||
39 | P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * Note that we don't use CCCR1 here, there is an | ||
44 | * exception for P4_BSQ_ALLOCATION but we just have | ||
45 | * no workaround | ||
46 | * | ||
47 | * consider this binding as resources which particular | ||
48 | * event may borrow, it doesn't contain EventMask, | ||
49 | * Tags and friends -- they are left to a caller | ||
50 | */ | ||
51 | static struct p4_event_bind p4_event_bind_map[] = { | ||
52 | [P4_EVENT_TC_DELIVER_MODE] = { | ||
53 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | ||
54 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
55 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
56 | }, | ||
57 | [P4_EVENT_BPU_FETCH_REQUEST] = { | ||
58 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | ||
59 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | ||
60 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
61 | }, | ||
62 | [P4_EVENT_ITLB_REFERENCE] = { | ||
63 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | ||
64 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | ||
65 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
66 | }, | ||
67 | [P4_EVENT_MEMORY_CANCEL] = { | ||
68 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | ||
69 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
70 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
71 | }, | ||
72 | [P4_EVENT_MEMORY_COMPLETE] = { | ||
73 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | ||
74 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
75 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
76 | }, | ||
77 | [P4_EVENT_LOAD_PORT_REPLAY] = { | ||
78 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | ||
79 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | ||
80 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
81 | }, | ||
82 | [P4_EVENT_STORE_PORT_REPLAY] = { | ||
83 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | ||
84 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
85 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
86 | }, | ||
87 | [P4_EVENT_MOB_LOAD_REPLAY] = { | ||
88 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | ||
89 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | ||
90 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
91 | }, | ||
92 | [P4_EVENT_PAGE_WALK_TYPE] = { | ||
93 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | ||
94 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | ||
95 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
96 | }, | ||
97 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | ||
98 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | ||
99 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
100 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
101 | }, | ||
102 | [P4_EVENT_IOQ_ALLOCATION] = { | ||
103 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | ||
104 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
105 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
106 | }, | ||
107 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
108 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | ||
109 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | ||
110 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
111 | }, | ||
112 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | ||
113 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | ||
114 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
115 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
116 | }, | ||
117 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | ||
118 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | ||
119 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | ||
120 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | ||
121 | }, | ||
122 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
123 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | ||
124 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | ||
125 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
126 | }, | ||
127 | [P4_EVENT_SSE_INPUT_ASSIST] = { | ||
128 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | ||
129 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
130 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
131 | }, | ||
132 | [P4_EVENT_PACKED_SP_UOP] = { | ||
133 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | ||
134 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
135 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
136 | }, | ||
137 | [P4_EVENT_PACKED_DP_UOP] = { | ||
138 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | ||
139 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
140 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
141 | }, | ||
142 | [P4_EVENT_SCALAR_SP_UOP] = { | ||
143 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | ||
144 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
145 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
146 | }, | ||
147 | [P4_EVENT_SCALAR_DP_UOP] = { | ||
148 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | ||
149 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
150 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
151 | }, | ||
152 | [P4_EVENT_64BIT_MMX_UOP] = { | ||
153 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | ||
154 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
155 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
156 | }, | ||
157 | [P4_EVENT_128BIT_MMX_UOP] = { | ||
158 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | ||
159 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
160 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
161 | }, | ||
162 | [P4_EVENT_X87_FP_UOP] = { | ||
163 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | ||
164 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
165 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
166 | }, | ||
167 | [P4_EVENT_TC_MISC] = { | ||
168 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | ||
169 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
170 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
171 | }, | ||
172 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | ||
173 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | ||
174 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
175 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
176 | }, | ||
177 | [P4_EVENT_TC_MS_XFER] = { | ||
178 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | ||
179 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
180 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
181 | }, | ||
182 | [P4_EVENT_UOP_QUEUE_WRITES] = { | ||
183 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | ||
184 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
185 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
186 | }, | ||
187 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | ||
188 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | ||
189 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | ||
190 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
191 | }, | ||
192 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | ||
193 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | ||
194 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | ||
195 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
196 | }, | ||
197 | [P4_EVENT_RESOURCE_STALL] = { | ||
198 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | ||
199 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | ||
200 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
201 | }, | ||
202 | [P4_EVENT_WC_BUFFER] = { | ||
203 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | ||
204 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
205 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
206 | }, | ||
207 | [P4_EVENT_B2B_CYCLES] = { | ||
208 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | ||
209 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
210 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
211 | }, | ||
212 | [P4_EVENT_BNR] = { | ||
213 | .opcode = P4_OPCODE(P4_EVENT_BNR), | ||
214 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
215 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
216 | }, | ||
217 | [P4_EVENT_SNOOP] = { | ||
218 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | ||
219 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
220 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
221 | }, | ||
222 | [P4_EVENT_RESPONSE] = { | ||
223 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | ||
224 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
225 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
226 | }, | ||
227 | [P4_EVENT_FRONT_END_EVENT] = { | ||
228 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | ||
229 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
230 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
231 | }, | ||
232 | [P4_EVENT_EXECUTION_EVENT] = { | ||
233 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | ||
234 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
235 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
236 | }, | ||
237 | [P4_EVENT_REPLAY_EVENT] = { | ||
238 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | ||
239 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
240 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
241 | }, | ||
242 | [P4_EVENT_INSTR_RETIRED] = { | ||
243 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | ||
244 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
245 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
246 | }, | ||
247 | [P4_EVENT_UOPS_RETIRED] = { | ||
248 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | ||
249 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
250 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
251 | }, | ||
252 | [P4_EVENT_UOP_TYPE] = { | ||
253 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | ||
254 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
255 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
256 | }, | ||
257 | [P4_EVENT_BRANCH_RETIRED] = { | ||
258 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | ||
259 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
260 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
261 | }, | ||
262 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | ||
263 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | ||
264 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
265 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
266 | }, | ||
267 | [P4_EVENT_X87_ASSIST] = { | ||
268 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | ||
269 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
270 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
271 | }, | ||
272 | [P4_EVENT_MACHINE_CLEAR] = { | ||
273 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | ||
274 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
275 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
276 | }, | ||
277 | [P4_EVENT_INSTR_COMPLETED] = { | ||
278 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | ||
279 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
280 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
281 | }, | ||
282 | }; | ||
283 | |||
284 | #define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ | ||
285 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | ||
286 | P4_ESCR_EMASK_BIT(event, bit)) | \ | ||
287 | p4_config_pack_cccr(cache_event | \ | ||
288 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | ||
289 | |||
290 | static __initconst const u64 p4_hw_cache_event_ids | ||
291 | [PERF_COUNT_HW_CACHE_MAX] | ||
292 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
293 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
294 | { | ||
295 | [ C(L1D ) ] = { | ||
296 | [ C(OP_READ) ] = { | ||
297 | [ C(RESULT_ACCESS) ] = 0x0, | ||
298 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
299 | P4_CACHE__1stl_cache_load_miss_retired), | ||
300 | }, | ||
301 | }, | ||
302 | [ C(LL ) ] = { | ||
303 | [ C(OP_READ) ] = { | ||
304 | [ C(RESULT_ACCESS) ] = 0x0, | ||
305 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
306 | P4_CACHE__2ndl_cache_load_miss_retired), | ||
307 | }, | ||
308 | }, | ||
309 | [ C(DTLB) ] = { | ||
310 | [ C(OP_READ) ] = { | ||
311 | [ C(RESULT_ACCESS) ] = 0x0, | ||
312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
313 | P4_CACHE__dtlb_load_miss_retired), | ||
314 | }, | ||
315 | [ C(OP_WRITE) ] = { | ||
316 | [ C(RESULT_ACCESS) ] = 0x0, | ||
317 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
318 | P4_CACHE__dtlb_store_miss_retired), | ||
319 | }, | ||
320 | }, | ||
321 | [ C(ITLB) ] = { | ||
322 | [ C(OP_READ) ] = { | ||
323 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | ||
324 | P4_CACHE__itlb_reference_hit), | ||
325 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | ||
326 | P4_CACHE__itlb_reference_miss), | ||
327 | }, | ||
328 | [ C(OP_WRITE) ] = { | ||
329 | [ C(RESULT_ACCESS) ] = -1, | ||
330 | [ C(RESULT_MISS) ] = -1, | ||
331 | }, | ||
332 | [ C(OP_PREFETCH) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = -1, | ||
334 | [ C(RESULT_MISS) ] = -1, | ||
335 | }, | ||
336 | }, | ||
337 | }; | ||
338 | |||
339 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { | ||
340 | /* non-halted CPU clocks */ | ||
341 | [PERF_COUNT_HW_CPU_CYCLES] = | ||
342 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | ||
343 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | ||
344 | |||
345 | /* | ||
346 | * retired instructions | ||
347 | * in a sake of simplicity we don't use the FSB tagging | ||
348 | */ | ||
349 | [PERF_COUNT_HW_INSTRUCTIONS] = | ||
350 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | | ||
351 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | ||
352 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), | ||
353 | |||
354 | /* cache hits */ | ||
355 | [PERF_COUNT_HW_CACHE_REFERENCES] = | ||
356 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
357 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
358 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
359 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
360 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
361 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
362 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), | ||
363 | |||
364 | /* cache misses */ | ||
365 | [PERF_COUNT_HW_CACHE_MISSES] = | ||
366 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
367 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
368 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
369 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), | ||
370 | |||
371 | /* branch instructions retired */ | ||
372 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = | ||
373 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | | ||
374 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
375 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | ||
376 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | ||
377 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), | ||
378 | |||
379 | /* mispredicted branches retired */ | ||
380 | [PERF_COUNT_HW_BRANCH_MISSES] = | ||
381 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | | ||
382 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), | ||
383 | |||
384 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
385 | [PERF_COUNT_HW_BUS_CYCLES] = | ||
386 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | | ||
387 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
388 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | | ||
389 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | ||
390 | }; | ||
391 | |||
392 | static struct p4_event_bind *p4_config_get_bind(u64 config) | ||
393 | { | ||
394 | unsigned int evnt = p4_config_unpack_event(config); | ||
395 | struct p4_event_bind *bind = NULL; | ||
396 | |||
397 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) | ||
398 | bind = &p4_event_bind_map[evnt]; | ||
399 | |||
400 | return bind; | ||
401 | } | ||
402 | |||
403 | static u64 p4_pmu_event_map(int hw_event) | ||
404 | { | ||
405 | struct p4_event_bind *bind; | ||
406 | unsigned int esel; | ||
407 | u64 config; | ||
408 | |||
409 | config = p4_general_events[hw_event]; | ||
410 | bind = p4_config_get_bind(config); | ||
411 | esel = P4_OPCODE_ESEL(bind->opcode); | ||
412 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | ||
413 | |||
414 | return config; | ||
415 | } | ||
416 | |||
417 | static int p4_hw_config(struct perf_event *event) | ||
418 | { | ||
419 | int cpu = get_cpu(); | ||
420 | int rc = 0; | ||
421 | unsigned int evnt; | ||
422 | u32 escr, cccr; | ||
423 | |||
424 | /* | ||
425 | * the reason we use cpu that early is that: if we get scheduled | ||
426 | * first time on the same cpu -- we will not need swap thread | ||
427 | * specific flags in config (and will save some cpu cycles) | ||
428 | */ | ||
429 | |||
430 | cccr = p4_default_cccr_conf(cpu); | ||
431 | escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, | ||
432 | event->attr.exclude_user); | ||
433 | event->hw.config = p4_config_pack_escr(escr) | | ||
434 | p4_config_pack_cccr(cccr); | ||
435 | |||
436 | if (p4_ht_active() && p4_ht_thread(cpu)) | ||
437 | event->hw.config = p4_set_ht_bit(event->hw.config); | ||
438 | |||
439 | if (event->attr.type == PERF_TYPE_RAW) { | ||
440 | |||
441 | /* user data may have out-of-bound event index */ | ||
442 | evnt = p4_config_unpack_event(event->attr.config); | ||
443 | if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { | ||
444 | rc = -EINVAL; | ||
445 | goto out; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * We don't control raw events so it's up to the caller | ||
450 | * to pass sane values (and we don't count the thread number | ||
451 | * on HT machine but allow HT-compatible specifics to be | ||
452 | * passed on) | ||
453 | * | ||
454 | * XXX: HT wide things should check perf_paranoid_cpu() && | ||
455 | * CAP_SYS_ADMIN | ||
456 | */ | ||
457 | event->hw.config |= event->attr.config & | ||
458 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | ||
459 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | ||
460 | } | ||
461 | |||
462 | rc = x86_setup_perfctr(event); | ||
463 | out: | ||
464 | put_cpu(); | ||
465 | return rc; | ||
466 | } | ||
467 | |||
468 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | ||
469 | { | ||
470 | int overflow = 0; | ||
471 | u32 low, high; | ||
472 | |||
473 | rdmsr(hwc->config_base + hwc->idx, low, high); | ||
474 | |||
475 | /* we need to check high bit for unflagged overflows */ | ||
476 | if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { | ||
477 | overflow = 1; | ||
478 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
479 | ((u64)low) & ~P4_CCCR_OVF); | ||
480 | } | ||
481 | |||
482 | return overflow; | ||
483 | } | ||
484 | |||
485 | static inline void p4_pmu_disable_event(struct perf_event *event) | ||
486 | { | ||
487 | struct hw_perf_event *hwc = &event->hw; | ||
488 | |||
489 | /* | ||
490 | * If event gets disabled while counter is in overflowed | ||
491 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | ||
492 | * asserted again and again | ||
493 | */ | ||
494 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
495 | (u64)(p4_config_unpack_cccr(hwc->config)) & | ||
496 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | ||
497 | } | ||
498 | |||
499 | static void p4_pmu_disable_all(void) | ||
500 | { | ||
501 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
502 | int idx; | ||
503 | |||
504 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
505 | struct perf_event *event = cpuc->events[idx]; | ||
506 | if (!test_bit(idx, cpuc->active_mask)) | ||
507 | continue; | ||
508 | p4_pmu_disable_event(event); | ||
509 | } | ||
510 | } | ||
511 | |||
512 | static void p4_pmu_enable_event(struct perf_event *event) | ||
513 | { | ||
514 | struct hw_perf_event *hwc = &event->hw; | ||
515 | int thread = p4_ht_config_thread(hwc->config); | ||
516 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | ||
517 | unsigned int idx = p4_config_unpack_event(hwc->config); | ||
518 | unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); | ||
519 | struct p4_event_bind *bind; | ||
520 | struct p4_cache_event_bind *bind_cache; | ||
521 | u64 escr_addr, cccr; | ||
522 | |||
523 | bind = &p4_event_bind_map[idx]; | ||
524 | escr_addr = (u64)bind->escr_msr[thread]; | ||
525 | |||
526 | /* | ||
527 | * - we dont support cascaded counters yet | ||
528 | * - and counter 1 is broken (erratum) | ||
529 | */ | ||
530 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | ||
531 | WARN_ON_ONCE(hwc->idx == 1); | ||
532 | |||
533 | /* we need a real Event value */ | ||
534 | escr_conf &= ~P4_ESCR_EVENT_MASK; | ||
535 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); | ||
536 | |||
537 | cccr = p4_config_unpack_cccr(hwc->config); | ||
538 | |||
539 | /* | ||
540 | * it could be Cache event so that we need to | ||
541 | * set metrics into additional MSRs | ||
542 | */ | ||
543 | BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); | ||
544 | if (idx_cache > P4_CACHE__NONE && | ||
545 | idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { | ||
546 | bind_cache = &p4_cache_event_bind_map[idx_cache]; | ||
547 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); | ||
548 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); | ||
549 | } | ||
550 | |||
551 | (void)checking_wrmsrl(escr_addr, escr_conf); | ||
552 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
553 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | ||
554 | } | ||
555 | |||
556 | static void p4_pmu_enable_all(int added) | ||
557 | { | ||
558 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
559 | int idx; | ||
560 | |||
561 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
562 | struct perf_event *event = cpuc->events[idx]; | ||
563 | if (!test_bit(idx, cpuc->active_mask)) | ||
564 | continue; | ||
565 | p4_pmu_enable_event(event); | ||
566 | } | ||
567 | } | ||
568 | |||
569 | static int p4_pmu_handle_irq(struct pt_regs *regs) | ||
570 | { | ||
571 | struct perf_sample_data data; | ||
572 | struct cpu_hw_events *cpuc; | ||
573 | struct perf_event *event; | ||
574 | struct hw_perf_event *hwc; | ||
575 | int idx, handled = 0; | ||
576 | u64 val; | ||
577 | |||
578 | data.addr = 0; | ||
579 | data.raw = NULL; | ||
580 | |||
581 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
582 | |||
583 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
584 | |||
585 | if (!test_bit(idx, cpuc->active_mask)) | ||
586 | continue; | ||
587 | |||
588 | event = cpuc->events[idx]; | ||
589 | hwc = &event->hw; | ||
590 | |||
591 | WARN_ON_ONCE(hwc->idx != idx); | ||
592 | |||
593 | /* it might be unflagged overflow */ | ||
594 | handled = p4_pmu_clear_cccr_ovf(hwc); | ||
595 | |||
596 | val = x86_perf_event_update(event); | ||
597 | if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) | ||
598 | continue; | ||
599 | |||
600 | /* event overflow for sure */ | ||
601 | data.period = event->hw.last_period; | ||
602 | |||
603 | if (!x86_perf_event_set_period(event)) | ||
604 | continue; | ||
605 | if (perf_event_overflow(event, 1, &data, regs)) | ||
606 | p4_pmu_disable_event(event); | ||
607 | } | ||
608 | |||
609 | if (handled) { | ||
610 | /* p4 quirk: unmask it again */ | ||
611 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
612 | inc_irq_stat(apic_perf_irqs); | ||
613 | } | ||
614 | |||
615 | return handled; | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * swap thread specific fields according to a thread | ||
620 | * we are going to run on | ||
621 | */ | ||
622 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | ||
623 | { | ||
624 | u32 escr, cccr; | ||
625 | |||
626 | /* | ||
627 | * we either lucky and continue on same cpu or no HT support | ||
628 | */ | ||
629 | if (!p4_should_swap_ts(hwc->config, cpu)) | ||
630 | return; | ||
631 | |||
632 | /* | ||
633 | * the event is migrated from an another logical | ||
634 | * cpu, so we need to swap thread specific flags | ||
635 | */ | ||
636 | |||
637 | escr = p4_config_unpack_escr(hwc->config); | ||
638 | cccr = p4_config_unpack_cccr(hwc->config); | ||
639 | |||
640 | if (p4_ht_thread(cpu)) { | ||
641 | cccr &= ~P4_CCCR_OVF_PMI_T0; | ||
642 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
643 | if (escr & P4_ESCR_T0_OS) { | ||
644 | escr &= ~P4_ESCR_T0_OS; | ||
645 | escr |= P4_ESCR_T1_OS; | ||
646 | } | ||
647 | if (escr & P4_ESCR_T0_USR) { | ||
648 | escr &= ~P4_ESCR_T0_USR; | ||
649 | escr |= P4_ESCR_T1_USR; | ||
650 | } | ||
651 | hwc->config = p4_config_pack_escr(escr); | ||
652 | hwc->config |= p4_config_pack_cccr(cccr); | ||
653 | hwc->config |= P4_CONFIG_HT; | ||
654 | } else { | ||
655 | cccr &= ~P4_CCCR_OVF_PMI_T1; | ||
656 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
657 | if (escr & P4_ESCR_T1_OS) { | ||
658 | escr &= ~P4_ESCR_T1_OS; | ||
659 | escr |= P4_ESCR_T0_OS; | ||
660 | } | ||
661 | if (escr & P4_ESCR_T1_USR) { | ||
662 | escr &= ~P4_ESCR_T1_USR; | ||
663 | escr |= P4_ESCR_T0_USR; | ||
664 | } | ||
665 | hwc->config = p4_config_pack_escr(escr); | ||
666 | hwc->config |= p4_config_pack_cccr(cccr); | ||
667 | hwc->config &= ~P4_CONFIG_HT; | ||
668 | } | ||
669 | } | ||
670 | |||
671 | /* | ||
672 | * ESCR address hashing is tricky, ESCRs are not sequential | ||
673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and | ||
674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | ||
675 | * | ||
676 | * so we make ~70% filled hashtable | ||
677 | */ | ||
678 | |||
679 | #define P4_ESCR_MSR_BASE 0x000003a0 | ||
680 | #define P4_ESCR_MSR_MAX 0x000003e1 | ||
681 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) | ||
682 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) | ||
683 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr | ||
684 | |||
685 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { | ||
686 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), | ||
687 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), | ||
688 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), | ||
689 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), | ||
690 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), | ||
691 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), | ||
692 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), | ||
693 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), | ||
694 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), | ||
695 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), | ||
696 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), | ||
697 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), | ||
698 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), | ||
699 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), | ||
700 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), | ||
701 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), | ||
702 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), | ||
703 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), | ||
704 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), | ||
705 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), | ||
706 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), | ||
707 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), | ||
708 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), | ||
709 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), | ||
710 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), | ||
711 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), | ||
712 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), | ||
713 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), | ||
714 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), | ||
715 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), | ||
716 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), | ||
717 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), | ||
718 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), | ||
719 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), | ||
720 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), | ||
721 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), | ||
722 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), | ||
723 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), | ||
724 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), | ||
725 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), | ||
726 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), | ||
727 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), | ||
728 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), | ||
729 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), | ||
730 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), | ||
731 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), | ||
732 | }; | ||
733 | |||
734 | static int p4_get_escr_idx(unsigned int addr) | ||
735 | { | ||
736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | ||
737 | |||
738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | ||
739 | !p4_escr_table[idx] || | ||
740 | p4_escr_table[idx] != addr)) { | ||
741 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | ||
742 | return -1; | ||
743 | } | ||
744 | |||
745 | return idx; | ||
746 | } | ||
747 | |||
748 | static int p4_next_cntr(int thread, unsigned long *used_mask, | ||
749 | struct p4_event_bind *bind) | ||
750 | { | ||
751 | int i, j; | ||
752 | |||
753 | for (i = 0; i < P4_CNTR_LIMIT; i++) { | ||
754 | j = bind->cntr[thread][i]; | ||
755 | if (j != -1 && !test_bit(j, used_mask)) | ||
756 | return j; | ||
757 | } | ||
758 | |||
759 | return -1; | ||
760 | } | ||
761 | |||
762 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
763 | { | ||
764 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
765 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | ||
766 | int cpu = smp_processor_id(); | ||
767 | struct hw_perf_event *hwc; | ||
768 | struct p4_event_bind *bind; | ||
769 | unsigned int i, thread, num; | ||
770 | int cntr_idx, escr_idx; | ||
771 | |||
772 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
773 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); | ||
774 | |||
775 | for (i = 0, num = n; i < n; i++, num--) { | ||
776 | |||
777 | hwc = &cpuc->event_list[i]->hw; | ||
778 | thread = p4_ht_thread(cpu); | ||
779 | bind = p4_config_get_bind(hwc->config); | ||
780 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | ||
781 | if (unlikely(escr_idx == -1)) | ||
782 | goto done; | ||
783 | |||
784 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | ||
785 | cntr_idx = hwc->idx; | ||
786 | if (assign) | ||
787 | assign[i] = hwc->idx; | ||
788 | goto reserve; | ||
789 | } | ||
790 | |||
791 | cntr_idx = p4_next_cntr(thread, used_mask, bind); | ||
792 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | ||
793 | goto done; | ||
794 | |||
795 | p4_pmu_swap_config_ts(hwc, cpu); | ||
796 | if (assign) | ||
797 | assign[i] = cntr_idx; | ||
798 | reserve: | ||
799 | set_bit(cntr_idx, used_mask); | ||
800 | set_bit(escr_idx, escr_mask); | ||
801 | } | ||
802 | |||
803 | done: | ||
804 | return num ? -ENOSPC : 0; | ||
805 | } | ||
806 | |||
807 | static __initconst const struct x86_pmu p4_pmu = { | ||
808 | .name = "Netburst P4/Xeon", | ||
809 | .handle_irq = p4_pmu_handle_irq, | ||
810 | .disable_all = p4_pmu_disable_all, | ||
811 | .enable_all = p4_pmu_enable_all, | ||
812 | .enable = p4_pmu_enable_event, | ||
813 | .disable = p4_pmu_disable_event, | ||
814 | .eventsel = MSR_P4_BPU_CCCR0, | ||
815 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
816 | .event_map = p4_pmu_event_map, | ||
817 | .max_events = ARRAY_SIZE(p4_general_events), | ||
818 | .get_event_constraints = x86_get_event_constraints, | ||
819 | /* | ||
820 | * IF HT disabled we may need to use all | ||
821 | * ARCH_P4_MAX_CCCR counters simulaneously | ||
822 | * though leave it restricted at moment assuming | ||
823 | * HT is on | ||
824 | */ | ||
825 | .num_counters = ARCH_P4_MAX_CCCR, | ||
826 | .apic = 1, | ||
827 | .cntval_bits = 40, | ||
828 | .cntval_mask = (1ULL << 40) - 1, | ||
829 | .max_period = (1ULL << 39) - 1, | ||
830 | .hw_config = p4_hw_config, | ||
831 | .schedule_events = p4_pmu_schedule_events, | ||
832 | }; | ||
833 | |||
834 | static __init int p4_pmu_init(void) | ||
835 | { | ||
836 | unsigned int low, high; | ||
837 | |||
838 | /* If we get stripped -- indexig fails */ | ||
839 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | ||
840 | |||
841 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
842 | if (!(low & (1 << 7))) { | ||
843 | pr_cont("unsupported Netburst CPU model %d ", | ||
844 | boot_cpu_data.x86_model); | ||
845 | return -ENODEV; | ||
846 | } | ||
847 | |||
848 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, | ||
849 | sizeof(hw_cache_event_ids)); | ||
850 | |||
851 | pr_cont("Netburst events, "); | ||
852 | |||
853 | x86_pmu = p4_pmu; | ||
854 | |||
855 | return 0; | ||
856 | } | ||
857 | |||
858 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485d14da..34ba07be2cda 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event) | |||
27 | */ | 27 | */ |
28 | #define P6_NOP_EVENT 0x0000002EULL | 28 | #define P6_NOP_EVENT 0x0000002EULL |
29 | 29 | ||
30 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
31 | { | ||
32 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
33 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
34 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
35 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
36 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
37 | |||
38 | #define P6_EVNTSEL_MASK \ | ||
39 | (P6_EVNTSEL_EVENT_MASK | \ | ||
40 | P6_EVNTSEL_UNIT_MASK | \ | ||
41 | P6_EVNTSEL_EDGE_MASK | \ | ||
42 | P6_EVNTSEL_INV_MASK | \ | ||
43 | P6_EVNTSEL_REG_MASK) | ||
44 | |||
45 | return hw_event & P6_EVNTSEL_MASK; | ||
46 | } | ||
47 | |||
48 | static struct event_constraint p6_event_constraints[] = | 30 | static struct event_constraint p6_event_constraints[] = |
49 | { | 31 | { |
50 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | 32 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ |
@@ -66,7 +48,7 @@ static void p6_pmu_disable_all(void) | |||
66 | wrmsrl(MSR_P6_EVNTSEL0, val); | 48 | wrmsrl(MSR_P6_EVNTSEL0, val); |
67 | } | 49 | } |
68 | 50 | ||
69 | static void p6_pmu_enable_all(void) | 51 | static void p6_pmu_enable_all(int added) |
70 | { | 52 | { |
71 | unsigned long val; | 53 | unsigned long val; |
72 | 54 | ||
@@ -102,22 +84,23 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
102 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 84 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); |
103 | } | 85 | } |
104 | 86 | ||
105 | static __initconst struct x86_pmu p6_pmu = { | 87 | static __initconst const struct x86_pmu p6_pmu = { |
106 | .name = "p6", | 88 | .name = "p6", |
107 | .handle_irq = x86_pmu_handle_irq, | 89 | .handle_irq = x86_pmu_handle_irq, |
108 | .disable_all = p6_pmu_disable_all, | 90 | .disable_all = p6_pmu_disable_all, |
109 | .enable_all = p6_pmu_enable_all, | 91 | .enable_all = p6_pmu_enable_all, |
110 | .enable = p6_pmu_enable_event, | 92 | .enable = p6_pmu_enable_event, |
111 | .disable = p6_pmu_disable_event, | 93 | .disable = p6_pmu_disable_event, |
94 | .hw_config = x86_pmu_hw_config, | ||
95 | .schedule_events = x86_schedule_events, | ||
112 | .eventsel = MSR_P6_EVNTSEL0, | 96 | .eventsel = MSR_P6_EVNTSEL0, |
113 | .perfctr = MSR_P6_PERFCTR0, | 97 | .perfctr = MSR_P6_PERFCTR0, |
114 | .event_map = p6_pmu_event_map, | 98 | .event_map = p6_pmu_event_map, |
115 | .raw_event = p6_pmu_raw_event, | ||
116 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | 99 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), |
117 | .apic = 1, | 100 | .apic = 1, |
118 | .max_period = (1ULL << 31) - 1, | 101 | .max_period = (1ULL << 31) - 1, |
119 | .version = 0, | 102 | .version = 0, |
120 | .num_events = 2, | 103 | .num_counters = 2, |
121 | /* | 104 | /* |
122 | * Events have 40 bits implemented. However they are designed such | 105 | * Events have 40 bits implemented. However they are designed such |
123 | * that bits [32-39] are sign extensions of bit 31. As such the | 106 | * that bits [32-39] are sign extensions of bit 31. As such the |
@@ -125,8 +108,8 @@ static __initconst struct x86_pmu p6_pmu = { | |||
125 | * | 108 | * |
126 | * See IA-32 Intel Architecture Software developer manual Vol 3B | 109 | * See IA-32 Intel Architecture Software developer manual Vol 3B |
127 | */ | 110 | */ |
128 | .event_bits = 32, | 111 | .cntval_bits = 32, |
129 | .event_mask = (1ULL << 32) - 1, | 112 | .cntval_mask = (1ULL << 32) - 1, |
130 | .get_event_constraints = x86_get_event_constraints, | 113 | .get_event_constraints = x86_get_event_constraints, |
131 | .event_constraints = p6_event_constraints, | 114 | .event_constraints = p6_event_constraints, |
132 | }; | 115 | }; |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index dfdb4dba2320..b9d1ff588445 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -24,8 +24,8 @@ | |||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
27 | #include <asm/vmware.h> | ||
28 | #include <asm/x86_init.h> | 27 | #include <asm/x86_init.h> |
28 | #include <asm/hypervisor.h> | ||
29 | 29 | ||
30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | 30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | 31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
@@ -65,7 +65,7 @@ static unsigned long vmware_get_tsc_khz(void) | |||
65 | return tsc_hz; | 65 | return tsc_hz; |
66 | } | 66 | } |
67 | 67 | ||
68 | void __init vmware_platform_setup(void) | 68 | static void __init vmware_platform_setup(void) |
69 | { | 69 | { |
70 | uint32_t eax, ebx, ecx, edx; | 70 | uint32_t eax, ebx, ecx, edx; |
71 | 71 | ||
@@ -83,26 +83,22 @@ void __init vmware_platform_setup(void) | |||
83 | * serial key should be enough, as this will always have a VMware | 83 | * serial key should be enough, as this will always have a VMware |
84 | * specific string when running under VMware hypervisor. | 84 | * specific string when running under VMware hypervisor. |
85 | */ | 85 | */ |
86 | int vmware_platform(void) | 86 | static bool __init vmware_platform(void) |
87 | { | 87 | { |
88 | if (cpu_has_hypervisor) { | 88 | if (cpu_has_hypervisor) { |
89 | unsigned int eax, ebx, ecx, edx; | 89 | unsigned int eax; |
90 | char hyper_vendor_id[13]; | 90 | unsigned int hyper_vendor_id[3]; |
91 | 91 | ||
92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | 92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], |
93 | memcpy(hyper_vendor_id + 0, &ebx, 4); | 93 | &hyper_vendor_id[1], &hyper_vendor_id[2]); |
94 | memcpy(hyper_vendor_id + 4, &ecx, 4); | 94 | if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) |
95 | memcpy(hyper_vendor_id + 8, &edx, 4); | 95 | return true; |
96 | hyper_vendor_id[12] = '\0'; | ||
97 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
98 | return 1; | ||
99 | } else if (dmi_available && dmi_name_in_serial("VMware") && | 96 | } else if (dmi_available && dmi_name_in_serial("VMware") && |
100 | __vmware_platform()) | 97 | __vmware_platform()) |
101 | return 1; | 98 | return true; |
102 | 99 | ||
103 | return 0; | 100 | return false; |
104 | } | 101 | } |
105 | EXPORT_SYMBOL(vmware_platform); | ||
106 | 102 | ||
107 | /* | 103 | /* |
108 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 104 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
@@ -116,8 +112,16 @@ EXPORT_SYMBOL(vmware_platform); | |||
116 | * so that the kernel could just trust the hypervisor with providing a | 112 | * so that the kernel could just trust the hypervisor with providing a |
117 | * reliable virtual TSC that is suitable for timekeeping. | 113 | * reliable virtual TSC that is suitable for timekeeping. |
118 | */ | 114 | */ |
119 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | 115 | static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) |
120 | { | 116 | { |
121 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
122 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | 118 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); |
123 | } | 119 | } |
120 | |||
121 | const __refconst struct hypervisor_x86 x86_hyper_vmware = { | ||
122 | .name = "VMware", | ||
123 | .detect = vmware_platform, | ||
124 | .set_cpu_features = vmware_set_cpu_features, | ||
125 | .init_platform = vmware_platform_setup, | ||
126 | }; | ||
127 | EXPORT_SYMBOL(x86_hyper_vmware); | ||
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 8b862d5900fe..1b7b31ab7d86 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -170,7 +170,7 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, | |||
170 | cpuid_device_destroy(cpu); | 170 | cpuid_device_destroy(cpu); |
171 | break; | 171 | break; |
172 | } | 172 | } |
173 | return err ? NOTIFY_BAD : NOTIFY_OK; | 173 | return notifier_from_errno(err); |
174 | } | 174 | } |
175 | 175 | ||
176 | static struct notifier_block __refdata cpuid_class_cpu_notifier = | 176 | static struct notifier_block __refdata cpuid_class_cpu_notifier = |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c deleted file mode 100644 index 1c47390dd0e5..000000000000 --- a/arch/x86/kernel/ds.c +++ /dev/null | |||
@@ -1,1437 +0,0 @@ | |||
1 | /* | ||
2 | * Debug Store support | ||
3 | * | ||
4 | * This provides a low-level interface to the hardware's Debug Store | ||
5 | * feature that is used for branch trace store (BTS) and | ||
6 | * precise-event based sampling (PEBS). | ||
7 | * | ||
8 | * It manages: | ||
9 | * - DS and BTS hardware configuration | ||
10 | * - buffer overflow handling (to be done) | ||
11 | * - buffer access | ||
12 | * | ||
13 | * It does not do: | ||
14 | * - security checking (is the caller allowed to trace the task) | ||
15 | * - buffer allocation (memory accounting) | ||
16 | * | ||
17 | * | ||
18 | * Copyright (C) 2007-2009 Intel Corporation. | ||
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | ||
20 | */ | ||
21 | |||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/errno.h> | ||
25 | #include <linux/sched.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/trace_clock.h> | ||
29 | |||
30 | #include <asm/ds.h> | ||
31 | |||
32 | #include "ds_selftest.h" | ||
33 | |||
34 | /* | ||
35 | * The configuration for a particular DS hardware implementation: | ||
36 | */ | ||
37 | struct ds_configuration { | ||
38 | /* The name of the configuration: */ | ||
39 | const char *name; | ||
40 | |||
41 | /* The size of pointer-typed fields in DS, BTS, and PEBS: */ | ||
42 | unsigned char sizeof_ptr_field; | ||
43 | |||
44 | /* The size of a BTS/PEBS record in bytes: */ | ||
45 | unsigned char sizeof_rec[2]; | ||
46 | |||
47 | /* The number of pebs counter reset values in the DS structure. */ | ||
48 | unsigned char nr_counter_reset; | ||
49 | |||
50 | /* Control bit-masks indexed by enum ds_feature: */ | ||
51 | unsigned long ctl[dsf_ctl_max]; | ||
52 | }; | ||
53 | static struct ds_configuration ds_cfg __read_mostly; | ||
54 | |||
55 | |||
56 | /* Maximal size of a DS configuration: */ | ||
57 | #define MAX_SIZEOF_DS 0x80 | ||
58 | |||
59 | /* Maximal size of a BTS record: */ | ||
60 | #define MAX_SIZEOF_BTS (3 * 8) | ||
61 | |||
62 | /* BTS and PEBS buffer alignment: */ | ||
63 | #define DS_ALIGNMENT (1 << 3) | ||
64 | |||
65 | /* Number of buffer pointers in DS: */ | ||
66 | #define NUM_DS_PTR_FIELDS 8 | ||
67 | |||
68 | /* Size of a pebs reset value in DS: */ | ||
69 | #define PEBS_RESET_FIELD_SIZE 8 | ||
70 | |||
71 | /* Mask of control bits in the DS MSR register: */ | ||
72 | #define BTS_CONTROL \ | ||
73 | ( ds_cfg.ctl[dsf_bts] | \ | ||
74 | ds_cfg.ctl[dsf_bts_kernel] | \ | ||
75 | ds_cfg.ctl[dsf_bts_user] | \ | ||
76 | ds_cfg.ctl[dsf_bts_overflow] ) | ||
77 | |||
78 | /* | ||
79 | * A BTS or PEBS tracer. | ||
80 | * | ||
81 | * This holds the configuration of the tracer and serves as a handle | ||
82 | * to identify tracers. | ||
83 | */ | ||
84 | struct ds_tracer { | ||
85 | /* The DS context (partially) owned by this tracer. */ | ||
86 | struct ds_context *context; | ||
87 | /* The buffer provided on ds_request() and its size in bytes. */ | ||
88 | void *buffer; | ||
89 | size_t size; | ||
90 | }; | ||
91 | |||
92 | struct bts_tracer { | ||
93 | /* The common DS part: */ | ||
94 | struct ds_tracer ds; | ||
95 | |||
96 | /* The trace including the DS configuration: */ | ||
97 | struct bts_trace trace; | ||
98 | |||
99 | /* Buffer overflow notification function: */ | ||
100 | bts_ovfl_callback_t ovfl; | ||
101 | |||
102 | /* Active flags affecting trace collection. */ | ||
103 | unsigned int flags; | ||
104 | }; | ||
105 | |||
106 | struct pebs_tracer { | ||
107 | /* The common DS part: */ | ||
108 | struct ds_tracer ds; | ||
109 | |||
110 | /* The trace including the DS configuration: */ | ||
111 | struct pebs_trace trace; | ||
112 | |||
113 | /* Buffer overflow notification function: */ | ||
114 | pebs_ovfl_callback_t ovfl; | ||
115 | }; | ||
116 | |||
117 | /* | ||
118 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | ||
119 | * Architectures Software Developer's Manual, section 18.5) | ||
120 | * | ||
121 | * The DS configuration consists of the following fields; different | ||
122 | * architetures vary in the size of those fields. | ||
123 | * | ||
124 | * - double-word aligned base linear address of the BTS buffer | ||
125 | * - write pointer into the BTS buffer | ||
126 | * - end linear address of the BTS buffer (one byte beyond the end of | ||
127 | * the buffer) | ||
128 | * - interrupt pointer into BTS buffer | ||
129 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
130 | * - double-word aligned base linear address of the PEBS buffer | ||
131 | * - write pointer into the PEBS buffer | ||
132 | * - end linear address of the PEBS buffer (one byte beyond the end of | ||
133 | * the buffer) | ||
134 | * - interrupt pointer into PEBS buffer | ||
135 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
136 | * - value to which counter is reset following counter overflow | ||
137 | * | ||
138 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
139 | * architectures use 32bit pointers in 32bit mode. | ||
140 | * | ||
141 | * | ||
142 | * We compute the base address for the first 8 fields based on: | ||
143 | * - the field size stored in the DS configuration | ||
144 | * - the relative field position | ||
145 | * - an offset giving the start of the respective region | ||
146 | * | ||
147 | * This offset is further used to index various arrays holding | ||
148 | * information for BTS and PEBS at the respective index. | ||
149 | * | ||
150 | * On later 32bit processors, we only access the lower 32bit of the | ||
151 | * 64bit pointer fields. The upper halves will be zeroed out. | ||
152 | */ | ||
153 | |||
154 | enum ds_field { | ||
155 | ds_buffer_base = 0, | ||
156 | ds_index, | ||
157 | ds_absolute_maximum, | ||
158 | ds_interrupt_threshold, | ||
159 | }; | ||
160 | |||
161 | enum ds_qualifier { | ||
162 | ds_bts = 0, | ||
163 | ds_pebs | ||
164 | }; | ||
165 | |||
166 | static inline unsigned long | ||
167 | ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) | ||
168 | { | ||
169 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
170 | return *(unsigned long *)base; | ||
171 | } | ||
172 | |||
173 | static inline void | ||
174 | ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, | ||
175 | unsigned long value) | ||
176 | { | ||
177 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
178 | (*(unsigned long *)base) = value; | ||
179 | } | ||
180 | |||
181 | |||
182 | /* | ||
183 | * Locking is done only for allocating BTS or PEBS resources. | ||
184 | */ | ||
185 | static DEFINE_SPINLOCK(ds_lock); | ||
186 | |||
187 | /* | ||
188 | * We either support (system-wide) per-cpu or per-thread allocation. | ||
189 | * We distinguish the two based on the task_struct pointer, where a | ||
190 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
191 | * | ||
192 | * Allocations are use-counted. As soon as resources are allocated, | ||
193 | * further allocations must be of the same type (per-cpu or | ||
194 | * per-thread). We model this by counting allocations (i.e. the number | ||
195 | * of tracers of a certain type) for one type negatively: | ||
196 | * =0 no tracers | ||
197 | * >0 number of per-thread tracers | ||
198 | * <0 number of per-cpu tracers | ||
199 | * | ||
200 | * Tracers essentially gives the number of ds contexts for a certain | ||
201 | * type of allocation. | ||
202 | */ | ||
203 | static atomic_t tracers = ATOMIC_INIT(0); | ||
204 | |||
205 | static inline int get_tracer(struct task_struct *task) | ||
206 | { | ||
207 | int error; | ||
208 | |||
209 | spin_lock_irq(&ds_lock); | ||
210 | |||
211 | if (task) { | ||
212 | error = -EPERM; | ||
213 | if (atomic_read(&tracers) < 0) | ||
214 | goto out; | ||
215 | atomic_inc(&tracers); | ||
216 | } else { | ||
217 | error = -EPERM; | ||
218 | if (atomic_read(&tracers) > 0) | ||
219 | goto out; | ||
220 | atomic_dec(&tracers); | ||
221 | } | ||
222 | |||
223 | error = 0; | ||
224 | out: | ||
225 | spin_unlock_irq(&ds_lock); | ||
226 | return error; | ||
227 | } | ||
228 | |||
229 | static inline void put_tracer(struct task_struct *task) | ||
230 | { | ||
231 | if (task) | ||
232 | atomic_dec(&tracers); | ||
233 | else | ||
234 | atomic_inc(&tracers); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * The DS context is either attached to a thread or to a cpu: | ||
239 | * - in the former case, the thread_struct contains a pointer to the | ||
240 | * attached context. | ||
241 | * - in the latter case, we use a static array of per-cpu context | ||
242 | * pointers. | ||
243 | * | ||
244 | * Contexts are use-counted. They are allocated on first access and | ||
245 | * deallocated when the last user puts the context. | ||
246 | */ | ||
247 | struct ds_context { | ||
248 | /* The DS configuration; goes into MSR_IA32_DS_AREA: */ | ||
249 | unsigned char ds[MAX_SIZEOF_DS]; | ||
250 | |||
251 | /* The owner of the BTS and PEBS configuration, respectively: */ | ||
252 | struct bts_tracer *bts_master; | ||
253 | struct pebs_tracer *pebs_master; | ||
254 | |||
255 | /* Use count: */ | ||
256 | unsigned long count; | ||
257 | |||
258 | /* Pointer to the context pointer field: */ | ||
259 | struct ds_context **this; | ||
260 | |||
261 | /* The traced task; NULL for cpu tracing: */ | ||
262 | struct task_struct *task; | ||
263 | |||
264 | /* The traced cpu; only valid if task is NULL: */ | ||
265 | int cpu; | ||
266 | }; | ||
267 | |||
268 | static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); | ||
269 | |||
270 | |||
271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) | ||
272 | { | ||
273 | struct ds_context **p_context = | ||
274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); | ||
275 | struct ds_context *context = NULL; | ||
276 | struct ds_context *new_context = NULL; | ||
277 | |||
278 | /* Chances are small that we already have a context. */ | ||
279 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | ||
280 | if (!new_context) | ||
281 | return NULL; | ||
282 | |||
283 | spin_lock_irq(&ds_lock); | ||
284 | |||
285 | context = *p_context; | ||
286 | if (likely(!context)) { | ||
287 | context = new_context; | ||
288 | |||
289 | context->this = p_context; | ||
290 | context->task = task; | ||
291 | context->cpu = cpu; | ||
292 | context->count = 0; | ||
293 | |||
294 | *p_context = context; | ||
295 | } | ||
296 | |||
297 | context->count++; | ||
298 | |||
299 | spin_unlock_irq(&ds_lock); | ||
300 | |||
301 | if (context != new_context) | ||
302 | kfree(new_context); | ||
303 | |||
304 | return context; | ||
305 | } | ||
306 | |||
307 | static void ds_put_context(struct ds_context *context) | ||
308 | { | ||
309 | struct task_struct *task; | ||
310 | unsigned long irq; | ||
311 | |||
312 | if (!context) | ||
313 | return; | ||
314 | |||
315 | spin_lock_irqsave(&ds_lock, irq); | ||
316 | |||
317 | if (--context->count) { | ||
318 | spin_unlock_irqrestore(&ds_lock, irq); | ||
319 | return; | ||
320 | } | ||
321 | |||
322 | *(context->this) = NULL; | ||
323 | |||
324 | task = context->task; | ||
325 | |||
326 | if (task) | ||
327 | clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
328 | |||
329 | /* | ||
330 | * We leave the (now dangling) pointer to the DS configuration in | ||
331 | * the DS_AREA msr. This is as good or as bad as replacing it with | ||
332 | * NULL - the hardware would crash if we enabled tracing. | ||
333 | * | ||
334 | * This saves us some problems with having to write an msr on a | ||
335 | * different cpu while preventing others from doing the same for the | ||
336 | * next context for that same cpu. | ||
337 | */ | ||
338 | |||
339 | spin_unlock_irqrestore(&ds_lock, irq); | ||
340 | |||
341 | /* The context might still be in use for context switching. */ | ||
342 | if (task && (task != current)) | ||
343 | wait_task_context_switch(task); | ||
344 | |||
345 | kfree(context); | ||
346 | } | ||
347 | |||
348 | static void ds_install_ds_area(struct ds_context *context) | ||
349 | { | ||
350 | unsigned long ds; | ||
351 | |||
352 | ds = (unsigned long)context->ds; | ||
353 | |||
354 | /* | ||
355 | * There is a race between the bts master and the pebs master. | ||
356 | * | ||
357 | * The thread/cpu access is synchronized via get/put_cpu() for | ||
358 | * task tracing and via wrmsr_on_cpu for cpu tracing. | ||
359 | * | ||
360 | * If bts and pebs are collected for the same task or same cpu, | ||
361 | * the same confiuration is written twice. | ||
362 | */ | ||
363 | if (context->task) { | ||
364 | get_cpu(); | ||
365 | if (context->task == current) | ||
366 | wrmsrl(MSR_IA32_DS_AREA, ds); | ||
367 | set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
368 | put_cpu(); | ||
369 | } else | ||
370 | wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, | ||
371 | (u32)((u64)ds), (u32)((u64)ds >> 32)); | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * Call the tracer's callback on a buffer overflow. | ||
376 | * | ||
377 | * context: the ds context | ||
378 | * qual: the buffer type | ||
379 | */ | ||
380 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) | ||
381 | { | ||
382 | switch (qual) { | ||
383 | case ds_bts: | ||
384 | if (context->bts_master && | ||
385 | context->bts_master->ovfl) | ||
386 | context->bts_master->ovfl(context->bts_master); | ||
387 | break; | ||
388 | case ds_pebs: | ||
389 | if (context->pebs_master && | ||
390 | context->pebs_master->ovfl) | ||
391 | context->pebs_master->ovfl(context->pebs_master); | ||
392 | break; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | |||
397 | /* | ||
398 | * Write raw data into the BTS or PEBS buffer. | ||
399 | * | ||
400 | * The remainder of any partially written record is zeroed out. | ||
401 | * | ||
402 | * context: the DS context | ||
403 | * qual: the buffer type | ||
404 | * record: the data to write | ||
405 | * size: the size of the data | ||
406 | */ | ||
407 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | ||
408 | const void *record, size_t size) | ||
409 | { | ||
410 | int bytes_written = 0; | ||
411 | |||
412 | if (!record) | ||
413 | return -EINVAL; | ||
414 | |||
415 | while (size) { | ||
416 | unsigned long base, index, end, write_end, int_th; | ||
417 | unsigned long write_size, adj_write_size; | ||
418 | |||
419 | /* | ||
420 | * Write as much as possible without producing an | ||
421 | * overflow interrupt. | ||
422 | * | ||
423 | * Interrupt_threshold must either be | ||
424 | * - bigger than absolute_maximum or | ||
425 | * - point to a record between buffer_base and absolute_maximum | ||
426 | * | ||
427 | * Index points to a valid record. | ||
428 | */ | ||
429 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
430 | index = ds_get(context->ds, qual, ds_index); | ||
431 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
432 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
433 | |||
434 | write_end = min(end, int_th); | ||
435 | |||
436 | /* | ||
437 | * If we are already beyond the interrupt threshold, | ||
438 | * we fill the entire buffer. | ||
439 | */ | ||
440 | if (write_end <= index) | ||
441 | write_end = end; | ||
442 | |||
443 | if (write_end <= index) | ||
444 | break; | ||
445 | |||
446 | write_size = min((unsigned long) size, write_end - index); | ||
447 | memcpy((void *)index, record, write_size); | ||
448 | |||
449 | record = (const char *)record + write_size; | ||
450 | size -= write_size; | ||
451 | bytes_written += write_size; | ||
452 | |||
453 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
454 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
455 | |||
456 | /* Zero out trailing bytes. */ | ||
457 | memset((char *)index + write_size, 0, | ||
458 | adj_write_size - write_size); | ||
459 | index += adj_write_size; | ||
460 | |||
461 | if (index >= end) | ||
462 | index = base; | ||
463 | ds_set(context->ds, qual, ds_index, index); | ||
464 | |||
465 | if (index >= int_th) | ||
466 | ds_overflow(context, qual); | ||
467 | } | ||
468 | |||
469 | return bytes_written; | ||
470 | } | ||
471 | |||
472 | |||
473 | /* | ||
474 | * Branch Trace Store (BTS) uses the following format. Different | ||
475 | * architectures vary in the size of those fields. | ||
476 | * - source linear address | ||
477 | * - destination linear address | ||
478 | * - flags | ||
479 | * | ||
480 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
481 | * architectures use 32bit pointers in 32bit mode. | ||
482 | * | ||
483 | * We compute the base address for the fields based on: | ||
484 | * - the field size stored in the DS configuration | ||
485 | * - the relative field position | ||
486 | * | ||
487 | * In order to store additional information in the BTS buffer, we use | ||
488 | * a special source address to indicate that the record requires | ||
489 | * special interpretation. | ||
490 | * | ||
491 | * Netburst indicated via a bit in the flags field whether the branch | ||
492 | * was predicted; this is ignored. | ||
493 | * | ||
494 | * We use two levels of abstraction: | ||
495 | * - the raw data level defined here | ||
496 | * - an arch-independent level defined in ds.h | ||
497 | */ | ||
498 | |||
499 | enum bts_field { | ||
500 | bts_from, | ||
501 | bts_to, | ||
502 | bts_flags, | ||
503 | |||
504 | bts_qual = bts_from, | ||
505 | bts_clock = bts_to, | ||
506 | bts_pid = bts_flags, | ||
507 | |||
508 | bts_qual_mask = (bts_qual_max - 1), | ||
509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
510 | }; | ||
511 | |||
512 | static inline unsigned long bts_get(const char *base, unsigned long field) | ||
513 | { | ||
514 | base += (ds_cfg.sizeof_ptr_field * field); | ||
515 | return *(unsigned long *)base; | ||
516 | } | ||
517 | |||
518 | static inline void bts_set(char *base, unsigned long field, unsigned long val) | ||
519 | { | ||
520 | base += (ds_cfg.sizeof_ptr_field * field); | ||
521 | (*(unsigned long *)base) = val; | ||
522 | } | ||
523 | |||
524 | |||
525 | /* | ||
526 | * The raw BTS data is architecture dependent. | ||
527 | * | ||
528 | * For higher-level users, we give an arch-independent view. | ||
529 | * - ds.h defines struct bts_struct | ||
530 | * - bts_read translates one raw bts record into a bts_struct | ||
531 | * - bts_write translates one bts_struct into the raw format and | ||
532 | * writes it into the top of the parameter tracer's buffer. | ||
533 | * | ||
534 | * return: bytes read/written on success; -Eerrno, otherwise | ||
535 | */ | ||
536 | static int | ||
537 | bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) | ||
538 | { | ||
539 | if (!tracer) | ||
540 | return -EINVAL; | ||
541 | |||
542 | if (at < tracer->trace.ds.begin) | ||
543 | return -EINVAL; | ||
544 | |||
545 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
546 | return -EINVAL; | ||
547 | |||
548 | memset(out, 0, sizeof(*out)); | ||
549 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | ||
550 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | ||
551 | out->variant.event.clock = bts_get(at, bts_clock); | ||
552 | out->variant.event.pid = bts_get(at, bts_pid); | ||
553 | } else { | ||
554 | out->qualifier = bts_branch; | ||
555 | out->variant.lbr.from = bts_get(at, bts_from); | ||
556 | out->variant.lbr.to = bts_get(at, bts_to); | ||
557 | |||
558 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
559 | out->qualifier = bts_invalid; | ||
560 | } | ||
561 | |||
562 | return ds_cfg.sizeof_rec[ds_bts]; | ||
563 | } | ||
564 | |||
565 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | ||
566 | { | ||
567 | unsigned char raw[MAX_SIZEOF_BTS]; | ||
568 | |||
569 | if (!tracer) | ||
570 | return -EINVAL; | ||
571 | |||
572 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) | ||
573 | return -EOVERFLOW; | ||
574 | |||
575 | switch (in->qualifier) { | ||
576 | case bts_invalid: | ||
577 | bts_set(raw, bts_from, 0); | ||
578 | bts_set(raw, bts_to, 0); | ||
579 | bts_set(raw, bts_flags, 0); | ||
580 | break; | ||
581 | case bts_branch: | ||
582 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
583 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
584 | bts_set(raw, bts_flags, 0); | ||
585 | break; | ||
586 | case bts_task_arrives: | ||
587 | case bts_task_departs: | ||
588 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
589 | bts_set(raw, bts_clock, in->variant.event.clock); | ||
590 | bts_set(raw, bts_pid, in->variant.event.pid); | ||
591 | break; | ||
592 | default: | ||
593 | return -EINVAL; | ||
594 | } | ||
595 | |||
596 | return ds_write(tracer->ds.context, ds_bts, raw, | ||
597 | ds_cfg.sizeof_rec[ds_bts]); | ||
598 | } | ||
599 | |||
600 | |||
601 | static void ds_write_config(struct ds_context *context, | ||
602 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
603 | { | ||
604 | unsigned char *ds = context->ds; | ||
605 | |||
606 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
607 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
608 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
609 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
610 | } | ||
611 | |||
612 | static void ds_read_config(struct ds_context *context, | ||
613 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
614 | { | ||
615 | unsigned char *ds = context->ds; | ||
616 | |||
617 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | ||
618 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
619 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
620 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
621 | } | ||
622 | |||
623 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
624 | void *base, size_t size, size_t ith, | ||
625 | unsigned int flags) { | ||
626 | unsigned long buffer, adj; | ||
627 | |||
628 | /* | ||
629 | * Adjust the buffer address and size to meet alignment | ||
630 | * constraints: | ||
631 | * - buffer is double-word aligned | ||
632 | * - size is multiple of record size | ||
633 | * | ||
634 | * We checked the size at the very beginning; we have enough | ||
635 | * space to do the adjustment. | ||
636 | */ | ||
637 | buffer = (unsigned long)base; | ||
638 | |||
639 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; | ||
640 | buffer += adj; | ||
641 | size -= adj; | ||
642 | |||
643 | trace->n = size / ds_cfg.sizeof_rec[qual]; | ||
644 | trace->size = ds_cfg.sizeof_rec[qual]; | ||
645 | |||
646 | size = (trace->n * trace->size); | ||
647 | |||
648 | trace->begin = (void *)buffer; | ||
649 | trace->top = trace->begin; | ||
650 | trace->end = (void *)(buffer + size); | ||
651 | /* | ||
652 | * The value for 'no threshold' is -1, which will set the | ||
653 | * threshold outside of the buffer, just like we want it. | ||
654 | */ | ||
655 | ith *= ds_cfg.sizeof_rec[qual]; | ||
656 | trace->ith = (void *)(buffer + size - ith); | ||
657 | |||
658 | trace->flags = flags; | ||
659 | } | ||
660 | |||
661 | |||
662 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | ||
663 | enum ds_qualifier qual, struct task_struct *task, | ||
664 | int cpu, void *base, size_t size, size_t th) | ||
665 | { | ||
666 | struct ds_context *context; | ||
667 | int error; | ||
668 | size_t req_size; | ||
669 | |||
670 | error = -EOPNOTSUPP; | ||
671 | if (!ds_cfg.sizeof_rec[qual]) | ||
672 | goto out; | ||
673 | |||
674 | error = -EINVAL; | ||
675 | if (!base) | ||
676 | goto out; | ||
677 | |||
678 | req_size = ds_cfg.sizeof_rec[qual]; | ||
679 | /* We might need space for alignment adjustments. */ | ||
680 | if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) | ||
681 | req_size += DS_ALIGNMENT; | ||
682 | |||
683 | error = -EINVAL; | ||
684 | if (size < req_size) | ||
685 | goto out; | ||
686 | |||
687 | if (th != (size_t)-1) { | ||
688 | th *= ds_cfg.sizeof_rec[qual]; | ||
689 | |||
690 | error = -EINVAL; | ||
691 | if (size <= th) | ||
692 | goto out; | ||
693 | } | ||
694 | |||
695 | tracer->buffer = base; | ||
696 | tracer->size = size; | ||
697 | |||
698 | error = -ENOMEM; | ||
699 | context = ds_get_context(task, cpu); | ||
700 | if (!context) | ||
701 | goto out; | ||
702 | tracer->context = context; | ||
703 | |||
704 | /* | ||
705 | * Defer any tracer-specific initialization work for the context until | ||
706 | * context ownership has been clarified. | ||
707 | */ | ||
708 | |||
709 | error = 0; | ||
710 | out: | ||
711 | return error; | ||
712 | } | ||
713 | |||
714 | static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, | ||
715 | void *base, size_t size, | ||
716 | bts_ovfl_callback_t ovfl, size_t th, | ||
717 | unsigned int flags) | ||
718 | { | ||
719 | struct bts_tracer *tracer; | ||
720 | int error; | ||
721 | |||
722 | /* Buffer overflow notification is not yet implemented. */ | ||
723 | error = -EOPNOTSUPP; | ||
724 | if (ovfl) | ||
725 | goto out; | ||
726 | |||
727 | error = get_tracer(task); | ||
728 | if (error < 0) | ||
729 | goto out; | ||
730 | |||
731 | error = -ENOMEM; | ||
732 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
733 | if (!tracer) | ||
734 | goto out_put_tracer; | ||
735 | tracer->ovfl = ovfl; | ||
736 | |||
737 | /* Do some more error checking and acquire a tracing context. */ | ||
738 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
739 | ds_bts, task, cpu, base, size, th); | ||
740 | if (error < 0) | ||
741 | goto out_tracer; | ||
742 | |||
743 | /* Claim the bts part of the tracing context we acquired above. */ | ||
744 | spin_lock_irq(&ds_lock); | ||
745 | |||
746 | error = -EPERM; | ||
747 | if (tracer->ds.context->bts_master) | ||
748 | goto out_unlock; | ||
749 | tracer->ds.context->bts_master = tracer; | ||
750 | |||
751 | spin_unlock_irq(&ds_lock); | ||
752 | |||
753 | /* | ||
754 | * Now that we own the bts part of the context, let's complete the | ||
755 | * initialization for that part. | ||
756 | */ | ||
757 | ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); | ||
758 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
759 | ds_install_ds_area(tracer->ds.context); | ||
760 | |||
761 | tracer->trace.read = bts_read; | ||
762 | tracer->trace.write = bts_write; | ||
763 | |||
764 | /* Start tracing. */ | ||
765 | ds_resume_bts(tracer); | ||
766 | |||
767 | return tracer; | ||
768 | |||
769 | out_unlock: | ||
770 | spin_unlock_irq(&ds_lock); | ||
771 | ds_put_context(tracer->ds.context); | ||
772 | out_tracer: | ||
773 | kfree(tracer); | ||
774 | out_put_tracer: | ||
775 | put_tracer(task); | ||
776 | out: | ||
777 | return ERR_PTR(error); | ||
778 | } | ||
779 | |||
780 | struct bts_tracer *ds_request_bts_task(struct task_struct *task, | ||
781 | void *base, size_t size, | ||
782 | bts_ovfl_callback_t ovfl, | ||
783 | size_t th, unsigned int flags) | ||
784 | { | ||
785 | return ds_request_bts(task, 0, base, size, ovfl, th, flags); | ||
786 | } | ||
787 | |||
788 | struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
789 | bts_ovfl_callback_t ovfl, | ||
790 | size_t th, unsigned int flags) | ||
791 | { | ||
792 | return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); | ||
793 | } | ||
794 | |||
795 | static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, | ||
796 | void *base, size_t size, | ||
797 | pebs_ovfl_callback_t ovfl, size_t th, | ||
798 | unsigned int flags) | ||
799 | { | ||
800 | struct pebs_tracer *tracer; | ||
801 | int error; | ||
802 | |||
803 | /* Buffer overflow notification is not yet implemented. */ | ||
804 | error = -EOPNOTSUPP; | ||
805 | if (ovfl) | ||
806 | goto out; | ||
807 | |||
808 | error = get_tracer(task); | ||
809 | if (error < 0) | ||
810 | goto out; | ||
811 | |||
812 | error = -ENOMEM; | ||
813 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
814 | if (!tracer) | ||
815 | goto out_put_tracer; | ||
816 | tracer->ovfl = ovfl; | ||
817 | |||
818 | /* Do some more error checking and acquire a tracing context. */ | ||
819 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
820 | ds_pebs, task, cpu, base, size, th); | ||
821 | if (error < 0) | ||
822 | goto out_tracer; | ||
823 | |||
824 | /* Claim the pebs part of the tracing context we acquired above. */ | ||
825 | spin_lock_irq(&ds_lock); | ||
826 | |||
827 | error = -EPERM; | ||
828 | if (tracer->ds.context->pebs_master) | ||
829 | goto out_unlock; | ||
830 | tracer->ds.context->pebs_master = tracer; | ||
831 | |||
832 | spin_unlock_irq(&ds_lock); | ||
833 | |||
834 | /* | ||
835 | * Now that we own the pebs part of the context, let's complete the | ||
836 | * initialization for that part. | ||
837 | */ | ||
838 | ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); | ||
839 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
840 | ds_install_ds_area(tracer->ds.context); | ||
841 | |||
842 | /* Start tracing. */ | ||
843 | ds_resume_pebs(tracer); | ||
844 | |||
845 | return tracer; | ||
846 | |||
847 | out_unlock: | ||
848 | spin_unlock_irq(&ds_lock); | ||
849 | ds_put_context(tracer->ds.context); | ||
850 | out_tracer: | ||
851 | kfree(tracer); | ||
852 | out_put_tracer: | ||
853 | put_tracer(task); | ||
854 | out: | ||
855 | return ERR_PTR(error); | ||
856 | } | ||
857 | |||
858 | struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, | ||
859 | void *base, size_t size, | ||
860 | pebs_ovfl_callback_t ovfl, | ||
861 | size_t th, unsigned int flags) | ||
862 | { | ||
863 | return ds_request_pebs(task, 0, base, size, ovfl, th, flags); | ||
864 | } | ||
865 | |||
866 | struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, | ||
867 | pebs_ovfl_callback_t ovfl, | ||
868 | size_t th, unsigned int flags) | ||
869 | { | ||
870 | return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); | ||
871 | } | ||
872 | |||
873 | static void ds_free_bts(struct bts_tracer *tracer) | ||
874 | { | ||
875 | struct task_struct *task; | ||
876 | |||
877 | task = tracer->ds.context->task; | ||
878 | |||
879 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); | ||
880 | tracer->ds.context->bts_master = NULL; | ||
881 | |||
882 | /* Make sure tracing stopped and the tracer is not in use. */ | ||
883 | if (task && (task != current)) | ||
884 | wait_task_context_switch(task); | ||
885 | |||
886 | ds_put_context(tracer->ds.context); | ||
887 | put_tracer(task); | ||
888 | |||
889 | kfree(tracer); | ||
890 | } | ||
891 | |||
892 | void ds_release_bts(struct bts_tracer *tracer) | ||
893 | { | ||
894 | might_sleep(); | ||
895 | |||
896 | if (!tracer) | ||
897 | return; | ||
898 | |||
899 | ds_suspend_bts(tracer); | ||
900 | ds_free_bts(tracer); | ||
901 | } | ||
902 | |||
903 | int ds_release_bts_noirq(struct bts_tracer *tracer) | ||
904 | { | ||
905 | struct task_struct *task; | ||
906 | unsigned long irq; | ||
907 | int error; | ||
908 | |||
909 | if (!tracer) | ||
910 | return 0; | ||
911 | |||
912 | task = tracer->ds.context->task; | ||
913 | |||
914 | local_irq_save(irq); | ||
915 | |||
916 | error = -EPERM; | ||
917 | if (!task && | ||
918 | (tracer->ds.context->cpu != smp_processor_id())) | ||
919 | goto out; | ||
920 | |||
921 | error = -EPERM; | ||
922 | if (task && (task != current)) | ||
923 | goto out; | ||
924 | |||
925 | ds_suspend_bts_noirq(tracer); | ||
926 | ds_free_bts(tracer); | ||
927 | |||
928 | error = 0; | ||
929 | out: | ||
930 | local_irq_restore(irq); | ||
931 | return error; | ||
932 | } | ||
933 | |||
934 | static void update_task_debugctlmsr(struct task_struct *task, | ||
935 | unsigned long debugctlmsr) | ||
936 | { | ||
937 | task->thread.debugctlmsr = debugctlmsr; | ||
938 | |||
939 | get_cpu(); | ||
940 | if (task == current) | ||
941 | update_debugctlmsr(debugctlmsr); | ||
942 | put_cpu(); | ||
943 | } | ||
944 | |||
945 | void ds_suspend_bts(struct bts_tracer *tracer) | ||
946 | { | ||
947 | struct task_struct *task; | ||
948 | unsigned long debugctlmsr; | ||
949 | int cpu; | ||
950 | |||
951 | if (!tracer) | ||
952 | return; | ||
953 | |||
954 | tracer->flags = 0; | ||
955 | |||
956 | task = tracer->ds.context->task; | ||
957 | cpu = tracer->ds.context->cpu; | ||
958 | |||
959 | WARN_ON(!task && irqs_disabled()); | ||
960 | |||
961 | debugctlmsr = (task ? | ||
962 | task->thread.debugctlmsr : | ||
963 | get_debugctlmsr_on_cpu(cpu)); | ||
964 | debugctlmsr &= ~BTS_CONTROL; | ||
965 | |||
966 | if (task) | ||
967 | update_task_debugctlmsr(task, debugctlmsr); | ||
968 | else | ||
969 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
970 | } | ||
971 | |||
972 | int ds_suspend_bts_noirq(struct bts_tracer *tracer) | ||
973 | { | ||
974 | struct task_struct *task; | ||
975 | unsigned long debugctlmsr, irq; | ||
976 | int cpu, error = 0; | ||
977 | |||
978 | if (!tracer) | ||
979 | return 0; | ||
980 | |||
981 | tracer->flags = 0; | ||
982 | |||
983 | task = tracer->ds.context->task; | ||
984 | cpu = tracer->ds.context->cpu; | ||
985 | |||
986 | local_irq_save(irq); | ||
987 | |||
988 | error = -EPERM; | ||
989 | if (!task && (cpu != smp_processor_id())) | ||
990 | goto out; | ||
991 | |||
992 | debugctlmsr = (task ? | ||
993 | task->thread.debugctlmsr : | ||
994 | get_debugctlmsr()); | ||
995 | debugctlmsr &= ~BTS_CONTROL; | ||
996 | |||
997 | if (task) | ||
998 | update_task_debugctlmsr(task, debugctlmsr); | ||
999 | else | ||
1000 | update_debugctlmsr(debugctlmsr); | ||
1001 | |||
1002 | error = 0; | ||
1003 | out: | ||
1004 | local_irq_restore(irq); | ||
1005 | return error; | ||
1006 | } | ||
1007 | |||
1008 | static unsigned long ds_bts_control(struct bts_tracer *tracer) | ||
1009 | { | ||
1010 | unsigned long control; | ||
1011 | |||
1012 | control = ds_cfg.ctl[dsf_bts]; | ||
1013 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | ||
1014 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
1015 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
1016 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
1017 | |||
1018 | return control; | ||
1019 | } | ||
1020 | |||
1021 | void ds_resume_bts(struct bts_tracer *tracer) | ||
1022 | { | ||
1023 | struct task_struct *task; | ||
1024 | unsigned long debugctlmsr; | ||
1025 | int cpu; | ||
1026 | |||
1027 | if (!tracer) | ||
1028 | return; | ||
1029 | |||
1030 | tracer->flags = tracer->trace.ds.flags; | ||
1031 | |||
1032 | task = tracer->ds.context->task; | ||
1033 | cpu = tracer->ds.context->cpu; | ||
1034 | |||
1035 | WARN_ON(!task && irqs_disabled()); | ||
1036 | |||
1037 | debugctlmsr = (task ? | ||
1038 | task->thread.debugctlmsr : | ||
1039 | get_debugctlmsr_on_cpu(cpu)); | ||
1040 | debugctlmsr |= ds_bts_control(tracer); | ||
1041 | |||
1042 | if (task) | ||
1043 | update_task_debugctlmsr(task, debugctlmsr); | ||
1044 | else | ||
1045 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
1046 | } | ||
1047 | |||
1048 | int ds_resume_bts_noirq(struct bts_tracer *tracer) | ||
1049 | { | ||
1050 | struct task_struct *task; | ||
1051 | unsigned long debugctlmsr, irq; | ||
1052 | int cpu, error = 0; | ||
1053 | |||
1054 | if (!tracer) | ||
1055 | return 0; | ||
1056 | |||
1057 | tracer->flags = tracer->trace.ds.flags; | ||
1058 | |||
1059 | task = tracer->ds.context->task; | ||
1060 | cpu = tracer->ds.context->cpu; | ||
1061 | |||
1062 | local_irq_save(irq); | ||
1063 | |||
1064 | error = -EPERM; | ||
1065 | if (!task && (cpu != smp_processor_id())) | ||
1066 | goto out; | ||
1067 | |||
1068 | debugctlmsr = (task ? | ||
1069 | task->thread.debugctlmsr : | ||
1070 | get_debugctlmsr()); | ||
1071 | debugctlmsr |= ds_bts_control(tracer); | ||
1072 | |||
1073 | if (task) | ||
1074 | update_task_debugctlmsr(task, debugctlmsr); | ||
1075 | else | ||
1076 | update_debugctlmsr(debugctlmsr); | ||
1077 | |||
1078 | error = 0; | ||
1079 | out: | ||
1080 | local_irq_restore(irq); | ||
1081 | return error; | ||
1082 | } | ||
1083 | |||
1084 | static void ds_free_pebs(struct pebs_tracer *tracer) | ||
1085 | { | ||
1086 | struct task_struct *task; | ||
1087 | |||
1088 | task = tracer->ds.context->task; | ||
1089 | |||
1090 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
1091 | tracer->ds.context->pebs_master = NULL; | ||
1092 | |||
1093 | ds_put_context(tracer->ds.context); | ||
1094 | put_tracer(task); | ||
1095 | |||
1096 | kfree(tracer); | ||
1097 | } | ||
1098 | |||
1099 | void ds_release_pebs(struct pebs_tracer *tracer) | ||
1100 | { | ||
1101 | might_sleep(); | ||
1102 | |||
1103 | if (!tracer) | ||
1104 | return; | ||
1105 | |||
1106 | ds_suspend_pebs(tracer); | ||
1107 | ds_free_pebs(tracer); | ||
1108 | } | ||
1109 | |||
1110 | int ds_release_pebs_noirq(struct pebs_tracer *tracer) | ||
1111 | { | ||
1112 | struct task_struct *task; | ||
1113 | unsigned long irq; | ||
1114 | int error; | ||
1115 | |||
1116 | if (!tracer) | ||
1117 | return 0; | ||
1118 | |||
1119 | task = tracer->ds.context->task; | ||
1120 | |||
1121 | local_irq_save(irq); | ||
1122 | |||
1123 | error = -EPERM; | ||
1124 | if (!task && | ||
1125 | (tracer->ds.context->cpu != smp_processor_id())) | ||
1126 | goto out; | ||
1127 | |||
1128 | error = -EPERM; | ||
1129 | if (task && (task != current)) | ||
1130 | goto out; | ||
1131 | |||
1132 | ds_suspend_pebs_noirq(tracer); | ||
1133 | ds_free_pebs(tracer); | ||
1134 | |||
1135 | error = 0; | ||
1136 | out: | ||
1137 | local_irq_restore(irq); | ||
1138 | return error; | ||
1139 | } | ||
1140 | |||
1141 | void ds_suspend_pebs(struct pebs_tracer *tracer) | ||
1142 | { | ||
1143 | |||
1144 | } | ||
1145 | |||
1146 | int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) | ||
1147 | { | ||
1148 | return 0; | ||
1149 | } | ||
1150 | |||
1151 | void ds_resume_pebs(struct pebs_tracer *tracer) | ||
1152 | { | ||
1153 | |||
1154 | } | ||
1155 | |||
1156 | int ds_resume_pebs_noirq(struct pebs_tracer *tracer) | ||
1157 | { | ||
1158 | return 0; | ||
1159 | } | ||
1160 | |||
1161 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) | ||
1162 | { | ||
1163 | if (!tracer) | ||
1164 | return NULL; | ||
1165 | |||
1166 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
1167 | return &tracer->trace; | ||
1168 | } | ||
1169 | |||
1170 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) | ||
1171 | { | ||
1172 | if (!tracer) | ||
1173 | return NULL; | ||
1174 | |||
1175 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
1176 | |||
1177 | tracer->trace.counters = ds_cfg.nr_counter_reset; | ||
1178 | memcpy(tracer->trace.counter_reset, | ||
1179 | tracer->ds.context->ds + | ||
1180 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), | ||
1181 | ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); | ||
1182 | |||
1183 | return &tracer->trace; | ||
1184 | } | ||
1185 | |||
1186 | int ds_reset_bts(struct bts_tracer *tracer) | ||
1187 | { | ||
1188 | if (!tracer) | ||
1189 | return -EINVAL; | ||
1190 | |||
1191 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
1192 | |||
1193 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, | ||
1194 | (unsigned long)tracer->trace.ds.top); | ||
1195 | |||
1196 | return 0; | ||
1197 | } | ||
1198 | |||
1199 | int ds_reset_pebs(struct pebs_tracer *tracer) | ||
1200 | { | ||
1201 | if (!tracer) | ||
1202 | return -EINVAL; | ||
1203 | |||
1204 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
1205 | |||
1206 | ds_set(tracer->ds.context->ds, ds_pebs, ds_index, | ||
1207 | (unsigned long)tracer->trace.ds.top); | ||
1208 | |||
1209 | return 0; | ||
1210 | } | ||
1211 | |||
1212 | int ds_set_pebs_reset(struct pebs_tracer *tracer, | ||
1213 | unsigned int counter, u64 value) | ||
1214 | { | ||
1215 | if (!tracer) | ||
1216 | return -EINVAL; | ||
1217 | |||
1218 | if (ds_cfg.nr_counter_reset < counter) | ||
1219 | return -EINVAL; | ||
1220 | |||
1221 | *(u64 *)(tracer->ds.context->ds + | ||
1222 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + | ||
1223 | (counter * PEBS_RESET_FIELD_SIZE)) = value; | ||
1224 | |||
1225 | return 0; | ||
1226 | } | ||
1227 | |||
1228 | static const struct ds_configuration ds_cfg_netburst = { | ||
1229 | .name = "Netburst", | ||
1230 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), | ||
1231 | .ctl[dsf_bts_kernel] = (1 << 5), | ||
1232 | .ctl[dsf_bts_user] = (1 << 6), | ||
1233 | .nr_counter_reset = 1, | ||
1234 | }; | ||
1235 | static const struct ds_configuration ds_cfg_pentium_m = { | ||
1236 | .name = "Pentium M", | ||
1237 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
1238 | .nr_counter_reset = 1, | ||
1239 | }; | ||
1240 | static const struct ds_configuration ds_cfg_core2_atom = { | ||
1241 | .name = "Core 2/Atom", | ||
1242 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
1243 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
1244 | .ctl[dsf_bts_user] = (1 << 10), | ||
1245 | .nr_counter_reset = 1, | ||
1246 | }; | ||
1247 | static const struct ds_configuration ds_cfg_core_i7 = { | ||
1248 | .name = "Core i7", | ||
1249 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
1250 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
1251 | .ctl[dsf_bts_user] = (1 << 10), | ||
1252 | .nr_counter_reset = 4, | ||
1253 | }; | ||
1254 | |||
1255 | static void | ||
1256 | ds_configure(const struct ds_configuration *cfg, | ||
1257 | struct cpuinfo_x86 *cpu) | ||
1258 | { | ||
1259 | unsigned long nr_pebs_fields = 0; | ||
1260 | |||
1261 | printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); | ||
1262 | |||
1263 | #ifdef __i386__ | ||
1264 | nr_pebs_fields = 10; | ||
1265 | #else | ||
1266 | nr_pebs_fields = 18; | ||
1267 | #endif | ||
1268 | |||
1269 | /* | ||
1270 | * Starting with version 2, architectural performance | ||
1271 | * monitoring supports a format specifier. | ||
1272 | */ | ||
1273 | if ((cpuid_eax(0xa) & 0xff) > 1) { | ||
1274 | unsigned long perf_capabilities, format; | ||
1275 | |||
1276 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); | ||
1277 | |||
1278 | format = (perf_capabilities >> 8) & 0xf; | ||
1279 | |||
1280 | switch (format) { | ||
1281 | case 0: | ||
1282 | nr_pebs_fields = 18; | ||
1283 | break; | ||
1284 | case 1: | ||
1285 | nr_pebs_fields = 22; | ||
1286 | break; | ||
1287 | default: | ||
1288 | printk(KERN_INFO | ||
1289 | "[ds] unknown PEBS format: %lu\n", format); | ||
1290 | nr_pebs_fields = 0; | ||
1291 | break; | ||
1292 | } | ||
1293 | } | ||
1294 | |||
1295 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
1296 | ds_cfg = *cfg; | ||
1297 | |||
1298 | ds_cfg.sizeof_ptr_field = | ||
1299 | (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); | ||
1300 | |||
1301 | ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; | ||
1302 | ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; | ||
1303 | |||
1304 | if (!cpu_has(cpu, X86_FEATURE_BTS)) { | ||
1305 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
1306 | printk(KERN_INFO "[ds] bts not available\n"); | ||
1307 | } | ||
1308 | if (!cpu_has(cpu, X86_FEATURE_PEBS)) { | ||
1309 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
1310 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
1311 | } | ||
1312 | |||
1313 | printk(KERN_INFO "[ds] sizes: address: %u bit, ", | ||
1314 | 8 * ds_cfg.sizeof_ptr_field); | ||
1315 | printk("bts/pebs record: %u/%u bytes\n", | ||
1316 | ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); | ||
1317 | |||
1318 | WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); | ||
1319 | } | ||
1320 | |||
1321 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | ||
1322 | { | ||
1323 | /* Only configure the first cpu. Others are identical. */ | ||
1324 | if (ds_cfg.name) | ||
1325 | return; | ||
1326 | |||
1327 | switch (c->x86) { | ||
1328 | case 0x6: | ||
1329 | switch (c->x86_model) { | ||
1330 | case 0x9: | ||
1331 | case 0xd: /* Pentium M */ | ||
1332 | ds_configure(&ds_cfg_pentium_m, c); | ||
1333 | break; | ||
1334 | case 0xf: | ||
1335 | case 0x17: /* Core2 */ | ||
1336 | case 0x1c: /* Atom */ | ||
1337 | ds_configure(&ds_cfg_core2_atom, c); | ||
1338 | break; | ||
1339 | case 0x1a: /* Core i7 */ | ||
1340 | ds_configure(&ds_cfg_core_i7, c); | ||
1341 | break; | ||
1342 | default: | ||
1343 | /* Sorry, don't know about them. */ | ||
1344 | break; | ||
1345 | } | ||
1346 | break; | ||
1347 | case 0xf: | ||
1348 | switch (c->x86_model) { | ||
1349 | case 0x0: | ||
1350 | case 0x1: | ||
1351 | case 0x2: /* Netburst */ | ||
1352 | ds_configure(&ds_cfg_netburst, c); | ||
1353 | break; | ||
1354 | default: | ||
1355 | /* Sorry, don't know about them. */ | ||
1356 | break; | ||
1357 | } | ||
1358 | break; | ||
1359 | default: | ||
1360 | /* Sorry, don't know about them. */ | ||
1361 | break; | ||
1362 | } | ||
1363 | } | ||
1364 | |||
1365 | static inline void ds_take_timestamp(struct ds_context *context, | ||
1366 | enum bts_qualifier qualifier, | ||
1367 | struct task_struct *task) | ||
1368 | { | ||
1369 | struct bts_tracer *tracer = context->bts_master; | ||
1370 | struct bts_struct ts; | ||
1371 | |||
1372 | /* Prevent compilers from reading the tracer pointer twice. */ | ||
1373 | barrier(); | ||
1374 | |||
1375 | if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) | ||
1376 | return; | ||
1377 | |||
1378 | memset(&ts, 0, sizeof(ts)); | ||
1379 | ts.qualifier = qualifier; | ||
1380 | ts.variant.event.clock = trace_clock_global(); | ||
1381 | ts.variant.event.pid = task->pid; | ||
1382 | |||
1383 | bts_write(tracer, &ts); | ||
1384 | } | ||
1385 | |||
1386 | /* | ||
1387 | * Change the DS configuration from tracing prev to tracing next. | ||
1388 | */ | ||
1389 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
1390 | { | ||
1391 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
1392 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
1393 | unsigned long debugctlmsr = next->thread.debugctlmsr; | ||
1394 | |||
1395 | /* Make sure all data is read before we start. */ | ||
1396 | barrier(); | ||
1397 | |||
1398 | if (prev_ctx) { | ||
1399 | update_debugctlmsr(0); | ||
1400 | |||
1401 | ds_take_timestamp(prev_ctx, bts_task_departs, prev); | ||
1402 | } | ||
1403 | |||
1404 | if (next_ctx) { | ||
1405 | ds_take_timestamp(next_ctx, bts_task_arrives, next); | ||
1406 | |||
1407 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
1408 | } | ||
1409 | |||
1410 | update_debugctlmsr(debugctlmsr); | ||
1411 | } | ||
1412 | |||
1413 | static __init int ds_selftest(void) | ||
1414 | { | ||
1415 | if (ds_cfg.sizeof_rec[ds_bts]) { | ||
1416 | int error; | ||
1417 | |||
1418 | error = ds_selftest_bts(); | ||
1419 | if (error) { | ||
1420 | WARN(1, "[ds] selftest failed. disabling bts.\n"); | ||
1421 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
1422 | } | ||
1423 | } | ||
1424 | |||
1425 | if (ds_cfg.sizeof_rec[ds_pebs]) { | ||
1426 | int error; | ||
1427 | |||
1428 | error = ds_selftest_pebs(); | ||
1429 | if (error) { | ||
1430 | WARN(1, "[ds] selftest failed. disabling pebs.\n"); | ||
1431 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
1432 | } | ||
1433 | } | ||
1434 | |||
1435 | return 0; | ||
1436 | } | ||
1437 | device_initcall(ds_selftest); | ||
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c deleted file mode 100644 index 6bc7c199ab99..000000000000 --- a/arch/x86/kernel/ds_selftest.c +++ /dev/null | |||
@@ -1,408 +0,0 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #include "ds_selftest.h" | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/cpu.h> | ||
15 | |||
16 | #include <asm/ds.h> | ||
17 | |||
18 | |||
19 | #define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ | ||
20 | #define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ | ||
21 | |||
22 | struct ds_selftest_bts_conf { | ||
23 | struct bts_tracer *tracer; | ||
24 | int error; | ||
25 | int (*suspend)(struct bts_tracer *); | ||
26 | int (*resume)(struct bts_tracer *); | ||
27 | }; | ||
28 | |||
29 | static int ds_selftest_bts_consistency(const struct bts_trace *trace) | ||
30 | { | ||
31 | int error = 0; | ||
32 | |||
33 | if (!trace) { | ||
34 | printk(KERN_CONT "failed to access trace..."); | ||
35 | /* Bail out. Other tests are pointless. */ | ||
36 | return -1; | ||
37 | } | ||
38 | |||
39 | if (!trace->read) { | ||
40 | printk(KERN_CONT "bts read not available..."); | ||
41 | error = -1; | ||
42 | } | ||
43 | |||
44 | /* Do some sanity checks on the trace configuration. */ | ||
45 | if (!trace->ds.n) { | ||
46 | printk(KERN_CONT "empty bts buffer..."); | ||
47 | error = -1; | ||
48 | } | ||
49 | if (!trace->ds.size) { | ||
50 | printk(KERN_CONT "bad bts trace setup..."); | ||
51 | error = -1; | ||
52 | } | ||
53 | if (trace->ds.end != | ||
54 | (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { | ||
55 | printk(KERN_CONT "bad bts buffer setup..."); | ||
56 | error = -1; | ||
57 | } | ||
58 | /* | ||
59 | * We allow top in [begin; end], since its not clear when the | ||
60 | * overflow adjustment happens: after the increment or before the | ||
61 | * write. | ||
62 | */ | ||
63 | if ((trace->ds.top < trace->ds.begin) || | ||
64 | (trace->ds.end < trace->ds.top)) { | ||
65 | printk(KERN_CONT "bts top out of bounds..."); | ||
66 | error = -1; | ||
67 | } | ||
68 | |||
69 | return error; | ||
70 | } | ||
71 | |||
72 | static int ds_selftest_bts_read(struct bts_tracer *tracer, | ||
73 | const struct bts_trace *trace, | ||
74 | const void *from, const void *to) | ||
75 | { | ||
76 | const unsigned char *at; | ||
77 | |||
78 | /* | ||
79 | * Check a few things which do not belong to this test. | ||
80 | * They should be covered by other tests. | ||
81 | */ | ||
82 | if (!trace) | ||
83 | return -1; | ||
84 | |||
85 | if (!trace->read) | ||
86 | return -1; | ||
87 | |||
88 | if (to < from) | ||
89 | return -1; | ||
90 | |||
91 | if (from < trace->ds.begin) | ||
92 | return -1; | ||
93 | |||
94 | if (trace->ds.end < to) | ||
95 | return -1; | ||
96 | |||
97 | if (!trace->ds.size) | ||
98 | return -1; | ||
99 | |||
100 | /* Now to the test itself. */ | ||
101 | for (at = from; (void *)at < to; at += trace->ds.size) { | ||
102 | struct bts_struct bts; | ||
103 | unsigned long index; | ||
104 | int error; | ||
105 | |||
106 | if (((void *)at - trace->ds.begin) % trace->ds.size) { | ||
107 | printk(KERN_CONT | ||
108 | "read from non-integer index..."); | ||
109 | return -1; | ||
110 | } | ||
111 | index = ((void *)at - trace->ds.begin) / trace->ds.size; | ||
112 | |||
113 | memset(&bts, 0, sizeof(bts)); | ||
114 | error = trace->read(tracer, at, &bts); | ||
115 | if (error < 0) { | ||
116 | printk(KERN_CONT | ||
117 | "error reading bts trace at [%lu] (0x%p)...", | ||
118 | index, at); | ||
119 | return error; | ||
120 | } | ||
121 | |||
122 | switch (bts.qualifier) { | ||
123 | case BTS_BRANCH: | ||
124 | break; | ||
125 | default: | ||
126 | printk(KERN_CONT | ||
127 | "unexpected bts entry %llu at [%lu] (0x%p)...", | ||
128 | bts.qualifier, index, at); | ||
129 | return -1; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static void ds_selftest_bts_cpu(void *arg) | ||
137 | { | ||
138 | struct ds_selftest_bts_conf *conf = arg; | ||
139 | const struct bts_trace *trace; | ||
140 | void *top; | ||
141 | |||
142 | if (IS_ERR(conf->tracer)) { | ||
143 | conf->error = PTR_ERR(conf->tracer); | ||
144 | conf->tracer = NULL; | ||
145 | |||
146 | printk(KERN_CONT | ||
147 | "initialization failed (err: %d)...", conf->error); | ||
148 | return; | ||
149 | } | ||
150 | |||
151 | /* We should meanwhile have enough trace. */ | ||
152 | conf->error = conf->suspend(conf->tracer); | ||
153 | if (conf->error < 0) | ||
154 | return; | ||
155 | |||
156 | /* Let's see if we can access the trace. */ | ||
157 | trace = ds_read_bts(conf->tracer); | ||
158 | |||
159 | conf->error = ds_selftest_bts_consistency(trace); | ||
160 | if (conf->error < 0) | ||
161 | return; | ||
162 | |||
163 | /* If everything went well, we should have a few trace entries. */ | ||
164 | if (trace->ds.top == trace->ds.begin) { | ||
165 | /* | ||
166 | * It is possible but highly unlikely that we got a | ||
167 | * buffer overflow and end up at exactly the same | ||
168 | * position we started from. | ||
169 | * Let's issue a warning, but continue. | ||
170 | */ | ||
171 | printk(KERN_CONT "no trace/overflow..."); | ||
172 | } | ||
173 | |||
174 | /* Let's try to read the trace we collected. */ | ||
175 | conf->error = | ||
176 | ds_selftest_bts_read(conf->tracer, trace, | ||
177 | trace->ds.begin, trace->ds.top); | ||
178 | if (conf->error < 0) | ||
179 | return; | ||
180 | |||
181 | /* | ||
182 | * Let's read the trace again. | ||
183 | * Since we suspended tracing, we should get the same result. | ||
184 | */ | ||
185 | top = trace->ds.top; | ||
186 | |||
187 | trace = ds_read_bts(conf->tracer); | ||
188 | conf->error = ds_selftest_bts_consistency(trace); | ||
189 | if (conf->error < 0) | ||
190 | return; | ||
191 | |||
192 | if (top != trace->ds.top) { | ||
193 | printk(KERN_CONT "suspend not working..."); | ||
194 | conf->error = -1; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | /* Let's collect some more trace - see if resume is working. */ | ||
199 | conf->error = conf->resume(conf->tracer); | ||
200 | if (conf->error < 0) | ||
201 | return; | ||
202 | |||
203 | conf->error = conf->suspend(conf->tracer); | ||
204 | if (conf->error < 0) | ||
205 | return; | ||
206 | |||
207 | trace = ds_read_bts(conf->tracer); | ||
208 | |||
209 | conf->error = ds_selftest_bts_consistency(trace); | ||
210 | if (conf->error < 0) | ||
211 | return; | ||
212 | |||
213 | if (trace->ds.top == top) { | ||
214 | /* | ||
215 | * It is possible but highly unlikely that we got a | ||
216 | * buffer overflow and end up at exactly the same | ||
217 | * position we started from. | ||
218 | * Let's issue a warning and check the full trace. | ||
219 | */ | ||
220 | printk(KERN_CONT | ||
221 | "no resume progress/overflow..."); | ||
222 | |||
223 | conf->error = | ||
224 | ds_selftest_bts_read(conf->tracer, trace, | ||
225 | trace->ds.begin, trace->ds.end); | ||
226 | } else if (trace->ds.top < top) { | ||
227 | /* | ||
228 | * We had a buffer overflow - the entire buffer should | ||
229 | * contain trace records. | ||
230 | */ | ||
231 | conf->error = | ||
232 | ds_selftest_bts_read(conf->tracer, trace, | ||
233 | trace->ds.begin, trace->ds.end); | ||
234 | } else { | ||
235 | /* | ||
236 | * It is quite likely that the buffer did not overflow. | ||
237 | * Let's just check the delta trace. | ||
238 | */ | ||
239 | conf->error = | ||
240 | ds_selftest_bts_read(conf->tracer, trace, top, | ||
241 | trace->ds.top); | ||
242 | } | ||
243 | if (conf->error < 0) | ||
244 | return; | ||
245 | |||
246 | conf->error = 0; | ||
247 | } | ||
248 | |||
249 | static int ds_suspend_bts_wrap(struct bts_tracer *tracer) | ||
250 | { | ||
251 | ds_suspend_bts(tracer); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static int ds_resume_bts_wrap(struct bts_tracer *tracer) | ||
256 | { | ||
257 | ds_resume_bts(tracer); | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | static void ds_release_bts_noirq_wrap(void *tracer) | ||
262 | { | ||
263 | (void)ds_release_bts_noirq(tracer); | ||
264 | } | ||
265 | |||
266 | static int ds_selftest_bts_bad_release_noirq(int cpu, | ||
267 | struct bts_tracer *tracer) | ||
268 | { | ||
269 | int error = -EPERM; | ||
270 | |||
271 | /* Try to release the tracer on the wrong cpu. */ | ||
272 | get_cpu(); | ||
273 | if (cpu != smp_processor_id()) { | ||
274 | error = ds_release_bts_noirq(tracer); | ||
275 | if (error != -EPERM) | ||
276 | printk(KERN_CONT "release on wrong cpu..."); | ||
277 | } | ||
278 | put_cpu(); | ||
279 | |||
280 | return error ? 0 : -1; | ||
281 | } | ||
282 | |||
283 | static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) | ||
284 | { | ||
285 | struct bts_tracer *tracer; | ||
286 | int error; | ||
287 | |||
288 | /* Try to request cpu tracing while task tracing is active. */ | ||
289 | tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, | ||
290 | (size_t)-1, BTS_KERNEL); | ||
291 | error = PTR_ERR(tracer); | ||
292 | if (!IS_ERR(tracer)) { | ||
293 | ds_release_bts(tracer); | ||
294 | error = 0; | ||
295 | } | ||
296 | |||
297 | if (error != -EPERM) | ||
298 | printk(KERN_CONT "cpu/task tracing overlap..."); | ||
299 | |||
300 | return error ? 0 : -1; | ||
301 | } | ||
302 | |||
303 | static int ds_selftest_bts_bad_request_task(void *buffer) | ||
304 | { | ||
305 | struct bts_tracer *tracer; | ||
306 | int error; | ||
307 | |||
308 | /* Try to request cpu tracing while task tracing is active. */ | ||
309 | tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, | ||
310 | (size_t)-1, BTS_KERNEL); | ||
311 | error = PTR_ERR(tracer); | ||
312 | if (!IS_ERR(tracer)) { | ||
313 | error = 0; | ||
314 | ds_release_bts(tracer); | ||
315 | } | ||
316 | |||
317 | if (error != -EPERM) | ||
318 | printk(KERN_CONT "task/cpu tracing overlap..."); | ||
319 | |||
320 | return error ? 0 : -1; | ||
321 | } | ||
322 | |||
323 | int ds_selftest_bts(void) | ||
324 | { | ||
325 | struct ds_selftest_bts_conf conf; | ||
326 | unsigned char buffer[BUFFER_SIZE], *small_buffer; | ||
327 | unsigned long irq; | ||
328 | int cpu; | ||
329 | |||
330 | printk(KERN_INFO "[ds] bts selftest..."); | ||
331 | conf.error = 0; | ||
332 | |||
333 | small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; | ||
334 | |||
335 | get_online_cpus(); | ||
336 | for_each_online_cpu(cpu) { | ||
337 | conf.suspend = ds_suspend_bts_wrap; | ||
338 | conf.resume = ds_resume_bts_wrap; | ||
339 | conf.tracer = | ||
340 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
341 | NULL, (size_t)-1, BTS_KERNEL); | ||
342 | ds_selftest_bts_cpu(&conf); | ||
343 | if (conf.error >= 0) | ||
344 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
345 | ds_release_bts(conf.tracer); | ||
346 | if (conf.error < 0) | ||
347 | goto out; | ||
348 | |||
349 | conf.suspend = ds_suspend_bts_noirq; | ||
350 | conf.resume = ds_resume_bts_noirq; | ||
351 | conf.tracer = | ||
352 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
353 | NULL, (size_t)-1, BTS_KERNEL); | ||
354 | smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); | ||
355 | if (conf.error >= 0) { | ||
356 | conf.error = | ||
357 | ds_selftest_bts_bad_release_noirq(cpu, | ||
358 | conf.tracer); | ||
359 | /* We must not release the tracer twice. */ | ||
360 | if (conf.error < 0) | ||
361 | conf.tracer = NULL; | ||
362 | } | ||
363 | if (conf.error >= 0) | ||
364 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
365 | smp_call_function_single(cpu, ds_release_bts_noirq_wrap, | ||
366 | conf.tracer, 1); | ||
367 | if (conf.error < 0) | ||
368 | goto out; | ||
369 | } | ||
370 | |||
371 | conf.suspend = ds_suspend_bts_wrap; | ||
372 | conf.resume = ds_resume_bts_wrap; | ||
373 | conf.tracer = | ||
374 | ds_request_bts_task(current, buffer, BUFFER_SIZE, | ||
375 | NULL, (size_t)-1, BTS_KERNEL); | ||
376 | ds_selftest_bts_cpu(&conf); | ||
377 | if (conf.error >= 0) | ||
378 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
379 | ds_release_bts(conf.tracer); | ||
380 | if (conf.error < 0) | ||
381 | goto out; | ||
382 | |||
383 | conf.suspend = ds_suspend_bts_noirq; | ||
384 | conf.resume = ds_resume_bts_noirq; | ||
385 | conf.tracer = | ||
386 | ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, | ||
387 | NULL, (size_t)-1, BTS_KERNEL); | ||
388 | local_irq_save(irq); | ||
389 | ds_selftest_bts_cpu(&conf); | ||
390 | if (conf.error >= 0) | ||
391 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
392 | ds_release_bts_noirq(conf.tracer); | ||
393 | local_irq_restore(irq); | ||
394 | if (conf.error < 0) | ||
395 | goto out; | ||
396 | |||
397 | conf.error = 0; | ||
398 | out: | ||
399 | put_online_cpus(); | ||
400 | printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); | ||
401 | |||
402 | return conf.error; | ||
403 | } | ||
404 | |||
405 | int ds_selftest_pebs(void) | ||
406 | { | ||
407 | return 0; | ||
408 | } | ||
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h deleted file mode 100644 index 2ba8745c6663..000000000000 --- a/arch/x86/kernel/ds_selftest.h +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #ifdef CONFIG_X86_DS_SELFTEST | ||
10 | extern int ds_selftest_bts(void); | ||
11 | extern int ds_selftest_pebs(void); | ||
12 | #else | ||
13 | static inline int ds_selftest_bts(void) { return 0; } | ||
14 | static inline int ds_selftest_pebs(void) { return 0; } | ||
15 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6d817554780a..c89a386930b7 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -224,11 +224,6 @@ unsigned __kprobes long oops_begin(void) | |||
224 | int cpu; | 224 | int cpu; |
225 | unsigned long flags; | 225 | unsigned long flags; |
226 | 226 | ||
227 | /* notify the hw-branch tracer so it may disable tracing and | ||
228 | add the last trace to the trace buffer - | ||
229 | the earlier this happens, the more useful the trace. */ | ||
230 | trace_hw_branch_oops(); | ||
231 | |||
232 | oops_enter(); | 227 | oops_enter(); |
233 | 228 | ||
234 | /* racy, but better than risking deadlock. */ | 229 | /* racy, but better than risking deadlock. */ |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index b9c830c12b4a..fa99bae75ace 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -41,6 +41,14 @@ static void early_vga_write(struct console *con, const char *str, unsigned n) | |||
41 | writew(0x720, VGABASE + 2*(max_xpos*j + i)); | 41 | writew(0x720, VGABASE + 2*(max_xpos*j + i)); |
42 | current_ypos = max_ypos-1; | 42 | current_ypos = max_ypos-1; |
43 | } | 43 | } |
44 | #ifdef CONFIG_KGDB_KDB | ||
45 | if (c == '\b') { | ||
46 | if (current_xpos > 0) | ||
47 | current_xpos--; | ||
48 | } else if (c == '\r') { | ||
49 | current_xpos = 0; | ||
50 | } else | ||
51 | #endif | ||
44 | if (c == '\n') { | 52 | if (c == '\n') { |
45 | current_xpos = 0; | 53 | current_xpos = 0; |
46 | current_ypos++; | 54 | current_ypos++; |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 44a8e0dc6737..cd49141cf153 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <asm/processor-flags.h> | 53 | #include <asm/processor-flags.h> |
54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
55 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
56 | #include <asm/cpufeature.h> | ||
56 | 57 | ||
57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 58 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
58 | #include <linux/elf-em.h> | 59 | #include <linux/elf-em.h> |
@@ -905,7 +906,25 @@ ENTRY(simd_coprocessor_error) | |||
905 | RING0_INT_FRAME | 906 | RING0_INT_FRAME |
906 | pushl $0 | 907 | pushl $0 |
907 | CFI_ADJUST_CFA_OFFSET 4 | 908 | CFI_ADJUST_CFA_OFFSET 4 |
909 | #ifdef CONFIG_X86_INVD_BUG | ||
910 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | ||
911 | 661: pushl $do_general_protection | ||
912 | 662: | ||
913 | .section .altinstructions,"a" | ||
914 | .balign 4 | ||
915 | .long 661b | ||
916 | .long 663f | ||
917 | .byte X86_FEATURE_XMM | ||
918 | .byte 662b-661b | ||
919 | .byte 664f-663f | ||
920 | .previous | ||
921 | .section .altinstr_replacement,"ax" | ||
922 | 663: pushl $do_simd_coprocessor_error | ||
923 | 664: | ||
924 | .previous | ||
925 | #else | ||
908 | pushl $do_simd_coprocessor_error | 926 | pushl $do_simd_coprocessor_error |
927 | #endif | ||
909 | CFI_ADJUST_CFA_OFFSET 4 | 928 | CFI_ADJUST_CFA_OFFSET 4 |
910 | jmp error_code | 929 | jmp error_code |
911 | CFI_ENDPROC | 930 | CFI_ENDPROC |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 23b4ecdffa9b..a198b7c87a12 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -36,6 +36,7 @@ | |||
36 | unsigned long hpet_address; | 36 | unsigned long hpet_address; |
37 | u8 hpet_blockid; /* OS timer block num */ | 37 | u8 hpet_blockid; /* OS timer block num */ |
38 | u8 hpet_msi_disable; | 38 | u8 hpet_msi_disable; |
39 | u8 hpet_readback_cmp; | ||
39 | 40 | ||
40 | #ifdef CONFIG_PCI_MSI | 41 | #ifdef CONFIG_PCI_MSI |
41 | static unsigned long hpet_num_timers; | 42 | static unsigned long hpet_num_timers; |
@@ -395,19 +396,23 @@ static int hpet_next_event(unsigned long delta, | |||
395 | * at that point and we would wait for the next hpet interrupt | 396 | * at that point and we would wait for the next hpet interrupt |
396 | * forever. We found out that reading the CMP register back | 397 | * forever. We found out that reading the CMP register back |
397 | * forces the transfer so we can rely on the comparison with | 398 | * forces the transfer so we can rely on the comparison with |
398 | * the counter register below. If the read back from the | 399 | * the counter register below. |
399 | * compare register does not match the value we programmed | 400 | * |
400 | * then we might have a real hardware problem. We can not do | 401 | * That works fine on those ATI chipsets, but on newer Intel |
401 | * much about it here, but at least alert the user/admin with | 402 | * chipsets (ICH9...) this triggers due to an erratum: Reading |
402 | * a prominent warning. | 403 | * the comparator immediately following a write is returning |
403 | * An erratum on some chipsets (ICH9,..), results in comparator read | 404 | * the old value. |
404 | * immediately following a write returning old value. Workaround | 405 | * |
405 | * for this is to read this value second time, when first | 406 | * We restrict the read back to the affected ATI chipsets (set |
406 | * read returns old value. | 407 | * by quirks) and also run it with hpet=verbose for debugging |
408 | * purposes. | ||
407 | */ | 409 | */ |
408 | if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { | 410 | if (hpet_readback_cmp || hpet_verbose) { |
409 | WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, | 411 | u32 cmp = hpet_readl(HPET_Tn_CMP(timer)); |
410 | KERN_WARNING "hpet: compare register read back failed.\n"); | 412 | |
413 | if (cmp != cnt) | ||
414 | printk_once(KERN_WARNING | ||
415 | "hpet: compare register read back failed.\n"); | ||
411 | } | 416 | } |
412 | 417 | ||
413 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | 418 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d6cc065f519f..a8f1b803d2fd 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -189,25 +189,16 @@ static int get_hbp_len(u8 hbp_len) | |||
189 | } | 189 | } |
190 | 190 | ||
191 | /* | 191 | /* |
192 | * Check for virtual address in user space. | ||
193 | */ | ||
194 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
195 | { | ||
196 | unsigned int len; | ||
197 | |||
198 | len = get_hbp_len(hbp_len); | ||
199 | |||
200 | return (va <= TASK_SIZE - len); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Check for virtual address in kernel space. | 192 | * Check for virtual address in kernel space. |
205 | */ | 193 | */ |
206 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | 194 | int arch_check_bp_in_kernelspace(struct perf_event *bp) |
207 | { | 195 | { |
208 | unsigned int len; | 196 | unsigned int len; |
197 | unsigned long va; | ||
198 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
209 | 199 | ||
210 | len = get_hbp_len(hbp_len); | 200 | va = info->address; |
201 | len = get_hbp_len(info->len); | ||
211 | 202 | ||
212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | 203 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); |
213 | } | 204 | } |
@@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
300 | /* | 291 | /* |
301 | * Validate the arch-specific HW Breakpoint register settings | 292 | * Validate the arch-specific HW Breakpoint register settings |
302 | */ | 293 | */ |
303 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | 294 | int arch_validate_hwbkpt_settings(struct perf_event *bp) |
304 | struct task_struct *tsk) | ||
305 | { | 295 | { |
306 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | 296 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); |
307 | unsigned int align; | 297 | unsigned int align; |
@@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
314 | 304 | ||
315 | ret = -EINVAL; | 305 | ret = -EINVAL; |
316 | 306 | ||
317 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
318 | /* | ||
319 | * Ptrace-refactoring code | ||
320 | * For now, we'll allow instruction breakpoint only for user-space | ||
321 | * addresses | ||
322 | */ | ||
323 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
324 | info->len != X86_BREAKPOINT_EXECUTE) | ||
325 | return ret; | ||
326 | |||
327 | switch (info->len) { | 307 | switch (info->len) { |
328 | case X86_BREAKPOINT_LEN_1: | 308 | case X86_BREAKPOINT_LEN_1: |
329 | align = 0; | 309 | align = 0; |
@@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
350 | if (info->address & align) | 330 | if (info->address & align) |
351 | return -EINVAL; | 331 | return -EINVAL; |
352 | 332 | ||
353 | /* Check that the virtual address is in the proper range */ | ||
354 | if (tsk) { | ||
355 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
356 | return -EFAULT; | ||
357 | } else { | ||
358 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
359 | return -EFAULT; | ||
360 | } | ||
361 | |||
362 | return 0; | 333 | return 0; |
363 | } | 334 | } |
364 | 335 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 54c31c285488..86cef6b32253 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -102,65 +102,62 @@ void __cpuinit fpu_init(void) | |||
102 | 102 | ||
103 | mxcsr_feature_mask_init(); | 103 | mxcsr_feature_mask_init(); |
104 | /* clean state in init */ | 104 | /* clean state in init */ |
105 | if (cpu_has_xsave) | 105 | current_thread_info()->status = 0; |
106 | current_thread_info()->status = TS_XSAVE; | ||
107 | else | ||
108 | current_thread_info()->status = 0; | ||
109 | clear_used_math(); | 106 | clear_used_math(); |
110 | } | 107 | } |
111 | #endif /* CONFIG_X86_64 */ | 108 | #endif /* CONFIG_X86_64 */ |
112 | 109 | ||
113 | /* | 110 | static void fpu_finit(struct fpu *fpu) |
114 | * The _current_ task is using the FPU for the first time | ||
115 | * so initialize it and set the mxcsr to its default | ||
116 | * value at reset if we support XMM instructions and then | ||
117 | * remeber the current task has used the FPU. | ||
118 | */ | ||
119 | int init_fpu(struct task_struct *tsk) | ||
120 | { | 111 | { |
121 | if (tsk_used_math(tsk)) { | ||
122 | if (HAVE_HWFP && tsk == current) | ||
123 | unlazy_fpu(tsk); | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Memory allocation at the first usage of the FPU and other state. | ||
129 | */ | ||
130 | if (!tsk->thread.xstate) { | ||
131 | tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | ||
132 | GFP_KERNEL); | ||
133 | if (!tsk->thread.xstate) | ||
134 | return -ENOMEM; | ||
135 | } | ||
136 | |||
137 | #ifdef CONFIG_X86_32 | 112 | #ifdef CONFIG_X86_32 |
138 | if (!HAVE_HWFP) { | 113 | if (!HAVE_HWFP) { |
139 | memset(tsk->thread.xstate, 0, xstate_size); | 114 | finit_soft_fpu(&fpu->state->soft); |
140 | finit_task(tsk); | 115 | return; |
141 | set_stopped_child_used_math(tsk); | ||
142 | return 0; | ||
143 | } | 116 | } |
144 | #endif | 117 | #endif |
145 | 118 | ||
146 | if (cpu_has_fxsr) { | 119 | if (cpu_has_fxsr) { |
147 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 120 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
148 | 121 | ||
149 | memset(fx, 0, xstate_size); | 122 | memset(fx, 0, xstate_size); |
150 | fx->cwd = 0x37f; | 123 | fx->cwd = 0x37f; |
151 | if (cpu_has_xmm) | 124 | if (cpu_has_xmm) |
152 | fx->mxcsr = MXCSR_DEFAULT; | 125 | fx->mxcsr = MXCSR_DEFAULT; |
153 | } else { | 126 | } else { |
154 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | 127 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
155 | memset(fp, 0, xstate_size); | 128 | memset(fp, 0, xstate_size); |
156 | fp->cwd = 0xffff037fu; | 129 | fp->cwd = 0xffff037fu; |
157 | fp->swd = 0xffff0000u; | 130 | fp->swd = 0xffff0000u; |
158 | fp->twd = 0xffffffffu; | 131 | fp->twd = 0xffffffffu; |
159 | fp->fos = 0xffff0000u; | 132 | fp->fos = 0xffff0000u; |
160 | } | 133 | } |
134 | } | ||
135 | |||
136 | /* | ||
137 | * The _current_ task is using the FPU for the first time | ||
138 | * so initialize it and set the mxcsr to its default | ||
139 | * value at reset if we support XMM instructions and then | ||
140 | * remeber the current task has used the FPU. | ||
141 | */ | ||
142 | int init_fpu(struct task_struct *tsk) | ||
143 | { | ||
144 | int ret; | ||
145 | |||
146 | if (tsk_used_math(tsk)) { | ||
147 | if (HAVE_HWFP && tsk == current) | ||
148 | unlazy_fpu(tsk); | ||
149 | return 0; | ||
150 | } | ||
151 | |||
161 | /* | 152 | /* |
162 | * Only the device not available exception or ptrace can call init_fpu. | 153 | * Memory allocation at the first usage of the FPU and other state. |
163 | */ | 154 | */ |
155 | ret = fpu_alloc(&tsk->thread.fpu); | ||
156 | if (ret) | ||
157 | return ret; | ||
158 | |||
159 | fpu_finit(&tsk->thread.fpu); | ||
160 | |||
164 | set_stopped_child_used_math(tsk); | 161 | set_stopped_child_used_math(tsk); |
165 | return 0; | 162 | return 0; |
166 | } | 163 | } |
@@ -194,7 +191,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
194 | return ret; | 191 | return ret; |
195 | 192 | ||
196 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 193 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
197 | &target->thread.xstate->fxsave, 0, -1); | 194 | &target->thread.fpu.state->fxsave, 0, -1); |
198 | } | 195 | } |
199 | 196 | ||
200 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | 197 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, |
@@ -211,19 +208,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
211 | return ret; | 208 | return ret; |
212 | 209 | ||
213 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 210 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
214 | &target->thread.xstate->fxsave, 0, -1); | 211 | &target->thread.fpu.state->fxsave, 0, -1); |
215 | 212 | ||
216 | /* | 213 | /* |
217 | * mxcsr reserved bits must be masked to zero for security reasons. | 214 | * mxcsr reserved bits must be masked to zero for security reasons. |
218 | */ | 215 | */ |
219 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 216 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
220 | 217 | ||
221 | /* | 218 | /* |
222 | * update the header bits in the xsave header, indicating the | 219 | * update the header bits in the xsave header, indicating the |
223 | * presence of FP and SSE state. | 220 | * presence of FP and SSE state. |
224 | */ | 221 | */ |
225 | if (cpu_has_xsave) | 222 | if (cpu_has_xsave) |
226 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | 223 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; |
227 | 224 | ||
228 | return ret; | 225 | return ret; |
229 | } | 226 | } |
@@ -246,14 +243,14 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
246 | * memory layout in the thread struct, so that we can copy the entire | 243 | * memory layout in the thread struct, so that we can copy the entire |
247 | * xstateregs to the user using one user_regset_copyout(). | 244 | * xstateregs to the user using one user_regset_copyout(). |
248 | */ | 245 | */ |
249 | memcpy(&target->thread.xstate->fxsave.sw_reserved, | 246 | memcpy(&target->thread.fpu.state->fxsave.sw_reserved, |
250 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | 247 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); |
251 | 248 | ||
252 | /* | 249 | /* |
253 | * Copy the xstate memory layout. | 250 | * Copy the xstate memory layout. |
254 | */ | 251 | */ |
255 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 252 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
256 | &target->thread.xstate->xsave, 0, -1); | 253 | &target->thread.fpu.state->xsave, 0, -1); |
257 | return ret; | 254 | return ret; |
258 | } | 255 | } |
259 | 256 | ||
@@ -272,14 +269,14 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
272 | return ret; | 269 | return ret; |
273 | 270 | ||
274 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 271 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
275 | &target->thread.xstate->xsave, 0, -1); | 272 | &target->thread.fpu.state->xsave, 0, -1); |
276 | 273 | ||
277 | /* | 274 | /* |
278 | * mxcsr reserved bits must be masked to zero for security reasons. | 275 | * mxcsr reserved bits must be masked to zero for security reasons. |
279 | */ | 276 | */ |
280 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 277 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
281 | 278 | ||
282 | xsave_hdr = &target->thread.xstate->xsave.xsave_hdr; | 279 | xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr; |
283 | 280 | ||
284 | xsave_hdr->xstate_bv &= pcntxt_mask; | 281 | xsave_hdr->xstate_bv &= pcntxt_mask; |
285 | /* | 282 | /* |
@@ -365,7 +362,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | |||
365 | static void | 362 | static void |
366 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | 363 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) |
367 | { | 364 | { |
368 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | 365 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
369 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; | 366 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; |
370 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; | 367 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; |
371 | int i; | 368 | int i; |
@@ -405,7 +402,7 @@ static void convert_to_fxsr(struct task_struct *tsk, | |||
405 | const struct user_i387_ia32_struct *env) | 402 | const struct user_i387_ia32_struct *env) |
406 | 403 | ||
407 | { | 404 | { |
408 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | 405 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
409 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; | 406 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; |
410 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; | 407 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; |
411 | int i; | 408 | int i; |
@@ -445,7 +442,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
445 | 442 | ||
446 | if (!cpu_has_fxsr) { | 443 | if (!cpu_has_fxsr) { |
447 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 444 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
448 | &target->thread.xstate->fsave, 0, | 445 | &target->thread.fpu.state->fsave, 0, |
449 | -1); | 446 | -1); |
450 | } | 447 | } |
451 | 448 | ||
@@ -475,7 +472,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
475 | 472 | ||
476 | if (!cpu_has_fxsr) { | 473 | if (!cpu_has_fxsr) { |
477 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 474 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
478 | &target->thread.xstate->fsave, 0, -1); | 475 | &target->thread.fpu.state->fsave, 0, -1); |
479 | } | 476 | } |
480 | 477 | ||
481 | if (pos > 0 || count < sizeof(env)) | 478 | if (pos > 0 || count < sizeof(env)) |
@@ -490,7 +487,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
490 | * presence of FP. | 487 | * presence of FP. |
491 | */ | 488 | */ |
492 | if (cpu_has_xsave) | 489 | if (cpu_has_xsave) |
493 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; | 490 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; |
494 | return ret; | 491 | return ret; |
495 | } | 492 | } |
496 | 493 | ||
@@ -501,7 +498,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
501 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | 498 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) |
502 | { | 499 | { |
503 | struct task_struct *tsk = current; | 500 | struct task_struct *tsk = current; |
504 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | 501 | struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; |
505 | 502 | ||
506 | fp->status = fp->swd; | 503 | fp->status = fp->swd; |
507 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) | 504 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) |
@@ -512,7 +509,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
512 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | 509 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) |
513 | { | 510 | { |
514 | struct task_struct *tsk = current; | 511 | struct task_struct *tsk = current; |
515 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 512 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; |
516 | struct user_i387_ia32_struct env; | 513 | struct user_i387_ia32_struct env; |
517 | int err = 0; | 514 | int err = 0; |
518 | 515 | ||
@@ -547,7 +544,7 @@ static int save_i387_xsave(void __user *buf) | |||
547 | * header as well as change any contents in the memory layout. | 544 | * header as well as change any contents in the memory layout. |
548 | * xrestore as part of sigreturn will capture all the changes. | 545 | * xrestore as part of sigreturn will capture all the changes. |
549 | */ | 546 | */ |
550 | tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | 547 | tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; |
551 | 548 | ||
552 | if (save_i387_fxsave(fx) < 0) | 549 | if (save_i387_fxsave(fx) < 0) |
553 | return -1; | 550 | return -1; |
@@ -599,7 +596,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
599 | { | 596 | { |
600 | struct task_struct *tsk = current; | 597 | struct task_struct *tsk = current; |
601 | 598 | ||
602 | return __copy_from_user(&tsk->thread.xstate->fsave, buf, | 599 | return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, |
603 | sizeof(struct i387_fsave_struct)); | 600 | sizeof(struct i387_fsave_struct)); |
604 | } | 601 | } |
605 | 602 | ||
@@ -610,10 +607,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, | |||
610 | struct user_i387_ia32_struct env; | 607 | struct user_i387_ia32_struct env; |
611 | int err; | 608 | int err; |
612 | 609 | ||
613 | err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], | 610 | err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], |
614 | size); | 611 | size); |
615 | /* mxcsr reserved bits must be masked to zero for security reasons */ | 612 | /* mxcsr reserved bits must be masked to zero for security reasons */ |
616 | tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 613 | tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
617 | if (err || __copy_from_user(&env, buf, sizeof(env))) | 614 | if (err || __copy_from_user(&env, buf, sizeof(env))) |
618 | return 1; | 615 | return 1; |
619 | convert_to_fxsr(tsk, &env); | 616 | convert_to_fxsr(tsk, &env); |
@@ -629,7 +626,7 @@ static int restore_i387_xsave(void __user *buf) | |||
629 | struct i387_fxsave_struct __user *fx = | 626 | struct i387_fxsave_struct __user *fx = |
630 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; | 627 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; |
631 | struct xsave_hdr_struct *xsave_hdr = | 628 | struct xsave_hdr_struct *xsave_hdr = |
632 | ¤t->thread.xstate->xsave.xsave_hdr; | 629 | ¤t->thread.fpu.state->xsave.xsave_hdr; |
633 | u64 mask; | 630 | u64 mask; |
634 | int err; | 631 | int err; |
635 | 632 | ||
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 23c167925a5c..2dfd31597443 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <asm/hpet.h> | 16 | #include <asm/hpet.h> |
17 | #include <asm/smp.h> | 17 | #include <asm/smp.h> |
18 | 18 | ||
19 | DEFINE_SPINLOCK(i8253_lock); | 19 | DEFINE_RAW_SPINLOCK(i8253_lock); |
20 | EXPORT_SYMBOL(i8253_lock); | 20 | EXPORT_SYMBOL(i8253_lock); |
21 | 21 | ||
22 | /* | 22 | /* |
@@ -33,7 +33,7 @@ struct clock_event_device *global_clock_event; | |||
33 | static void init_pit_timer(enum clock_event_mode mode, | 33 | static void init_pit_timer(enum clock_event_mode mode, |
34 | struct clock_event_device *evt) | 34 | struct clock_event_device *evt) |
35 | { | 35 | { |
36 | spin_lock(&i8253_lock); | 36 | raw_spin_lock(&i8253_lock); |
37 | 37 | ||
38 | switch (mode) { | 38 | switch (mode) { |
39 | case CLOCK_EVT_MODE_PERIODIC: | 39 | case CLOCK_EVT_MODE_PERIODIC: |
@@ -62,7 +62,7 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
62 | /* Nothing to do here */ | 62 | /* Nothing to do here */ |
63 | break; | 63 | break; |
64 | } | 64 | } |
65 | spin_unlock(&i8253_lock); | 65 | raw_spin_unlock(&i8253_lock); |
66 | } | 66 | } |
67 | 67 | ||
68 | /* | 68 | /* |
@@ -72,10 +72,10 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
72 | */ | 72 | */ |
73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) | 73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) |
74 | { | 74 | { |
75 | spin_lock(&i8253_lock); | 75 | raw_spin_lock(&i8253_lock); |
76 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ | 76 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ |
77 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ | 77 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ |
78 | spin_unlock(&i8253_lock); | 78 | raw_spin_unlock(&i8253_lock); |
79 | 79 | ||
80 | return 0; | 80 | return 0; |
81 | } | 81 | } |
@@ -130,7 +130,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
130 | int count; | 130 | int count; |
131 | u32 jifs; | 131 | u32 jifs; |
132 | 132 | ||
133 | spin_lock_irqsave(&i8253_lock, flags); | 133 | raw_spin_lock_irqsave(&i8253_lock, flags); |
134 | /* | 134 | /* |
135 | * Although our caller may have the read side of xtime_lock, | 135 | * Although our caller may have the read side of xtime_lock, |
136 | * this is now a seqlock, and we are cheating in this routine | 136 | * this is now a seqlock, and we are cheating in this routine |
@@ -176,7 +176,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
176 | old_count = count; | 176 | old_count = count; |
177 | old_jifs = jifs; | 177 | old_jifs = jifs; |
178 | 178 | ||
179 | spin_unlock_irqrestore(&i8253_lock, flags); | 179 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
180 | 180 | ||
181 | count = (LATCH - 1) - count; | 181 | count = (LATCH - 1) - count; |
182 | 182 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 0ed2d300cd46..990ae7cfc578 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -60,7 +60,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) | |||
60 | outb(0, 0xF0); | 60 | outb(0, 0xF0); |
61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | 61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) |
62 | return IRQ_NONE; | 62 | return IRQ_NONE; |
63 | math_error((void __user *)get_irq_regs()->ip); | 63 | math_error(get_irq_regs(), 0, 16); |
64 | return IRQ_HANDLED; | 64 | return IRQ_HANDLED; |
65 | } | 65 | } |
66 | 66 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b2258ca91003..4f4af75b9482 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -47,20 +47,8 @@ | |||
47 | #include <asm/debugreg.h> | 47 | #include <asm/debugreg.h> |
48 | #include <asm/apicdef.h> | 48 | #include <asm/apicdef.h> |
49 | #include <asm/system.h> | 49 | #include <asm/system.h> |
50 | |||
51 | #include <asm/apic.h> | 50 | #include <asm/apic.h> |
52 | 51 | ||
53 | /* | ||
54 | * Put the error code here just in case the user cares: | ||
55 | */ | ||
56 | static int gdb_x86errcode; | ||
57 | |||
58 | /* | ||
59 | * Likewise, the vector number here (since GDB only gets the signal | ||
60 | * number through the usual means, and that's not very specific): | ||
61 | */ | ||
62 | static int gdb_x86vector = -1; | ||
63 | |||
64 | /** | 52 | /** |
65 | * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs | 53 | * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs |
66 | * @gdb_regs: A pointer to hold the registers in the order GDB wants. | 54 | * @gdb_regs: A pointer to hold the registers in the order GDB wants. |
@@ -211,6 +199,8 @@ static struct hw_breakpoint { | |||
211 | struct perf_event **pev; | 199 | struct perf_event **pev; |
212 | } breakinfo[4]; | 200 | } breakinfo[4]; |
213 | 201 | ||
202 | static unsigned long early_dr7; | ||
203 | |||
214 | static void kgdb_correct_hw_break(void) | 204 | static void kgdb_correct_hw_break(void) |
215 | { | 205 | { |
216 | int breakno; | 206 | int breakno; |
@@ -222,6 +212,14 @@ static void kgdb_correct_hw_break(void) | |||
222 | int cpu = raw_smp_processor_id(); | 212 | int cpu = raw_smp_processor_id(); |
223 | if (!breakinfo[breakno].enabled) | 213 | if (!breakinfo[breakno].enabled) |
224 | continue; | 214 | continue; |
215 | if (dbg_is_early) { | ||
216 | set_debugreg(breakinfo[breakno].addr, breakno); | ||
217 | early_dr7 |= encode_dr7(breakno, | ||
218 | breakinfo[breakno].len, | ||
219 | breakinfo[breakno].type); | ||
220 | set_debugreg(early_dr7, 7); | ||
221 | continue; | ||
222 | } | ||
225 | bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); | 223 | bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); |
226 | info = counter_arch_bp(bp); | 224 | info = counter_arch_bp(bp); |
227 | if (bp->attr.disabled != 1) | 225 | if (bp->attr.disabled != 1) |
@@ -236,7 +234,8 @@ static void kgdb_correct_hw_break(void) | |||
236 | if (!val) | 234 | if (!val) |
237 | bp->attr.disabled = 0; | 235 | bp->attr.disabled = 0; |
238 | } | 236 | } |
239 | hw_breakpoint_restore(); | 237 | if (!dbg_is_early) |
238 | hw_breakpoint_restore(); | ||
240 | } | 239 | } |
241 | 240 | ||
242 | static int hw_break_reserve_slot(int breakno) | 241 | static int hw_break_reserve_slot(int breakno) |
@@ -245,6 +244,9 @@ static int hw_break_reserve_slot(int breakno) | |||
245 | int cnt = 0; | 244 | int cnt = 0; |
246 | struct perf_event **pevent; | 245 | struct perf_event **pevent; |
247 | 246 | ||
247 | if (dbg_is_early) | ||
248 | return 0; | ||
249 | |||
248 | for_each_online_cpu(cpu) { | 250 | for_each_online_cpu(cpu) { |
249 | cnt++; | 251 | cnt++; |
250 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | 252 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); |
@@ -270,6 +272,9 @@ static int hw_break_release_slot(int breakno) | |||
270 | struct perf_event **pevent; | 272 | struct perf_event **pevent; |
271 | int cpu; | 273 | int cpu; |
272 | 274 | ||
275 | if (dbg_is_early) | ||
276 | return 0; | ||
277 | |||
273 | for_each_online_cpu(cpu) { | 278 | for_each_online_cpu(cpu) { |
274 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | 279 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); |
275 | if (dbg_release_bp_slot(*pevent)) | 280 | if (dbg_release_bp_slot(*pevent)) |
@@ -314,7 +319,11 @@ static void kgdb_remove_all_hw_break(void) | |||
314 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | 319 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); |
315 | if (bp->attr.disabled == 1) | 320 | if (bp->attr.disabled == 1) |
316 | continue; | 321 | continue; |
317 | arch_uninstall_hw_breakpoint(bp); | 322 | if (dbg_is_early) |
323 | early_dr7 &= ~encode_dr7(i, breakinfo[i].len, | ||
324 | breakinfo[i].type); | ||
325 | else | ||
326 | arch_uninstall_hw_breakpoint(bp); | ||
318 | bp->attr.disabled = 1; | 327 | bp->attr.disabled = 1; |
319 | } | 328 | } |
320 | } | 329 | } |
@@ -391,6 +400,11 @@ void kgdb_disable_hw_debug(struct pt_regs *regs) | |||
391 | for (i = 0; i < 4; i++) { | 400 | for (i = 0; i < 4; i++) { |
392 | if (!breakinfo[i].enabled) | 401 | if (!breakinfo[i].enabled) |
393 | continue; | 402 | continue; |
403 | if (dbg_is_early) { | ||
404 | early_dr7 &= ~encode_dr7(i, breakinfo[i].len, | ||
405 | breakinfo[i].type); | ||
406 | continue; | ||
407 | } | ||
394 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | 408 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); |
395 | if (bp->attr.disabled == 1) | 409 | if (bp->attr.disabled == 1) |
396 | continue; | 410 | continue; |
@@ -399,23 +413,6 @@ void kgdb_disable_hw_debug(struct pt_regs *regs) | |||
399 | } | 413 | } |
400 | } | 414 | } |
401 | 415 | ||
402 | /** | ||
403 | * kgdb_post_primary_code - Save error vector/code numbers. | ||
404 | * @regs: Original pt_regs. | ||
405 | * @e_vector: Original error vector. | ||
406 | * @err_code: Original error code. | ||
407 | * | ||
408 | * This is needed on architectures which support SMP and KGDB. | ||
409 | * This function is called after all the slave cpus have been put | ||
410 | * to a know spin state and the primary CPU has control over KGDB. | ||
411 | */ | ||
412 | void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code) | ||
413 | { | ||
414 | /* primary processor is completely in the debugger */ | ||
415 | gdb_x86vector = e_vector; | ||
416 | gdb_x86errcode = err_code; | ||
417 | } | ||
418 | |||
419 | #ifdef CONFIG_SMP | 416 | #ifdef CONFIG_SMP |
420 | /** | 417 | /** |
421 | * kgdb_roundup_cpus - Get other CPUs into a holding pattern | 418 | * kgdb_roundup_cpus - Get other CPUs into a holding pattern |
@@ -567,7 +564,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
567 | return NOTIFY_DONE; | 564 | return NOTIFY_DONE; |
568 | } | 565 | } |
569 | 566 | ||
570 | if (kgdb_handle_exception(args->trapnr, args->signr, args->err, regs)) | 567 | if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs)) |
571 | return NOTIFY_DONE; | 568 | return NOTIFY_DONE; |
572 | 569 | ||
573 | /* Must touch watchdog before return to normal operation */ | 570 | /* Must touch watchdog before return to normal operation */ |
@@ -575,6 +572,26 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
575 | return NOTIFY_STOP; | 572 | return NOTIFY_STOP; |
576 | } | 573 | } |
577 | 574 | ||
575 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | ||
576 | int kgdb_ll_trap(int cmd, const char *str, | ||
577 | struct pt_regs *regs, long err, int trap, int sig) | ||
578 | { | ||
579 | struct die_args args = { | ||
580 | .regs = regs, | ||
581 | .str = str, | ||
582 | .err = err, | ||
583 | .trapnr = trap, | ||
584 | .signr = sig, | ||
585 | |||
586 | }; | ||
587 | |||
588 | if (!kgdb_io_module_registered) | ||
589 | return NOTIFY_DONE; | ||
590 | |||
591 | return __kgdb_notify(&args, cmd); | ||
592 | } | ||
593 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | ||
594 | |||
578 | static int | 595 | static int |
579 | kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) | 596 | kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) |
580 | { | 597 | { |
@@ -605,14 +622,15 @@ static struct notifier_block kgdb_notifier = { | |||
605 | */ | 622 | */ |
606 | int kgdb_arch_init(void) | 623 | int kgdb_arch_init(void) |
607 | { | 624 | { |
625 | return register_die_notifier(&kgdb_notifier); | ||
626 | } | ||
627 | |||
628 | void kgdb_arch_late(void) | ||
629 | { | ||
608 | int i, cpu; | 630 | int i, cpu; |
609 | int ret; | ||
610 | struct perf_event_attr attr; | 631 | struct perf_event_attr attr; |
611 | struct perf_event **pevent; | 632 | struct perf_event **pevent; |
612 | 633 | ||
613 | ret = register_die_notifier(&kgdb_notifier); | ||
614 | if (ret != 0) | ||
615 | return ret; | ||
616 | /* | 634 | /* |
617 | * Pre-allocate the hw breakpoint structions in the non-atomic | 635 | * Pre-allocate the hw breakpoint structions in the non-atomic |
618 | * portion of kgdb because this operation requires mutexs to | 636 | * portion of kgdb because this operation requires mutexs to |
@@ -624,12 +642,15 @@ int kgdb_arch_init(void) | |||
624 | attr.bp_type = HW_BREAKPOINT_W; | 642 | attr.bp_type = HW_BREAKPOINT_W; |
625 | attr.disabled = 1; | 643 | attr.disabled = 1; |
626 | for (i = 0; i < 4; i++) { | 644 | for (i = 0; i < 4; i++) { |
645 | if (breakinfo[i].pev) | ||
646 | continue; | ||
627 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); | 647 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); |
628 | if (IS_ERR(breakinfo[i].pev)) { | 648 | if (IS_ERR(breakinfo[i].pev)) { |
629 | printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n"); | 649 | printk(KERN_ERR "kgdb: Could not allocate hw" |
650 | "breakpoints\nDisabling the kernel debugger\n"); | ||
630 | breakinfo[i].pev = NULL; | 651 | breakinfo[i].pev = NULL; |
631 | kgdb_arch_exit(); | 652 | kgdb_arch_exit(); |
632 | return -1; | 653 | return; |
633 | } | 654 | } |
634 | for_each_online_cpu(cpu) { | 655 | for_each_online_cpu(cpu) { |
635 | pevent = per_cpu_ptr(breakinfo[i].pev, cpu); | 656 | pevent = per_cpu_ptr(breakinfo[i].pev, cpu); |
@@ -640,7 +661,6 @@ int kgdb_arch_init(void) | |||
640 | } | 661 | } |
641 | } | 662 | } |
642 | } | 663 | } |
643 | return ret; | ||
644 | } | 664 | } |
645 | 665 | ||
646 | /** | 666 | /** |
@@ -690,6 +710,11 @@ unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) | |||
690 | return instruction_pointer(regs); | 710 | return instruction_pointer(regs); |
691 | } | 711 | } |
692 | 712 | ||
713 | void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) | ||
714 | { | ||
715 | regs->ip = ip; | ||
716 | } | ||
717 | |||
693 | struct kgdb_arch arch_kgdb_ops = { | 718 | struct kgdb_arch arch_kgdb_ops = { |
694 | /* Breakpoint instruction: */ | 719 | /* Breakpoint instruction: */ |
695 | .gdb_bpt_instr = { 0xcc }, | 720 | .gdb_bpt_instr = { 0xcc }, |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b43bbaebe2c0..345a4b1fe144 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -422,14 +422,22 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
422 | 422 | ||
423 | static void __kprobes clear_btf(void) | 423 | static void __kprobes clear_btf(void) |
424 | { | 424 | { |
425 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | 425 | if (test_thread_flag(TIF_BLOCKSTEP)) { |
426 | update_debugctlmsr(0); | 426 | unsigned long debugctl = get_debugctlmsr(); |
427 | |||
428 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
429 | update_debugctlmsr(debugctl); | ||
430 | } | ||
427 | } | 431 | } |
428 | 432 | ||
429 | static void __kprobes restore_btf(void) | 433 | static void __kprobes restore_btf(void) |
430 | { | 434 | { |
431 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | 435 | if (test_thread_flag(TIF_BLOCKSTEP)) { |
432 | update_debugctlmsr(current->thread.debugctlmsr); | 436 | unsigned long debugctl = get_debugctlmsr(); |
437 | |||
438 | debugctl |= DEBUGCTLMSR_BTF; | ||
439 | update_debugctlmsr(debugctl); | ||
440 | } | ||
433 | } | 441 | } |
434 | 442 | ||
435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 443 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
@@ -534,20 +542,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
534 | struct kprobe_ctlblk *kcb; | 542 | struct kprobe_ctlblk *kcb; |
535 | 543 | ||
536 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); | 544 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); |
537 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
538 | /* | ||
539 | * The breakpoint instruction was removed right | ||
540 | * after we hit it. Another cpu has removed | ||
541 | * either a probepoint or a debugger breakpoint | ||
542 | * at this address. In either case, no further | ||
543 | * handling of this interrupt is appropriate. | ||
544 | * Back up over the (now missing) int3 and run | ||
545 | * the original instruction. | ||
546 | */ | ||
547 | regs->ip = (unsigned long)addr; | ||
548 | return 1; | ||
549 | } | ||
550 | |||
551 | /* | 545 | /* |
552 | * We don't want to be preempted for the entire | 546 | * We don't want to be preempted for the entire |
553 | * duration of kprobe processing. We conditionally | 547 | * duration of kprobe processing. We conditionally |
@@ -579,6 +573,19 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
579 | setup_singlestep(p, regs, kcb, 0); | 573 | setup_singlestep(p, regs, kcb, 0); |
580 | return 1; | 574 | return 1; |
581 | } | 575 | } |
576 | } else if (*addr != BREAKPOINT_INSTRUCTION) { | ||
577 | /* | ||
578 | * The breakpoint instruction was removed right | ||
579 | * after we hit it. Another cpu has removed | ||
580 | * either a probepoint or a debugger breakpoint | ||
581 | * at this address. In either case, no further | ||
582 | * handling of this interrupt is appropriate. | ||
583 | * Back up over the (now missing) int3 and run | ||
584 | * the original instruction. | ||
585 | */ | ||
586 | regs->ip = (unsigned long)addr; | ||
587 | preempt_enable_no_resched(); | ||
588 | return 1; | ||
582 | } else if (kprobe_running()) { | 589 | } else if (kprobe_running()) { |
583 | p = __get_cpu_var(current_kprobe); | 590 | p = __get_cpu_var(current_kprobe); |
584 | if (p->break_handler && p->break_handler(p, regs)) { | 591 | if (p->break_handler && p->break_handler(p, regs)) { |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index feaeb0d3aa4f..eb9b76c716c2 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #define KVM_SCALE 22 | 29 | #define KVM_SCALE 22 |
30 | 30 | ||
31 | static int kvmclock = 1; | 31 | static int kvmclock = 1; |
32 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | ||
33 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | ||
32 | 34 | ||
33 | static int parse_no_kvmclock(char *arg) | 35 | static int parse_no_kvmclock(char *arg) |
34 | { | 36 | { |
@@ -54,7 +56,8 @@ static unsigned long kvm_get_wallclock(void) | |||
54 | 56 | ||
55 | low = (int)__pa_symbol(&wall_clock); | 57 | low = (int)__pa_symbol(&wall_clock); |
56 | high = ((u64)__pa_symbol(&wall_clock) >> 32); | 58 | high = ((u64)__pa_symbol(&wall_clock) >> 32); |
57 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | 59 | |
60 | native_write_msr(msr_kvm_wall_clock, low, high); | ||
58 | 61 | ||
59 | vcpu_time = &get_cpu_var(hv_clock); | 62 | vcpu_time = &get_cpu_var(hv_clock); |
60 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | 63 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); |
@@ -130,7 +133,8 @@ static int kvm_register_clock(char *txt) | |||
130 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 133 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); |
131 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 134 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
132 | cpu, high, low, txt); | 135 | cpu, high, low, txt); |
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 136 | |
137 | return native_write_msr_safe(msr_kvm_system_time, low, high); | ||
134 | } | 138 | } |
135 | 139 | ||
136 | #ifdef CONFIG_X86_LOCAL_APIC | 140 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -165,14 +169,14 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
165 | #ifdef CONFIG_KEXEC | 169 | #ifdef CONFIG_KEXEC |
166 | static void kvm_crash_shutdown(struct pt_regs *regs) | 170 | static void kvm_crash_shutdown(struct pt_regs *regs) |
167 | { | 171 | { |
168 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 172 | native_write_msr(msr_kvm_system_time, 0, 0); |
169 | native_machine_crash_shutdown(regs); | 173 | native_machine_crash_shutdown(regs); |
170 | } | 174 | } |
171 | #endif | 175 | #endif |
172 | 176 | ||
173 | static void kvm_shutdown(void) | 177 | static void kvm_shutdown(void) |
174 | { | 178 | { |
175 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 179 | native_write_msr(msr_kvm_system_time, 0, 0); |
176 | native_machine_shutdown(); | 180 | native_machine_shutdown(); |
177 | } | 181 | } |
178 | 182 | ||
@@ -181,27 +185,37 @@ void __init kvmclock_init(void) | |||
181 | if (!kvm_para_available()) | 185 | if (!kvm_para_available()) |
182 | return; | 186 | return; |
183 | 187 | ||
184 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 188 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { |
185 | if (kvm_register_clock("boot clock")) | 189 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; |
186 | return; | 190 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; |
187 | pv_time_ops.sched_clock = kvm_clock_read; | 191 | } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) |
188 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 192 | return; |
189 | x86_platform.get_wallclock = kvm_get_wallclock; | 193 | |
190 | x86_platform.set_wallclock = kvm_set_wallclock; | 194 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", |
195 | msr_kvm_system_time, msr_kvm_wall_clock); | ||
196 | |||
197 | if (kvm_register_clock("boot clock")) | ||
198 | return; | ||
199 | pv_time_ops.sched_clock = kvm_clock_read; | ||
200 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | ||
201 | x86_platform.get_wallclock = kvm_get_wallclock; | ||
202 | x86_platform.set_wallclock = kvm_set_wallclock; | ||
191 | #ifdef CONFIG_X86_LOCAL_APIC | 203 | #ifdef CONFIG_X86_LOCAL_APIC |
192 | x86_cpuinit.setup_percpu_clockev = | 204 | x86_cpuinit.setup_percpu_clockev = |
193 | kvm_setup_secondary_clock; | 205 | kvm_setup_secondary_clock; |
194 | #endif | 206 | #endif |
195 | #ifdef CONFIG_SMP | 207 | #ifdef CONFIG_SMP |
196 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 208 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
197 | #endif | 209 | #endif |
198 | machine_ops.shutdown = kvm_shutdown; | 210 | machine_ops.shutdown = kvm_shutdown; |
199 | #ifdef CONFIG_KEXEC | 211 | #ifdef CONFIG_KEXEC |
200 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 212 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
201 | #endif | 213 | #endif |
202 | kvm_get_preset_lpj(); | 214 | kvm_get_preset_lpj(); |
203 | clocksource_register(&kvm_clock); | 215 | clocksource_register(&kvm_clock); |
204 | pv_info.paravirt_enabled = 1; | 216 | pv_info.paravirt_enabled = 1; |
205 | pv_info.name = "KVM"; | 217 | pv_info.name = "KVM"; |
206 | } | 218 | |
219 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
220 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | ||
207 | } | 221 | } |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index cceb5bc3c3c2..fa6551d36c10 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -201,9 +201,9 @@ static int do_microcode_update(const void __user *buf, size_t size) | |||
201 | return error; | 201 | return error; |
202 | } | 202 | } |
203 | 203 | ||
204 | static int microcode_open(struct inode *unused1, struct file *unused2) | 204 | static int microcode_open(struct inode *inode, struct file *file) |
205 | { | 205 | { |
206 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | 206 | return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; |
207 | } | 207 | } |
208 | 208 | ||
209 | static ssize_t microcode_write(struct file *file, const char __user *buf, | 209 | static ssize_t microcode_write(struct file *file, const char __user *buf, |
@@ -260,6 +260,7 @@ static void microcode_dev_exit(void) | |||
260 | } | 260 | } |
261 | 261 | ||
262 | MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | 262 | MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); |
263 | MODULE_ALIAS("devname:cpu/microcode"); | ||
263 | #else | 264 | #else |
264 | #define microcode_dev_init() 0 | 265 | #define microcode_dev_init() 0 |
265 | #define microcode_dev_exit() do { } while (0) | 266 | #define microcode_dev_exit() do { } while (0) |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 85a343e28937..356170262a93 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -343,10 +343,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
343 | int (*get_ucode_data)(void *, const void *, size_t)) | 343 | int (*get_ucode_data)(void *, const void *, size_t)) |
344 | { | 344 | { |
345 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 345 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
346 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | 346 | u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; |
347 | int new_rev = uci->cpu_sig.rev; | 347 | int new_rev = uci->cpu_sig.rev; |
348 | unsigned int leftover = size; | 348 | unsigned int leftover = size; |
349 | enum ucode_state state = UCODE_OK; | 349 | enum ucode_state state = UCODE_OK; |
350 | unsigned int curr_mc_size = 0; | ||
350 | 351 | ||
351 | while (leftover) { | 352 | while (leftover) { |
352 | struct microcode_header_intel mc_header; | 353 | struct microcode_header_intel mc_header; |
@@ -361,9 +362,15 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
361 | break; | 362 | break; |
362 | } | 363 | } |
363 | 364 | ||
364 | mc = vmalloc(mc_size); | 365 | /* For performance reasons, reuse mc area when possible */ |
365 | if (!mc) | 366 | if (!mc || mc_size > curr_mc_size) { |
366 | break; | 367 | if (mc) |
368 | vfree(mc); | ||
369 | mc = vmalloc(mc_size); | ||
370 | if (!mc) | ||
371 | break; | ||
372 | curr_mc_size = mc_size; | ||
373 | } | ||
367 | 374 | ||
368 | if (get_ucode_data(mc, ucode_ptr, mc_size) || | 375 | if (get_ucode_data(mc, ucode_ptr, mc_size) || |
369 | microcode_sanity_check(mc) < 0) { | 376 | microcode_sanity_check(mc) < 0) { |
@@ -376,13 +383,16 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
376 | vfree(new_mc); | 383 | vfree(new_mc); |
377 | new_rev = mc_header.rev; | 384 | new_rev = mc_header.rev; |
378 | new_mc = mc; | 385 | new_mc = mc; |
379 | } else | 386 | mc = NULL; /* trigger new vmalloc */ |
380 | vfree(mc); | 387 | } |
381 | 388 | ||
382 | ucode_ptr += mc_size; | 389 | ucode_ptr += mc_size; |
383 | leftover -= mc_size; | 390 | leftover -= mc_size; |
384 | } | 391 | } |
385 | 392 | ||
393 | if (mc) | ||
394 | vfree(mc); | ||
395 | |||
386 | if (leftover) { | 396 | if (leftover) { |
387 | if (new_mc) | 397 | if (new_mc) |
388 | vfree(new_mc); | 398 | vfree(new_mc); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index e81030f71a8f..5ae5d2426edf 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -115,21 +115,6 @@ static void __init MP_bus_info(struct mpc_bus *m) | |||
115 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | 115 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); |
116 | } | 116 | } |
117 | 117 | ||
118 | static int bad_ioapic(unsigned long address) | ||
119 | { | ||
120 | if (nr_ioapics >= MAX_IO_APICS) { | ||
121 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | ||
122 | "(found %d)\n", MAX_IO_APICS, nr_ioapics); | ||
123 | panic("Recompile kernel with bigger MAX_IO_APICS!\n"); | ||
124 | } | ||
125 | if (!address) { | ||
126 | printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" | ||
127 | " found in table, skipping!\n"); | ||
128 | return 1; | ||
129 | } | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | static void __init MP_ioapic_info(struct mpc_ioapic *m) | 118 | static void __init MP_ioapic_info(struct mpc_ioapic *m) |
134 | { | 119 | { |
135 | if (!(m->flags & MPC_APIC_USABLE)) | 120 | if (!(m->flags & MPC_APIC_USABLE)) |
@@ -138,15 +123,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) | |||
138 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", | 123 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", |
139 | m->apicid, m->apicver, m->apicaddr); | 124 | m->apicid, m->apicver, m->apicaddr); |
140 | 125 | ||
141 | if (bad_ioapic(m->apicaddr)) | 126 | mp_register_ioapic(m->apicid, m->apicaddr, gsi_end + 1); |
142 | return; | ||
143 | |||
144 | mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; | ||
145 | mp_ioapics[nr_ioapics].apicid = m->apicid; | ||
146 | mp_ioapics[nr_ioapics].type = m->type; | ||
147 | mp_ioapics[nr_ioapics].apicver = m->apicver; | ||
148 | mp_ioapics[nr_ioapics].flags = m->flags; | ||
149 | nr_ioapics++; | ||
150 | } | 127 | } |
151 | 128 | ||
152 | static void print_MP_intsrc_info(struct mpc_intsrc *m) | 129 | static void print_MP_intsrc_info(struct mpc_intsrc *m) |
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index 0aad8670858e..e796448f0eb5 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c | |||
@@ -237,4 +237,9 @@ void __init x86_mrst_early_setup(void) | |||
237 | x86_init.pci.fixup_irqs = x86_init_noop; | 237 | x86_init.pci.fixup_irqs = x86_init_noop; |
238 | 238 | ||
239 | legacy_pic = &null_legacy_pic; | 239 | legacy_pic = &null_legacy_pic; |
240 | |||
241 | /* Avoid searching for BIOS MP tables */ | ||
242 | x86_init.mpparse.find_smp_config = x86_init_noop; | ||
243 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | ||
244 | |||
240 | } | 245 | } |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4d4468e9f47c..7bf2dc4c8f70 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -230,7 +230,7 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, | |||
230 | msr_device_destroy(cpu); | 230 | msr_device_destroy(cpu); |
231 | break; | 231 | break; |
232 | } | 232 | } |
233 | return err ? NOTIFY_BAD : NOTIFY_OK; | 233 | return notifier_from_errno(err); |
234 | } | 234 | } |
235 | 235 | ||
236 | static struct notifier_block __refdata msr_class_cpu_notifier = { | 236 | static struct notifier_block __refdata msr_class_cpu_notifier = { |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 7d2829dde20e..a5bc528d4328 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -31,8 +31,6 @@ static struct dma_map_ops swiotlb_dma_ops = { | |||
31 | .free_coherent = swiotlb_free_coherent, | 31 | .free_coherent = swiotlb_free_coherent, |
32 | .sync_single_for_cpu = swiotlb_sync_single_for_cpu, | 32 | .sync_single_for_cpu = swiotlb_sync_single_for_cpu, |
33 | .sync_single_for_device = swiotlb_sync_single_for_device, | 33 | .sync_single_for_device = swiotlb_sync_single_for_device, |
34 | .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, | ||
35 | .sync_single_range_for_device = swiotlb_sync_single_range_for_device, | ||
36 | .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, | 34 | .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, |
37 | .sync_sg_for_device = swiotlb_sync_sg_for_device, | 35 | .sync_sg_for_device = swiotlb_sync_sg_for_device, |
38 | .map_sg = swiotlb_map_sg_attrs, | 36 | .map_sg = swiotlb_map_sg_attrs, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 28ad9f4d8b94..e7e35219b32f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
22 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
23 | #include <asm/ds.h> | ||
24 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
25 | 24 | ||
26 | unsigned long idle_halt; | 25 | unsigned long idle_halt; |
@@ -32,26 +31,22 @@ struct kmem_cache *task_xstate_cachep; | |||
32 | 31 | ||
33 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 32 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
34 | { | 33 | { |
34 | int ret; | ||
35 | |||
35 | *dst = *src; | 36 | *dst = *src; |
36 | if (src->thread.xstate) { | 37 | if (fpu_allocated(&src->thread.fpu)) { |
37 | dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | 38 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); |
38 | GFP_KERNEL); | 39 | ret = fpu_alloc(&dst->thread.fpu); |
39 | if (!dst->thread.xstate) | 40 | if (ret) |
40 | return -ENOMEM; | 41 | return ret; |
41 | WARN_ON((unsigned long)dst->thread.xstate & 15); | 42 | fpu_copy(&dst->thread.fpu, &src->thread.fpu); |
42 | memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); | ||
43 | } | 43 | } |
44 | return 0; | 44 | return 0; |
45 | } | 45 | } |
46 | 46 | ||
47 | void free_thread_xstate(struct task_struct *tsk) | 47 | void free_thread_xstate(struct task_struct *tsk) |
48 | { | 48 | { |
49 | if (tsk->thread.xstate) { | 49 | fpu_free(&tsk->thread.fpu); |
50 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); | ||
51 | tsk->thread.xstate = NULL; | ||
52 | } | ||
53 | |||
54 | WARN(tsk->thread.ds_ctx, "leaking DS context\n"); | ||
55 | } | 50 | } |
56 | 51 | ||
57 | void free_thread_info(struct thread_info *ti) | 52 | void free_thread_info(struct thread_info *ti) |
@@ -198,11 +193,16 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
198 | prev = &prev_p->thread; | 193 | prev = &prev_p->thread; |
199 | next = &next_p->thread; | 194 | next = &next_p->thread; |
200 | 195 | ||
201 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || | 196 | if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ |
202 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | 197 | test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { |
203 | ds_switch_to(prev_p, next_p); | 198 | unsigned long debugctl = get_debugctlmsr(); |
204 | else if (next->debugctlmsr != prev->debugctlmsr) | 199 | |
205 | update_debugctlmsr(next->debugctlmsr); | 200 | debugctl &= ~DEBUGCTLMSR_BTF; |
201 | if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) | ||
202 | debugctl |= DEBUGCTLMSR_BTF; | ||
203 | |||
204 | update_debugctlmsr(debugctl); | ||
205 | } | ||
206 | 206 | ||
207 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 207 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
208 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 208 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
@@ -546,11 +546,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
546 | * check OSVW bit for CPUs that are not affected | 546 | * check OSVW bit for CPUs that are not affected |
547 | * by erratum #400 | 547 | * by erratum #400 |
548 | */ | 548 | */ |
549 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); | 549 | if (cpu_has(c, X86_FEATURE_OSVW)) { |
550 | if (val >= 2) { | 550 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); |
551 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); | 551 | if (val >= 2) { |
552 | if (!(val & BIT(1))) | 552 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); |
553 | goto no_c1e_idle; | 553 | if (!(val & BIT(1))) |
554 | goto no_c1e_idle; | ||
555 | } | ||
554 | } | 556 | } |
555 | return 1; | 557 | return 1; |
556 | } | 558 | } |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f6c62667e30c..8d128783af47 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include <asm/cpu.h> | 55 | #include <asm/cpu.h> |
56 | #include <asm/idle.h> | 56 | #include <asm/idle.h> |
57 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
58 | #include <asm/ds.h> | ||
59 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
60 | 59 | ||
61 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
@@ -238,13 +237,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
238 | kfree(p->thread.io_bitmap_ptr); | 237 | kfree(p->thread.io_bitmap_ptr); |
239 | p->thread.io_bitmap_max = 0; | 238 | p->thread.io_bitmap_max = 0; |
240 | } | 239 | } |
241 | |||
242 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); | ||
243 | p->thread.ds_ctx = NULL; | ||
244 | |||
245 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
246 | p->thread.debugctlmsr = 0; | ||
247 | |||
248 | return err; | 240 | return err; |
249 | } | 241 | } |
250 | 242 | ||
@@ -317,7 +309,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
317 | 309 | ||
318 | /* we're going to use this soon, after a few expensive things */ | 310 | /* we're going to use this soon, after a few expensive things */ |
319 | if (preload_fpu) | 311 | if (preload_fpu) |
320 | prefetch(next->xstate); | 312 | prefetch(next->fpu.state); |
321 | 313 | ||
322 | /* | 314 | /* |
323 | * Reload esp0. | 315 | * Reload esp0. |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 17cb3295cbf7..3c2422a99f1f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -49,7 +49,6 @@ | |||
49 | #include <asm/ia32.h> | 49 | #include <asm/ia32.h> |
50 | #include <asm/idle.h> | 50 | #include <asm/idle.h> |
51 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
52 | #include <asm/ds.h> | ||
53 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
54 | 53 | ||
55 | asmlinkage extern void ret_from_fork(void); | 54 | asmlinkage extern void ret_from_fork(void); |
@@ -313,13 +312,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
313 | if (err) | 312 | if (err) |
314 | goto out; | 313 | goto out; |
315 | } | 314 | } |
316 | |||
317 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); | ||
318 | p->thread.ds_ctx = NULL; | ||
319 | |||
320 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
321 | p->thread.debugctlmsr = 0; | ||
322 | |||
323 | err = 0; | 315 | err = 0; |
324 | out: | 316 | out: |
325 | if (err && p->thread.io_bitmap_ptr) { | 317 | if (err && p->thread.io_bitmap_ptr) { |
@@ -396,7 +388,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
396 | 388 | ||
397 | /* we're going to use this soon, after a few expensive things */ | 389 | /* we're going to use this soon, after a few expensive things */ |
398 | if (preload_fpu) | 390 | if (preload_fpu) |
399 | prefetch(next->xstate); | 391 | prefetch(next->fpu.state); |
400 | 392 | ||
401 | /* | 393 | /* |
402 | * Reload esp0, LDT and the page table pointer: | 394 | * Reload esp0, LDT and the page table pointer: |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2e9b55027b7e..70c4872cd8aa 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -2,9 +2,6 @@ | |||
2 | /* | 2 | /* |
3 | * Pentium III FXSR, SSE support | 3 | * Pentium III FXSR, SSE support |
4 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 4 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
5 | * | ||
6 | * BTS tracing | ||
7 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
8 | */ | 5 | */ |
9 | 6 | ||
10 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
@@ -22,7 +19,6 @@ | |||
22 | #include <linux/audit.h> | 19 | #include <linux/audit.h> |
23 | #include <linux/seccomp.h> | 20 | #include <linux/seccomp.h> |
24 | #include <linux/signal.h> | 21 | #include <linux/signal.h> |
25 | #include <linux/workqueue.h> | ||
26 | #include <linux/perf_event.h> | 22 | #include <linux/perf_event.h> |
27 | #include <linux/hw_breakpoint.h> | 23 | #include <linux/hw_breakpoint.h> |
28 | 24 | ||
@@ -36,7 +32,6 @@ | |||
36 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
37 | #include <asm/prctl.h> | 33 | #include <asm/prctl.h> |
38 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
39 | #include <asm/ds.h> | ||
40 | #include <asm/hw_breakpoint.h> | 35 | #include <asm/hw_breakpoint.h> |
41 | 36 | ||
42 | #include "tls.h" | 37 | #include "tls.h" |
@@ -693,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | |||
693 | struct perf_event_attr attr; | 688 | struct perf_event_attr attr; |
694 | 689 | ||
695 | if (!t->ptrace_bps[nr]) { | 690 | if (!t->ptrace_bps[nr]) { |
696 | hw_breakpoint_init(&attr); | 691 | ptrace_breakpoint_init(&attr); |
697 | /* | 692 | /* |
698 | * Put stub len and type to register (reserve) an inactive but | 693 | * Put stub len and type to register (reserve) an inactive but |
699 | * correct bp | 694 | * correct bp |
@@ -789,342 +784,6 @@ static int ioperm_get(struct task_struct *target, | |||
789 | 0, IO_BITMAP_BYTES); | 784 | 0, IO_BITMAP_BYTES); |
790 | } | 785 | } |
791 | 786 | ||
792 | #ifdef CONFIG_X86_PTRACE_BTS | ||
793 | /* | ||
794 | * A branch trace store context. | ||
795 | * | ||
796 | * Contexts may only be installed by ptrace_bts_config() and only for | ||
797 | * ptraced tasks. | ||
798 | * | ||
799 | * Contexts are destroyed when the tracee is detached from the tracer. | ||
800 | * The actual destruction work requires interrupts enabled, so the | ||
801 | * work is deferred and will be scheduled during __ptrace_unlink(). | ||
802 | * | ||
803 | * Contexts hold an additional task_struct reference on the traced | ||
804 | * task, as well as a reference on the tracer's mm. | ||
805 | * | ||
806 | * Ptrace already holds a task_struct for the duration of ptrace operations, | ||
807 | * but since destruction is deferred, it may be executed after both | ||
808 | * tracer and tracee exited. | ||
809 | */ | ||
810 | struct bts_context { | ||
811 | /* The branch trace handle. */ | ||
812 | struct bts_tracer *tracer; | ||
813 | |||
814 | /* The buffer used to store the branch trace and its size. */ | ||
815 | void *buffer; | ||
816 | unsigned int size; | ||
817 | |||
818 | /* The mm that paid for the above buffer. */ | ||
819 | struct mm_struct *mm; | ||
820 | |||
821 | /* The task this context belongs to. */ | ||
822 | struct task_struct *task; | ||
823 | |||
824 | /* The signal to send on a bts buffer overflow. */ | ||
825 | unsigned int bts_ovfl_signal; | ||
826 | |||
827 | /* The work struct to destroy a context. */ | ||
828 | struct work_struct work; | ||
829 | }; | ||
830 | |||
831 | static int alloc_bts_buffer(struct bts_context *context, unsigned int size) | ||
832 | { | ||
833 | void *buffer = NULL; | ||
834 | int err = -ENOMEM; | ||
835 | |||
836 | err = account_locked_memory(current->mm, current->signal->rlim, size); | ||
837 | if (err < 0) | ||
838 | return err; | ||
839 | |||
840 | buffer = kzalloc(size, GFP_KERNEL); | ||
841 | if (!buffer) | ||
842 | goto out_refund; | ||
843 | |||
844 | context->buffer = buffer; | ||
845 | context->size = size; | ||
846 | context->mm = get_task_mm(current); | ||
847 | |||
848 | return 0; | ||
849 | |||
850 | out_refund: | ||
851 | refund_locked_memory(current->mm, size); | ||
852 | return err; | ||
853 | } | ||
854 | |||
855 | static inline void free_bts_buffer(struct bts_context *context) | ||
856 | { | ||
857 | if (!context->buffer) | ||
858 | return; | ||
859 | |||
860 | kfree(context->buffer); | ||
861 | context->buffer = NULL; | ||
862 | |||
863 | refund_locked_memory(context->mm, context->size); | ||
864 | context->size = 0; | ||
865 | |||
866 | mmput(context->mm); | ||
867 | context->mm = NULL; | ||
868 | } | ||
869 | |||
870 | static void free_bts_context_work(struct work_struct *w) | ||
871 | { | ||
872 | struct bts_context *context; | ||
873 | |||
874 | context = container_of(w, struct bts_context, work); | ||
875 | |||
876 | ds_release_bts(context->tracer); | ||
877 | put_task_struct(context->task); | ||
878 | free_bts_buffer(context); | ||
879 | kfree(context); | ||
880 | } | ||
881 | |||
882 | static inline void free_bts_context(struct bts_context *context) | ||
883 | { | ||
884 | INIT_WORK(&context->work, free_bts_context_work); | ||
885 | schedule_work(&context->work); | ||
886 | } | ||
887 | |||
888 | static inline struct bts_context *alloc_bts_context(struct task_struct *task) | ||
889 | { | ||
890 | struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
891 | if (context) { | ||
892 | context->task = task; | ||
893 | task->bts = context; | ||
894 | |||
895 | get_task_struct(task); | ||
896 | } | ||
897 | |||
898 | return context; | ||
899 | } | ||
900 | |||
901 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | ||
902 | struct bts_struct __user *out) | ||
903 | { | ||
904 | struct bts_context *context; | ||
905 | const struct bts_trace *trace; | ||
906 | struct bts_struct bts; | ||
907 | const unsigned char *at; | ||
908 | int error; | ||
909 | |||
910 | context = child->bts; | ||
911 | if (!context) | ||
912 | return -ESRCH; | ||
913 | |||
914 | trace = ds_read_bts(context->tracer); | ||
915 | if (!trace) | ||
916 | return -ESRCH; | ||
917 | |||
918 | at = trace->ds.top - ((index + 1) * trace->ds.size); | ||
919 | if ((void *)at < trace->ds.begin) | ||
920 | at += (trace->ds.n * trace->ds.size); | ||
921 | |||
922 | if (!trace->read) | ||
923 | return -EOPNOTSUPP; | ||
924 | |||
925 | error = trace->read(context->tracer, at, &bts); | ||
926 | if (error < 0) | ||
927 | return error; | ||
928 | |||
929 | if (copy_to_user(out, &bts, sizeof(bts))) | ||
930 | return -EFAULT; | ||
931 | |||
932 | return sizeof(bts); | ||
933 | } | ||
934 | |||
935 | static int ptrace_bts_drain(struct task_struct *child, | ||
936 | long size, | ||
937 | struct bts_struct __user *out) | ||
938 | { | ||
939 | struct bts_context *context; | ||
940 | const struct bts_trace *trace; | ||
941 | const unsigned char *at; | ||
942 | int error, drained = 0; | ||
943 | |||
944 | context = child->bts; | ||
945 | if (!context) | ||
946 | return -ESRCH; | ||
947 | |||
948 | trace = ds_read_bts(context->tracer); | ||
949 | if (!trace) | ||
950 | return -ESRCH; | ||
951 | |||
952 | if (!trace->read) | ||
953 | return -EOPNOTSUPP; | ||
954 | |||
955 | if (size < (trace->ds.top - trace->ds.begin)) | ||
956 | return -EIO; | ||
957 | |||
958 | for (at = trace->ds.begin; (void *)at < trace->ds.top; | ||
959 | out++, drained++, at += trace->ds.size) { | ||
960 | struct bts_struct bts; | ||
961 | |||
962 | error = trace->read(context->tracer, at, &bts); | ||
963 | if (error < 0) | ||
964 | return error; | ||
965 | |||
966 | if (copy_to_user(out, &bts, sizeof(bts))) | ||
967 | return -EFAULT; | ||
968 | } | ||
969 | |||
970 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | ||
971 | |||
972 | error = ds_reset_bts(context->tracer); | ||
973 | if (error < 0) | ||
974 | return error; | ||
975 | |||
976 | return drained; | ||
977 | } | ||
978 | |||
979 | static int ptrace_bts_config(struct task_struct *child, | ||
980 | long cfg_size, | ||
981 | const struct ptrace_bts_config __user *ucfg) | ||
982 | { | ||
983 | struct bts_context *context; | ||
984 | struct ptrace_bts_config cfg; | ||
985 | unsigned int flags = 0; | ||
986 | |||
987 | if (cfg_size < sizeof(cfg)) | ||
988 | return -EIO; | ||
989 | |||
990 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | ||
991 | return -EFAULT; | ||
992 | |||
993 | context = child->bts; | ||
994 | if (!context) | ||
995 | context = alloc_bts_context(child); | ||
996 | if (!context) | ||
997 | return -ENOMEM; | ||
998 | |||
999 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | ||
1000 | if (!cfg.signal) | ||
1001 | return -EINVAL; | ||
1002 | |||
1003 | return -EOPNOTSUPP; | ||
1004 | context->bts_ovfl_signal = cfg.signal; | ||
1005 | } | ||
1006 | |||
1007 | ds_release_bts(context->tracer); | ||
1008 | context->tracer = NULL; | ||
1009 | |||
1010 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { | ||
1011 | int err; | ||
1012 | |||
1013 | free_bts_buffer(context); | ||
1014 | if (!cfg.size) | ||
1015 | return 0; | ||
1016 | |||
1017 | err = alloc_bts_buffer(context, cfg.size); | ||
1018 | if (err < 0) | ||
1019 | return err; | ||
1020 | } | ||
1021 | |||
1022 | if (cfg.flags & PTRACE_BTS_O_TRACE) | ||
1023 | flags |= BTS_USER; | ||
1024 | |||
1025 | if (cfg.flags & PTRACE_BTS_O_SCHED) | ||
1026 | flags |= BTS_TIMESTAMPS; | ||
1027 | |||
1028 | context->tracer = | ||
1029 | ds_request_bts_task(child, context->buffer, context->size, | ||
1030 | NULL, (size_t)-1, flags); | ||
1031 | if (unlikely(IS_ERR(context->tracer))) { | ||
1032 | int error = PTR_ERR(context->tracer); | ||
1033 | |||
1034 | free_bts_buffer(context); | ||
1035 | context->tracer = NULL; | ||
1036 | return error; | ||
1037 | } | ||
1038 | |||
1039 | return sizeof(cfg); | ||
1040 | } | ||
1041 | |||
1042 | static int ptrace_bts_status(struct task_struct *child, | ||
1043 | long cfg_size, | ||
1044 | struct ptrace_bts_config __user *ucfg) | ||
1045 | { | ||
1046 | struct bts_context *context; | ||
1047 | const struct bts_trace *trace; | ||
1048 | struct ptrace_bts_config cfg; | ||
1049 | |||
1050 | context = child->bts; | ||
1051 | if (!context) | ||
1052 | return -ESRCH; | ||
1053 | |||
1054 | if (cfg_size < sizeof(cfg)) | ||
1055 | return -EIO; | ||
1056 | |||
1057 | trace = ds_read_bts(context->tracer); | ||
1058 | if (!trace) | ||
1059 | return -ESRCH; | ||
1060 | |||
1061 | memset(&cfg, 0, sizeof(cfg)); | ||
1062 | cfg.size = trace->ds.end - trace->ds.begin; | ||
1063 | cfg.signal = context->bts_ovfl_signal; | ||
1064 | cfg.bts_size = sizeof(struct bts_struct); | ||
1065 | |||
1066 | if (cfg.signal) | ||
1067 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | ||
1068 | |||
1069 | if (trace->ds.flags & BTS_USER) | ||
1070 | cfg.flags |= PTRACE_BTS_O_TRACE; | ||
1071 | |||
1072 | if (trace->ds.flags & BTS_TIMESTAMPS) | ||
1073 | cfg.flags |= PTRACE_BTS_O_SCHED; | ||
1074 | |||
1075 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | ||
1076 | return -EFAULT; | ||
1077 | |||
1078 | return sizeof(cfg); | ||
1079 | } | ||
1080 | |||
1081 | static int ptrace_bts_clear(struct task_struct *child) | ||
1082 | { | ||
1083 | struct bts_context *context; | ||
1084 | const struct bts_trace *trace; | ||
1085 | |||
1086 | context = child->bts; | ||
1087 | if (!context) | ||
1088 | return -ESRCH; | ||
1089 | |||
1090 | trace = ds_read_bts(context->tracer); | ||
1091 | if (!trace) | ||
1092 | return -ESRCH; | ||
1093 | |||
1094 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | ||
1095 | |||
1096 | return ds_reset_bts(context->tracer); | ||
1097 | } | ||
1098 | |||
1099 | static int ptrace_bts_size(struct task_struct *child) | ||
1100 | { | ||
1101 | struct bts_context *context; | ||
1102 | const struct bts_trace *trace; | ||
1103 | |||
1104 | context = child->bts; | ||
1105 | if (!context) | ||
1106 | return -ESRCH; | ||
1107 | |||
1108 | trace = ds_read_bts(context->tracer); | ||
1109 | if (!trace) | ||
1110 | return -ESRCH; | ||
1111 | |||
1112 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; | ||
1113 | } | ||
1114 | |||
1115 | /* | ||
1116 | * Called from __ptrace_unlink() after the child has been moved back | ||
1117 | * to its original parent. | ||
1118 | */ | ||
1119 | void ptrace_bts_untrace(struct task_struct *child) | ||
1120 | { | ||
1121 | if (unlikely(child->bts)) { | ||
1122 | free_bts_context(child->bts); | ||
1123 | child->bts = NULL; | ||
1124 | } | ||
1125 | } | ||
1126 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1127 | |||
1128 | /* | 787 | /* |
1129 | * Called by kernel/ptrace.c when detaching.. | 788 | * Called by kernel/ptrace.c when detaching.. |
1130 | * | 789 | * |
@@ -1252,39 +911,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
1252 | break; | 911 | break; |
1253 | #endif | 912 | #endif |
1254 | 913 | ||
1255 | /* | ||
1256 | * These bits need more cooking - not enabled yet: | ||
1257 | */ | ||
1258 | #ifdef CONFIG_X86_PTRACE_BTS | ||
1259 | case PTRACE_BTS_CONFIG: | ||
1260 | ret = ptrace_bts_config | ||
1261 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
1262 | break; | ||
1263 | |||
1264 | case PTRACE_BTS_STATUS: | ||
1265 | ret = ptrace_bts_status | ||
1266 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
1267 | break; | ||
1268 | |||
1269 | case PTRACE_BTS_SIZE: | ||
1270 | ret = ptrace_bts_size(child); | ||
1271 | break; | ||
1272 | |||
1273 | case PTRACE_BTS_GET: | ||
1274 | ret = ptrace_bts_read_record | ||
1275 | (child, data, (struct bts_struct __user *) addr); | ||
1276 | break; | ||
1277 | |||
1278 | case PTRACE_BTS_CLEAR: | ||
1279 | ret = ptrace_bts_clear(child); | ||
1280 | break; | ||
1281 | |||
1282 | case PTRACE_BTS_DRAIN: | ||
1283 | ret = ptrace_bts_drain | ||
1284 | (child, data, (struct bts_struct __user *) addr); | ||
1285 | break; | ||
1286 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1287 | |||
1288 | default: | 914 | default: |
1289 | ret = ptrace_request(child, request, addr, data); | 915 | ret = ptrace_request(child, request, addr, data); |
1290 | break; | 916 | break; |
@@ -1544,14 +1170,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1544 | 1170 | ||
1545 | case PTRACE_GET_THREAD_AREA: | 1171 | case PTRACE_GET_THREAD_AREA: |
1546 | case PTRACE_SET_THREAD_AREA: | 1172 | case PTRACE_SET_THREAD_AREA: |
1547 | #ifdef CONFIG_X86_PTRACE_BTS | ||
1548 | case PTRACE_BTS_CONFIG: | ||
1549 | case PTRACE_BTS_STATUS: | ||
1550 | case PTRACE_BTS_SIZE: | ||
1551 | case PTRACE_BTS_GET: | ||
1552 | case PTRACE_BTS_CLEAR: | ||
1553 | case PTRACE_BTS_DRAIN: | ||
1554 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1555 | return arch_ptrace(child, request, addr, data); | 1173 | return arch_ptrace(child, request, addr, data); |
1556 | 1174 | ||
1557 | default: | 1175 | default: |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 03801f2f761f..239427ca02af 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -31,8 +31,16 @@ struct pvclock_shadow_time { | |||
31 | u32 tsc_to_nsec_mul; | 31 | u32 tsc_to_nsec_mul; |
32 | int tsc_shift; | 32 | int tsc_shift; |
33 | u32 version; | 33 | u32 version; |
34 | u8 flags; | ||
34 | }; | 35 | }; |
35 | 36 | ||
37 | static u8 valid_flags __read_mostly = 0; | ||
38 | |||
39 | void pvclock_set_flags(u8 flags) | ||
40 | { | ||
41 | valid_flags = flags; | ||
42 | } | ||
43 | |||
36 | /* | 44 | /* |
37 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | 45 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, |
38 | * yielding a 64-bit result. | 46 | * yielding a 64-bit result. |
@@ -91,6 +99,7 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | |||
91 | dst->system_timestamp = src->system_time; | 99 | dst->system_timestamp = src->system_time; |
92 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | 100 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; |
93 | dst->tsc_shift = src->tsc_shift; | 101 | dst->tsc_shift = src->tsc_shift; |
102 | dst->flags = src->flags; | ||
94 | rmb(); /* test version after fetching data */ | 103 | rmb(); /* test version after fetching data */ |
95 | } while ((src->version & 1) || (dst->version != src->version)); | 104 | } while ((src->version & 1) || (dst->version != src->version)); |
96 | 105 | ||
@@ -109,11 +118,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | |||
109 | return pv_tsc_khz; | 118 | return pv_tsc_khz; |
110 | } | 119 | } |
111 | 120 | ||
121 | static atomic64_t last_value = ATOMIC64_INIT(0); | ||
122 | |||
112 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 123 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
113 | { | 124 | { |
114 | struct pvclock_shadow_time shadow; | 125 | struct pvclock_shadow_time shadow; |
115 | unsigned version; | 126 | unsigned version; |
116 | cycle_t ret, offset; | 127 | cycle_t ret, offset; |
128 | u64 last; | ||
117 | 129 | ||
118 | do { | 130 | do { |
119 | version = pvclock_get_time_values(&shadow, src); | 131 | version = pvclock_get_time_values(&shadow, src); |
@@ -123,6 +135,31 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | |||
123 | barrier(); | 135 | barrier(); |
124 | } while (version != src->version); | 136 | } while (version != src->version); |
125 | 137 | ||
138 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && | ||
139 | (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) | ||
140 | return ret; | ||
141 | |||
142 | /* | ||
143 | * Assumption here is that last_value, a global accumulator, always goes | ||
144 | * forward. If we are less than that, we should not be much smaller. | ||
145 | * We assume there is an error marging we're inside, and then the correction | ||
146 | * does not sacrifice accuracy. | ||
147 | * | ||
148 | * For reads: global may have changed between test and return, | ||
149 | * but this means someone else updated poked the clock at a later time. | ||
150 | * We just need to make sure we are not seeing a backwards event. | ||
151 | * | ||
152 | * For updates: last_value = ret is not enough, since two vcpus could be | ||
153 | * updating at the same time, and one of them could be slightly behind, | ||
154 | * making the assumption that last_value always go forward fail to hold. | ||
155 | */ | ||
156 | last = atomic64_read(&last_value); | ||
157 | do { | ||
158 | if (ret < last) | ||
159 | return last; | ||
160 | last = atomic64_cmpxchg(&last_value, last, ret); | ||
161 | } while (unlikely(last != ret)); | ||
162 | |||
126 | return ret; | 163 | return ret; |
127 | } | 164 | } |
128 | 165 | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 12e9feaa2f7a..e72d3fc6547d 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -495,10 +495,18 @@ void force_hpet_resume(void) | |||
495 | /* | 495 | /* |
496 | * HPET MSI on some boards (ATI SB700/SB800) has side effect on | 496 | * HPET MSI on some boards (ATI SB700/SB800) has side effect on |
497 | * floppy DMA. Disable HPET MSI on such platforms. | 497 | * floppy DMA. Disable HPET MSI on such platforms. |
498 | * See erratum #27 (Misinterpreted MSI Requests May Result in | ||
499 | * Corrupted LPC DMA Data) in AMD Publication #46837, | ||
500 | * "SB700 Family Product Errata", Rev. 1.0, March 2010. | ||
501 | * | ||
502 | * Also force the read back of the CMP register in hpet_next_event() | ||
503 | * to work around the problem that the CMP register write seems to be | ||
504 | * delayed. See hpet_next_event() for details. | ||
498 | */ | 505 | */ |
499 | static void force_disable_hpet_msi(struct pci_dev *unused) | 506 | static void force_disable_hpet_msi(struct pci_dev *unused) |
500 | { | 507 | { |
501 | hpet_msi_disable = 1; | 508 | hpet_msi_disable = 1; |
509 | hpet_readback_cmp = 1; | ||
502 | } | 510 | } |
503 | 511 | ||
504 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | 512 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c4851eff57b3..b4ae4acbd031 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -676,6 +676,17 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
676 | DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), | 676 | DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), |
677 | }, | 677 | }, |
678 | }, | 678 | }, |
679 | /* | ||
680 | * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so | ||
681 | * match on the product name. | ||
682 | */ | ||
683 | { | ||
684 | .callback = dmi_low_memory_corruption, | ||
685 | .ident = "Phoenix BIOS", | ||
686 | .matches = { | ||
687 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), | ||
688 | }, | ||
689 | }, | ||
679 | #endif | 690 | #endif |
680 | {} | 691 | {} |
681 | }; | 692 | }; |
@@ -725,6 +736,7 @@ void __init setup_arch(char **cmdline_p) | |||
725 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | 736 | /* VMI may relocate the fixmap; do this before touching ioremap area */ |
726 | vmi_init(); | 737 | vmi_init(); |
727 | 738 | ||
739 | early_trap_init(); | ||
728 | early_cpu_init(); | 740 | early_cpu_init(); |
729 | early_ioremap_init(); | 741 | early_ioremap_init(); |
730 | 742 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef6370b00e70..a867940a6dfc 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -265,10 +265,10 @@ void __init setup_per_cpu_areas(void) | |||
265 | 265 | ||
266 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) | 266 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) |
267 | /* | 267 | /* |
268 | * make sure boot cpu node_number is right, when boot cpu is on the | 268 | * make sure boot cpu numa_node is right, when boot cpu is on the |
269 | * node that doesn't have mem installed | 269 | * node that doesn't have mem installed |
270 | */ | 270 | */ |
271 | per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id); | 271 | set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id)); |
272 | #endif | 272 | #endif |
273 | 273 | ||
274 | /* Setup node to cpumask map */ | 274 | /* Setup node to cpumask map */ |
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c index 34e099382651..7ded57896c0a 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/kernel/sfi.c | |||
@@ -81,7 +81,6 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table) | |||
81 | #endif /* CONFIG_X86_LOCAL_APIC */ | 81 | #endif /* CONFIG_X86_LOCAL_APIC */ |
82 | 82 | ||
83 | #ifdef CONFIG_X86_IO_APIC | 83 | #ifdef CONFIG_X86_IO_APIC |
84 | static u32 gsi_base; | ||
85 | 84 | ||
86 | static int __init sfi_parse_ioapic(struct sfi_table_header *table) | 85 | static int __init sfi_parse_ioapic(struct sfi_table_header *table) |
87 | { | 86 | { |
@@ -94,8 +93,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) | |||
94 | pentry = (struct sfi_apic_table_entry *)sb->pentry; | 93 | pentry = (struct sfi_apic_table_entry *)sb->pentry; |
95 | 94 | ||
96 | for (i = 0; i < num; i++) { | 95 | for (i = 0; i < num; i++) { |
97 | mp_register_ioapic(i, pentry->phys_addr, gsi_base); | 96 | mp_register_ioapic(i, pentry->phys_addr, gsi_end + 1); |
98 | gsi_base += io_apic_get_redir_entries(i); | ||
99 | pentry++; | 97 | pentry++; |
100 | } | 98 | } |
101 | 99 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 763d815e27a0..37462f1ddba5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1215,9 +1215,17 @@ __init void prefill_possible_map(void) | |||
1215 | if (!num_processors) | 1215 | if (!num_processors) |
1216 | num_processors = 1; | 1216 | num_processors = 1; |
1217 | 1217 | ||
1218 | if (setup_possible_cpus == -1) | 1218 | i = setup_max_cpus ?: 1; |
1219 | possible = num_processors + disabled_cpus; | 1219 | if (setup_possible_cpus == -1) { |
1220 | else | 1220 | possible = num_processors; |
1221 | #ifdef CONFIG_HOTPLUG_CPU | ||
1222 | if (setup_max_cpus) | ||
1223 | possible += disabled_cpus; | ||
1224 | #else | ||
1225 | if (possible > i) | ||
1226 | possible = i; | ||
1227 | #endif | ||
1228 | } else | ||
1221 | possible = setup_possible_cpus; | 1229 | possible = setup_possible_cpus; |
1222 | 1230 | ||
1223 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); | 1231 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); |
@@ -1230,11 +1238,23 @@ __init void prefill_possible_map(void) | |||
1230 | possible = nr_cpu_ids; | 1238 | possible = nr_cpu_ids; |
1231 | } | 1239 | } |
1232 | 1240 | ||
1241 | #ifdef CONFIG_HOTPLUG_CPU | ||
1242 | if (!setup_max_cpus) | ||
1243 | #endif | ||
1244 | if (possible > i) { | ||
1245 | printk(KERN_WARNING | ||
1246 | "%d Processors exceeds max_cpus limit of %u\n", | ||
1247 | possible, setup_max_cpus); | ||
1248 | possible = i; | ||
1249 | } | ||
1250 | |||
1233 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1251 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
1234 | possible, max_t(int, possible - num_processors, 0)); | 1252 | possible, max_t(int, possible - num_processors, 0)); |
1235 | 1253 | ||
1236 | for (i = 0; i < possible; i++) | 1254 | for (i = 0; i < possible; i++) |
1237 | set_cpu_possible(i, true); | 1255 | set_cpu_possible(i, true); |
1256 | for (; i < NR_CPUS; i++) | ||
1257 | set_cpu_possible(i, false); | ||
1238 | 1258 | ||
1239 | nr_cpu_ids = possible; | 1259 | nr_cpu_ids = possible; |
1240 | } | 1260 | } |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 3149032ff107..58de45ee08b6 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -158,22 +158,6 @@ static int enable_single_step(struct task_struct *child) | |||
158 | } | 158 | } |
159 | 159 | ||
160 | /* | 160 | /* |
161 | * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running. | ||
162 | */ | ||
163 | static void write_debugctlmsr(struct task_struct *child, unsigned long val) | ||
164 | { | ||
165 | if (child->thread.debugctlmsr == val) | ||
166 | return; | ||
167 | |||
168 | child->thread.debugctlmsr = val; | ||
169 | |||
170 | if (child != current) | ||
171 | return; | ||
172 | |||
173 | update_debugctlmsr(val); | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * Enable single or block step. | 161 | * Enable single or block step. |
178 | */ | 162 | */ |
179 | static void enable_step(struct task_struct *child, bool block) | 163 | static void enable_step(struct task_struct *child, bool block) |
@@ -186,15 +170,17 @@ static void enable_step(struct task_struct *child, bool block) | |||
186 | * that uses user-mode single stepping itself. | 170 | * that uses user-mode single stepping itself. |
187 | */ | 171 | */ |
188 | if (enable_single_step(child) && block) { | 172 | if (enable_single_step(child) && block) { |
189 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 173 | unsigned long debugctl = get_debugctlmsr(); |
190 | write_debugctlmsr(child, | 174 | |
191 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); | 175 | debugctl |= DEBUGCTLMSR_BTF; |
192 | } else { | 176 | update_debugctlmsr(debugctl); |
193 | write_debugctlmsr(child, | 177 | set_tsk_thread_flag(child, TIF_BLOCKSTEP); |
194 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); | 178 | } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { |
195 | 179 | unsigned long debugctl = get_debugctlmsr(); | |
196 | if (!child->thread.debugctlmsr) | 180 | |
197 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 181 | debugctl &= ~DEBUGCTLMSR_BTF; |
182 | update_debugctlmsr(debugctl); | ||
183 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
198 | } | 184 | } |
199 | } | 185 | } |
200 | 186 | ||
@@ -213,11 +199,13 @@ void user_disable_single_step(struct task_struct *child) | |||
213 | /* | 199 | /* |
214 | * Make sure block stepping (BTF) is disabled. | 200 | * Make sure block stepping (BTF) is disabled. |
215 | */ | 201 | */ |
216 | write_debugctlmsr(child, | 202 | if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { |
217 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); | 203 | unsigned long debugctl = get_debugctlmsr(); |
218 | 204 | ||
219 | if (!child->thread.debugctlmsr) | 205 | debugctl &= ~DEBUGCTLMSR_BTF; |
220 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 206 | update_debugctlmsr(debugctl); |
207 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
208 | } | ||
221 | 209 | ||
222 | /* Always clear TIF_SINGLESTEP... */ | 210 | /* Always clear TIF_SINGLESTEP... */ |
223 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | 211 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 86c9f91b48ae..c2f1b26141e2 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -46,6 +46,7 @@ | |||
46 | 46 | ||
47 | /* Global pointer to shared data; NULL means no measured launch. */ | 47 | /* Global pointer to shared data; NULL means no measured launch. */ |
48 | struct tboot *tboot __read_mostly; | 48 | struct tboot *tboot __read_mostly; |
49 | EXPORT_SYMBOL(tboot); | ||
49 | 50 | ||
50 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ | 51 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ |
51 | #define AP_WAIT_TIMEOUT 1 | 52 | #define AP_WAIT_TIMEOUT 1 |
@@ -175,6 +176,9 @@ static void add_mac_region(phys_addr_t start, unsigned long size) | |||
175 | struct tboot_mac_region *mr; | 176 | struct tboot_mac_region *mr; |
176 | phys_addr_t end = start + size; | 177 | phys_addr_t end = start + size; |
177 | 178 | ||
179 | if (tboot->num_mac_regions >= MAX_TB_MAC_REGIONS) | ||
180 | panic("tboot: Too many MAC regions\n"); | ||
181 | |||
178 | if (start && size) { | 182 | if (start && size) { |
179 | mr = &tboot->mac_regions[tboot->num_mac_regions++]; | 183 | mr = &tboot->mac_regions[tboot->num_mac_regions++]; |
180 | mr->start = round_down(start, PAGE_SIZE); | 184 | mr->start = round_down(start, PAGE_SIZE); |
@@ -184,18 +188,17 @@ static void add_mac_region(phys_addr_t start, unsigned long size) | |||
184 | 188 | ||
185 | static int tboot_setup_sleep(void) | 189 | static int tboot_setup_sleep(void) |
186 | { | 190 | { |
191 | int i; | ||
192 | |||
187 | tboot->num_mac_regions = 0; | 193 | tboot->num_mac_regions = 0; |
188 | 194 | ||
189 | /* S3 resume code */ | 195 | for (i = 0; i < e820.nr_map; i++) { |
190 | add_mac_region(acpi_wakeup_address, WAKEUP_SIZE); | 196 | if ((e820.map[i].type != E820_RAM) |
197 | && (e820.map[i].type != E820_RESERVED_KERN)) | ||
198 | continue; | ||
191 | 199 | ||
192 | #ifdef CONFIG_X86_TRAMPOLINE | 200 | add_mac_region(e820.map[i].addr, e820.map[i].size); |
193 | /* AP trampoline code */ | 201 | } |
194 | add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE); | ||
195 | #endif | ||
196 | |||
197 | /* kernel code + data + bss */ | ||
198 | add_mac_region(virt_to_phys(_text), _end - _text); | ||
199 | 202 | ||
200 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; | 203 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; |
201 | 204 | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 17b03dd3a6b5..7fea555929e2 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * SGI UltraViolet TLB flush routines. | 2 | * SGI UltraViolet TLB flush routines. |
3 | * | 3 | * |
4 | * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI. | 4 | * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI. |
5 | * | 5 | * |
6 | * This code is released under the GNU General Public License version 2 or | 6 | * This code is released under the GNU General Public License version 2 or |
7 | * later. | 7 | * later. |
@@ -20,42 +20,67 @@ | |||
20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
21 | #include <asm/tsc.h> | 21 | #include <asm/tsc.h> |
22 | #include <asm/irq_vectors.h> | 22 | #include <asm/irq_vectors.h> |
23 | #include <asm/timer.h> | ||
23 | 24 | ||
24 | static struct bau_control **uv_bau_table_bases __read_mostly; | 25 | struct msg_desc { |
25 | static int uv_bau_retry_limit __read_mostly; | 26 | struct bau_payload_queue_entry *msg; |
27 | int msg_slot; | ||
28 | int sw_ack_slot; | ||
29 | struct bau_payload_queue_entry *va_queue_first; | ||
30 | struct bau_payload_queue_entry *va_queue_last; | ||
31 | }; | ||
26 | 32 | ||
27 | /* base pnode in this partition */ | 33 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL |
28 | static int uv_partition_base_pnode __read_mostly; | 34 | |
35 | static int uv_bau_max_concurrent __read_mostly; | ||
36 | |||
37 | static int nobau; | ||
38 | static int __init setup_nobau(char *arg) | ||
39 | { | ||
40 | nobau = 1; | ||
41 | return 0; | ||
42 | } | ||
43 | early_param("nobau", setup_nobau); | ||
29 | 44 | ||
30 | static unsigned long uv_mmask __read_mostly; | 45 | /* base pnode in this partition */ |
46 | static int uv_partition_base_pnode __read_mostly; | ||
47 | /* position of pnode (which is nasid>>1): */ | ||
48 | static int uv_nshift __read_mostly; | ||
49 | static unsigned long uv_mmask __read_mostly; | ||
31 | 50 | ||
32 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | 51 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); |
33 | static DEFINE_PER_CPU(struct bau_control, bau_control); | 52 | static DEFINE_PER_CPU(struct bau_control, bau_control); |
53 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
54 | |||
55 | struct reset_args { | ||
56 | int sender; | ||
57 | }; | ||
34 | 58 | ||
35 | /* | 59 | /* |
36 | * Determine the first node on a blade. | 60 | * Determine the first node on a uvhub. 'Nodes' are used for kernel |
61 | * memory allocation. | ||
37 | */ | 62 | */ |
38 | static int __init blade_to_first_node(int blade) | 63 | static int __init uvhub_to_first_node(int uvhub) |
39 | { | 64 | { |
40 | int node, b; | 65 | int node, b; |
41 | 66 | ||
42 | for_each_online_node(node) { | 67 | for_each_online_node(node) { |
43 | b = uv_node_to_blade_id(node); | 68 | b = uv_node_to_blade_id(node); |
44 | if (blade == b) | 69 | if (uvhub == b) |
45 | return node; | 70 | return node; |
46 | } | 71 | } |
47 | return -1; /* shouldn't happen */ | 72 | return -1; |
48 | } | 73 | } |
49 | 74 | ||
50 | /* | 75 | /* |
51 | * Determine the apicid of the first cpu on a blade. | 76 | * Determine the apicid of the first cpu on a uvhub. |
52 | */ | 77 | */ |
53 | static int __init blade_to_first_apicid(int blade) | 78 | static int __init uvhub_to_first_apicid(int uvhub) |
54 | { | 79 | { |
55 | int cpu; | 80 | int cpu; |
56 | 81 | ||
57 | for_each_present_cpu(cpu) | 82 | for_each_present_cpu(cpu) |
58 | if (blade == uv_cpu_to_blade_id(cpu)) | 83 | if (uvhub == uv_cpu_to_blade_id(cpu)) |
59 | return per_cpu(x86_cpu_to_apicid, cpu); | 84 | return per_cpu(x86_cpu_to_apicid, cpu); |
60 | return -1; | 85 | return -1; |
61 | } | 86 | } |
@@ -68,195 +93,459 @@ static int __init blade_to_first_apicid(int blade) | |||
68 | * clear of the Timeout bit (as well) will free the resource. No reply will | 93 | * clear of the Timeout bit (as well) will free the resource. No reply will |
69 | * be sent (the hardware will only do one reply per message). | 94 | * be sent (the hardware will only do one reply per message). |
70 | */ | 95 | */ |
71 | static void uv_reply_to_message(int resource, | 96 | static inline void uv_reply_to_message(struct msg_desc *mdp, |
72 | struct bau_payload_queue_entry *msg, | 97 | struct bau_control *bcp) |
73 | struct bau_msg_status *msp) | ||
74 | { | 98 | { |
75 | unsigned long dw; | 99 | unsigned long dw; |
100 | struct bau_payload_queue_entry *msg; | ||
76 | 101 | ||
77 | dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); | 102 | msg = mdp->msg; |
103 | if (!msg->canceled) { | ||
104 | dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | | ||
105 | msg->sw_ack_vector; | ||
106 | uv_write_local_mmr( | ||
107 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); | ||
108 | } | ||
78 | msg->replied_to = 1; | 109 | msg->replied_to = 1; |
79 | msg->sw_ack_vector = 0; | 110 | msg->sw_ack_vector = 0; |
80 | if (msp) | ||
81 | msp->seen_by.bits = 0; | ||
82 | uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); | ||
83 | } | 111 | } |
84 | 112 | ||
85 | /* | 113 | /* |
86 | * Do all the things a cpu should do for a TLB shootdown message. | 114 | * Process the receipt of a RETRY message |
87 | * Other cpu's may come here at the same time for this message. | ||
88 | */ | 115 | */ |
89 | static void uv_bau_process_message(struct bau_payload_queue_entry *msg, | 116 | static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, |
90 | int msg_slot, int sw_ack_slot) | 117 | struct bau_control *bcp) |
91 | { | 118 | { |
92 | unsigned long this_cpu_mask; | 119 | int i; |
93 | struct bau_msg_status *msp; | 120 | int cancel_count = 0; |
94 | int cpu; | 121 | int slot2; |
122 | unsigned long msg_res; | ||
123 | unsigned long mmr = 0; | ||
124 | struct bau_payload_queue_entry *msg; | ||
125 | struct bau_payload_queue_entry *msg2; | ||
126 | struct ptc_stats *stat; | ||
95 | 127 | ||
96 | msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; | 128 | msg = mdp->msg; |
97 | cpu = uv_blade_processor_id(); | 129 | stat = &per_cpu(ptcstats, bcp->cpu); |
98 | msg->number_of_cpus = | 130 | stat->d_retries++; |
99 | uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); | 131 | /* |
100 | this_cpu_mask = 1UL << cpu; | 132 | * cancel any message from msg+1 to the retry itself |
101 | if (msp->seen_by.bits & this_cpu_mask) | 133 | */ |
102 | return; | 134 | for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { |
103 | atomic_or_long(&msp->seen_by.bits, this_cpu_mask); | 135 | if (msg2 > mdp->va_queue_last) |
136 | msg2 = mdp->va_queue_first; | ||
137 | if (msg2 == msg) | ||
138 | break; | ||
139 | |||
140 | /* same conditions for cancellation as uv_do_reset */ | ||
141 | if ((msg2->replied_to == 0) && (msg2->canceled == 0) && | ||
142 | (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & | ||
143 | msg->sw_ack_vector) == 0) && | ||
144 | (msg2->sending_cpu == msg->sending_cpu) && | ||
145 | (msg2->msg_type != MSG_NOOP)) { | ||
146 | slot2 = msg2 - mdp->va_queue_first; | ||
147 | mmr = uv_read_local_mmr | ||
148 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
149 | msg_res = ((msg2->sw_ack_vector << 8) | | ||
150 | msg2->sw_ack_vector); | ||
151 | /* | ||
152 | * This is a message retry; clear the resources held | ||
153 | * by the previous message only if they timed out. | ||
154 | * If it has not timed out we have an unexpected | ||
155 | * situation to report. | ||
156 | */ | ||
157 | if (mmr & (msg_res << 8)) { | ||
158 | /* | ||
159 | * is the resource timed out? | ||
160 | * make everyone ignore the cancelled message. | ||
161 | */ | ||
162 | msg2->canceled = 1; | ||
163 | stat->d_canceled++; | ||
164 | cancel_count++; | ||
165 | uv_write_local_mmr( | ||
166 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
167 | (msg_res << 8) | msg_res); | ||
168 | } else | ||
169 | printk(KERN_INFO "note bau retry: no effect\n"); | ||
170 | } | ||
171 | } | ||
172 | if (!cancel_count) | ||
173 | stat->d_nocanceled++; | ||
174 | } | ||
104 | 175 | ||
105 | if (msg->replied_to == 1) | 176 | /* |
106 | return; | 177 | * Do all the things a cpu should do for a TLB shootdown message. |
178 | * Other cpu's may come here at the same time for this message. | ||
179 | */ | ||
180 | static void uv_bau_process_message(struct msg_desc *mdp, | ||
181 | struct bau_control *bcp) | ||
182 | { | ||
183 | int msg_ack_count; | ||
184 | short socket_ack_count = 0; | ||
185 | struct ptc_stats *stat; | ||
186 | struct bau_payload_queue_entry *msg; | ||
187 | struct bau_control *smaster = bcp->socket_master; | ||
107 | 188 | ||
189 | /* | ||
190 | * This must be a normal message, or retry of a normal message | ||
191 | */ | ||
192 | msg = mdp->msg; | ||
193 | stat = &per_cpu(ptcstats, bcp->cpu); | ||
108 | if (msg->address == TLB_FLUSH_ALL) { | 194 | if (msg->address == TLB_FLUSH_ALL) { |
109 | local_flush_tlb(); | 195 | local_flush_tlb(); |
110 | __get_cpu_var(ptcstats).alltlb++; | 196 | stat->d_alltlb++; |
111 | } else { | 197 | } else { |
112 | __flush_tlb_one(msg->address); | 198 | __flush_tlb_one(msg->address); |
113 | __get_cpu_var(ptcstats).onetlb++; | 199 | stat->d_onetlb++; |
114 | } | 200 | } |
201 | stat->d_requestee++; | ||
202 | |||
203 | /* | ||
204 | * One cpu on each uvhub has the additional job on a RETRY | ||
205 | * of releasing the resource held by the message that is | ||
206 | * being retried. That message is identified by sending | ||
207 | * cpu number. | ||
208 | */ | ||
209 | if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) | ||
210 | uv_bau_process_retry_msg(mdp, bcp); | ||
115 | 211 | ||
116 | __get_cpu_var(ptcstats).requestee++; | 212 | /* |
213 | * This is a sw_ack message, so we have to reply to it. | ||
214 | * Count each responding cpu on the socket. This avoids | ||
215 | * pinging the count's cache line back and forth between | ||
216 | * the sockets. | ||
217 | */ | ||
218 | socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) | ||
219 | &smaster->socket_acknowledge_count[mdp->msg_slot]); | ||
220 | if (socket_ack_count == bcp->cpus_in_socket) { | ||
221 | /* | ||
222 | * Both sockets dump their completed count total into | ||
223 | * the message's count. | ||
224 | */ | ||
225 | smaster->socket_acknowledge_count[mdp->msg_slot] = 0; | ||
226 | msg_ack_count = atomic_add_short_return(socket_ack_count, | ||
227 | (struct atomic_short *)&msg->acknowledge_count); | ||
228 | |||
229 | if (msg_ack_count == bcp->cpus_in_uvhub) { | ||
230 | /* | ||
231 | * All cpus in uvhub saw it; reply | ||
232 | */ | ||
233 | uv_reply_to_message(mdp, bcp); | ||
234 | } | ||
235 | } | ||
117 | 236 | ||
118 | atomic_inc_short(&msg->acknowledge_count); | 237 | return; |
119 | if (msg->number_of_cpus == msg->acknowledge_count) | ||
120 | uv_reply_to_message(sw_ack_slot, msg, msp); | ||
121 | } | 238 | } |
122 | 239 | ||
123 | /* | 240 | /* |
124 | * Examine the payload queue on one distribution node to see | 241 | * Determine the first cpu on a uvhub. |
125 | * which messages have not been seen, and which cpu(s) have not seen them. | 242 | */ |
243 | static int uvhub_to_first_cpu(int uvhub) | ||
244 | { | ||
245 | int cpu; | ||
246 | for_each_present_cpu(cpu) | ||
247 | if (uvhub == uv_cpu_to_blade_id(cpu)) | ||
248 | return cpu; | ||
249 | return -1; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Last resort when we get a large number of destination timeouts is | ||
254 | * to clear resources held by a given cpu. | ||
255 | * Do this with IPI so that all messages in the BAU message queue | ||
256 | * can be identified by their nonzero sw_ack_vector field. | ||
126 | * | 257 | * |
127 | * Returns the number of cpu's that have not responded. | 258 | * This is entered for a single cpu on the uvhub. |
259 | * The sender want's this uvhub to free a specific message's | ||
260 | * sw_ack resources. | ||
128 | */ | 261 | */ |
129 | static int uv_examine_destination(struct bau_control *bau_tablesp, int sender) | 262 | static void |
263 | uv_do_reset(void *ptr) | ||
130 | { | 264 | { |
131 | struct bau_payload_queue_entry *msg; | ||
132 | struct bau_msg_status *msp; | ||
133 | int count = 0; | ||
134 | int i; | 265 | int i; |
135 | int j; | 266 | int slot; |
267 | int count = 0; | ||
268 | unsigned long mmr; | ||
269 | unsigned long msg_res; | ||
270 | struct bau_control *bcp; | ||
271 | struct reset_args *rap; | ||
272 | struct bau_payload_queue_entry *msg; | ||
273 | struct ptc_stats *stat; | ||
136 | 274 | ||
137 | for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE; | 275 | bcp = &per_cpu(bau_control, smp_processor_id()); |
138 | msg++, i++) { | 276 | rap = (struct reset_args *)ptr; |
139 | if ((msg->sending_cpu == sender) && (!msg->replied_to)) { | 277 | stat = &per_cpu(ptcstats, bcp->cpu); |
140 | msp = bau_tablesp->msg_statuses + i; | 278 | stat->d_resets++; |
141 | printk(KERN_DEBUG | 279 | |
142 | "blade %d: address:%#lx %d of %d, not cpu(s): ", | 280 | /* |
143 | i, msg->address, msg->acknowledge_count, | 281 | * We're looking for the given sender, and |
144 | msg->number_of_cpus); | 282 | * will free its sw_ack resource. |
145 | for (j = 0; j < msg->number_of_cpus; j++) { | 283 | * If all cpu's finally responded after the timeout, its |
146 | if (!((1L << j) & msp->seen_by.bits)) { | 284 | * message 'replied_to' was set. |
147 | count++; | 285 | */ |
148 | printk("%d ", j); | 286 | for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { |
149 | } | 287 | /* uv_do_reset: same conditions for cancellation as |
288 | uv_bau_process_retry_msg() */ | ||
289 | if ((msg->replied_to == 0) && | ||
290 | (msg->canceled == 0) && | ||
291 | (msg->sending_cpu == rap->sender) && | ||
292 | (msg->sw_ack_vector) && | ||
293 | (msg->msg_type != MSG_NOOP)) { | ||
294 | /* | ||
295 | * make everyone else ignore this message | ||
296 | */ | ||
297 | msg->canceled = 1; | ||
298 | slot = msg - bcp->va_queue_first; | ||
299 | count++; | ||
300 | /* | ||
301 | * only reset the resource if it is still pending | ||
302 | */ | ||
303 | mmr = uv_read_local_mmr | ||
304 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
305 | msg_res = ((msg->sw_ack_vector << 8) | | ||
306 | msg->sw_ack_vector); | ||
307 | if (mmr & msg_res) { | ||
308 | stat->d_rcanceled++; | ||
309 | uv_write_local_mmr( | ||
310 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
311 | msg_res); | ||
150 | } | 312 | } |
151 | printk("\n"); | ||
152 | } | 313 | } |
153 | } | 314 | } |
154 | return count; | 315 | return; |
155 | } | 316 | } |
156 | 317 | ||
157 | /* | 318 | /* |
158 | * Examine the payload queue on all the distribution nodes to see | 319 | * Use IPI to get all target uvhubs to release resources held by |
159 | * which messages have not been seen, and which cpu(s) have not seen them. | 320 | * a given sending cpu number. |
160 | * | ||
161 | * Returns the number of cpu's that have not responded. | ||
162 | */ | 321 | */ |
163 | static int uv_examine_destinations(struct bau_target_nodemask *distribution) | 322 | static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, |
323 | int sender) | ||
164 | { | 324 | { |
165 | int sender; | 325 | int uvhub; |
166 | int i; | 326 | int cpu; |
167 | int count = 0; | 327 | cpumask_t mask; |
328 | struct reset_args reset_args; | ||
329 | |||
330 | reset_args.sender = sender; | ||
168 | 331 | ||
169 | sender = smp_processor_id(); | 332 | cpus_clear(mask); |
170 | for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) { | 333 | /* find a single cpu for each uvhub in this distribution mask */ |
171 | if (!bau_node_isset(i, distribution)) | 334 | for (uvhub = 0; |
335 | uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; | ||
336 | uvhub++) { | ||
337 | if (!bau_uvhub_isset(uvhub, distribution)) | ||
172 | continue; | 338 | continue; |
173 | count += uv_examine_destination(uv_bau_table_bases[i], sender); | 339 | /* find a cpu for this uvhub */ |
340 | cpu = uvhub_to_first_cpu(uvhub); | ||
341 | cpu_set(cpu, mask); | ||
174 | } | 342 | } |
175 | return count; | 343 | /* IPI all cpus; Preemption is already disabled */ |
344 | smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); | ||
345 | return; | ||
346 | } | ||
347 | |||
348 | static inline unsigned long | ||
349 | cycles_2_us(unsigned long long cyc) | ||
350 | { | ||
351 | unsigned long long ns; | ||
352 | unsigned long us; | ||
353 | ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) | ||
354 | >> CYC2NS_SCALE_FACTOR; | ||
355 | us = ns / 1000; | ||
356 | return us; | ||
176 | } | 357 | } |
177 | 358 | ||
178 | /* | 359 | /* |
179 | * wait for completion of a broadcast message | 360 | * wait for all cpus on this hub to finish their sends and go quiet |
180 | * | 361 | * leaves uvhub_quiesce set so that no new broadcasts are started by |
181 | * return COMPLETE, RETRY or GIVEUP | 362 | * bau_flush_send_and_wait() |
363 | */ | ||
364 | static inline void | ||
365 | quiesce_local_uvhub(struct bau_control *hmaster) | ||
366 | { | ||
367 | atomic_add_short_return(1, (struct atomic_short *) | ||
368 | &hmaster->uvhub_quiesce); | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * mark this quiet-requestor as done | ||
373 | */ | ||
374 | static inline void | ||
375 | end_uvhub_quiesce(struct bau_control *hmaster) | ||
376 | { | ||
377 | atomic_add_short_return(-1, (struct atomic_short *) | ||
378 | &hmaster->uvhub_quiesce); | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Wait for completion of a broadcast software ack message | ||
383 | * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP | ||
182 | */ | 384 | */ |
183 | static int uv_wait_completion(struct bau_desc *bau_desc, | 385 | static int uv_wait_completion(struct bau_desc *bau_desc, |
184 | unsigned long mmr_offset, int right_shift) | 386 | unsigned long mmr_offset, int right_shift, int this_cpu, |
387 | struct bau_control *bcp, struct bau_control *smaster, long try) | ||
185 | { | 388 | { |
186 | int exams = 0; | 389 | int relaxes = 0; |
187 | long destination_timeouts = 0; | ||
188 | long source_timeouts = 0; | ||
189 | unsigned long descriptor_status; | 390 | unsigned long descriptor_status; |
391 | unsigned long mmr; | ||
392 | unsigned long mask; | ||
393 | cycles_t ttime; | ||
394 | cycles_t timeout_time; | ||
395 | struct ptc_stats *stat = &per_cpu(ptcstats, this_cpu); | ||
396 | struct bau_control *hmaster; | ||
397 | |||
398 | hmaster = bcp->uvhub_master; | ||
399 | timeout_time = get_cycles() + bcp->timeout_interval; | ||
190 | 400 | ||
401 | /* spin on the status MMR, waiting for it to go idle */ | ||
191 | while ((descriptor_status = (((unsigned long) | 402 | while ((descriptor_status = (((unsigned long) |
192 | uv_read_local_mmr(mmr_offset) >> | 403 | uv_read_local_mmr(mmr_offset) >> |
193 | right_shift) & UV_ACT_STATUS_MASK)) != | 404 | right_shift) & UV_ACT_STATUS_MASK)) != |
194 | DESC_STATUS_IDLE) { | 405 | DESC_STATUS_IDLE) { |
195 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { | ||
196 | source_timeouts++; | ||
197 | if (source_timeouts > SOURCE_TIMEOUT_LIMIT) | ||
198 | source_timeouts = 0; | ||
199 | __get_cpu_var(ptcstats).s_retry++; | ||
200 | return FLUSH_RETRY; | ||
201 | } | ||
202 | /* | 406 | /* |
203 | * spin here looking for progress at the destinations | 407 | * Our software ack messages may be blocked because there are |
408 | * no swack resources available. As long as none of them | ||
409 | * has timed out hardware will NACK our message and its | ||
410 | * state will stay IDLE. | ||
204 | */ | 411 | */ |
205 | if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) { | 412 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { |
206 | destination_timeouts++; | 413 | stat->s_stimeout++; |
207 | if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) { | 414 | return FLUSH_GIVEUP; |
208 | /* | 415 | } else if (descriptor_status == |
209 | * returns number of cpus not responding | 416 | DESC_STATUS_DESTINATION_TIMEOUT) { |
210 | */ | 417 | stat->s_dtimeout++; |
211 | if (uv_examine_destinations | 418 | ttime = get_cycles(); |
212 | (&bau_desc->distribution) == 0) { | 419 | |
213 | __get_cpu_var(ptcstats).d_retry++; | 420 | /* |
214 | return FLUSH_RETRY; | 421 | * Our retries may be blocked by all destination |
215 | } | 422 | * swack resources being consumed, and a timeout |
216 | exams++; | 423 | * pending. In that case hardware returns the |
217 | if (exams >= uv_bau_retry_limit) { | 424 | * ERROR that looks like a destination timeout. |
218 | printk(KERN_DEBUG | 425 | */ |
219 | "uv_flush_tlb_others"); | 426 | if (cycles_2_us(ttime - bcp->send_message) < BIOS_TO) { |
220 | printk("giving up on cpu %d\n", | 427 | bcp->conseccompletes = 0; |
221 | smp_processor_id()); | 428 | return FLUSH_RETRY_PLUGGED; |
429 | } | ||
430 | |||
431 | bcp->conseccompletes = 0; | ||
432 | return FLUSH_RETRY_TIMEOUT; | ||
433 | } else { | ||
434 | /* | ||
435 | * descriptor_status is still BUSY | ||
436 | */ | ||
437 | cpu_relax(); | ||
438 | relaxes++; | ||
439 | if (relaxes >= 10000) { | ||
440 | relaxes = 0; | ||
441 | if (get_cycles() > timeout_time) { | ||
442 | quiesce_local_uvhub(hmaster); | ||
443 | |||
444 | /* single-thread the register change */ | ||
445 | spin_lock(&hmaster->masks_lock); | ||
446 | mmr = uv_read_local_mmr(mmr_offset); | ||
447 | mask = 0UL; | ||
448 | mask |= (3UL < right_shift); | ||
449 | mask = ~mask; | ||
450 | mmr &= mask; | ||
451 | uv_write_local_mmr(mmr_offset, mmr); | ||
452 | spin_unlock(&hmaster->masks_lock); | ||
453 | end_uvhub_quiesce(hmaster); | ||
454 | stat->s_busy++; | ||
222 | return FLUSH_GIVEUP; | 455 | return FLUSH_GIVEUP; |
223 | } | 456 | } |
224 | /* | ||
225 | * delays can hang the simulator | ||
226 | udelay(1000); | ||
227 | */ | ||
228 | destination_timeouts = 0; | ||
229 | } | 457 | } |
230 | } | 458 | } |
231 | cpu_relax(); | ||
232 | } | 459 | } |
460 | bcp->conseccompletes++; | ||
233 | return FLUSH_COMPLETE; | 461 | return FLUSH_COMPLETE; |
234 | } | 462 | } |
235 | 463 | ||
464 | static inline cycles_t | ||
465 | sec_2_cycles(unsigned long sec) | ||
466 | { | ||
467 | unsigned long ns; | ||
468 | cycles_t cyc; | ||
469 | |||
470 | ns = sec * 1000000000; | ||
471 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
472 | return cyc; | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * conditionally add 1 to *v, unless *v is >= u | ||
477 | * return 0 if we cannot add 1 to *v because it is >= u | ||
478 | * return 1 if we can add 1 to *v because it is < u | ||
479 | * the add is atomic | ||
480 | * | ||
481 | * This is close to atomic_add_unless(), but this allows the 'u' value | ||
482 | * to be lowered below the current 'v'. atomic_add_unless can only stop | ||
483 | * on equal. | ||
484 | */ | ||
485 | static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | ||
486 | { | ||
487 | spin_lock(lock); | ||
488 | if (atomic_read(v) >= u) { | ||
489 | spin_unlock(lock); | ||
490 | return 0; | ||
491 | } | ||
492 | atomic_inc(v); | ||
493 | spin_unlock(lock); | ||
494 | return 1; | ||
495 | } | ||
496 | |||
236 | /** | 497 | /** |
237 | * uv_flush_send_and_wait | 498 | * uv_flush_send_and_wait |
238 | * | 499 | * |
239 | * Send a broadcast and wait for a broadcast message to complete. | 500 | * Send a broadcast and wait for it to complete. |
240 | * | 501 | * |
241 | * The flush_mask contains the cpus the broadcast was sent to. | 502 | * The flush_mask contains the cpus the broadcast is to be sent to, plus |
503 | * cpus that are on the local uvhub. | ||
242 | * | 504 | * |
243 | * Returns NULL if all remote flushing was done. The mask is zeroed. | 505 | * Returns NULL if all flushing represented in the mask was done. The mask |
506 | * is zeroed. | ||
244 | * Returns @flush_mask if some remote flushing remains to be done. The | 507 | * Returns @flush_mask if some remote flushing remains to be done. The |
245 | * mask will have some bits still set. | 508 | * mask will have some bits still set, representing any cpus on the local |
509 | * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed. | ||
246 | */ | 510 | */ |
247 | const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | 511 | const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, |
248 | struct bau_desc *bau_desc, | 512 | struct cpumask *flush_mask, |
249 | struct cpumask *flush_mask) | 513 | struct bau_control *bcp) |
250 | { | 514 | { |
251 | int completion_status = 0; | ||
252 | int right_shift; | 515 | int right_shift; |
253 | int tries = 0; | 516 | int uvhub; |
254 | int pnode; | ||
255 | int bit; | 517 | int bit; |
518 | int completion_status = 0; | ||
519 | int seq_number = 0; | ||
520 | long try = 0; | ||
521 | int cpu = bcp->uvhub_cpu; | ||
522 | int this_cpu = bcp->cpu; | ||
523 | int this_uvhub = bcp->uvhub; | ||
256 | unsigned long mmr_offset; | 524 | unsigned long mmr_offset; |
257 | unsigned long index; | 525 | unsigned long index; |
258 | cycles_t time1; | 526 | cycles_t time1; |
259 | cycles_t time2; | 527 | cycles_t time2; |
528 | struct ptc_stats *stat = &per_cpu(ptcstats, bcp->cpu); | ||
529 | struct bau_control *smaster = bcp->socket_master; | ||
530 | struct bau_control *hmaster = bcp->uvhub_master; | ||
531 | |||
532 | /* | ||
533 | * Spin here while there are hmaster->max_concurrent or more active | ||
534 | * descriptors. This is the per-uvhub 'throttle'. | ||
535 | */ | ||
536 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
537 | &hmaster->active_descriptor_count, | ||
538 | hmaster->max_concurrent)) { | ||
539 | stat->s_throttles++; | ||
540 | do { | ||
541 | cpu_relax(); | ||
542 | } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
543 | &hmaster->active_descriptor_count, | ||
544 | hmaster->max_concurrent)); | ||
545 | } | ||
546 | |||
547 | while (hmaster->uvhub_quiesce) | ||
548 | cpu_relax(); | ||
260 | 549 | ||
261 | if (cpu < UV_CPUS_PER_ACT_STATUS) { | 550 | if (cpu < UV_CPUS_PER_ACT_STATUS) { |
262 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | 551 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; |
@@ -268,24 +557,108 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | |||
268 | } | 557 | } |
269 | time1 = get_cycles(); | 558 | time1 = get_cycles(); |
270 | do { | 559 | do { |
271 | tries++; | 560 | /* |
561 | * Every message from any given cpu gets a unique message | ||
562 | * sequence number. But retries use that same number. | ||
563 | * Our message may have timed out at the destination because | ||
564 | * all sw-ack resources are in use and there is a timeout | ||
565 | * pending there. In that case, our last send never got | ||
566 | * placed into the queue and we need to persist until it | ||
567 | * does. | ||
568 | * | ||
569 | * Make any retry a type MSG_RETRY so that the destination will | ||
570 | * free any resource held by a previous message from this cpu. | ||
571 | */ | ||
572 | if (try == 0) { | ||
573 | /* use message type set by the caller the first time */ | ||
574 | seq_number = bcp->message_number++; | ||
575 | } else { | ||
576 | /* use RETRY type on all the rest; same sequence */ | ||
577 | bau_desc->header.msg_type = MSG_RETRY; | ||
578 | stat->s_retry_messages++; | ||
579 | } | ||
580 | bau_desc->header.sequence = seq_number; | ||
272 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | 581 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | |
273 | cpu; | 582 | bcp->uvhub_cpu; |
583 | bcp->send_message = get_cycles(); | ||
584 | |||
274 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | 585 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); |
586 | |||
587 | try++; | ||
275 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | 588 | completion_status = uv_wait_completion(bau_desc, mmr_offset, |
276 | right_shift); | 589 | right_shift, this_cpu, bcp, smaster, try); |
277 | } while (completion_status == FLUSH_RETRY); | 590 | |
591 | if (completion_status == FLUSH_RETRY_PLUGGED) { | ||
592 | /* | ||
593 | * Our retries may be blocked by all destination swack | ||
594 | * resources being consumed, and a timeout pending. In | ||
595 | * that case hardware immediately returns the ERROR | ||
596 | * that looks like a destination timeout. | ||
597 | */ | ||
598 | udelay(TIMEOUT_DELAY); | ||
599 | bcp->plugged_tries++; | ||
600 | if (bcp->plugged_tries >= PLUGSB4RESET) { | ||
601 | bcp->plugged_tries = 0; | ||
602 | quiesce_local_uvhub(hmaster); | ||
603 | spin_lock(&hmaster->queue_lock); | ||
604 | uv_reset_with_ipi(&bau_desc->distribution, | ||
605 | this_cpu); | ||
606 | spin_unlock(&hmaster->queue_lock); | ||
607 | end_uvhub_quiesce(hmaster); | ||
608 | bcp->ipi_attempts++; | ||
609 | stat->s_resets_plug++; | ||
610 | } | ||
611 | } else if (completion_status == FLUSH_RETRY_TIMEOUT) { | ||
612 | hmaster->max_concurrent = 1; | ||
613 | bcp->timeout_tries++; | ||
614 | udelay(TIMEOUT_DELAY); | ||
615 | if (bcp->timeout_tries >= TIMEOUTSB4RESET) { | ||
616 | bcp->timeout_tries = 0; | ||
617 | quiesce_local_uvhub(hmaster); | ||
618 | spin_lock(&hmaster->queue_lock); | ||
619 | uv_reset_with_ipi(&bau_desc->distribution, | ||
620 | this_cpu); | ||
621 | spin_unlock(&hmaster->queue_lock); | ||
622 | end_uvhub_quiesce(hmaster); | ||
623 | bcp->ipi_attempts++; | ||
624 | stat->s_resets_timeout++; | ||
625 | } | ||
626 | } | ||
627 | if (bcp->ipi_attempts >= 3) { | ||
628 | bcp->ipi_attempts = 0; | ||
629 | completion_status = FLUSH_GIVEUP; | ||
630 | break; | ||
631 | } | ||
632 | cpu_relax(); | ||
633 | } while ((completion_status == FLUSH_RETRY_PLUGGED) || | ||
634 | (completion_status == FLUSH_RETRY_TIMEOUT)); | ||
278 | time2 = get_cycles(); | 635 | time2 = get_cycles(); |
279 | __get_cpu_var(ptcstats).sflush += (time2 - time1); | ||
280 | if (tries > 1) | ||
281 | __get_cpu_var(ptcstats).retriesok++; | ||
282 | 636 | ||
283 | if (completion_status == FLUSH_GIVEUP) { | 637 | if ((completion_status == FLUSH_COMPLETE) && (bcp->conseccompletes > 5) |
638 | && (hmaster->max_concurrent < hmaster->max_concurrent_constant)) | ||
639 | hmaster->max_concurrent++; | ||
640 | |||
641 | /* | ||
642 | * hold any cpu not timing out here; no other cpu currently held by | ||
643 | * the 'throttle' should enter the activation code | ||
644 | */ | ||
645 | while (hmaster->uvhub_quiesce) | ||
646 | cpu_relax(); | ||
647 | atomic_dec(&hmaster->active_descriptor_count); | ||
648 | |||
649 | /* guard against cycles wrap */ | ||
650 | if (time2 > time1) | ||
651 | stat->s_time += (time2 - time1); | ||
652 | else | ||
653 | stat->s_requestor--; /* don't count this one */ | ||
654 | if (completion_status == FLUSH_COMPLETE && try > 1) | ||
655 | stat->s_retriesok++; | ||
656 | else if (completion_status == FLUSH_GIVEUP) { | ||
284 | /* | 657 | /* |
285 | * Cause the caller to do an IPI-style TLB shootdown on | 658 | * Cause the caller to do an IPI-style TLB shootdown on |
286 | * the cpu's, all of which are still in the mask. | 659 | * the target cpu's, all of which are still in the mask. |
287 | */ | 660 | */ |
288 | __get_cpu_var(ptcstats).ptc_i++; | 661 | stat->s_giveup++; |
289 | return flush_mask; | 662 | return flush_mask; |
290 | } | 663 | } |
291 | 664 | ||
@@ -294,18 +667,17 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | |||
294 | * use the IPI method of shootdown on them. | 667 | * use the IPI method of shootdown on them. |
295 | */ | 668 | */ |
296 | for_each_cpu(bit, flush_mask) { | 669 | for_each_cpu(bit, flush_mask) { |
297 | pnode = uv_cpu_to_pnode(bit); | 670 | uvhub = uv_cpu_to_blade_id(bit); |
298 | if (pnode == this_pnode) | 671 | if (uvhub == this_uvhub) |
299 | continue; | 672 | continue; |
300 | cpumask_clear_cpu(bit, flush_mask); | 673 | cpumask_clear_cpu(bit, flush_mask); |
301 | } | 674 | } |
302 | if (!cpumask_empty(flush_mask)) | 675 | if (!cpumask_empty(flush_mask)) |
303 | return flush_mask; | 676 | return flush_mask; |
677 | |||
304 | return NULL; | 678 | return NULL; |
305 | } | 679 | } |
306 | 680 | ||
307 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
308 | |||
309 | /** | 681 | /** |
310 | * uv_flush_tlb_others - globally purge translation cache of a virtual | 682 | * uv_flush_tlb_others - globally purge translation cache of a virtual |
311 | * address or all TLB's | 683 | * address or all TLB's |
@@ -322,8 +694,8 @@ static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | |||
322 | * The caller has derived the cpumask from the mm_struct. This function | 694 | * The caller has derived the cpumask from the mm_struct. This function |
323 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) | 695 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) |
324 | * | 696 | * |
325 | * The cpumask is converted into a nodemask of the nodes containing | 697 | * The cpumask is converted into a uvhubmask of the uvhubs containing |
326 | * the cpus. | 698 | * those cpus. |
327 | * | 699 | * |
328 | * Note that this function should be called with preemption disabled. | 700 | * Note that this function should be called with preemption disabled. |
329 | * | 701 | * |
@@ -335,52 +707,82 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
335 | struct mm_struct *mm, | 707 | struct mm_struct *mm, |
336 | unsigned long va, unsigned int cpu) | 708 | unsigned long va, unsigned int cpu) |
337 | { | 709 | { |
338 | struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); | 710 | int remotes; |
339 | int i; | 711 | int tcpu; |
340 | int bit; | 712 | int uvhub; |
341 | int pnode; | ||
342 | int uv_cpu; | ||
343 | int this_pnode; | ||
344 | int locals = 0; | 713 | int locals = 0; |
345 | struct bau_desc *bau_desc; | 714 | struct bau_desc *bau_desc; |
715 | struct cpumask *flush_mask; | ||
716 | struct ptc_stats *stat; | ||
717 | struct bau_control *bcp; | ||
346 | 718 | ||
347 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | 719 | if (nobau) |
720 | return cpumask; | ||
348 | 721 | ||
349 | uv_cpu = uv_blade_processor_id(); | 722 | bcp = &per_cpu(bau_control, cpu); |
350 | this_pnode = uv_hub_info->pnode; | 723 | /* |
351 | bau_desc = __get_cpu_var(bau_control).descriptor_base; | 724 | * Each sending cpu has a per-cpu mask which it fills from the caller's |
352 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; | 725 | * cpu mask. Only remote cpus are converted to uvhubs and copied. |
726 | */ | ||
727 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | ||
728 | /* | ||
729 | * copy cpumask to flush_mask, removing current cpu | ||
730 | * (current cpu should already have been flushed by the caller and | ||
731 | * should never be returned if we return flush_mask) | ||
732 | */ | ||
733 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | ||
734 | if (cpu_isset(cpu, *cpumask)) | ||
735 | locals++; /* current cpu was targeted */ | ||
353 | 736 | ||
354 | bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 737 | bau_desc = bcp->descriptor_base; |
738 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | ||
355 | 739 | ||
356 | i = 0; | 740 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
357 | for_each_cpu(bit, flush_mask) { | 741 | remotes = 0; |
358 | pnode = uv_cpu_to_pnode(bit); | 742 | for_each_cpu(tcpu, flush_mask) { |
359 | BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1)); | 743 | uvhub = uv_cpu_to_blade_id(tcpu); |
360 | if (pnode == this_pnode) { | 744 | if (uvhub == bcp->uvhub) { |
361 | locals++; | 745 | locals++; |
362 | continue; | 746 | continue; |
363 | } | 747 | } |
364 | bau_node_set(pnode - uv_partition_base_pnode, | 748 | bau_uvhub_set(uvhub, &bau_desc->distribution); |
365 | &bau_desc->distribution); | 749 | remotes++; |
366 | i++; | ||
367 | } | 750 | } |
368 | if (i == 0) { | 751 | if (remotes == 0) { |
369 | /* | 752 | /* |
370 | * no off_node flushing; return status for local node | 753 | * No off_hub flushing; return status for local hub. |
754 | * Return the caller's mask if all were local (the current | ||
755 | * cpu may be in that mask). | ||
371 | */ | 756 | */ |
372 | if (locals) | 757 | if (locals) |
373 | return flush_mask; | 758 | return cpumask; |
374 | else | 759 | else |
375 | return NULL; | 760 | return NULL; |
376 | } | 761 | } |
377 | __get_cpu_var(ptcstats).requestor++; | 762 | stat = &per_cpu(ptcstats, cpu); |
378 | __get_cpu_var(ptcstats).ntargeted += i; | 763 | stat->s_requestor++; |
764 | stat->s_ntargcpu += remotes; | ||
765 | remotes = bau_uvhub_weight(&bau_desc->distribution); | ||
766 | stat->s_ntarguvhub += remotes; | ||
767 | if (remotes >= 16) | ||
768 | stat->s_ntarguvhub16++; | ||
769 | else if (remotes >= 8) | ||
770 | stat->s_ntarguvhub8++; | ||
771 | else if (remotes >= 4) | ||
772 | stat->s_ntarguvhub4++; | ||
773 | else if (remotes >= 2) | ||
774 | stat->s_ntarguvhub2++; | ||
775 | else | ||
776 | stat->s_ntarguvhub1++; | ||
379 | 777 | ||
380 | bau_desc->payload.address = va; | 778 | bau_desc->payload.address = va; |
381 | bau_desc->payload.sending_cpu = cpu; | 779 | bau_desc->payload.sending_cpu = cpu; |
382 | 780 | ||
383 | return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask); | 781 | /* |
782 | * uv_flush_send_and_wait returns null if all cpu's were messaged, or | ||
783 | * the adjusted flush_mask if any cpu's were not messaged. | ||
784 | */ | ||
785 | return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); | ||
384 | } | 786 | } |
385 | 787 | ||
386 | /* | 788 | /* |
@@ -389,87 +791,70 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
389 | * | 791 | * |
390 | * We received a broadcast assist message. | 792 | * We received a broadcast assist message. |
391 | * | 793 | * |
392 | * Interrupts may have been disabled; this interrupt could represent | 794 | * Interrupts are disabled; this interrupt could represent |
393 | * the receipt of several messages. | 795 | * the receipt of several messages. |
394 | * | 796 | * |
395 | * All cores/threads on this node get this interrupt. | 797 | * All cores/threads on this hub get this interrupt. |
396 | * The last one to see it does the s/w ack. | 798 | * The last one to see it does the software ack. |
397 | * (the resource will not be freed until noninterruptable cpus see this | 799 | * (the resource will not be freed until noninterruptable cpus see this |
398 | * interrupt; hardware will timeout the s/w ack and reply ERROR) | 800 | * interrupt; hardware may timeout the s/w ack and reply ERROR) |
399 | */ | 801 | */ |
400 | void uv_bau_message_interrupt(struct pt_regs *regs) | 802 | void uv_bau_message_interrupt(struct pt_regs *regs) |
401 | { | 803 | { |
402 | struct bau_payload_queue_entry *va_queue_first; | ||
403 | struct bau_payload_queue_entry *va_queue_last; | ||
404 | struct bau_payload_queue_entry *msg; | ||
405 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
406 | cycles_t time1; | ||
407 | cycles_t time2; | ||
408 | int msg_slot; | ||
409 | int sw_ack_slot; | ||
410 | int fw; | ||
411 | int count = 0; | 804 | int count = 0; |
412 | unsigned long local_pnode; | 805 | cycles_t time_start; |
413 | 806 | struct bau_payload_queue_entry *msg; | |
414 | ack_APIC_irq(); | 807 | struct bau_control *bcp; |
415 | exit_idle(); | 808 | struct ptc_stats *stat; |
416 | irq_enter(); | 809 | struct msg_desc msgdesc; |
417 | 810 | ||
418 | time1 = get_cycles(); | 811 | time_start = get_cycles(); |
419 | 812 | bcp = &per_cpu(bau_control, smp_processor_id()); | |
420 | local_pnode = uv_blade_to_pnode(uv_numa_blade_id()); | 813 | stat = &per_cpu(ptcstats, smp_processor_id()); |
421 | 814 | msgdesc.va_queue_first = bcp->va_queue_first; | |
422 | va_queue_first = __get_cpu_var(bau_control).va_queue_first; | 815 | msgdesc.va_queue_last = bcp->va_queue_last; |
423 | va_queue_last = __get_cpu_var(bau_control).va_queue_last; | 816 | msg = bcp->bau_msg_head; |
424 | |||
425 | msg = __get_cpu_var(bau_control).bau_msg_head; | ||
426 | while (msg->sw_ack_vector) { | 817 | while (msg->sw_ack_vector) { |
427 | count++; | 818 | count++; |
428 | fw = msg->sw_ack_vector; | 819 | msgdesc.msg_slot = msg - msgdesc.va_queue_first; |
429 | msg_slot = msg - va_queue_first; | 820 | msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; |
430 | sw_ack_slot = ffs(fw) - 1; | 821 | msgdesc.msg = msg; |
431 | 822 | uv_bau_process_message(&msgdesc, bcp); | |
432 | uv_bau_process_message(msg, msg_slot, sw_ack_slot); | ||
433 | |||
434 | msg++; | 823 | msg++; |
435 | if (msg > va_queue_last) | 824 | if (msg > msgdesc.va_queue_last) |
436 | msg = va_queue_first; | 825 | msg = msgdesc.va_queue_first; |
437 | __get_cpu_var(bau_control).bau_msg_head = msg; | 826 | bcp->bau_msg_head = msg; |
438 | } | 827 | } |
828 | stat->d_time += (get_cycles() - time_start); | ||
439 | if (!count) | 829 | if (!count) |
440 | __get_cpu_var(ptcstats).nomsg++; | 830 | stat->d_nomsg++; |
441 | else if (count > 1) | 831 | else if (count > 1) |
442 | __get_cpu_var(ptcstats).multmsg++; | 832 | stat->d_multmsg++; |
443 | 833 | ack_APIC_irq(); | |
444 | time2 = get_cycles(); | ||
445 | __get_cpu_var(ptcstats).dflush += (time2 - time1); | ||
446 | |||
447 | irq_exit(); | ||
448 | set_irq_regs(old_regs); | ||
449 | } | 834 | } |
450 | 835 | ||
451 | /* | 836 | /* |
452 | * uv_enable_timeouts | 837 | * uv_enable_timeouts |
453 | * | 838 | * |
454 | * Each target blade (i.e. blades that have cpu's) needs to have | 839 | * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have |
455 | * shootdown message timeouts enabled. The timeout does not cause | 840 | * shootdown message timeouts enabled. The timeout does not cause |
456 | * an interrupt, but causes an error message to be returned to | 841 | * an interrupt, but causes an error message to be returned to |
457 | * the sender. | 842 | * the sender. |
458 | */ | 843 | */ |
459 | static void uv_enable_timeouts(void) | 844 | static void uv_enable_timeouts(void) |
460 | { | 845 | { |
461 | int blade; | 846 | int uvhub; |
462 | int nblades; | 847 | int nuvhubs; |
463 | int pnode; | 848 | int pnode; |
464 | unsigned long mmr_image; | 849 | unsigned long mmr_image; |
465 | 850 | ||
466 | nblades = uv_num_possible_blades(); | 851 | nuvhubs = uv_num_possible_blades(); |
467 | 852 | ||
468 | for (blade = 0; blade < nblades; blade++) { | 853 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
469 | if (!uv_blade_nr_possible_cpus(blade)) | 854 | if (!uv_blade_nr_possible_cpus(uvhub)) |
470 | continue; | 855 | continue; |
471 | 856 | ||
472 | pnode = uv_blade_to_pnode(blade); | 857 | pnode = uv_blade_to_pnode(uvhub); |
473 | mmr_image = | 858 | mmr_image = |
474 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); | 859 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); |
475 | /* | 860 | /* |
@@ -479,16 +864,16 @@ static void uv_enable_timeouts(void) | |||
479 | * To program the period, the SOFT_ACK_MODE must be off. | 864 | * To program the period, the SOFT_ACK_MODE must be off. |
480 | */ | 865 | */ |
481 | mmr_image &= ~((unsigned long)1 << | 866 | mmr_image &= ~((unsigned long)1 << |
482 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); | 867 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); |
483 | uv_write_global_mmr64 | 868 | uv_write_global_mmr64 |
484 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 869 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
485 | /* | 870 | /* |
486 | * Set the 4-bit period. | 871 | * Set the 4-bit period. |
487 | */ | 872 | */ |
488 | mmr_image &= ~((unsigned long)0xf << | 873 | mmr_image &= ~((unsigned long)0xf << |
489 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); | 874 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); |
490 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << | 875 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << |
491 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); | 876 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); |
492 | uv_write_global_mmr64 | 877 | uv_write_global_mmr64 |
493 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 878 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
494 | /* | 879 | /* |
@@ -497,7 +882,7 @@ static void uv_enable_timeouts(void) | |||
497 | * indicated in bits 2:0 (7 causes all of them to timeout). | 882 | * indicated in bits 2:0 (7 causes all of them to timeout). |
498 | */ | 883 | */ |
499 | mmr_image |= ((unsigned long)1 << | 884 | mmr_image |= ((unsigned long)1 << |
500 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); | 885 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); |
501 | uv_write_global_mmr64 | 886 | uv_write_global_mmr64 |
502 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 887 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
503 | } | 888 | } |
@@ -522,9 +907,20 @@ static void uv_ptc_seq_stop(struct seq_file *file, void *data) | |||
522 | { | 907 | { |
523 | } | 908 | } |
524 | 909 | ||
910 | static inline unsigned long long | ||
911 | millisec_2_cycles(unsigned long millisec) | ||
912 | { | ||
913 | unsigned long ns; | ||
914 | unsigned long long cyc; | ||
915 | |||
916 | ns = millisec * 1000; | ||
917 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
918 | return cyc; | ||
919 | } | ||
920 | |||
525 | /* | 921 | /* |
526 | * Display the statistics thru /proc | 922 | * Display the statistics thru /proc. |
527 | * data points to the cpu number | 923 | * 'data' points to the cpu number |
528 | */ | 924 | */ |
529 | static int uv_ptc_seq_show(struct seq_file *file, void *data) | 925 | static int uv_ptc_seq_show(struct seq_file *file, void *data) |
530 | { | 926 | { |
@@ -535,78 +931,155 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
535 | 931 | ||
536 | if (!cpu) { | 932 | if (!cpu) { |
537 | seq_printf(file, | 933 | seq_printf(file, |
538 | "# cpu requestor requestee one all sretry dretry ptc_i "); | 934 | "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); |
539 | seq_printf(file, | 935 | seq_printf(file, |
540 | "sw_ack sflush dflush sok dnomsg dmult starget\n"); | 936 | "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); |
937 | seq_printf(file, | ||
938 | "retries rok resetp resett giveup sto bz throt "); | ||
939 | seq_printf(file, | ||
940 | "sw_ack recv rtime all "); | ||
941 | seq_printf(file, | ||
942 | "one mult none retry canc nocan reset rcan\n"); | ||
541 | } | 943 | } |
542 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | 944 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { |
543 | stat = &per_cpu(ptcstats, cpu); | 945 | stat = &per_cpu(ptcstats, cpu); |
544 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ", | 946 | /* source side statistics */ |
545 | cpu, stat->requestor, | 947 | seq_printf(file, |
546 | stat->requestee, stat->onetlb, stat->alltlb, | 948 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
547 | stat->s_retry, stat->d_retry, stat->ptc_i); | 949 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), |
548 | seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", | 950 | stat->s_ntarguvhub, stat->s_ntarguvhub16, |
951 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | ||
952 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | ||
953 | stat->s_ntargcpu, stat->s_dtimeout); | ||
954 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | ||
955 | stat->s_retry_messages, stat->s_retriesok, | ||
956 | stat->s_resets_plug, stat->s_resets_timeout, | ||
957 | stat->s_giveup, stat->s_stimeout, | ||
958 | stat->s_busy, stat->s_throttles); | ||
959 | /* destination side statistics */ | ||
960 | seq_printf(file, | ||
961 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", | ||
549 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), | 962 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), |
550 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), | 963 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), |
551 | stat->sflush, stat->dflush, | 964 | stat->d_requestee, cycles_2_us(stat->d_time), |
552 | stat->retriesok, stat->nomsg, | 965 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, |
553 | stat->multmsg, stat->ntargeted); | 966 | stat->d_nomsg, stat->d_retries, stat->d_canceled, |
967 | stat->d_nocanceled, stat->d_resets, | ||
968 | stat->d_rcanceled); | ||
554 | } | 969 | } |
555 | 970 | ||
556 | return 0; | 971 | return 0; |
557 | } | 972 | } |
558 | 973 | ||
559 | /* | 974 | /* |
975 | * -1: resetf the statistics | ||
560 | * 0: display meaning of the statistics | 976 | * 0: display meaning of the statistics |
561 | * >0: retry limit | 977 | * >0: maximum concurrent active descriptors per uvhub (throttle) |
562 | */ | 978 | */ |
563 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | 979 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, |
564 | size_t count, loff_t *data) | 980 | size_t count, loff_t *data) |
565 | { | 981 | { |
566 | long newmode; | 982 | int cpu; |
983 | long input_arg; | ||
567 | char optstr[64]; | 984 | char optstr[64]; |
985 | struct ptc_stats *stat; | ||
986 | struct bau_control *bcp; | ||
568 | 987 | ||
569 | if (count == 0 || count > sizeof(optstr)) | 988 | if (count == 0 || count > sizeof(optstr)) |
570 | return -EINVAL; | 989 | return -EINVAL; |
571 | if (copy_from_user(optstr, user, count)) | 990 | if (copy_from_user(optstr, user, count)) |
572 | return -EFAULT; | 991 | return -EFAULT; |
573 | optstr[count - 1] = '\0'; | 992 | optstr[count - 1] = '\0'; |
574 | if (strict_strtoul(optstr, 10, &newmode) < 0) { | 993 | if (strict_strtol(optstr, 10, &input_arg) < 0) { |
575 | printk(KERN_DEBUG "%s is invalid\n", optstr); | 994 | printk(KERN_DEBUG "%s is invalid\n", optstr); |
576 | return -EINVAL; | 995 | return -EINVAL; |
577 | } | 996 | } |
578 | 997 | ||
579 | if (newmode == 0) { | 998 | if (input_arg == 0) { |
580 | printk(KERN_DEBUG "# cpu: cpu number\n"); | 999 | printk(KERN_DEBUG "# cpu: cpu number\n"); |
1000 | printk(KERN_DEBUG "Sender statistics:\n"); | ||
1001 | printk(KERN_DEBUG | ||
1002 | "sent: number of shootdown messages sent\n"); | ||
1003 | printk(KERN_DEBUG | ||
1004 | "stime: time spent sending messages\n"); | ||
1005 | printk(KERN_DEBUG | ||
1006 | "numuvhubs: number of hubs targeted with shootdown\n"); | ||
1007 | printk(KERN_DEBUG | ||
1008 | "numuvhubs16: number times 16 or more hubs targeted\n"); | ||
1009 | printk(KERN_DEBUG | ||
1010 | "numuvhubs8: number times 8 or more hubs targeted\n"); | ||
1011 | printk(KERN_DEBUG | ||
1012 | "numuvhubs4: number times 4 or more hubs targeted\n"); | ||
1013 | printk(KERN_DEBUG | ||
1014 | "numuvhubs2: number times 2 or more hubs targeted\n"); | ||
1015 | printk(KERN_DEBUG | ||
1016 | "numuvhubs1: number times 1 hub targeted\n"); | ||
1017 | printk(KERN_DEBUG | ||
1018 | "numcpus: number of cpus targeted with shootdown\n"); | ||
1019 | printk(KERN_DEBUG | ||
1020 | "dto: number of destination timeouts\n"); | ||
1021 | printk(KERN_DEBUG | ||
1022 | "retries: destination timeout retries sent\n"); | ||
1023 | printk(KERN_DEBUG | ||
1024 | "rok: : destination timeouts successfully retried\n"); | ||
1025 | printk(KERN_DEBUG | ||
1026 | "resetp: ipi-style resource resets for plugs\n"); | ||
1027 | printk(KERN_DEBUG | ||
1028 | "resett: ipi-style resource resets for timeouts\n"); | ||
1029 | printk(KERN_DEBUG | ||
1030 | "giveup: fall-backs to ipi-style shootdowns\n"); | ||
1031 | printk(KERN_DEBUG | ||
1032 | "sto: number of source timeouts\n"); | ||
1033 | printk(KERN_DEBUG | ||
1034 | "bz: number of stay-busy's\n"); | ||
1035 | printk(KERN_DEBUG | ||
1036 | "throt: number times spun in throttle\n"); | ||
1037 | printk(KERN_DEBUG "Destination side statistics:\n"); | ||
581 | printk(KERN_DEBUG | 1038 | printk(KERN_DEBUG |
582 | "requestor: times this cpu was the flush requestor\n"); | 1039 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); |
583 | printk(KERN_DEBUG | 1040 | printk(KERN_DEBUG |
584 | "requestee: times this cpu was requested to flush its TLBs\n"); | 1041 | "recv: shootdown messages received\n"); |
585 | printk(KERN_DEBUG | 1042 | printk(KERN_DEBUG |
586 | "one: times requested to flush a single address\n"); | 1043 | "rtime: time spent processing messages\n"); |
587 | printk(KERN_DEBUG | 1044 | printk(KERN_DEBUG |
588 | "all: times requested to flush all TLB's\n"); | 1045 | "all: shootdown all-tlb messages\n"); |
589 | printk(KERN_DEBUG | 1046 | printk(KERN_DEBUG |
590 | "sretry: number of retries of source-side timeouts\n"); | 1047 | "one: shootdown one-tlb messages\n"); |
591 | printk(KERN_DEBUG | 1048 | printk(KERN_DEBUG |
592 | "dretry: number of retries of destination-side timeouts\n"); | 1049 | "mult: interrupts that found multiple messages\n"); |
593 | printk(KERN_DEBUG | 1050 | printk(KERN_DEBUG |
594 | "ptc_i: times UV fell through to IPI-style flushes\n"); | 1051 | "none: interrupts that found no messages\n"); |
595 | printk(KERN_DEBUG | 1052 | printk(KERN_DEBUG |
596 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); | 1053 | "retry: number of retry messages processed\n"); |
597 | printk(KERN_DEBUG | 1054 | printk(KERN_DEBUG |
598 | "sflush_us: cycles spent in uv_flush_tlb_others()\n"); | 1055 | "canc: number messages canceled by retries\n"); |
599 | printk(KERN_DEBUG | 1056 | printk(KERN_DEBUG |
600 | "dflush_us: cycles spent in handling flush requests\n"); | 1057 | "nocan: number retries that found nothing to cancel\n"); |
601 | printk(KERN_DEBUG "sok: successes on retry\n"); | ||
602 | printk(KERN_DEBUG "dnomsg: interrupts with no message\n"); | ||
603 | printk(KERN_DEBUG | 1058 | printk(KERN_DEBUG |
604 | "dmult: interrupts with multiple messages\n"); | 1059 | "reset: number of ipi-style reset requests processed\n"); |
605 | printk(KERN_DEBUG "starget: nodes targeted\n"); | 1060 | printk(KERN_DEBUG |
1061 | "rcan: number messages canceled by reset requests\n"); | ||
1062 | } else if (input_arg == -1) { | ||
1063 | for_each_present_cpu(cpu) { | ||
1064 | stat = &per_cpu(ptcstats, cpu); | ||
1065 | memset(stat, 0, sizeof(struct ptc_stats)); | ||
1066 | } | ||
606 | } else { | 1067 | } else { |
607 | uv_bau_retry_limit = newmode; | 1068 | uv_bau_max_concurrent = input_arg; |
608 | printk(KERN_DEBUG "timeout retry limit:%d\n", | 1069 | bcp = &per_cpu(bau_control, smp_processor_id()); |
609 | uv_bau_retry_limit); | 1070 | if (uv_bau_max_concurrent < 1 || |
1071 | uv_bau_max_concurrent > bcp->cpus_in_uvhub) { | ||
1072 | printk(KERN_DEBUG | ||
1073 | "Error: BAU max concurrent %d; %d is invalid\n", | ||
1074 | bcp->max_concurrent, uv_bau_max_concurrent); | ||
1075 | return -EINVAL; | ||
1076 | } | ||
1077 | printk(KERN_DEBUG "Set BAU max concurrent:%d\n", | ||
1078 | uv_bau_max_concurrent); | ||
1079 | for_each_present_cpu(cpu) { | ||
1080 | bcp = &per_cpu(bau_control, cpu); | ||
1081 | bcp->max_concurrent = uv_bau_max_concurrent; | ||
1082 | } | ||
610 | } | 1083 | } |
611 | 1084 | ||
612 | return count; | 1085 | return count; |
@@ -650,79 +1123,30 @@ static int __init uv_ptc_init(void) | |||
650 | } | 1123 | } |
651 | 1124 | ||
652 | /* | 1125 | /* |
653 | * begin the initialization of the per-blade control structures | ||
654 | */ | ||
655 | static struct bau_control * __init uv_table_bases_init(int blade, int node) | ||
656 | { | ||
657 | int i; | ||
658 | struct bau_msg_status *msp; | ||
659 | struct bau_control *bau_tabp; | ||
660 | |||
661 | bau_tabp = | ||
662 | kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node); | ||
663 | BUG_ON(!bau_tabp); | ||
664 | |||
665 | bau_tabp->msg_statuses = | ||
666 | kmalloc_node(sizeof(struct bau_msg_status) * | ||
667 | DEST_Q_SIZE, GFP_KERNEL, node); | ||
668 | BUG_ON(!bau_tabp->msg_statuses); | ||
669 | |||
670 | for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++) | ||
671 | bau_cpubits_clear(&msp->seen_by, (int) | ||
672 | uv_blade_nr_possible_cpus(blade)); | ||
673 | |||
674 | uv_bau_table_bases[blade] = bau_tabp; | ||
675 | |||
676 | return bau_tabp; | ||
677 | } | ||
678 | |||
679 | /* | ||
680 | * finish the initialization of the per-blade control structures | ||
681 | */ | ||
682 | static void __init | ||
683 | uv_table_bases_finish(int blade, | ||
684 | struct bau_control *bau_tablesp, | ||
685 | struct bau_desc *adp) | ||
686 | { | ||
687 | struct bau_control *bcp; | ||
688 | int cpu; | ||
689 | |||
690 | for_each_present_cpu(cpu) { | ||
691 | if (blade != uv_cpu_to_blade_id(cpu)) | ||
692 | continue; | ||
693 | |||
694 | bcp = (struct bau_control *)&per_cpu(bau_control, cpu); | ||
695 | bcp->bau_msg_head = bau_tablesp->va_queue_first; | ||
696 | bcp->va_queue_first = bau_tablesp->va_queue_first; | ||
697 | bcp->va_queue_last = bau_tablesp->va_queue_last; | ||
698 | bcp->msg_statuses = bau_tablesp->msg_statuses; | ||
699 | bcp->descriptor_base = adp; | ||
700 | } | ||
701 | } | ||
702 | |||
703 | /* | ||
704 | * initialize the sending side's sending buffers | 1126 | * initialize the sending side's sending buffers |
705 | */ | 1127 | */ |
706 | static struct bau_desc * __init | 1128 | static void |
707 | uv_activation_descriptor_init(int node, int pnode) | 1129 | uv_activation_descriptor_init(int node, int pnode) |
708 | { | 1130 | { |
709 | int i; | 1131 | int i; |
1132 | int cpu; | ||
710 | unsigned long pa; | 1133 | unsigned long pa; |
711 | unsigned long m; | 1134 | unsigned long m; |
712 | unsigned long n; | 1135 | unsigned long n; |
713 | struct bau_desc *adp; | 1136 | struct bau_desc *bau_desc; |
714 | struct bau_desc *ad2; | 1137 | struct bau_desc *bd2; |
1138 | struct bau_control *bcp; | ||
715 | 1139 | ||
716 | /* | 1140 | /* |
717 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) | 1141 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) |
718 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade | 1142 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub |
719 | */ | 1143 | */ |
720 | adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* | 1144 | bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* |
721 | UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); | 1145 | UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); |
722 | BUG_ON(!adp); | 1146 | BUG_ON(!bau_desc); |
723 | 1147 | ||
724 | pa = uv_gpa(adp); /* need the real nasid*/ | 1148 | pa = uv_gpa(bau_desc); /* need the real nasid*/ |
725 | n = uv_gpa_to_pnode(pa); | 1149 | n = pa >> uv_nshift; |
726 | m = pa & uv_mmask; | 1150 | m = pa & uv_mmask; |
727 | 1151 | ||
728 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 1152 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
@@ -731,96 +1155,188 @@ uv_activation_descriptor_init(int node, int pnode) | |||
731 | /* | 1155 | /* |
732 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | 1156 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each |
733 | * cpu even though we only use the first one; one descriptor can | 1157 | * cpu even though we only use the first one; one descriptor can |
734 | * describe a broadcast to 256 nodes. | 1158 | * describe a broadcast to 256 uv hubs. |
735 | */ | 1159 | */ |
736 | for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); | 1160 | for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); |
737 | i++, ad2++) { | 1161 | i++, bd2++) { |
738 | memset(ad2, 0, sizeof(struct bau_desc)); | 1162 | memset(bd2, 0, sizeof(struct bau_desc)); |
739 | ad2->header.sw_ack_flag = 1; | 1163 | bd2->header.sw_ack_flag = 1; |
740 | /* | 1164 | /* |
741 | * base_dest_nodeid is the first node in the partition, so | 1165 | * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub |
742 | * the bit map will indicate partition-relative node numbers. | 1166 | * in the partition. The bit map will indicate uvhub numbers, |
743 | * note that base_dest_nodeid is actually a nasid. | 1167 | * which are 0-N in a partition. Pnodes are unique system-wide. |
744 | */ | 1168 | */ |
745 | ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; | 1169 | bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; |
746 | ad2->header.dest_subnodeid = 0x10; /* the LB */ | 1170 | bd2->header.dest_subnodeid = 0x10; /* the LB */ |
747 | ad2->header.command = UV_NET_ENDPOINT_INTD; | 1171 | bd2->header.command = UV_NET_ENDPOINT_INTD; |
748 | ad2->header.int_both = 1; | 1172 | bd2->header.int_both = 1; |
749 | /* | 1173 | /* |
750 | * all others need to be set to zero: | 1174 | * all others need to be set to zero: |
751 | * fairness chaining multilevel count replied_to | 1175 | * fairness chaining multilevel count replied_to |
752 | */ | 1176 | */ |
753 | } | 1177 | } |
754 | return adp; | 1178 | for_each_present_cpu(cpu) { |
1179 | if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) | ||
1180 | continue; | ||
1181 | bcp = &per_cpu(bau_control, cpu); | ||
1182 | bcp->descriptor_base = bau_desc; | ||
1183 | } | ||
755 | } | 1184 | } |
756 | 1185 | ||
757 | /* | 1186 | /* |
758 | * initialize the destination side's receiving buffers | 1187 | * initialize the destination side's receiving buffers |
1188 | * entered for each uvhub in the partition | ||
1189 | * - node is first node (kernel memory notion) on the uvhub | ||
1190 | * - pnode is the uvhub's physical identifier | ||
759 | */ | 1191 | */ |
760 | static struct bau_payload_queue_entry * __init | 1192 | static void |
761 | uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) | 1193 | uv_payload_queue_init(int node, int pnode) |
762 | { | 1194 | { |
763 | struct bau_payload_queue_entry *pqp; | ||
764 | unsigned long pa; | ||
765 | int pn; | 1195 | int pn; |
1196 | int cpu; | ||
766 | char *cp; | 1197 | char *cp; |
1198 | unsigned long pa; | ||
1199 | struct bau_payload_queue_entry *pqp; | ||
1200 | struct bau_payload_queue_entry *pqp_malloc; | ||
1201 | struct bau_control *bcp; | ||
767 | 1202 | ||
768 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( | 1203 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( |
769 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), | 1204 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), |
770 | GFP_KERNEL, node); | 1205 | GFP_KERNEL, node); |
771 | BUG_ON(!pqp); | 1206 | BUG_ON(!pqp); |
1207 | pqp_malloc = pqp; | ||
772 | 1208 | ||
773 | cp = (char *)pqp + 31; | 1209 | cp = (char *)pqp + 31; |
774 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); | 1210 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); |
775 | bau_tablesp->va_queue_first = pqp; | 1211 | |
1212 | for_each_present_cpu(cpu) { | ||
1213 | if (pnode != uv_cpu_to_pnode(cpu)) | ||
1214 | continue; | ||
1215 | /* for every cpu on this pnode: */ | ||
1216 | bcp = &per_cpu(bau_control, cpu); | ||
1217 | bcp->va_queue_first = pqp; | ||
1218 | bcp->bau_msg_head = pqp; | ||
1219 | bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); | ||
1220 | } | ||
776 | /* | 1221 | /* |
777 | * need the pnode of where the memory was really allocated | 1222 | * need the pnode of where the memory was really allocated |
778 | */ | 1223 | */ |
779 | pa = uv_gpa(pqp); | 1224 | pa = uv_gpa(pqp); |
780 | pn = uv_gpa_to_pnode(pa); | 1225 | pn = pa >> uv_nshift; |
781 | uv_write_global_mmr64(pnode, | 1226 | uv_write_global_mmr64(pnode, |
782 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, | 1227 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, |
783 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | | 1228 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | |
784 | uv_physnodeaddr(pqp)); | 1229 | uv_physnodeaddr(pqp)); |
785 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, | 1230 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, |
786 | uv_physnodeaddr(pqp)); | 1231 | uv_physnodeaddr(pqp)); |
787 | bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1); | ||
788 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, | 1232 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, |
789 | (unsigned long) | 1233 | (unsigned long) |
790 | uv_physnodeaddr(bau_tablesp->va_queue_last)); | 1234 | uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); |
1235 | /* in effect, all msg_type's are set to MSG_NOOP */ | ||
791 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); | 1236 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); |
792 | |||
793 | return pqp; | ||
794 | } | 1237 | } |
795 | 1238 | ||
796 | /* | 1239 | /* |
797 | * Initialization of each UV blade's structures | 1240 | * Initialization of each UV hub's structures |
798 | */ | 1241 | */ |
799 | static int __init uv_init_blade(int blade) | 1242 | static void __init uv_init_uvhub(int uvhub, int vector) |
800 | { | 1243 | { |
801 | int node; | 1244 | int node; |
802 | int pnode; | 1245 | int pnode; |
803 | unsigned long pa; | ||
804 | unsigned long apicid; | 1246 | unsigned long apicid; |
805 | struct bau_desc *adp; | 1247 | |
806 | struct bau_payload_queue_entry *pqp; | 1248 | node = uvhub_to_first_node(uvhub); |
807 | struct bau_control *bau_tablesp; | 1249 | pnode = uv_blade_to_pnode(uvhub); |
808 | 1250 | uv_activation_descriptor_init(node, pnode); | |
809 | node = blade_to_first_node(blade); | 1251 | uv_payload_queue_init(node, pnode); |
810 | bau_tablesp = uv_table_bases_init(blade, node); | ||
811 | pnode = uv_blade_to_pnode(blade); | ||
812 | adp = uv_activation_descriptor_init(node, pnode); | ||
813 | pqp = uv_payload_queue_init(node, pnode, bau_tablesp); | ||
814 | uv_table_bases_finish(blade, bau_tablesp, adp); | ||
815 | /* | 1252 | /* |
816 | * the below initialization can't be in firmware because the | 1253 | * the below initialization can't be in firmware because the |
817 | * messaging IRQ will be determined by the OS | 1254 | * messaging IRQ will be determined by the OS |
818 | */ | 1255 | */ |
819 | apicid = blade_to_first_apicid(blade); | 1256 | apicid = uvhub_to_first_apicid(uvhub); |
820 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); | ||
821 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | 1257 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
822 | ((apicid << 32) | UV_BAU_MESSAGE)); | 1258 | ((apicid << 32) | vector)); |
823 | return 0; | 1259 | } |
1260 | |||
1261 | /* | ||
1262 | * initialize the bau_control structure for each cpu | ||
1263 | */ | ||
1264 | static void uv_init_per_cpu(int nuvhubs) | ||
1265 | { | ||
1266 | int i, j, k; | ||
1267 | int cpu; | ||
1268 | int pnode; | ||
1269 | int uvhub; | ||
1270 | short socket = 0; | ||
1271 | struct bau_control *bcp; | ||
1272 | struct uvhub_desc *bdp; | ||
1273 | struct socket_desc *sdp; | ||
1274 | struct bau_control *hmaster = NULL; | ||
1275 | struct bau_control *smaster = NULL; | ||
1276 | struct socket_desc { | ||
1277 | short num_cpus; | ||
1278 | short cpu_number[16]; | ||
1279 | }; | ||
1280 | struct uvhub_desc { | ||
1281 | short num_sockets; | ||
1282 | short num_cpus; | ||
1283 | short uvhub; | ||
1284 | short pnode; | ||
1285 | struct socket_desc socket[2]; | ||
1286 | }; | ||
1287 | struct uvhub_desc *uvhub_descs; | ||
1288 | |||
1289 | uvhub_descs = (struct uvhub_desc *) | ||
1290 | kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); | ||
1291 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); | ||
1292 | for_each_present_cpu(cpu) { | ||
1293 | bcp = &per_cpu(bau_control, cpu); | ||
1294 | memset(bcp, 0, sizeof(struct bau_control)); | ||
1295 | spin_lock_init(&bcp->masks_lock); | ||
1296 | bcp->max_concurrent = uv_bau_max_concurrent; | ||
1297 | pnode = uv_cpu_hub_info(cpu)->pnode; | ||
1298 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | ||
1299 | bdp = &uvhub_descs[uvhub]; | ||
1300 | bdp->num_cpus++; | ||
1301 | bdp->uvhub = uvhub; | ||
1302 | bdp->pnode = pnode; | ||
1303 | /* time interval to catch a hardware stay-busy bug */ | ||
1304 | bcp->timeout_interval = millisec_2_cycles(3); | ||
1305 | /* kludge: assume uv_hub.h is constant */ | ||
1306 | socket = (cpu_physical_id(cpu)>>5)&1; | ||
1307 | if (socket >= bdp->num_sockets) | ||
1308 | bdp->num_sockets = socket+1; | ||
1309 | sdp = &bdp->socket[socket]; | ||
1310 | sdp->cpu_number[sdp->num_cpus] = cpu; | ||
1311 | sdp->num_cpus++; | ||
1312 | } | ||
1313 | socket = 0; | ||
1314 | for_each_possible_blade(uvhub) { | ||
1315 | bdp = &uvhub_descs[uvhub]; | ||
1316 | for (i = 0; i < bdp->num_sockets; i++) { | ||
1317 | sdp = &bdp->socket[i]; | ||
1318 | for (j = 0; j < sdp->num_cpus; j++) { | ||
1319 | cpu = sdp->cpu_number[j]; | ||
1320 | bcp = &per_cpu(bau_control, cpu); | ||
1321 | bcp->cpu = cpu; | ||
1322 | if (j == 0) { | ||
1323 | smaster = bcp; | ||
1324 | if (i == 0) | ||
1325 | hmaster = bcp; | ||
1326 | } | ||
1327 | bcp->cpus_in_uvhub = bdp->num_cpus; | ||
1328 | bcp->cpus_in_socket = sdp->num_cpus; | ||
1329 | bcp->socket_master = smaster; | ||
1330 | bcp->uvhub_master = hmaster; | ||
1331 | for (k = 0; k < DEST_Q_SIZE; k++) | ||
1332 | bcp->socket_acknowledge_count[k] = 0; | ||
1333 | bcp->uvhub_cpu = | ||
1334 | uv_cpu_hub_info(cpu)->blade_processor_id; | ||
1335 | } | ||
1336 | socket++; | ||
1337 | } | ||
1338 | } | ||
1339 | kfree(uvhub_descs); | ||
824 | } | 1340 | } |
825 | 1341 | ||
826 | /* | 1342 | /* |
@@ -828,38 +1344,54 @@ static int __init uv_init_blade(int blade) | |||
828 | */ | 1344 | */ |
829 | static int __init uv_bau_init(void) | 1345 | static int __init uv_bau_init(void) |
830 | { | 1346 | { |
831 | int blade; | 1347 | int uvhub; |
832 | int nblades; | 1348 | int pnode; |
1349 | int nuvhubs; | ||
833 | int cur_cpu; | 1350 | int cur_cpu; |
1351 | int vector; | ||
1352 | unsigned long mmr; | ||
834 | 1353 | ||
835 | if (!is_uv_system()) | 1354 | if (!is_uv_system()) |
836 | return 0; | 1355 | return 0; |
837 | 1356 | ||
1357 | if (nobau) | ||
1358 | return 0; | ||
1359 | |||
838 | for_each_possible_cpu(cur_cpu) | 1360 | for_each_possible_cpu(cur_cpu) |
839 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), | 1361 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), |
840 | GFP_KERNEL, cpu_to_node(cur_cpu)); | 1362 | GFP_KERNEL, cpu_to_node(cur_cpu)); |
841 | 1363 | ||
842 | uv_bau_retry_limit = 1; | 1364 | uv_bau_max_concurrent = MAX_BAU_CONCURRENT; |
1365 | uv_nshift = uv_hub_info->m_val; | ||
843 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; | 1366 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; |
844 | nblades = uv_num_possible_blades(); | 1367 | nuvhubs = uv_num_possible_blades(); |
845 | 1368 | ||
846 | uv_bau_table_bases = (struct bau_control **) | 1369 | uv_init_per_cpu(nuvhubs); |
847 | kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); | ||
848 | BUG_ON(!uv_bau_table_bases); | ||
849 | 1370 | ||
850 | uv_partition_base_pnode = 0x7fffffff; | 1371 | uv_partition_base_pnode = 0x7fffffff; |
851 | for (blade = 0; blade < nblades; blade++) | 1372 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) |
852 | if (uv_blade_nr_possible_cpus(blade) && | 1373 | if (uv_blade_nr_possible_cpus(uvhub) && |
853 | (uv_blade_to_pnode(blade) < uv_partition_base_pnode)) | 1374 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) |
854 | uv_partition_base_pnode = uv_blade_to_pnode(blade); | 1375 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); |
855 | for (blade = 0; blade < nblades; blade++) | 1376 | |
856 | if (uv_blade_nr_possible_cpus(blade)) | 1377 | vector = UV_BAU_MESSAGE; |
857 | uv_init_blade(blade); | 1378 | for_each_possible_blade(uvhub) |
858 | 1379 | if (uv_blade_nr_possible_cpus(uvhub)) | |
859 | alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); | 1380 | uv_init_uvhub(uvhub, vector); |
1381 | |||
860 | uv_enable_timeouts(); | 1382 | uv_enable_timeouts(); |
1383 | alloc_intr_gate(vector, uv_bau_message_intr1); | ||
1384 | |||
1385 | for_each_possible_blade(uvhub) { | ||
1386 | pnode = uv_blade_to_pnode(uvhub); | ||
1387 | /* INIT the bau */ | ||
1388 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, | ||
1389 | ((unsigned long)1 << 63)); | ||
1390 | mmr = 1; /* should be 1 to broadcast to both sockets */ | ||
1391 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, mmr); | ||
1392 | } | ||
861 | 1393 | ||
862 | return 0; | 1394 | return 0; |
863 | } | 1395 | } |
864 | __initcall(uv_bau_init); | 1396 | core_initcall(uv_bau_init); |
865 | __initcall(uv_ptc_init); | 1397 | core_initcall(uv_ptc_init); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1168e4454188..142d70c74b02 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/kprobes.h> | 15 | #include <linux/kprobes.h> |
16 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
17 | #include <linux/kdebug.h> | 17 | #include <linux/kdebug.h> |
18 | #include <linux/kgdb.h> | ||
18 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
19 | #include <linux/module.h> | 20 | #include <linux/module.h> |
20 | #include <linux/ptrace.h> | 21 | #include <linux/ptrace.h> |
@@ -108,15 +109,6 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
108 | dec_preempt_count(); | 109 | dec_preempt_count(); |
109 | } | 110 | } |
110 | 111 | ||
111 | #ifdef CONFIG_X86_32 | ||
112 | static inline void | ||
113 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
114 | { | ||
115 | if (!user_mode_vm(regs)) | ||
116 | die(str, regs, err); | ||
117 | } | ||
118 | #endif | ||
119 | |||
120 | static void __kprobes | 112 | static void __kprobes |
121 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | 113 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
122 | long error_code, siginfo_t *info) | 114 | long error_code, siginfo_t *info) |
@@ -460,6 +452,11 @@ void restart_nmi(void) | |||
460 | /* May run on IST stack. */ | 452 | /* May run on IST stack. */ |
461 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | 453 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) |
462 | { | 454 | { |
455 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | ||
456 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | ||
457 | == NOTIFY_STOP) | ||
458 | return; | ||
459 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | ||
463 | #ifdef CONFIG_KPROBES | 460 | #ifdef CONFIG_KPROBES |
464 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | 461 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
465 | == NOTIFY_STOP) | 462 | == NOTIFY_STOP) |
@@ -543,11 +540,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
543 | 540 | ||
544 | /* DR6 may or may not be cleared by the CPU */ | 541 | /* DR6 may or may not be cleared by the CPU */ |
545 | set_debugreg(0, 6); | 542 | set_debugreg(0, 6); |
543 | |||
546 | /* | 544 | /* |
547 | * The processor cleared BTF, so don't mark that we need it set. | 545 | * The processor cleared BTF, so don't mark that we need it set. |
548 | */ | 546 | */ |
549 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 547 | clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); |
550 | tsk->thread.debugctlmsr = 0; | ||
551 | 548 | ||
552 | /* Store the virtualized DR6 value */ | 549 | /* Store the virtualized DR6 value */ |
553 | tsk->thread.debugreg6 = dr6; | 550 | tsk->thread.debugreg6 = dr6; |
@@ -585,55 +582,67 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
585 | return; | 582 | return; |
586 | } | 583 | } |
587 | 584 | ||
588 | #ifdef CONFIG_X86_64 | ||
589 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
590 | { | ||
591 | if (fixup_exception(regs)) | ||
592 | return 1; | ||
593 | |||
594 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
595 | /* Illegal floating point operation in the kernel */ | ||
596 | current->thread.trap_no = trapnr; | ||
597 | die(str, regs, 0); | ||
598 | return 0; | ||
599 | } | ||
600 | #endif | ||
601 | |||
602 | /* | 585 | /* |
603 | * Note that we play around with the 'TS' bit in an attempt to get | 586 | * Note that we play around with the 'TS' bit in an attempt to get |
604 | * the correct behaviour even in the presence of the asynchronous | 587 | * the correct behaviour even in the presence of the asynchronous |
605 | * IRQ13 behaviour | 588 | * IRQ13 behaviour |
606 | */ | 589 | */ |
607 | void math_error(void __user *ip) | 590 | void math_error(struct pt_regs *regs, int error_code, int trapnr) |
608 | { | 591 | { |
609 | struct task_struct *task; | 592 | struct task_struct *task = current; |
610 | siginfo_t info; | 593 | siginfo_t info; |
611 | unsigned short cwd, swd, err; | 594 | unsigned short err; |
595 | char *str = (trapnr == 16) ? "fpu exception" : "simd exception"; | ||
596 | |||
597 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) | ||
598 | return; | ||
599 | conditional_sti(regs); | ||
600 | |||
601 | if (!user_mode_vm(regs)) | ||
602 | { | ||
603 | if (!fixup_exception(regs)) { | ||
604 | task->thread.error_code = error_code; | ||
605 | task->thread.trap_no = trapnr; | ||
606 | die(str, regs, error_code); | ||
607 | } | ||
608 | return; | ||
609 | } | ||
612 | 610 | ||
613 | /* | 611 | /* |
614 | * Save the info for the exception handler and clear the error. | 612 | * Save the info for the exception handler and clear the error. |
615 | */ | 613 | */ |
616 | task = current; | ||
617 | save_init_fpu(task); | 614 | save_init_fpu(task); |
618 | task->thread.trap_no = 16; | 615 | task->thread.trap_no = trapnr; |
619 | task->thread.error_code = 0; | 616 | task->thread.error_code = error_code; |
620 | info.si_signo = SIGFPE; | 617 | info.si_signo = SIGFPE; |
621 | info.si_errno = 0; | 618 | info.si_errno = 0; |
622 | info.si_addr = ip; | 619 | info.si_addr = (void __user *)regs->ip; |
623 | /* | 620 | if (trapnr == 16) { |
624 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 621 | unsigned short cwd, swd; |
625 | * status. 0x3f is the exception bits in these regs, 0x200 is the | 622 | /* |
626 | * C1 reg you need in case of a stack fault, 0x040 is the stack | 623 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
627 | * fault bit. We should only be taking one exception at a time, | 624 | * status. 0x3f is the exception bits in these regs, 0x200 is the |
628 | * so if this combination doesn't produce any single exception, | 625 | * C1 reg you need in case of a stack fault, 0x040 is the stack |
629 | * then we have a bad program that isn't synchronizing its FPU usage | 626 | * fault bit. We should only be taking one exception at a time, |
630 | * and it will suffer the consequences since we won't be able to | 627 | * so if this combination doesn't produce any single exception, |
631 | * fully reproduce the context of the exception | 628 | * then we have a bad program that isn't synchronizing its FPU usage |
632 | */ | 629 | * and it will suffer the consequences since we won't be able to |
633 | cwd = get_fpu_cwd(task); | 630 | * fully reproduce the context of the exception |
634 | swd = get_fpu_swd(task); | 631 | */ |
632 | cwd = get_fpu_cwd(task); | ||
633 | swd = get_fpu_swd(task); | ||
635 | 634 | ||
636 | err = swd & ~cwd; | 635 | err = swd & ~cwd; |
636 | } else { | ||
637 | /* | ||
638 | * The SIMD FPU exceptions are handled a little differently, as there | ||
639 | * is only a single status/control register. Thus, to determine which | ||
640 | * unmasked exception was caught we must mask the exception mask bits | ||
641 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
642 | */ | ||
643 | unsigned short mxcsr = get_fpu_mxcsr(task); | ||
644 | err = ~(mxcsr >> 7) & mxcsr; | ||
645 | } | ||
637 | 646 | ||
638 | if (err & 0x001) { /* Invalid op */ | 647 | if (err & 0x001) { /* Invalid op */ |
639 | /* | 648 | /* |
@@ -662,97 +671,17 @@ void math_error(void __user *ip) | |||
662 | 671 | ||
663 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | 672 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) |
664 | { | 673 | { |
665 | conditional_sti(regs); | ||
666 | |||
667 | #ifdef CONFIG_X86_32 | 674 | #ifdef CONFIG_X86_32 |
668 | ignore_fpu_irq = 1; | 675 | ignore_fpu_irq = 1; |
669 | #else | ||
670 | if (!user_mode(regs) && | ||
671 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
672 | return; | ||
673 | #endif | 676 | #endif |
674 | 677 | ||
675 | math_error((void __user *)regs->ip); | 678 | math_error(regs, error_code, 16); |
676 | } | ||
677 | |||
678 | static void simd_math_error(void __user *ip) | ||
679 | { | ||
680 | struct task_struct *task; | ||
681 | siginfo_t info; | ||
682 | unsigned short mxcsr; | ||
683 | |||
684 | /* | ||
685 | * Save the info for the exception handler and clear the error. | ||
686 | */ | ||
687 | task = current; | ||
688 | save_init_fpu(task); | ||
689 | task->thread.trap_no = 19; | ||
690 | task->thread.error_code = 0; | ||
691 | info.si_signo = SIGFPE; | ||
692 | info.si_errno = 0; | ||
693 | info.si_code = __SI_FAULT; | ||
694 | info.si_addr = ip; | ||
695 | /* | ||
696 | * The SIMD FPU exceptions are handled a little differently, as there | ||
697 | * is only a single status/control register. Thus, to determine which | ||
698 | * unmasked exception was caught we must mask the exception mask bits | ||
699 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
700 | */ | ||
701 | mxcsr = get_fpu_mxcsr(task); | ||
702 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | ||
703 | case 0x000: | ||
704 | default: | ||
705 | break; | ||
706 | case 0x001: /* Invalid Op */ | ||
707 | info.si_code = FPE_FLTINV; | ||
708 | break; | ||
709 | case 0x002: /* Denormalize */ | ||
710 | case 0x010: /* Underflow */ | ||
711 | info.si_code = FPE_FLTUND; | ||
712 | break; | ||
713 | case 0x004: /* Zero Divide */ | ||
714 | info.si_code = FPE_FLTDIV; | ||
715 | break; | ||
716 | case 0x008: /* Overflow */ | ||
717 | info.si_code = FPE_FLTOVF; | ||
718 | break; | ||
719 | case 0x020: /* Precision */ | ||
720 | info.si_code = FPE_FLTRES; | ||
721 | break; | ||
722 | } | ||
723 | force_sig_info(SIGFPE, &info, task); | ||
724 | } | 679 | } |
725 | 680 | ||
726 | dotraplinkage void | 681 | dotraplinkage void |
727 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 682 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) |
728 | { | 683 | { |
729 | conditional_sti(regs); | 684 | math_error(regs, error_code, 19); |
730 | |||
731 | #ifdef CONFIG_X86_32 | ||
732 | if (cpu_has_xmm) { | ||
733 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | ||
734 | ignore_fpu_irq = 1; | ||
735 | simd_math_error((void __user *)regs->ip); | ||
736 | return; | ||
737 | } | ||
738 | /* | ||
739 | * Handle strange cache flush from user space exception | ||
740 | * in all other cases. This is undocumented behaviour. | ||
741 | */ | ||
742 | if (regs->flags & X86_VM_MASK) { | ||
743 | handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); | ||
744 | return; | ||
745 | } | ||
746 | current->thread.trap_no = 19; | ||
747 | current->thread.error_code = error_code; | ||
748 | die_if_kernel("cache flush denied", regs, error_code); | ||
749 | force_sig(SIGSEGV, current); | ||
750 | #else | ||
751 | if (!user_mode(regs) && | ||
752 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
753 | return; | ||
754 | simd_math_error((void __user *)regs->ip); | ||
755 | #endif | ||
756 | } | 685 | } |
757 | 686 | ||
758 | dotraplinkage void | 687 | dotraplinkage void |
@@ -879,6 +808,16 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
879 | } | 808 | } |
880 | #endif | 809 | #endif |
881 | 810 | ||
811 | /* Set of traps needed for early debugging. */ | ||
812 | void __init early_trap_init(void) | ||
813 | { | ||
814 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | ||
815 | /* int3 can be called from all */ | ||
816 | set_system_intr_gate_ist(3, &int3, DEBUG_STACK); | ||
817 | set_intr_gate(14, &page_fault); | ||
818 | load_idt(&idt_descr); | ||
819 | } | ||
820 | |||
882 | void __init trap_init(void) | 821 | void __init trap_init(void) |
883 | { | 822 | { |
884 | int i; | 823 | int i; |
@@ -892,10 +831,7 @@ void __init trap_init(void) | |||
892 | #endif | 831 | #endif |
893 | 832 | ||
894 | set_intr_gate(0, ÷_error); | 833 | set_intr_gate(0, ÷_error); |
895 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | ||
896 | set_intr_gate_ist(2, &nmi, NMI_STACK); | 834 | set_intr_gate_ist(2, &nmi, NMI_STACK); |
897 | /* int3 can be called from all */ | ||
898 | set_system_intr_gate_ist(3, &int3, DEBUG_STACK); | ||
899 | /* int4 can be called from all */ | 835 | /* int4 can be called from all */ |
900 | set_system_intr_gate(4, &overflow); | 836 | set_system_intr_gate(4, &overflow); |
901 | set_intr_gate(5, &bounds); | 837 | set_intr_gate(5, &bounds); |
@@ -911,7 +847,6 @@ void __init trap_init(void) | |||
911 | set_intr_gate(11, &segment_not_present); | 847 | set_intr_gate(11, &segment_not_present); |
912 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); | 848 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); |
913 | set_intr_gate(13, &general_protection); | 849 | set_intr_gate(13, &general_protection); |
914 | set_intr_gate(14, &page_fault); | ||
915 | set_intr_gate(15, &spurious_interrupt_bug); | 850 | set_intr_gate(15, &spurious_interrupt_bug); |
916 | set_intr_gate(16, &coprocessor_error); | 851 | set_intr_gate(16, &coprocessor_error); |
917 | set_intr_gate(17, &alignment_check); | 852 | set_intr_gate(17, &alignment_check); |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 1d40336b030a..1132129db792 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c | |||
@@ -44,7 +44,7 @@ static void uv_ack_apic(unsigned int irq) | |||
44 | ack_APIC_irq(); | 44 | ack_APIC_irq(); |
45 | } | 45 | } |
46 | 46 | ||
47 | struct irq_chip uv_irq_chip = { | 47 | static struct irq_chip uv_irq_chip = { |
48 | .name = "UV-CORE", | 48 | .name = "UV-CORE", |
49 | .startup = uv_noop_ret, | 49 | .startup = uv_noop_ret, |
50 | .shutdown = uv_noop, | 50 | .shutdown = uv_noop, |
@@ -141,7 +141,7 @@ int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) | |||
141 | */ | 141 | */ |
142 | static int | 142 | static int |
143 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | 143 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, |
144 | unsigned long mmr_offset, int restrict) | 144 | unsigned long mmr_offset, int limit) |
145 | { | 145 | { |
146 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | 146 | const struct cpumask *eligible_cpu = cpumask_of(cpu); |
147 | struct irq_desc *desc = irq_to_desc(irq); | 147 | struct irq_desc *desc = irq_to_desc(irq); |
@@ -160,7 +160,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
160 | if (err != 0) | 160 | if (err != 0) |
161 | return err; | 161 | return err; |
162 | 162 | ||
163 | if (restrict == UV_AFFINITY_CPU) | 163 | if (limit == UV_AFFINITY_CPU) |
164 | desc->status |= IRQ_NO_BALANCING; | 164 | desc->status |= IRQ_NO_BALANCING; |
165 | else | 165 | else |
166 | desc->status |= IRQ_MOVE_PCNTXT; | 166 | desc->status |= IRQ_MOVE_PCNTXT; |
@@ -214,7 +214,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
214 | unsigned long mmr_value; | 214 | unsigned long mmr_value; |
215 | struct uv_IO_APIC_route_entry *entry; | 215 | struct uv_IO_APIC_route_entry *entry; |
216 | unsigned long mmr_offset; | 216 | unsigned long mmr_offset; |
217 | unsigned mmr_pnode; | 217 | int mmr_pnode; |
218 | 218 | ||
219 | if (set_desc_affinity(desc, mask, &dest)) | 219 | if (set_desc_affinity(desc, mask, &dest)) |
220 | return -1; | 220 | return -1; |
@@ -248,7 +248,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
248 | * interrupt is raised. | 248 | * interrupt is raised. |
249 | */ | 249 | */ |
250 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | 250 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, |
251 | unsigned long mmr_offset, int restrict) | 251 | unsigned long mmr_offset, int limit) |
252 | { | 252 | { |
253 | int irq, ret; | 253 | int irq, ret; |
254 | 254 | ||
@@ -258,7 +258,7 @@ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | |||
258 | return -EBUSY; | 258 | return -EBUSY; |
259 | 259 | ||
260 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, | 260 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, |
261 | restrict); | 261 | limit); |
262 | if (ret == irq) | 262 | if (ret == irq) |
263 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); | 263 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); |
264 | else | 264 | else |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 693920b22496..1b950d151e58 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -54,7 +54,6 @@ EXPORT_SYMBOL(memcpy); | |||
54 | EXPORT_SYMBOL(__memcpy); | 54 | EXPORT_SYMBOL(__memcpy); |
55 | 55 | ||
56 | EXPORT_SYMBOL(empty_zero_page); | 56 | EXPORT_SYMBOL(empty_zero_page); |
57 | EXPORT_SYMBOL(init_level4_pgt); | ||
58 | #ifndef CONFIG_PARAVIRT | 57 | #ifndef CONFIG_PARAVIRT |
59 | EXPORT_SYMBOL(native_load_gs_index); | 58 | EXPORT_SYMBOL(native_load_gs_index); |
60 | #endif | 59 | #endif |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 782c3a362ec6..37e68fc5e24a 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -99,7 +99,7 @@ int save_i387_xstate(void __user *buf) | |||
99 | if (err) | 99 | if (err) |
100 | return err; | 100 | return err; |
101 | 101 | ||
102 | if (task_thread_info(tsk)->status & TS_XSAVE) | 102 | if (use_xsave()) |
103 | err = xsave_user(buf); | 103 | err = xsave_user(buf); |
104 | else | 104 | else |
105 | err = fxsave_user(buf); | 105 | err = fxsave_user(buf); |
@@ -109,14 +109,14 @@ int save_i387_xstate(void __user *buf) | |||
109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
110 | stts(); | 110 | stts(); |
111 | } else { | 111 | } else { |
112 | if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, | 112 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, |
113 | xstate_size)) | 113 | xstate_size)) |
114 | return -1; | 114 | return -1; |
115 | } | 115 | } |
116 | 116 | ||
117 | clear_used_math(); /* trigger finit */ | 117 | clear_used_math(); /* trigger finit */ |
118 | 118 | ||
119 | if (task_thread_info(tsk)->status & TS_XSAVE) { | 119 | if (use_xsave()) { |
120 | struct _fpstate __user *fx = buf; | 120 | struct _fpstate __user *fx = buf; |
121 | struct _xstate __user *x = buf; | 121 | struct _xstate __user *x = buf; |
122 | u64 xstate_bv; | 122 | u64 xstate_bv; |
@@ -225,7 +225,7 @@ int restore_i387_xstate(void __user *buf) | |||
225 | clts(); | 225 | clts(); |
226 | task_thread_info(current)->status |= TS_USEDFPU; | 226 | task_thread_info(current)->status |= TS_USEDFPU; |
227 | } | 227 | } |
228 | if (task_thread_info(tsk)->status & TS_XSAVE) | 228 | if (use_xsave()) |
229 | err = restore_user_xstate(buf); | 229 | err = restore_user_xstate(buf); |
230 | else | 230 | else |
231 | err = fxrstor_checking((__force struct i387_fxsave_struct *) | 231 | err = fxrstor_checking((__force struct i387_fxsave_struct *) |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4dade6ac0827..5ac0bb465ed6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/kvm_emulate.h> | 33 | #include <asm/kvm_emulate.h> |
34 | 34 | ||
35 | #include "x86.h" | 35 | #include "x86.h" |
36 | #include "tss.h" | ||
36 | 37 | ||
37 | /* | 38 | /* |
38 | * Opcode effective-address decode tables. | 39 | * Opcode effective-address decode tables. |
@@ -50,6 +51,8 @@ | |||
50 | #define DstReg (2<<1) /* Register operand. */ | 51 | #define DstReg (2<<1) /* Register operand. */ |
51 | #define DstMem (3<<1) /* Memory operand. */ | 52 | #define DstMem (3<<1) /* Memory operand. */ |
52 | #define DstAcc (4<<1) /* Destination Accumulator */ | 53 | #define DstAcc (4<<1) /* Destination Accumulator */ |
54 | #define DstDI (5<<1) /* Destination is in ES:(E)DI */ | ||
55 | #define DstMem64 (6<<1) /* 64bit memory operand */ | ||
53 | #define DstMask (7<<1) | 56 | #define DstMask (7<<1) |
54 | /* Source operand type. */ | 57 | /* Source operand type. */ |
55 | #define SrcNone (0<<4) /* No source operand. */ | 58 | #define SrcNone (0<<4) /* No source operand. */ |
@@ -63,6 +66,7 @@ | |||
63 | #define SrcOne (7<<4) /* Implied '1' */ | 66 | #define SrcOne (7<<4) /* Implied '1' */ |
64 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ | 67 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
65 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ | 68 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ |
69 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ | ||
66 | #define SrcMask (0xf<<4) | 70 | #define SrcMask (0xf<<4) |
67 | /* Generic ModRM decode. */ | 71 | /* Generic ModRM decode. */ |
68 | #define ModRM (1<<8) | 72 | #define ModRM (1<<8) |
@@ -85,6 +89,9 @@ | |||
85 | #define Src2ImmByte (2<<29) | 89 | #define Src2ImmByte (2<<29) |
86 | #define Src2One (3<<29) | 90 | #define Src2One (3<<29) |
87 | #define Src2Imm16 (4<<29) | 91 | #define Src2Imm16 (4<<29) |
92 | #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be | ||
93 | in memory and second argument is located | ||
94 | immediately after the first one in memory. */ | ||
88 | #define Src2Mask (7<<29) | 95 | #define Src2Mask (7<<29) |
89 | 96 | ||
90 | enum { | 97 | enum { |
@@ -147,8 +154,8 @@ static u32 opcode_table[256] = { | |||
147 | 0, 0, 0, 0, | 154 | 0, 0, 0, 0, |
148 | /* 0x68 - 0x6F */ | 155 | /* 0x68 - 0x6F */ |
149 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 156 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
150 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | 157 | DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */ |
151 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | 158 | SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */ |
152 | /* 0x70 - 0x77 */ | 159 | /* 0x70 - 0x77 */ |
153 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 160 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
154 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 161 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
@@ -173,12 +180,12 @@ static u32 opcode_table[256] = { | |||
173 | /* 0xA0 - 0xA7 */ | 180 | /* 0xA0 - 0xA7 */ |
174 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 181 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
175 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 182 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, |
176 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, |
177 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, |
178 | /* 0xA8 - 0xAF */ | 185 | /* 0xA8 - 0xAF */ |
179 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 186 | 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, |
180 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, |
181 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 188 | ByteOp | DstDI | String, DstDI | String, |
182 | /* 0xB0 - 0xB7 */ | 189 | /* 0xB0 - 0xB7 */ |
183 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 190 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
184 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 191 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
@@ -204,13 +211,13 @@ static u32 opcode_table[256] = { | |||
204 | 0, 0, 0, 0, 0, 0, 0, 0, | 211 | 0, 0, 0, 0, 0, 0, 0, 0, |
205 | /* 0xE0 - 0xE7 */ | 212 | /* 0xE0 - 0xE7 */ |
206 | 0, 0, 0, 0, | 213 | 0, 0, 0, 0, |
207 | ByteOp | SrcImmUByte, SrcImmUByte, | 214 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
208 | ByteOp | SrcImmUByte, SrcImmUByte, | 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
209 | /* 0xE8 - 0xEF */ | 216 | /* 0xE8 - 0xEF */ |
210 | SrcImm | Stack, SrcImm | ImplicitOps, | 217 | SrcImm | Stack, SrcImm | ImplicitOps, |
211 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, | 218 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
212 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
213 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
214 | /* 0xF0 - 0xF7 */ | 221 | /* 0xF0 - 0xF7 */ |
215 | 0, 0, 0, 0, | 222 | 0, 0, 0, 0, |
216 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, | 223 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, |
@@ -343,7 +350,8 @@ static u32 group_table[] = { | |||
343 | [Group5*8] = | 350 | [Group5*8] = |
344 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 351 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
345 | SrcMem | ModRM | Stack, 0, | 352 | SrcMem | ModRM | Stack, 0, |
346 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | 353 | SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, |
354 | SrcMem | ModRM | Stack, 0, | ||
347 | [Group7*8] = | 355 | [Group7*8] = |
348 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, | 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
349 | SrcNone | ModRM | DstMem | Mov, 0, | 357 | SrcNone | ModRM | DstMem | Mov, 0, |
@@ -353,14 +361,14 @@ static u32 group_table[] = { | |||
353 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, | 361 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, |
354 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, | 362 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, |
355 | [Group9*8] = | 363 | [Group9*8] = |
356 | 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, | 364 | 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0, |
357 | }; | 365 | }; |
358 | 366 | ||
359 | static u32 group2_table[] = { | 367 | static u32 group2_table[] = { |
360 | [Group7*8] = | 368 | [Group7*8] = |
361 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, | 369 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, |
362 | SrcNone | ModRM | DstMem | Mov, 0, | 370 | SrcNone | ModRM | DstMem | Mov, 0, |
363 | SrcMem16 | ModRM | Mov, 0, | 371 | SrcMem16 | ModRM | Mov | Priv, 0, |
364 | [Group9*8] = | 372 | [Group9*8] = |
365 | 0, 0, 0, 0, 0, 0, 0, 0, | 373 | 0, 0, 0, 0, 0, 0, 0, 0, |
366 | }; | 374 | }; |
@@ -562,7 +570,7 @@ static u32 group2_table[] = { | |||
562 | #define insn_fetch(_type, _size, _eip) \ | 570 | #define insn_fetch(_type, _size, _eip) \ |
563 | ({ unsigned long _x; \ | 571 | ({ unsigned long _x; \ |
564 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ | 572 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ |
565 | if (rc != 0) \ | 573 | if (rc != X86EMUL_CONTINUE) \ |
566 | goto done; \ | 574 | goto done; \ |
567 | (_eip) += (_size); \ | 575 | (_eip) += (_size); \ |
568 | (_type)_x; \ | 576 | (_type)_x; \ |
@@ -638,40 +646,40 @@ static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | |||
638 | 646 | ||
639 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 647 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
640 | struct x86_emulate_ops *ops, | 648 | struct x86_emulate_ops *ops, |
641 | unsigned long linear, u8 *dest) | 649 | unsigned long eip, u8 *dest) |
642 | { | 650 | { |
643 | struct fetch_cache *fc = &ctxt->decode.fetch; | 651 | struct fetch_cache *fc = &ctxt->decode.fetch; |
644 | int rc; | 652 | int rc; |
645 | int size; | 653 | int size, cur_size; |
646 | 654 | ||
647 | if (linear < fc->start || linear >= fc->end) { | 655 | if (eip == fc->end) { |
648 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); | 656 | cur_size = fc->end - fc->start; |
649 | rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); | 657 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
650 | if (rc) | 658 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, |
659 | size, ctxt->vcpu, NULL); | ||
660 | if (rc != X86EMUL_CONTINUE) | ||
651 | return rc; | 661 | return rc; |
652 | fc->start = linear; | 662 | fc->end += size; |
653 | fc->end = linear + size; | ||
654 | } | 663 | } |
655 | *dest = fc->data[linear - fc->start]; | 664 | *dest = fc->data[eip - fc->start]; |
656 | return 0; | 665 | return X86EMUL_CONTINUE; |
657 | } | 666 | } |
658 | 667 | ||
659 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 668 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, |
660 | struct x86_emulate_ops *ops, | 669 | struct x86_emulate_ops *ops, |
661 | unsigned long eip, void *dest, unsigned size) | 670 | unsigned long eip, void *dest, unsigned size) |
662 | { | 671 | { |
663 | int rc = 0; | 672 | int rc; |
664 | 673 | ||
665 | /* x86 instructions are limited to 15 bytes. */ | 674 | /* x86 instructions are limited to 15 bytes. */ |
666 | if (eip + size - ctxt->decode.eip_orig > 15) | 675 | if (eip + size - ctxt->eip > 15) |
667 | return X86EMUL_UNHANDLEABLE; | 676 | return X86EMUL_UNHANDLEABLE; |
668 | eip += ctxt->cs_base; | ||
669 | while (size--) { | 677 | while (size--) { |
670 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 678 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
671 | if (rc) | 679 | if (rc != X86EMUL_CONTINUE) |
672 | return rc; | 680 | return rc; |
673 | } | 681 | } |
674 | return 0; | 682 | return X86EMUL_CONTINUE; |
675 | } | 683 | } |
676 | 684 | ||
677 | /* | 685 | /* |
@@ -702,7 +710,7 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
702 | *address = 0; | 710 | *address = 0; |
703 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, | 711 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
704 | ctxt->vcpu, NULL); | 712 | ctxt->vcpu, NULL); |
705 | if (rc) | 713 | if (rc != X86EMUL_CONTINUE) |
706 | return rc; | 714 | return rc; |
707 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, | 715 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
708 | ctxt->vcpu, NULL); | 716 | ctxt->vcpu, NULL); |
@@ -782,7 +790,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
782 | struct decode_cache *c = &ctxt->decode; | 790 | struct decode_cache *c = &ctxt->decode; |
783 | u8 sib; | 791 | u8 sib; |
784 | int index_reg = 0, base_reg = 0, scale; | 792 | int index_reg = 0, base_reg = 0, scale; |
785 | int rc = 0; | 793 | int rc = X86EMUL_CONTINUE; |
786 | 794 | ||
787 | if (c->rex_prefix) { | 795 | if (c->rex_prefix) { |
788 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ | 796 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ |
@@ -895,7 +903,7 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
895 | struct x86_emulate_ops *ops) | 903 | struct x86_emulate_ops *ops) |
896 | { | 904 | { |
897 | struct decode_cache *c = &ctxt->decode; | 905 | struct decode_cache *c = &ctxt->decode; |
898 | int rc = 0; | 906 | int rc = X86EMUL_CONTINUE; |
899 | 907 | ||
900 | switch (c->ad_bytes) { | 908 | switch (c->ad_bytes) { |
901 | case 2: | 909 | case 2: |
@@ -916,14 +924,18 @@ int | |||
916 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 924 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
917 | { | 925 | { |
918 | struct decode_cache *c = &ctxt->decode; | 926 | struct decode_cache *c = &ctxt->decode; |
919 | int rc = 0; | 927 | int rc = X86EMUL_CONTINUE; |
920 | int mode = ctxt->mode; | 928 | int mode = ctxt->mode; |
921 | int def_op_bytes, def_ad_bytes, group; | 929 | int def_op_bytes, def_ad_bytes, group; |
922 | 930 | ||
923 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
924 | 931 | ||
932 | /* we cannot decode insn before we complete previous rep insn */ | ||
933 | WARN_ON(ctxt->restart); | ||
934 | |||
935 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
925 | memset(c, 0, sizeof(struct decode_cache)); | 936 | memset(c, 0, sizeof(struct decode_cache)); |
926 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); | 937 | c->eip = ctxt->eip; |
938 | c->fetch.start = c->fetch.end = c->eip; | ||
927 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 939 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
928 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 940 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
929 | 941 | ||
@@ -1015,11 +1027,6 @@ done_prefixes: | |||
1015 | } | 1027 | } |
1016 | } | 1028 | } |
1017 | 1029 | ||
1018 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
1019 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); | ||
1020 | return -1; | ||
1021 | } | ||
1022 | |||
1023 | if (c->d & Group) { | 1030 | if (c->d & Group) { |
1024 | group = c->d & GroupMask; | 1031 | group = c->d & GroupMask; |
1025 | c->modrm = insn_fetch(u8, 1, c->eip); | 1032 | c->modrm = insn_fetch(u8, 1, c->eip); |
@@ -1046,7 +1053,7 @@ done_prefixes: | |||
1046 | rc = decode_modrm(ctxt, ops); | 1053 | rc = decode_modrm(ctxt, ops); |
1047 | else if (c->d & MemAbs) | 1054 | else if (c->d & MemAbs) |
1048 | rc = decode_abs(ctxt, ops); | 1055 | rc = decode_abs(ctxt, ops); |
1049 | if (rc) | 1056 | if (rc != X86EMUL_CONTINUE) |
1050 | goto done; | 1057 | goto done; |
1051 | 1058 | ||
1052 | if (!c->has_seg_override) | 1059 | if (!c->has_seg_override) |
@@ -1057,6 +1064,10 @@ done_prefixes: | |||
1057 | 1064 | ||
1058 | if (c->ad_bytes != 8) | 1065 | if (c->ad_bytes != 8) |
1059 | c->modrm_ea = (u32)c->modrm_ea; | 1066 | c->modrm_ea = (u32)c->modrm_ea; |
1067 | |||
1068 | if (c->rip_relative) | ||
1069 | c->modrm_ea += c->eip; | ||
1070 | |||
1060 | /* | 1071 | /* |
1061 | * Decode and fetch the source operand: register, memory | 1072 | * Decode and fetch the source operand: register, memory |
1062 | * or immediate. | 1073 | * or immediate. |
@@ -1091,6 +1102,8 @@ done_prefixes: | |||
1091 | break; | 1102 | break; |
1092 | } | 1103 | } |
1093 | c->src.type = OP_MEM; | 1104 | c->src.type = OP_MEM; |
1105 | c->src.ptr = (unsigned long *)c->modrm_ea; | ||
1106 | c->src.val = 0; | ||
1094 | break; | 1107 | break; |
1095 | case SrcImm: | 1108 | case SrcImm: |
1096 | case SrcImmU: | 1109 | case SrcImmU: |
@@ -1139,6 +1152,14 @@ done_prefixes: | |||
1139 | c->src.bytes = 1; | 1152 | c->src.bytes = 1; |
1140 | c->src.val = 1; | 1153 | c->src.val = 1; |
1141 | break; | 1154 | break; |
1155 | case SrcSI: | ||
1156 | c->src.type = OP_MEM; | ||
1157 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1158 | c->src.ptr = (unsigned long *) | ||
1159 | register_address(c, seg_override_base(ctxt, c), | ||
1160 | c->regs[VCPU_REGS_RSI]); | ||
1161 | c->src.val = 0; | ||
1162 | break; | ||
1142 | } | 1163 | } |
1143 | 1164 | ||
1144 | /* | 1165 | /* |
@@ -1168,6 +1189,12 @@ done_prefixes: | |||
1168 | c->src2.bytes = 1; | 1189 | c->src2.bytes = 1; |
1169 | c->src2.val = 1; | 1190 | c->src2.val = 1; |
1170 | break; | 1191 | break; |
1192 | case Src2Mem16: | ||
1193 | c->src2.type = OP_MEM; | ||
1194 | c->src2.bytes = 2; | ||
1195 | c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); | ||
1196 | c->src2.val = 0; | ||
1197 | break; | ||
1171 | } | 1198 | } |
1172 | 1199 | ||
1173 | /* Decode and fetch the destination operand: register or memory. */ | 1200 | /* Decode and fetch the destination operand: register or memory. */ |
@@ -1180,6 +1207,7 @@ done_prefixes: | |||
1180 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); | 1207 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); |
1181 | break; | 1208 | break; |
1182 | case DstMem: | 1209 | case DstMem: |
1210 | case DstMem64: | ||
1183 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1211 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
1184 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1212 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1185 | c->dst.type = OP_REG; | 1213 | c->dst.type = OP_REG; |
@@ -1188,12 +1216,24 @@ done_prefixes: | |||
1188 | break; | 1216 | break; |
1189 | } | 1217 | } |
1190 | c->dst.type = OP_MEM; | 1218 | c->dst.type = OP_MEM; |
1219 | c->dst.ptr = (unsigned long *)c->modrm_ea; | ||
1220 | if ((c->d & DstMask) == DstMem64) | ||
1221 | c->dst.bytes = 8; | ||
1222 | else | ||
1223 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1224 | c->dst.val = 0; | ||
1225 | if (c->d & BitOp) { | ||
1226 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | ||
1227 | |||
1228 | c->dst.ptr = (void *)c->dst.ptr + | ||
1229 | (c->src.val & mask) / 8; | ||
1230 | } | ||
1191 | break; | 1231 | break; |
1192 | case DstAcc: | 1232 | case DstAcc: |
1193 | c->dst.type = OP_REG; | 1233 | c->dst.type = OP_REG; |
1194 | c->dst.bytes = c->op_bytes; | 1234 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1195 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | 1235 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; |
1196 | switch (c->op_bytes) { | 1236 | switch (c->dst.bytes) { |
1197 | case 1: | 1237 | case 1: |
1198 | c->dst.val = *(u8 *)c->dst.ptr; | 1238 | c->dst.val = *(u8 *)c->dst.ptr; |
1199 | break; | 1239 | break; |
@@ -1203,18 +1243,248 @@ done_prefixes: | |||
1203 | case 4: | 1243 | case 4: |
1204 | c->dst.val = *(u32 *)c->dst.ptr; | 1244 | c->dst.val = *(u32 *)c->dst.ptr; |
1205 | break; | 1245 | break; |
1246 | case 8: | ||
1247 | c->dst.val = *(u64 *)c->dst.ptr; | ||
1248 | break; | ||
1206 | } | 1249 | } |
1207 | c->dst.orig_val = c->dst.val; | 1250 | c->dst.orig_val = c->dst.val; |
1208 | break; | 1251 | break; |
1252 | case DstDI: | ||
1253 | c->dst.type = OP_MEM; | ||
1254 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1255 | c->dst.ptr = (unsigned long *) | ||
1256 | register_address(c, es_base(ctxt), | ||
1257 | c->regs[VCPU_REGS_RDI]); | ||
1258 | c->dst.val = 0; | ||
1259 | break; | ||
1209 | } | 1260 | } |
1210 | 1261 | ||
1211 | if (c->rip_relative) | ||
1212 | c->modrm_ea += c->eip; | ||
1213 | |||
1214 | done: | 1262 | done: |
1215 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1263 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
1216 | } | 1264 | } |
1217 | 1265 | ||
1266 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | ||
1267 | struct x86_emulate_ops *ops, | ||
1268 | unsigned int size, unsigned short port, | ||
1269 | void *dest) | ||
1270 | { | ||
1271 | struct read_cache *rc = &ctxt->decode.io_read; | ||
1272 | |||
1273 | if (rc->pos == rc->end) { /* refill pio read ahead */ | ||
1274 | struct decode_cache *c = &ctxt->decode; | ||
1275 | unsigned int in_page, n; | ||
1276 | unsigned int count = c->rep_prefix ? | ||
1277 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; | ||
1278 | in_page = (ctxt->eflags & EFLG_DF) ? | ||
1279 | offset_in_page(c->regs[VCPU_REGS_RDI]) : | ||
1280 | PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); | ||
1281 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, | ||
1282 | count); | ||
1283 | if (n == 0) | ||
1284 | n = 1; | ||
1285 | rc->pos = rc->end = 0; | ||
1286 | if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) | ||
1287 | return 0; | ||
1288 | rc->end = n * size; | ||
1289 | } | ||
1290 | |||
1291 | memcpy(dest, rc->data + rc->pos, size); | ||
1292 | rc->pos += size; | ||
1293 | return 1; | ||
1294 | } | ||
1295 | |||
1296 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
1297 | { | ||
1298 | u32 limit = get_desc_limit(desc); | ||
1299 | |||
1300 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
1301 | } | ||
1302 | |||
1303 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | ||
1304 | struct x86_emulate_ops *ops, | ||
1305 | u16 selector, struct desc_ptr *dt) | ||
1306 | { | ||
1307 | if (selector & 1 << 2) { | ||
1308 | struct desc_struct desc; | ||
1309 | memset (dt, 0, sizeof *dt); | ||
1310 | if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) | ||
1311 | return; | ||
1312 | |||
1313 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | ||
1314 | dt->address = get_desc_base(&desc); | ||
1315 | } else | ||
1316 | ops->get_gdt(dt, ctxt->vcpu); | ||
1317 | } | ||
1318 | |||
1319 | /* allowed just for 8 bytes segments */ | ||
1320 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1321 | struct x86_emulate_ops *ops, | ||
1322 | u16 selector, struct desc_struct *desc) | ||
1323 | { | ||
1324 | struct desc_ptr dt; | ||
1325 | u16 index = selector >> 3; | ||
1326 | int ret; | ||
1327 | u32 err; | ||
1328 | ulong addr; | ||
1329 | |||
1330 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
1331 | |||
1332 | if (dt.size < index * 8 + 7) { | ||
1333 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
1334 | return X86EMUL_PROPAGATE_FAULT; | ||
1335 | } | ||
1336 | addr = dt.address + index * 8; | ||
1337 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
1338 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
1339 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
1340 | |||
1341 | return ret; | ||
1342 | } | ||
1343 | |||
1344 | /* allowed just for 8 bytes segments */ | ||
1345 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1346 | struct x86_emulate_ops *ops, | ||
1347 | u16 selector, struct desc_struct *desc) | ||
1348 | { | ||
1349 | struct desc_ptr dt; | ||
1350 | u16 index = selector >> 3; | ||
1351 | u32 err; | ||
1352 | ulong addr; | ||
1353 | int ret; | ||
1354 | |||
1355 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
1356 | |||
1357 | if (dt.size < index * 8 + 7) { | ||
1358 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
1359 | return X86EMUL_PROPAGATE_FAULT; | ||
1360 | } | ||
1361 | |||
1362 | addr = dt.address + index * 8; | ||
1363 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
1364 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
1365 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
1366 | |||
1367 | return ret; | ||
1368 | } | ||
1369 | |||
1370 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1371 | struct x86_emulate_ops *ops, | ||
1372 | u16 selector, int seg) | ||
1373 | { | ||
1374 | struct desc_struct seg_desc; | ||
1375 | u8 dpl, rpl, cpl; | ||
1376 | unsigned err_vec = GP_VECTOR; | ||
1377 | u32 err_code = 0; | ||
1378 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
1379 | int ret; | ||
1380 | |||
1381 | memset(&seg_desc, 0, sizeof seg_desc); | ||
1382 | |||
1383 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | ||
1384 | || ctxt->mode == X86EMUL_MODE_REAL) { | ||
1385 | /* set real mode segment descriptor */ | ||
1386 | set_desc_base(&seg_desc, selector << 4); | ||
1387 | set_desc_limit(&seg_desc, 0xffff); | ||
1388 | seg_desc.type = 3; | ||
1389 | seg_desc.p = 1; | ||
1390 | seg_desc.s = 1; | ||
1391 | goto load; | ||
1392 | } | ||
1393 | |||
1394 | /* NULL selector is not valid for TR, CS and SS */ | ||
1395 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | ||
1396 | && null_selector) | ||
1397 | goto exception; | ||
1398 | |||
1399 | /* TR should be in GDT only */ | ||
1400 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
1401 | goto exception; | ||
1402 | |||
1403 | if (null_selector) /* for NULL selector skip all following checks */ | ||
1404 | goto load; | ||
1405 | |||
1406 | ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
1407 | if (ret != X86EMUL_CONTINUE) | ||
1408 | return ret; | ||
1409 | |||
1410 | err_code = selector & 0xfffc; | ||
1411 | err_vec = GP_VECTOR; | ||
1412 | |||
1413 | /* can't load system descriptor into segment selecor */ | ||
1414 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | ||
1415 | goto exception; | ||
1416 | |||
1417 | if (!seg_desc.p) { | ||
1418 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
1419 | goto exception; | ||
1420 | } | ||
1421 | |||
1422 | rpl = selector & 3; | ||
1423 | dpl = seg_desc.dpl; | ||
1424 | cpl = ops->cpl(ctxt->vcpu); | ||
1425 | |||
1426 | switch (seg) { | ||
1427 | case VCPU_SREG_SS: | ||
1428 | /* | ||
1429 | * segment is not a writable data segment or segment | ||
1430 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
1431 | */ | ||
1432 | if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) | ||
1433 | goto exception; | ||
1434 | break; | ||
1435 | case VCPU_SREG_CS: | ||
1436 | if (!(seg_desc.type & 8)) | ||
1437 | goto exception; | ||
1438 | |||
1439 | if (seg_desc.type & 4) { | ||
1440 | /* conforming */ | ||
1441 | if (dpl > cpl) | ||
1442 | goto exception; | ||
1443 | } else { | ||
1444 | /* nonconforming */ | ||
1445 | if (rpl > cpl || dpl != cpl) | ||
1446 | goto exception; | ||
1447 | } | ||
1448 | /* CS(RPL) <- CPL */ | ||
1449 | selector = (selector & 0xfffc) | cpl; | ||
1450 | break; | ||
1451 | case VCPU_SREG_TR: | ||
1452 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) | ||
1453 | goto exception; | ||
1454 | break; | ||
1455 | case VCPU_SREG_LDTR: | ||
1456 | if (seg_desc.s || seg_desc.type != 2) | ||
1457 | goto exception; | ||
1458 | break; | ||
1459 | default: /* DS, ES, FS, or GS */ | ||
1460 | /* | ||
1461 | * segment is not a data or readable code segment or | ||
1462 | * ((segment is a data or nonconforming code segment) | ||
1463 | * and (both RPL and CPL > DPL)) | ||
1464 | */ | ||
1465 | if ((seg_desc.type & 0xa) == 0x8 || | ||
1466 | (((seg_desc.type & 0xc) != 0xc) && | ||
1467 | (rpl > dpl && cpl > dpl))) | ||
1468 | goto exception; | ||
1469 | break; | ||
1470 | } | ||
1471 | |||
1472 | if (seg_desc.s) { | ||
1473 | /* mark segment as accessed */ | ||
1474 | seg_desc.type |= 1; | ||
1475 | ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
1476 | if (ret != X86EMUL_CONTINUE) | ||
1477 | return ret; | ||
1478 | } | ||
1479 | load: | ||
1480 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | ||
1481 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | ||
1482 | return X86EMUL_CONTINUE; | ||
1483 | exception: | ||
1484 | kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); | ||
1485 | return X86EMUL_PROPAGATE_FAULT; | ||
1486 | } | ||
1487 | |||
1218 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | 1488 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) |
1219 | { | 1489 | { |
1220 | struct decode_cache *c = &ctxt->decode; | 1490 | struct decode_cache *c = &ctxt->decode; |
@@ -1251,7 +1521,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1251 | int rc; | 1521 | int rc; |
1252 | unsigned long val, change_mask; | 1522 | unsigned long val, change_mask; |
1253 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1523 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1254 | int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); | 1524 | int cpl = ops->cpl(ctxt->vcpu); |
1255 | 1525 | ||
1256 | rc = emulate_pop(ctxt, ops, &val, len); | 1526 | rc = emulate_pop(ctxt, ops, &val, len); |
1257 | if (rc != X86EMUL_CONTINUE) | 1527 | if (rc != X86EMUL_CONTINUE) |
@@ -1306,10 +1576,10 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1306 | int rc; | 1576 | int rc; |
1307 | 1577 | ||
1308 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | 1578 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); |
1309 | if (rc != 0) | 1579 | if (rc != X86EMUL_CONTINUE) |
1310 | return rc; | 1580 | return rc; |
1311 | 1581 | ||
1312 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); | 1582 | rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); |
1313 | return rc; | 1583 | return rc; |
1314 | } | 1584 | } |
1315 | 1585 | ||
@@ -1332,7 +1602,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1332 | struct x86_emulate_ops *ops) | 1602 | struct x86_emulate_ops *ops) |
1333 | { | 1603 | { |
1334 | struct decode_cache *c = &ctxt->decode; | 1604 | struct decode_cache *c = &ctxt->decode; |
1335 | int rc = 0; | 1605 | int rc = X86EMUL_CONTINUE; |
1336 | int reg = VCPU_REGS_RDI; | 1606 | int reg = VCPU_REGS_RDI; |
1337 | 1607 | ||
1338 | while (reg >= VCPU_REGS_RAX) { | 1608 | while (reg >= VCPU_REGS_RAX) { |
@@ -1343,7 +1613,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1343 | } | 1613 | } |
1344 | 1614 | ||
1345 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | 1615 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); |
1346 | if (rc != 0) | 1616 | if (rc != X86EMUL_CONTINUE) |
1347 | break; | 1617 | break; |
1348 | --reg; | 1618 | --reg; |
1349 | } | 1619 | } |
@@ -1354,12 +1624,8 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | |||
1354 | struct x86_emulate_ops *ops) | 1624 | struct x86_emulate_ops *ops) |
1355 | { | 1625 | { |
1356 | struct decode_cache *c = &ctxt->decode; | 1626 | struct decode_cache *c = &ctxt->decode; |
1357 | int rc; | ||
1358 | 1627 | ||
1359 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); | 1628 | return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); |
1360 | if (rc != 0) | ||
1361 | return rc; | ||
1362 | return 0; | ||
1363 | } | 1629 | } |
1364 | 1630 | ||
1365 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | 1631 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) |
@@ -1395,7 +1661,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1395 | struct x86_emulate_ops *ops) | 1661 | struct x86_emulate_ops *ops) |
1396 | { | 1662 | { |
1397 | struct decode_cache *c = &ctxt->decode; | 1663 | struct decode_cache *c = &ctxt->decode; |
1398 | int rc = 0; | ||
1399 | 1664 | ||
1400 | switch (c->modrm_reg) { | 1665 | switch (c->modrm_reg) { |
1401 | case 0 ... 1: /* test */ | 1666 | case 0 ... 1: /* test */ |
@@ -1408,11 +1673,9 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1408 | emulate_1op("neg", c->dst, ctxt->eflags); | 1673 | emulate_1op("neg", c->dst, ctxt->eflags); |
1409 | break; | 1674 | break; |
1410 | default: | 1675 | default: |
1411 | DPRINTF("Cannot emulate %02x\n", c->b); | 1676 | return 0; |
1412 | rc = X86EMUL_UNHANDLEABLE; | ||
1413 | break; | ||
1414 | } | 1677 | } |
1415 | return rc; | 1678 | return 1; |
1416 | } | 1679 | } |
1417 | 1680 | ||
1418 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | 1681 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, |
@@ -1442,20 +1705,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1442 | emulate_push(ctxt); | 1705 | emulate_push(ctxt); |
1443 | break; | 1706 | break; |
1444 | } | 1707 | } |
1445 | return 0; | 1708 | return X86EMUL_CONTINUE; |
1446 | } | 1709 | } |
1447 | 1710 | ||
1448 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | 1711 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, |
1449 | struct x86_emulate_ops *ops, | 1712 | struct x86_emulate_ops *ops) |
1450 | unsigned long memop) | ||
1451 | { | 1713 | { |
1452 | struct decode_cache *c = &ctxt->decode; | 1714 | struct decode_cache *c = &ctxt->decode; |
1453 | u64 old, new; | 1715 | u64 old = c->dst.orig_val; |
1454 | int rc; | ||
1455 | |||
1456 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); | ||
1457 | if (rc != X86EMUL_CONTINUE) | ||
1458 | return rc; | ||
1459 | 1716 | ||
1460 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1717 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || |
1461 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { | 1718 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { |
@@ -1463,17 +1720,13 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1463 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); | 1720 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); |
1464 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); | 1721 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); |
1465 | ctxt->eflags &= ~EFLG_ZF; | 1722 | ctxt->eflags &= ~EFLG_ZF; |
1466 | |||
1467 | } else { | 1723 | } else { |
1468 | new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | | 1724 | c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | |
1469 | (u32) c->regs[VCPU_REGS_RBX]; | 1725 | (u32) c->regs[VCPU_REGS_RBX]; |
1470 | 1726 | ||
1471 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); | ||
1472 | if (rc != X86EMUL_CONTINUE) | ||
1473 | return rc; | ||
1474 | ctxt->eflags |= EFLG_ZF; | 1727 | ctxt->eflags |= EFLG_ZF; |
1475 | } | 1728 | } |
1476 | return 0; | 1729 | return X86EMUL_CONTINUE; |
1477 | } | 1730 | } |
1478 | 1731 | ||
1479 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | 1732 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, |
@@ -1484,14 +1737,14 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1484 | unsigned long cs; | 1737 | unsigned long cs; |
1485 | 1738 | ||
1486 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); | 1739 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); |
1487 | if (rc) | 1740 | if (rc != X86EMUL_CONTINUE) |
1488 | return rc; | 1741 | return rc; |
1489 | if (c->op_bytes == 4) | 1742 | if (c->op_bytes == 4) |
1490 | c->eip = (u32)c->eip; | 1743 | c->eip = (u32)c->eip; |
1491 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1744 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
1492 | if (rc) | 1745 | if (rc != X86EMUL_CONTINUE) |
1493 | return rc; | 1746 | return rc; |
1494 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); | 1747 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); |
1495 | return rc; | 1748 | return rc; |
1496 | } | 1749 | } |
1497 | 1750 | ||
@@ -1544,7 +1797,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1544 | default: | 1797 | default: |
1545 | break; | 1798 | break; |
1546 | } | 1799 | } |
1547 | return 0; | 1800 | return X86EMUL_CONTINUE; |
1548 | } | 1801 | } |
1549 | 1802 | ||
1550 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | 1803 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) |
@@ -1598,8 +1851,11 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1598 | u64 msr_data; | 1851 | u64 msr_data; |
1599 | 1852 | ||
1600 | /* syscall is not available in real mode */ | 1853 | /* syscall is not available in real mode */ |
1601 | if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) | 1854 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1602 | return X86EMUL_UNHANDLEABLE; | 1855 | ctxt->mode == X86EMUL_MODE_VM86) { |
1856 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
1857 | return X86EMUL_PROPAGATE_FAULT; | ||
1858 | } | ||
1603 | 1859 | ||
1604 | setup_syscalls_segments(ctxt, &cs, &ss); | 1860 | setup_syscalls_segments(ctxt, &cs, &ss); |
1605 | 1861 | ||
@@ -1649,14 +1905,16 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1649 | /* inject #GP if in real mode */ | 1905 | /* inject #GP if in real mode */ |
1650 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 1906 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1651 | kvm_inject_gp(ctxt->vcpu, 0); | 1907 | kvm_inject_gp(ctxt->vcpu, 0); |
1652 | return X86EMUL_UNHANDLEABLE; | 1908 | return X86EMUL_PROPAGATE_FAULT; |
1653 | } | 1909 | } |
1654 | 1910 | ||
1655 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1911 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
1656 | * Therefore, we inject an #UD. | 1912 | * Therefore, we inject an #UD. |
1657 | */ | 1913 | */ |
1658 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1914 | if (ctxt->mode == X86EMUL_MODE_PROT64) { |
1659 | return X86EMUL_UNHANDLEABLE; | 1915 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1916 | return X86EMUL_PROPAGATE_FAULT; | ||
1917 | } | ||
1660 | 1918 | ||
1661 | setup_syscalls_segments(ctxt, &cs, &ss); | 1919 | setup_syscalls_segments(ctxt, &cs, &ss); |
1662 | 1920 | ||
@@ -1711,7 +1969,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1711 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1969 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1712 | ctxt->mode == X86EMUL_MODE_VM86) { | 1970 | ctxt->mode == X86EMUL_MODE_VM86) { |
1713 | kvm_inject_gp(ctxt->vcpu, 0); | 1971 | kvm_inject_gp(ctxt->vcpu, 0); |
1714 | return X86EMUL_UNHANDLEABLE; | 1972 | return X86EMUL_PROPAGATE_FAULT; |
1715 | } | 1973 | } |
1716 | 1974 | ||
1717 | setup_syscalls_segments(ctxt, &cs, &ss); | 1975 | setup_syscalls_segments(ctxt, &cs, &ss); |
@@ -1756,7 +2014,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1756 | return X86EMUL_CONTINUE; | 2014 | return X86EMUL_CONTINUE; |
1757 | } | 2015 | } |
1758 | 2016 | ||
1759 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | 2017 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, |
2018 | struct x86_emulate_ops *ops) | ||
1760 | { | 2019 | { |
1761 | int iopl; | 2020 | int iopl; |
1762 | if (ctxt->mode == X86EMUL_MODE_REAL) | 2021 | if (ctxt->mode == X86EMUL_MODE_REAL) |
@@ -1764,7 +2023,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | |||
1764 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2023 | if (ctxt->mode == X86EMUL_MODE_VM86) |
1765 | return true; | 2024 | return true; |
1766 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2025 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1767 | return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; | 2026 | return ops->cpl(ctxt->vcpu) > iopl; |
1768 | } | 2027 | } |
1769 | 2028 | ||
1770 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2029 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
@@ -1801,22 +2060,419 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
1801 | struct x86_emulate_ops *ops, | 2060 | struct x86_emulate_ops *ops, |
1802 | u16 port, u16 len) | 2061 | u16 port, u16 len) |
1803 | { | 2062 | { |
1804 | if (emulator_bad_iopl(ctxt)) | 2063 | if (emulator_bad_iopl(ctxt, ops)) |
1805 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | 2064 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) |
1806 | return false; | 2065 | return false; |
1807 | return true; | 2066 | return true; |
1808 | } | 2067 | } |
1809 | 2068 | ||
2069 | static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, | ||
2070 | struct x86_emulate_ops *ops, | ||
2071 | int seg) | ||
2072 | { | ||
2073 | struct desc_struct desc; | ||
2074 | if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) | ||
2075 | return get_desc_base(&desc); | ||
2076 | else | ||
2077 | return ~0; | ||
2078 | } | ||
2079 | |||
2080 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | ||
2081 | struct x86_emulate_ops *ops, | ||
2082 | struct tss_segment_16 *tss) | ||
2083 | { | ||
2084 | struct decode_cache *c = &ctxt->decode; | ||
2085 | |||
2086 | tss->ip = c->eip; | ||
2087 | tss->flag = ctxt->eflags; | ||
2088 | tss->ax = c->regs[VCPU_REGS_RAX]; | ||
2089 | tss->cx = c->regs[VCPU_REGS_RCX]; | ||
2090 | tss->dx = c->regs[VCPU_REGS_RDX]; | ||
2091 | tss->bx = c->regs[VCPU_REGS_RBX]; | ||
2092 | tss->sp = c->regs[VCPU_REGS_RSP]; | ||
2093 | tss->bp = c->regs[VCPU_REGS_RBP]; | ||
2094 | tss->si = c->regs[VCPU_REGS_RSI]; | ||
2095 | tss->di = c->regs[VCPU_REGS_RDI]; | ||
2096 | |||
2097 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
2098 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
2099 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
2100 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
2101 | tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
2102 | } | ||
2103 | |||
2104 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | ||
2105 | struct x86_emulate_ops *ops, | ||
2106 | struct tss_segment_16 *tss) | ||
2107 | { | ||
2108 | struct decode_cache *c = &ctxt->decode; | ||
2109 | int ret; | ||
2110 | |||
2111 | c->eip = tss->ip; | ||
2112 | ctxt->eflags = tss->flag | 2; | ||
2113 | c->regs[VCPU_REGS_RAX] = tss->ax; | ||
2114 | c->regs[VCPU_REGS_RCX] = tss->cx; | ||
2115 | c->regs[VCPU_REGS_RDX] = tss->dx; | ||
2116 | c->regs[VCPU_REGS_RBX] = tss->bx; | ||
2117 | c->regs[VCPU_REGS_RSP] = tss->sp; | ||
2118 | c->regs[VCPU_REGS_RBP] = tss->bp; | ||
2119 | c->regs[VCPU_REGS_RSI] = tss->si; | ||
2120 | c->regs[VCPU_REGS_RDI] = tss->di; | ||
2121 | |||
2122 | /* | ||
2123 | * SDM says that segment selectors are loaded before segment | ||
2124 | * descriptors | ||
2125 | */ | ||
2126 | ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); | ||
2127 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
2128 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
2129 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2130 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
2131 | |||
2132 | /* | ||
2133 | * Now load segment descriptors. If fault happenes at this stage | ||
2134 | * it is handled in a context of new task | ||
2135 | */ | ||
2136 | ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); | ||
2137 | if (ret != X86EMUL_CONTINUE) | ||
2138 | return ret; | ||
2139 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
2140 | if (ret != X86EMUL_CONTINUE) | ||
2141 | return ret; | ||
2142 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
2143 | if (ret != X86EMUL_CONTINUE) | ||
2144 | return ret; | ||
2145 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
2146 | if (ret != X86EMUL_CONTINUE) | ||
2147 | return ret; | ||
2148 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
2149 | if (ret != X86EMUL_CONTINUE) | ||
2150 | return ret; | ||
2151 | |||
2152 | return X86EMUL_CONTINUE; | ||
2153 | } | ||
2154 | |||
2155 | static int task_switch_16(struct x86_emulate_ctxt *ctxt, | ||
2156 | struct x86_emulate_ops *ops, | ||
2157 | u16 tss_selector, u16 old_tss_sel, | ||
2158 | ulong old_tss_base, struct desc_struct *new_desc) | ||
2159 | { | ||
2160 | struct tss_segment_16 tss_seg; | ||
2161 | int ret; | ||
2162 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
2163 | |||
2164 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2165 | &err); | ||
2166 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2167 | /* FIXME: need to provide precise fault address */ | ||
2168 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2169 | return ret; | ||
2170 | } | ||
2171 | |||
2172 | save_state_to_tss16(ctxt, ops, &tss_seg); | ||
2173 | |||
2174 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2175 | &err); | ||
2176 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2177 | /* FIXME: need to provide precise fault address */ | ||
2178 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2179 | return ret; | ||
2180 | } | ||
2181 | |||
2182 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2183 | &err); | ||
2184 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2185 | /* FIXME: need to provide precise fault address */ | ||
2186 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2187 | return ret; | ||
2188 | } | ||
2189 | |||
2190 | if (old_tss_sel != 0xffff) { | ||
2191 | tss_seg.prev_task_link = old_tss_sel; | ||
2192 | |||
2193 | ret = ops->write_std(new_tss_base, | ||
2194 | &tss_seg.prev_task_link, | ||
2195 | sizeof tss_seg.prev_task_link, | ||
2196 | ctxt->vcpu, &err); | ||
2197 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2198 | /* FIXME: need to provide precise fault address */ | ||
2199 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2200 | return ret; | ||
2201 | } | ||
2202 | } | ||
2203 | |||
2204 | return load_state_from_tss16(ctxt, ops, &tss_seg); | ||
2205 | } | ||
2206 | |||
2207 | static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | ||
2208 | struct x86_emulate_ops *ops, | ||
2209 | struct tss_segment_32 *tss) | ||
2210 | { | ||
2211 | struct decode_cache *c = &ctxt->decode; | ||
2212 | |||
2213 | tss->cr3 = ops->get_cr(3, ctxt->vcpu); | ||
2214 | tss->eip = c->eip; | ||
2215 | tss->eflags = ctxt->eflags; | ||
2216 | tss->eax = c->regs[VCPU_REGS_RAX]; | ||
2217 | tss->ecx = c->regs[VCPU_REGS_RCX]; | ||
2218 | tss->edx = c->regs[VCPU_REGS_RDX]; | ||
2219 | tss->ebx = c->regs[VCPU_REGS_RBX]; | ||
2220 | tss->esp = c->regs[VCPU_REGS_RSP]; | ||
2221 | tss->ebp = c->regs[VCPU_REGS_RBP]; | ||
2222 | tss->esi = c->regs[VCPU_REGS_RSI]; | ||
2223 | tss->edi = c->regs[VCPU_REGS_RDI]; | ||
2224 | |||
2225 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
2226 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
2227 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
2228 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
2229 | tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); | ||
2230 | tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); | ||
2231 | tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
2232 | } | ||
2233 | |||
2234 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | ||
2235 | struct x86_emulate_ops *ops, | ||
2236 | struct tss_segment_32 *tss) | ||
2237 | { | ||
2238 | struct decode_cache *c = &ctxt->decode; | ||
2239 | int ret; | ||
2240 | |||
2241 | ops->set_cr(3, tss->cr3, ctxt->vcpu); | ||
2242 | c->eip = tss->eip; | ||
2243 | ctxt->eflags = tss->eflags | 2; | ||
2244 | c->regs[VCPU_REGS_RAX] = tss->eax; | ||
2245 | c->regs[VCPU_REGS_RCX] = tss->ecx; | ||
2246 | c->regs[VCPU_REGS_RDX] = tss->edx; | ||
2247 | c->regs[VCPU_REGS_RBX] = tss->ebx; | ||
2248 | c->regs[VCPU_REGS_RSP] = tss->esp; | ||
2249 | c->regs[VCPU_REGS_RBP] = tss->ebp; | ||
2250 | c->regs[VCPU_REGS_RSI] = tss->esi; | ||
2251 | c->regs[VCPU_REGS_RDI] = tss->edi; | ||
2252 | |||
2253 | /* | ||
2254 | * SDM says that segment selectors are loaded before segment | ||
2255 | * descriptors | ||
2256 | */ | ||
2257 | ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); | ||
2258 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
2259 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
2260 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2261 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
2262 | ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); | ||
2263 | ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); | ||
2264 | |||
2265 | /* | ||
2266 | * Now load segment descriptors. If fault happenes at this stage | ||
2267 | * it is handled in a context of new task | ||
2268 | */ | ||
2269 | ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); | ||
2270 | if (ret != X86EMUL_CONTINUE) | ||
2271 | return ret; | ||
2272 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
2273 | if (ret != X86EMUL_CONTINUE) | ||
2274 | return ret; | ||
2275 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
2276 | if (ret != X86EMUL_CONTINUE) | ||
2277 | return ret; | ||
2278 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
2279 | if (ret != X86EMUL_CONTINUE) | ||
2280 | return ret; | ||
2281 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
2282 | if (ret != X86EMUL_CONTINUE) | ||
2283 | return ret; | ||
2284 | ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); | ||
2285 | if (ret != X86EMUL_CONTINUE) | ||
2286 | return ret; | ||
2287 | ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); | ||
2288 | if (ret != X86EMUL_CONTINUE) | ||
2289 | return ret; | ||
2290 | |||
2291 | return X86EMUL_CONTINUE; | ||
2292 | } | ||
2293 | |||
2294 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, | ||
2295 | struct x86_emulate_ops *ops, | ||
2296 | u16 tss_selector, u16 old_tss_sel, | ||
2297 | ulong old_tss_base, struct desc_struct *new_desc) | ||
2298 | { | ||
2299 | struct tss_segment_32 tss_seg; | ||
2300 | int ret; | ||
2301 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
2302 | |||
2303 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2304 | &err); | ||
2305 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2306 | /* FIXME: need to provide precise fault address */ | ||
2307 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2308 | return ret; | ||
2309 | } | ||
2310 | |||
2311 | save_state_to_tss32(ctxt, ops, &tss_seg); | ||
2312 | |||
2313 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2314 | &err); | ||
2315 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2316 | /* FIXME: need to provide precise fault address */ | ||
2317 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2318 | return ret; | ||
2319 | } | ||
2320 | |||
2321 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2322 | &err); | ||
2323 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2324 | /* FIXME: need to provide precise fault address */ | ||
2325 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2326 | return ret; | ||
2327 | } | ||
2328 | |||
2329 | if (old_tss_sel != 0xffff) { | ||
2330 | tss_seg.prev_task_link = old_tss_sel; | ||
2331 | |||
2332 | ret = ops->write_std(new_tss_base, | ||
2333 | &tss_seg.prev_task_link, | ||
2334 | sizeof tss_seg.prev_task_link, | ||
2335 | ctxt->vcpu, &err); | ||
2336 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2337 | /* FIXME: need to provide precise fault address */ | ||
2338 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2339 | return ret; | ||
2340 | } | ||
2341 | } | ||
2342 | |||
2343 | return load_state_from_tss32(ctxt, ops, &tss_seg); | ||
2344 | } | ||
2345 | |||
2346 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | ||
2347 | struct x86_emulate_ops *ops, | ||
2348 | u16 tss_selector, int reason, | ||
2349 | bool has_error_code, u32 error_code) | ||
2350 | { | ||
2351 | struct desc_struct curr_tss_desc, next_tss_desc; | ||
2352 | int ret; | ||
2353 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | ||
2354 | ulong old_tss_base = | ||
2355 | get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); | ||
2356 | u32 desc_limit; | ||
2357 | |||
2358 | /* FIXME: old_tss_base == ~0 ? */ | ||
2359 | |||
2360 | ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); | ||
2361 | if (ret != X86EMUL_CONTINUE) | ||
2362 | return ret; | ||
2363 | ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); | ||
2364 | if (ret != X86EMUL_CONTINUE) | ||
2365 | return ret; | ||
2366 | |||
2367 | /* FIXME: check that next_tss_desc is tss */ | ||
2368 | |||
2369 | if (reason != TASK_SWITCH_IRET) { | ||
2370 | if ((tss_selector & 3) > next_tss_desc.dpl || | ||
2371 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | ||
2372 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2373 | return X86EMUL_PROPAGATE_FAULT; | ||
2374 | } | ||
2375 | } | ||
2376 | |||
2377 | desc_limit = desc_limit_scaled(&next_tss_desc); | ||
2378 | if (!next_tss_desc.p || | ||
2379 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | ||
2380 | desc_limit < 0x2b)) { | ||
2381 | kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, | ||
2382 | tss_selector & 0xfffc); | ||
2383 | return X86EMUL_PROPAGATE_FAULT; | ||
2384 | } | ||
2385 | |||
2386 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
2387 | curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ | ||
2388 | write_segment_descriptor(ctxt, ops, old_tss_sel, | ||
2389 | &curr_tss_desc); | ||
2390 | } | ||
2391 | |||
2392 | if (reason == TASK_SWITCH_IRET) | ||
2393 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; | ||
2394 | |||
2395 | /* set back link to prev task only if NT bit is set in eflags | ||
2396 | note that old_tss_sel is not used afetr this point */ | ||
2397 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
2398 | old_tss_sel = 0xffff; | ||
2399 | |||
2400 | if (next_tss_desc.type & 8) | ||
2401 | ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, | ||
2402 | old_tss_base, &next_tss_desc); | ||
2403 | else | ||
2404 | ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, | ||
2405 | old_tss_base, &next_tss_desc); | ||
2406 | if (ret != X86EMUL_CONTINUE) | ||
2407 | return ret; | ||
2408 | |||
2409 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) | ||
2410 | ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT; | ||
2411 | |||
2412 | if (reason != TASK_SWITCH_IRET) { | ||
2413 | next_tss_desc.type |= (1 << 1); /* set busy flag */ | ||
2414 | write_segment_descriptor(ctxt, ops, tss_selector, | ||
2415 | &next_tss_desc); | ||
2416 | } | ||
2417 | |||
2418 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | ||
2419 | ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); | ||
2420 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | ||
2421 | |||
2422 | if (has_error_code) { | ||
2423 | struct decode_cache *c = &ctxt->decode; | ||
2424 | |||
2425 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | ||
2426 | c->lock_prefix = 0; | ||
2427 | c->src.val = (unsigned long) error_code; | ||
2428 | emulate_push(ctxt); | ||
2429 | } | ||
2430 | |||
2431 | return ret; | ||
2432 | } | ||
2433 | |||
2434 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
2435 | struct x86_emulate_ops *ops, | ||
2436 | u16 tss_selector, int reason, | ||
2437 | bool has_error_code, u32 error_code) | ||
2438 | { | ||
2439 | struct decode_cache *c = &ctxt->decode; | ||
2440 | int rc; | ||
2441 | |||
2442 | memset(c, 0, sizeof(struct decode_cache)); | ||
2443 | c->eip = ctxt->eip; | ||
2444 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
2445 | c->dst.type = OP_NONE; | ||
2446 | |||
2447 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | ||
2448 | has_error_code, error_code); | ||
2449 | |||
2450 | if (rc == X86EMUL_CONTINUE) { | ||
2451 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | ||
2452 | kvm_rip_write(ctxt->vcpu, c->eip); | ||
2453 | rc = writeback(ctxt, ops); | ||
2454 | } | ||
2455 | |||
2456 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | ||
2457 | } | ||
2458 | |||
2459 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, | ||
2460 | int reg, struct operand *op) | ||
2461 | { | ||
2462 | struct decode_cache *c = &ctxt->decode; | ||
2463 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | ||
2464 | |||
2465 | register_address_increment(c, &c->regs[reg], df * op->bytes); | ||
2466 | op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]); | ||
2467 | } | ||
2468 | |||
1810 | int | 2469 | int |
1811 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 2470 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1812 | { | 2471 | { |
1813 | unsigned long memop = 0; | ||
1814 | u64 msr_data; | 2472 | u64 msr_data; |
1815 | unsigned long saved_eip = 0; | ||
1816 | struct decode_cache *c = &ctxt->decode; | 2473 | struct decode_cache *c = &ctxt->decode; |
1817 | unsigned int port; | 2474 | int rc = X86EMUL_CONTINUE; |
1818 | int io_dir_in; | 2475 | int saved_dst_type = c->dst.type; |
1819 | int rc = 0; | ||
1820 | 2476 | ||
1821 | ctxt->interruptibility = 0; | 2477 | ctxt->interruptibility = 0; |
1822 | 2478 | ||
@@ -1826,26 +2482,30 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1826 | */ | 2482 | */ |
1827 | 2483 | ||
1828 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 2484 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
1829 | saved_eip = c->eip; | 2485 | |
2486 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
2487 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
2488 | goto done; | ||
2489 | } | ||
1830 | 2490 | ||
1831 | /* LOCK prefix is allowed only with some instructions */ | 2491 | /* LOCK prefix is allowed only with some instructions */ |
1832 | if (c->lock_prefix && !(c->d & Lock)) { | 2492 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
1833 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2493 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1834 | goto done; | 2494 | goto done; |
1835 | } | 2495 | } |
1836 | 2496 | ||
1837 | /* Privileged instruction can be executed only in CPL=0 */ | 2497 | /* Privileged instruction can be executed only in CPL=0 */ |
1838 | if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { | 2498 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
1839 | kvm_inject_gp(ctxt->vcpu, 0); | 2499 | kvm_inject_gp(ctxt->vcpu, 0); |
1840 | goto done; | 2500 | goto done; |
1841 | } | 2501 | } |
1842 | 2502 | ||
1843 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) | ||
1844 | memop = c->modrm_ea; | ||
1845 | |||
1846 | if (c->rep_prefix && (c->d & String)) { | 2503 | if (c->rep_prefix && (c->d & String)) { |
2504 | ctxt->restart = true; | ||
1847 | /* All REP prefixes have the same first termination condition */ | 2505 | /* All REP prefixes have the same first termination condition */ |
1848 | if (c->regs[VCPU_REGS_RCX] == 0) { | 2506 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
2507 | string_done: | ||
2508 | ctxt->restart = false; | ||
1849 | kvm_rip_write(ctxt->vcpu, c->eip); | 2509 | kvm_rip_write(ctxt->vcpu, c->eip); |
1850 | goto done; | 2510 | goto done; |
1851 | } | 2511 | } |
@@ -1857,25 +2517,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1857 | * - if REPNE/REPNZ and ZF = 1 then done | 2517 | * - if REPNE/REPNZ and ZF = 1 then done |
1858 | */ | 2518 | */ |
1859 | if ((c->b == 0xa6) || (c->b == 0xa7) || | 2519 | if ((c->b == 0xa6) || (c->b == 0xa7) || |
1860 | (c->b == 0xae) || (c->b == 0xaf)) { | 2520 | (c->b == 0xae) || (c->b == 0xaf)) { |
1861 | if ((c->rep_prefix == REPE_PREFIX) && | 2521 | if ((c->rep_prefix == REPE_PREFIX) && |
1862 | ((ctxt->eflags & EFLG_ZF) == 0)) { | 2522 | ((ctxt->eflags & EFLG_ZF) == 0)) |
1863 | kvm_rip_write(ctxt->vcpu, c->eip); | 2523 | goto string_done; |
1864 | goto done; | ||
1865 | } | ||
1866 | if ((c->rep_prefix == REPNE_PREFIX) && | 2524 | if ((c->rep_prefix == REPNE_PREFIX) && |
1867 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { | 2525 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) |
1868 | kvm_rip_write(ctxt->vcpu, c->eip); | 2526 | goto string_done; |
1869 | goto done; | ||
1870 | } | ||
1871 | } | 2527 | } |
1872 | c->regs[VCPU_REGS_RCX]--; | 2528 | c->eip = ctxt->eip; |
1873 | c->eip = kvm_rip_read(ctxt->vcpu); | ||
1874 | } | 2529 | } |
1875 | 2530 | ||
1876 | if (c->src.type == OP_MEM) { | 2531 | if (c->src.type == OP_MEM) { |
1877 | c->src.ptr = (unsigned long *)memop; | ||
1878 | c->src.val = 0; | ||
1879 | rc = ops->read_emulated((unsigned long)c->src.ptr, | 2532 | rc = ops->read_emulated((unsigned long)c->src.ptr, |
1880 | &c->src.val, | 2533 | &c->src.val, |
1881 | c->src.bytes, | 2534 | c->src.bytes, |
@@ -1885,29 +2538,25 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1885 | c->src.orig_val = c->src.val; | 2538 | c->src.orig_val = c->src.val; |
1886 | } | 2539 | } |
1887 | 2540 | ||
2541 | if (c->src2.type == OP_MEM) { | ||
2542 | rc = ops->read_emulated((unsigned long)c->src2.ptr, | ||
2543 | &c->src2.val, | ||
2544 | c->src2.bytes, | ||
2545 | ctxt->vcpu); | ||
2546 | if (rc != X86EMUL_CONTINUE) | ||
2547 | goto done; | ||
2548 | } | ||
2549 | |||
1888 | if ((c->d & DstMask) == ImplicitOps) | 2550 | if ((c->d & DstMask) == ImplicitOps) |
1889 | goto special_insn; | 2551 | goto special_insn; |
1890 | 2552 | ||
1891 | 2553 | ||
1892 | if (c->dst.type == OP_MEM) { | 2554 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
1893 | c->dst.ptr = (unsigned long *)memop; | 2555 | /* optimisation - avoid slow emulated read if Mov */ |
1894 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2556 | rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, |
1895 | c->dst.val = 0; | 2557 | c->dst.bytes, ctxt->vcpu); |
1896 | if (c->d & BitOp) { | 2558 | if (rc != X86EMUL_CONTINUE) |
1897 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | 2559 | goto done; |
1898 | |||
1899 | c->dst.ptr = (void *)c->dst.ptr + | ||
1900 | (c->src.val & mask) / 8; | ||
1901 | } | ||
1902 | if (!(c->d & Mov)) { | ||
1903 | /* optimisation - avoid slow emulated read */ | ||
1904 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
1905 | &c->dst.val, | ||
1906 | c->dst.bytes, | ||
1907 | ctxt->vcpu); | ||
1908 | if (rc != X86EMUL_CONTINUE) | ||
1909 | goto done; | ||
1910 | } | ||
1911 | } | 2560 | } |
1912 | c->dst.orig_val = c->dst.val; | 2561 | c->dst.orig_val = c->dst.val; |
1913 | 2562 | ||
@@ -1926,7 +2575,7 @@ special_insn: | |||
1926 | break; | 2575 | break; |
1927 | case 0x07: /* pop es */ | 2576 | case 0x07: /* pop es */ |
1928 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 2577 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
1929 | if (rc != 0) | 2578 | if (rc != X86EMUL_CONTINUE) |
1930 | goto done; | 2579 | goto done; |
1931 | break; | 2580 | break; |
1932 | case 0x08 ... 0x0d: | 2581 | case 0x08 ... 0x0d: |
@@ -1945,7 +2594,7 @@ special_insn: | |||
1945 | break; | 2594 | break; |
1946 | case 0x17: /* pop ss */ | 2595 | case 0x17: /* pop ss */ |
1947 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 2596 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
1948 | if (rc != 0) | 2597 | if (rc != X86EMUL_CONTINUE) |
1949 | goto done; | 2598 | goto done; |
1950 | break; | 2599 | break; |
1951 | case 0x18 ... 0x1d: | 2600 | case 0x18 ... 0x1d: |
@@ -1957,7 +2606,7 @@ special_insn: | |||
1957 | break; | 2606 | break; |
1958 | case 0x1f: /* pop ds */ | 2607 | case 0x1f: /* pop ds */ |
1959 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 2608 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
1960 | if (rc != 0) | 2609 | if (rc != X86EMUL_CONTINUE) |
1961 | goto done; | 2610 | goto done; |
1962 | break; | 2611 | break; |
1963 | case 0x20 ... 0x25: | 2612 | case 0x20 ... 0x25: |
@@ -1988,7 +2637,7 @@ special_insn: | |||
1988 | case 0x58 ... 0x5f: /* pop reg */ | 2637 | case 0x58 ... 0x5f: /* pop reg */ |
1989 | pop_instruction: | 2638 | pop_instruction: |
1990 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); | 2639 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); |
1991 | if (rc != 0) | 2640 | if (rc != X86EMUL_CONTINUE) |
1992 | goto done; | 2641 | goto done; |
1993 | break; | 2642 | break; |
1994 | case 0x60: /* pusha */ | 2643 | case 0x60: /* pusha */ |
@@ -1996,7 +2645,7 @@ special_insn: | |||
1996 | break; | 2645 | break; |
1997 | case 0x61: /* popa */ | 2646 | case 0x61: /* popa */ |
1998 | rc = emulate_popa(ctxt, ops); | 2647 | rc = emulate_popa(ctxt, ops); |
1999 | if (rc != 0) | 2648 | if (rc != X86EMUL_CONTINUE) |
2000 | goto done; | 2649 | goto done; |
2001 | break; | 2650 | break; |
2002 | case 0x63: /* movsxd */ | 2651 | case 0x63: /* movsxd */ |
@@ -2010,47 +2659,29 @@ special_insn: | |||
2010 | break; | 2659 | break; |
2011 | case 0x6c: /* insb */ | 2660 | case 0x6c: /* insb */ |
2012 | case 0x6d: /* insw/insd */ | 2661 | case 0x6d: /* insw/insd */ |
2662 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
2013 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2663 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2014 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2664 | c->dst.bytes)) { |
2015 | kvm_inject_gp(ctxt->vcpu, 0); | 2665 | kvm_inject_gp(ctxt->vcpu, 0); |
2016 | goto done; | 2666 | goto done; |
2017 | } | 2667 | } |
2018 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2668 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, |
2019 | 1, | 2669 | c->regs[VCPU_REGS_RDX], &c->dst.val)) |
2020 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2670 | goto done; /* IO is needed, skip writeback */ |
2021 | c->rep_prefix ? | 2671 | break; |
2022 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | ||
2023 | (ctxt->eflags & EFLG_DF), | ||
2024 | register_address(c, es_base(ctxt), | ||
2025 | c->regs[VCPU_REGS_RDI]), | ||
2026 | c->rep_prefix, | ||
2027 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
2028 | c->eip = saved_eip; | ||
2029 | return -1; | ||
2030 | } | ||
2031 | return 0; | ||
2032 | case 0x6e: /* outsb */ | 2672 | case 0x6e: /* outsb */ |
2033 | case 0x6f: /* outsw/outsd */ | 2673 | case 0x6f: /* outsw/outsd */ |
2674 | c->src.bytes = min(c->src.bytes, 4u); | ||
2034 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2675 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2035 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2676 | c->src.bytes)) { |
2036 | kvm_inject_gp(ctxt->vcpu, 0); | 2677 | kvm_inject_gp(ctxt->vcpu, 0); |
2037 | goto done; | 2678 | goto done; |
2038 | } | 2679 | } |
2039 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2680 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], |
2040 | 0, | 2681 | &c->src.val, 1, ctxt->vcpu); |
2041 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2682 | |
2042 | c->rep_prefix ? | 2683 | c->dst.type = OP_NONE; /* nothing to writeback */ |
2043 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | 2684 | break; |
2044 | (ctxt->eflags & EFLG_DF), | ||
2045 | register_address(c, | ||
2046 | seg_override_base(ctxt, c), | ||
2047 | c->regs[VCPU_REGS_RSI]), | ||
2048 | c->rep_prefix, | ||
2049 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
2050 | c->eip = saved_eip; | ||
2051 | return -1; | ||
2052 | } | ||
2053 | return 0; | ||
2054 | case 0x70 ... 0x7f: /* jcc (short) */ | 2685 | case 0x70 ... 0x7f: /* jcc (short) */ |
2055 | if (test_cc(c->b, ctxt->eflags)) | 2686 | if (test_cc(c->b, ctxt->eflags)) |
2056 | jmp_rel(c, c->src.val); | 2687 | jmp_rel(c, c->src.val); |
@@ -2107,12 +2738,11 @@ special_insn: | |||
2107 | case 0x8c: { /* mov r/m, sreg */ | 2738 | case 0x8c: { /* mov r/m, sreg */ |
2108 | struct kvm_segment segreg; | 2739 | struct kvm_segment segreg; |
2109 | 2740 | ||
2110 | if (c->modrm_reg <= 5) | 2741 | if (c->modrm_reg <= VCPU_SREG_GS) |
2111 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | 2742 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); |
2112 | else { | 2743 | else { |
2113 | printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", | 2744 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2114 | c->modrm); | 2745 | goto done; |
2115 | goto cannot_emulate; | ||
2116 | } | 2746 | } |
2117 | c->dst.val = segreg.selector; | 2747 | c->dst.val = segreg.selector; |
2118 | break; | 2748 | break; |
@@ -2132,16 +2762,16 @@ special_insn: | |||
2132 | } | 2762 | } |
2133 | 2763 | ||
2134 | if (c->modrm_reg == VCPU_SREG_SS) | 2764 | if (c->modrm_reg == VCPU_SREG_SS) |
2135 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | 2765 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); |
2136 | 2766 | ||
2137 | rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); | 2767 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); |
2138 | 2768 | ||
2139 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2769 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2140 | break; | 2770 | break; |
2141 | } | 2771 | } |
2142 | case 0x8f: /* pop (sole member of Grp1a) */ | 2772 | case 0x8f: /* pop (sole member of Grp1a) */ |
2143 | rc = emulate_grp1a(ctxt, ops); | 2773 | rc = emulate_grp1a(ctxt, ops); |
2144 | if (rc != 0) | 2774 | if (rc != X86EMUL_CONTINUE) |
2145 | goto done; | 2775 | goto done; |
2146 | break; | 2776 | break; |
2147 | case 0x90: /* nop / xchg r8,rax */ | 2777 | case 0x90: /* nop / xchg r8,rax */ |
@@ -2175,89 +2805,16 @@ special_insn: | |||
2175 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; | 2805 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; |
2176 | break; | 2806 | break; |
2177 | case 0xa4 ... 0xa5: /* movs */ | 2807 | case 0xa4 ... 0xa5: /* movs */ |
2178 | c->dst.type = OP_MEM; | 2808 | goto mov; |
2179 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2180 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2181 | es_base(ctxt), | ||
2182 | c->regs[VCPU_REGS_RDI]); | ||
2183 | rc = ops->read_emulated(register_address(c, | ||
2184 | seg_override_base(ctxt, c), | ||
2185 | c->regs[VCPU_REGS_RSI]), | ||
2186 | &c->dst.val, | ||
2187 | c->dst.bytes, ctxt->vcpu); | ||
2188 | if (rc != X86EMUL_CONTINUE) | ||
2189 | goto done; | ||
2190 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2191 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2192 | : c->dst.bytes); | ||
2193 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2194 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2195 | : c->dst.bytes); | ||
2196 | break; | ||
2197 | case 0xa6 ... 0xa7: /* cmps */ | 2809 | case 0xa6 ... 0xa7: /* cmps */ |
2198 | c->src.type = OP_NONE; /* Disable writeback. */ | ||
2199 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2200 | c->src.ptr = (unsigned long *)register_address(c, | ||
2201 | seg_override_base(ctxt, c), | ||
2202 | c->regs[VCPU_REGS_RSI]); | ||
2203 | rc = ops->read_emulated((unsigned long)c->src.ptr, | ||
2204 | &c->src.val, | ||
2205 | c->src.bytes, | ||
2206 | ctxt->vcpu); | ||
2207 | if (rc != X86EMUL_CONTINUE) | ||
2208 | goto done; | ||
2209 | |||
2210 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2810 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2211 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2212 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2213 | es_base(ctxt), | ||
2214 | c->regs[VCPU_REGS_RDI]); | ||
2215 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
2216 | &c->dst.val, | ||
2217 | c->dst.bytes, | ||
2218 | ctxt->vcpu); | ||
2219 | if (rc != X86EMUL_CONTINUE) | ||
2220 | goto done; | ||
2221 | |||
2222 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2811 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
2223 | 2812 | goto cmp; | |
2224 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
2225 | |||
2226 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2227 | (ctxt->eflags & EFLG_DF) ? -c->src.bytes | ||
2228 | : c->src.bytes); | ||
2229 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2230 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2231 | : c->dst.bytes); | ||
2232 | |||
2233 | break; | ||
2234 | case 0xaa ... 0xab: /* stos */ | 2813 | case 0xaa ... 0xab: /* stos */ |
2235 | c->dst.type = OP_MEM; | ||
2236 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2237 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2238 | es_base(ctxt), | ||
2239 | c->regs[VCPU_REGS_RDI]); | ||
2240 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 2814 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
2241 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2242 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2243 | : c->dst.bytes); | ||
2244 | break; | 2815 | break; |
2245 | case 0xac ... 0xad: /* lods */ | 2816 | case 0xac ... 0xad: /* lods */ |
2246 | c->dst.type = OP_REG; | 2817 | goto mov; |
2247 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2248 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | ||
2249 | rc = ops->read_emulated(register_address(c, | ||
2250 | seg_override_base(ctxt, c), | ||
2251 | c->regs[VCPU_REGS_RSI]), | ||
2252 | &c->dst.val, | ||
2253 | c->dst.bytes, | ||
2254 | ctxt->vcpu); | ||
2255 | if (rc != X86EMUL_CONTINUE) | ||
2256 | goto done; | ||
2257 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2258 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2259 | : c->dst.bytes); | ||
2260 | break; | ||
2261 | case 0xae ... 0xaf: /* scas */ | 2818 | case 0xae ... 0xaf: /* scas */ |
2262 | DPRINTF("Urk! I don't handle SCAS.\n"); | 2819 | DPRINTF("Urk! I don't handle SCAS.\n"); |
2263 | goto cannot_emulate; | 2820 | goto cannot_emulate; |
@@ -2277,7 +2834,7 @@ special_insn: | |||
2277 | break; | 2834 | break; |
2278 | case 0xcb: /* ret far */ | 2835 | case 0xcb: /* ret far */ |
2279 | rc = emulate_ret_far(ctxt, ops); | 2836 | rc = emulate_ret_far(ctxt, ops); |
2280 | if (rc) | 2837 | if (rc != X86EMUL_CONTINUE) |
2281 | goto done; | 2838 | goto done; |
2282 | break; | 2839 | break; |
2283 | case 0xd0 ... 0xd1: /* Grp2 */ | 2840 | case 0xd0 ... 0xd1: /* Grp2 */ |
@@ -2290,14 +2847,10 @@ special_insn: | |||
2290 | break; | 2847 | break; |
2291 | case 0xe4: /* inb */ | 2848 | case 0xe4: /* inb */ |
2292 | case 0xe5: /* in */ | 2849 | case 0xe5: /* in */ |
2293 | port = c->src.val; | 2850 | goto do_io_in; |
2294 | io_dir_in = 1; | ||
2295 | goto do_io; | ||
2296 | case 0xe6: /* outb */ | 2851 | case 0xe6: /* outb */ |
2297 | case 0xe7: /* out */ | 2852 | case 0xe7: /* out */ |
2298 | port = c->src.val; | 2853 | goto do_io_out; |
2299 | io_dir_in = 0; | ||
2300 | goto do_io; | ||
2301 | case 0xe8: /* call (near) */ { | 2854 | case 0xe8: /* call (near) */ { |
2302 | long int rel = c->src.val; | 2855 | long int rel = c->src.val; |
2303 | c->src.val = (unsigned long) c->eip; | 2856 | c->src.val = (unsigned long) c->eip; |
@@ -2308,8 +2861,9 @@ special_insn: | |||
2308 | case 0xe9: /* jmp rel */ | 2861 | case 0xe9: /* jmp rel */ |
2309 | goto jmp; | 2862 | goto jmp; |
2310 | case 0xea: /* jmp far */ | 2863 | case 0xea: /* jmp far */ |
2311 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, | 2864 | jump_far: |
2312 | VCPU_SREG_CS)) | 2865 | if (load_segment_descriptor(ctxt, ops, c->src2.val, |
2866 | VCPU_SREG_CS)) | ||
2313 | goto done; | 2867 | goto done; |
2314 | 2868 | ||
2315 | c->eip = c->src.val; | 2869 | c->eip = c->src.val; |
@@ -2321,25 +2875,29 @@ special_insn: | |||
2321 | break; | 2875 | break; |
2322 | case 0xec: /* in al,dx */ | 2876 | case 0xec: /* in al,dx */ |
2323 | case 0xed: /* in (e/r)ax,dx */ | 2877 | case 0xed: /* in (e/r)ax,dx */ |
2324 | port = c->regs[VCPU_REGS_RDX]; | 2878 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2325 | io_dir_in = 1; | 2879 | do_io_in: |
2326 | goto do_io; | 2880 | c->dst.bytes = min(c->dst.bytes, 4u); |
2881 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | ||
2882 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2883 | goto done; | ||
2884 | } | ||
2885 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | ||
2886 | &c->dst.val)) | ||
2887 | goto done; /* IO is needed */ | ||
2888 | break; | ||
2327 | case 0xee: /* out al,dx */ | 2889 | case 0xee: /* out al,dx */ |
2328 | case 0xef: /* out (e/r)ax,dx */ | 2890 | case 0xef: /* out (e/r)ax,dx */ |
2329 | port = c->regs[VCPU_REGS_RDX]; | 2891 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2330 | io_dir_in = 0; | 2892 | do_io_out: |
2331 | do_io: | 2893 | c->dst.bytes = min(c->dst.bytes, 4u); |
2332 | if (!emulator_io_permited(ctxt, ops, port, | 2894 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
2333 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2334 | kvm_inject_gp(ctxt->vcpu, 0); | 2895 | kvm_inject_gp(ctxt->vcpu, 0); |
2335 | goto done; | 2896 | goto done; |
2336 | } | 2897 | } |
2337 | if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | 2898 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, |
2338 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2899 | ctxt->vcpu); |
2339 | port) != 0) { | 2900 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2340 | c->eip = saved_eip; | ||
2341 | goto cannot_emulate; | ||
2342 | } | ||
2343 | break; | 2901 | break; |
2344 | case 0xf4: /* hlt */ | 2902 | case 0xf4: /* hlt */ |
2345 | ctxt->vcpu->arch.halt_request = 1; | 2903 | ctxt->vcpu->arch.halt_request = 1; |
@@ -2350,16 +2908,15 @@ special_insn: | |||
2350 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2908 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2351 | break; | 2909 | break; |
2352 | case 0xf6 ... 0xf7: /* Grp3 */ | 2910 | case 0xf6 ... 0xf7: /* Grp3 */ |
2353 | rc = emulate_grp3(ctxt, ops); | 2911 | if (!emulate_grp3(ctxt, ops)) |
2354 | if (rc != 0) | 2912 | goto cannot_emulate; |
2355 | goto done; | ||
2356 | break; | 2913 | break; |
2357 | case 0xf8: /* clc */ | 2914 | case 0xf8: /* clc */ |
2358 | ctxt->eflags &= ~EFLG_CF; | 2915 | ctxt->eflags &= ~EFLG_CF; |
2359 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2916 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2360 | break; | 2917 | break; |
2361 | case 0xfa: /* cli */ | 2918 | case 0xfa: /* cli */ |
2362 | if (emulator_bad_iopl(ctxt)) | 2919 | if (emulator_bad_iopl(ctxt, ops)) |
2363 | kvm_inject_gp(ctxt->vcpu, 0); | 2920 | kvm_inject_gp(ctxt->vcpu, 0); |
2364 | else { | 2921 | else { |
2365 | ctxt->eflags &= ~X86_EFLAGS_IF; | 2922 | ctxt->eflags &= ~X86_EFLAGS_IF; |
@@ -2367,10 +2924,10 @@ special_insn: | |||
2367 | } | 2924 | } |
2368 | break; | 2925 | break; |
2369 | case 0xfb: /* sti */ | 2926 | case 0xfb: /* sti */ |
2370 | if (emulator_bad_iopl(ctxt)) | 2927 | if (emulator_bad_iopl(ctxt, ops)) |
2371 | kvm_inject_gp(ctxt->vcpu, 0); | 2928 | kvm_inject_gp(ctxt->vcpu, 0); |
2372 | else { | 2929 | else { |
2373 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | 2930 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); |
2374 | ctxt->eflags |= X86_EFLAGS_IF; | 2931 | ctxt->eflags |= X86_EFLAGS_IF; |
2375 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2932 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2376 | } | 2933 | } |
@@ -2383,28 +2940,55 @@ special_insn: | |||
2383 | ctxt->eflags |= EFLG_DF; | 2940 | ctxt->eflags |= EFLG_DF; |
2384 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2941 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2385 | break; | 2942 | break; |
2386 | case 0xfe ... 0xff: /* Grp4/Grp5 */ | 2943 | case 0xfe: /* Grp4 */ |
2944 | grp45: | ||
2387 | rc = emulate_grp45(ctxt, ops); | 2945 | rc = emulate_grp45(ctxt, ops); |
2388 | if (rc != 0) | 2946 | if (rc != X86EMUL_CONTINUE) |
2389 | goto done; | 2947 | goto done; |
2390 | break; | 2948 | break; |
2949 | case 0xff: /* Grp5 */ | ||
2950 | if (c->modrm_reg == 5) | ||
2951 | goto jump_far; | ||
2952 | goto grp45; | ||
2391 | } | 2953 | } |
2392 | 2954 | ||
2393 | writeback: | 2955 | writeback: |
2394 | rc = writeback(ctxt, ops); | 2956 | rc = writeback(ctxt, ops); |
2395 | if (rc != 0) | 2957 | if (rc != X86EMUL_CONTINUE) |
2396 | goto done; | 2958 | goto done; |
2397 | 2959 | ||
2960 | /* | ||
2961 | * restore dst type in case the decoding will be reused | ||
2962 | * (happens for string instruction ) | ||
2963 | */ | ||
2964 | c->dst.type = saved_dst_type; | ||
2965 | |||
2966 | if ((c->d & SrcMask) == SrcSI) | ||
2967 | string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, | ||
2968 | &c->src); | ||
2969 | |||
2970 | if ((c->d & DstMask) == DstDI) | ||
2971 | string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); | ||
2972 | |||
2973 | if (c->rep_prefix && (c->d & String)) { | ||
2974 | struct read_cache *rc = &ctxt->decode.io_read; | ||
2975 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | ||
2976 | /* | ||
2977 | * Re-enter guest when pio read ahead buffer is empty or, | ||
2978 | * if it is not used, after each 1024 iteration. | ||
2979 | */ | ||
2980 | if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) || | ||
2981 | (rc->end != 0 && rc->end == rc->pos)) | ||
2982 | ctxt->restart = false; | ||
2983 | } | ||
2984 | |||
2398 | /* Commit shadow register state. */ | 2985 | /* Commit shadow register state. */ |
2399 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 2986 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); |
2400 | kvm_rip_write(ctxt->vcpu, c->eip); | 2987 | kvm_rip_write(ctxt->vcpu, c->eip); |
2988 | ops->set_rflags(ctxt->vcpu, ctxt->eflags); | ||
2401 | 2989 | ||
2402 | done: | 2990 | done: |
2403 | if (rc == X86EMUL_UNHANDLEABLE) { | 2991 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
2404 | c->eip = saved_eip; | ||
2405 | return -1; | ||
2406 | } | ||
2407 | return 0; | ||
2408 | 2992 | ||
2409 | twobyte_insn: | 2993 | twobyte_insn: |
2410 | switch (c->b) { | 2994 | switch (c->b) { |
@@ -2418,18 +3002,18 @@ twobyte_insn: | |||
2418 | goto cannot_emulate; | 3002 | goto cannot_emulate; |
2419 | 3003 | ||
2420 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3004 | rc = kvm_fix_hypercall(ctxt->vcpu); |
2421 | if (rc) | 3005 | if (rc != X86EMUL_CONTINUE) |
2422 | goto done; | 3006 | goto done; |
2423 | 3007 | ||
2424 | /* Let the processor re-execute the fixed hypercall */ | 3008 | /* Let the processor re-execute the fixed hypercall */ |
2425 | c->eip = kvm_rip_read(ctxt->vcpu); | 3009 | c->eip = ctxt->eip; |
2426 | /* Disable writeback. */ | 3010 | /* Disable writeback. */ |
2427 | c->dst.type = OP_NONE; | 3011 | c->dst.type = OP_NONE; |
2428 | break; | 3012 | break; |
2429 | case 2: /* lgdt */ | 3013 | case 2: /* lgdt */ |
2430 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3014 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
2431 | &size, &address, c->op_bytes); | 3015 | &size, &address, c->op_bytes); |
2432 | if (rc) | 3016 | if (rc != X86EMUL_CONTINUE) |
2433 | goto done; | 3017 | goto done; |
2434 | realmode_lgdt(ctxt->vcpu, size, address); | 3018 | realmode_lgdt(ctxt->vcpu, size, address); |
2435 | /* Disable writeback. */ | 3019 | /* Disable writeback. */ |
@@ -2440,7 +3024,7 @@ twobyte_insn: | |||
2440 | switch (c->modrm_rm) { | 3024 | switch (c->modrm_rm) { |
2441 | case 1: | 3025 | case 1: |
2442 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3026 | rc = kvm_fix_hypercall(ctxt->vcpu); |
2443 | if (rc) | 3027 | if (rc != X86EMUL_CONTINUE) |
2444 | goto done; | 3028 | goto done; |
2445 | break; | 3029 | break; |
2446 | default: | 3030 | default: |
@@ -2450,7 +3034,7 @@ twobyte_insn: | |||
2450 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3034 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
2451 | &size, &address, | 3035 | &size, &address, |
2452 | c->op_bytes); | 3036 | c->op_bytes); |
2453 | if (rc) | 3037 | if (rc != X86EMUL_CONTINUE) |
2454 | goto done; | 3038 | goto done; |
2455 | realmode_lidt(ctxt->vcpu, size, address); | 3039 | realmode_lidt(ctxt->vcpu, size, address); |
2456 | } | 3040 | } |
@@ -2459,15 +3043,18 @@ twobyte_insn: | |||
2459 | break; | 3043 | break; |
2460 | case 4: /* smsw */ | 3044 | case 4: /* smsw */ |
2461 | c->dst.bytes = 2; | 3045 | c->dst.bytes = 2; |
2462 | c->dst.val = realmode_get_cr(ctxt->vcpu, 0); | 3046 | c->dst.val = ops->get_cr(0, ctxt->vcpu); |
2463 | break; | 3047 | break; |
2464 | case 6: /* lmsw */ | 3048 | case 6: /* lmsw */ |
2465 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, | 3049 | ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) | |
2466 | &ctxt->eflags); | 3050 | (c->src.val & 0x0f), ctxt->vcpu); |
2467 | c->dst.type = OP_NONE; | 3051 | c->dst.type = OP_NONE; |
2468 | break; | 3052 | break; |
3053 | case 5: /* not defined */ | ||
3054 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
3055 | goto done; | ||
2469 | case 7: /* invlpg*/ | 3056 | case 7: /* invlpg*/ |
2470 | emulate_invlpg(ctxt->vcpu, memop); | 3057 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); |
2471 | /* Disable writeback. */ | 3058 | /* Disable writeback. */ |
2472 | c->dst.type = OP_NONE; | 3059 | c->dst.type = OP_NONE; |
2473 | break; | 3060 | break; |
@@ -2493,54 +3080,54 @@ twobyte_insn: | |||
2493 | c->dst.type = OP_NONE; | 3080 | c->dst.type = OP_NONE; |
2494 | break; | 3081 | break; |
2495 | case 0x20: /* mov cr, reg */ | 3082 | case 0x20: /* mov cr, reg */ |
2496 | if (c->modrm_mod != 3) | 3083 | switch (c->modrm_reg) { |
2497 | goto cannot_emulate; | 3084 | case 1: |
2498 | c->regs[c->modrm_rm] = | 3085 | case 5 ... 7: |
2499 | realmode_get_cr(ctxt->vcpu, c->modrm_reg); | 3086 | case 9 ... 15: |
3087 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
3088 | goto done; | ||
3089 | } | ||
3090 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); | ||
2500 | c->dst.type = OP_NONE; /* no writeback */ | 3091 | c->dst.type = OP_NONE; /* no writeback */ |
2501 | break; | 3092 | break; |
2502 | case 0x21: /* mov from dr to reg */ | 3093 | case 0x21: /* mov from dr to reg */ |
2503 | if (c->modrm_mod != 3) | 3094 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
2504 | goto cannot_emulate; | 3095 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
2505 | rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | 3096 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2506 | if (rc) | 3097 | goto done; |
2507 | goto cannot_emulate; | 3098 | } |
3099 | emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | ||
2508 | c->dst.type = OP_NONE; /* no writeback */ | 3100 | c->dst.type = OP_NONE; /* no writeback */ |
2509 | break; | 3101 | break; |
2510 | case 0x22: /* mov reg, cr */ | 3102 | case 0x22: /* mov reg, cr */ |
2511 | if (c->modrm_mod != 3) | 3103 | ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); |
2512 | goto cannot_emulate; | ||
2513 | realmode_set_cr(ctxt->vcpu, | ||
2514 | c->modrm_reg, c->modrm_val, &ctxt->eflags); | ||
2515 | c->dst.type = OP_NONE; | 3104 | c->dst.type = OP_NONE; |
2516 | break; | 3105 | break; |
2517 | case 0x23: /* mov from reg to dr */ | 3106 | case 0x23: /* mov from reg to dr */ |
2518 | if (c->modrm_mod != 3) | 3107 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
2519 | goto cannot_emulate; | 3108 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
2520 | rc = emulator_set_dr(ctxt, c->modrm_reg, | 3109 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2521 | c->regs[c->modrm_rm]); | 3110 | goto done; |
2522 | if (rc) | 3111 | } |
2523 | goto cannot_emulate; | 3112 | emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); |
2524 | c->dst.type = OP_NONE; /* no writeback */ | 3113 | c->dst.type = OP_NONE; /* no writeback */ |
2525 | break; | 3114 | break; |
2526 | case 0x30: | 3115 | case 0x30: |
2527 | /* wrmsr */ | 3116 | /* wrmsr */ |
2528 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 3117 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
2529 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3118 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
2530 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); | 3119 | if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
2531 | if (rc) { | ||
2532 | kvm_inject_gp(ctxt->vcpu, 0); | 3120 | kvm_inject_gp(ctxt->vcpu, 0); |
2533 | c->eip = kvm_rip_read(ctxt->vcpu); | 3121 | goto done; |
2534 | } | 3122 | } |
2535 | rc = X86EMUL_CONTINUE; | 3123 | rc = X86EMUL_CONTINUE; |
2536 | c->dst.type = OP_NONE; | 3124 | c->dst.type = OP_NONE; |
2537 | break; | 3125 | break; |
2538 | case 0x32: | 3126 | case 0x32: |
2539 | /* rdmsr */ | 3127 | /* rdmsr */ |
2540 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); | 3128 | if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
2541 | if (rc) { | ||
2542 | kvm_inject_gp(ctxt->vcpu, 0); | 3129 | kvm_inject_gp(ctxt->vcpu, 0); |
2543 | c->eip = kvm_rip_read(ctxt->vcpu); | 3130 | goto done; |
2544 | } else { | 3131 | } else { |
2545 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3132 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
2546 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; | 3133 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; |
@@ -2577,7 +3164,7 @@ twobyte_insn: | |||
2577 | break; | 3164 | break; |
2578 | case 0xa1: /* pop fs */ | 3165 | case 0xa1: /* pop fs */ |
2579 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 3166 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
2580 | if (rc != 0) | 3167 | if (rc != X86EMUL_CONTINUE) |
2581 | goto done; | 3168 | goto done; |
2582 | break; | 3169 | break; |
2583 | case 0xa3: | 3170 | case 0xa3: |
@@ -2596,7 +3183,7 @@ twobyte_insn: | |||
2596 | break; | 3183 | break; |
2597 | case 0xa9: /* pop gs */ | 3184 | case 0xa9: /* pop gs */ |
2598 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 3185 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
2599 | if (rc != 0) | 3186 | if (rc != X86EMUL_CONTINUE) |
2600 | goto done; | 3187 | goto done; |
2601 | break; | 3188 | break; |
2602 | case 0xab: | 3189 | case 0xab: |
@@ -2668,16 +3255,14 @@ twobyte_insn: | |||
2668 | (u64) c->src.val; | 3255 | (u64) c->src.val; |
2669 | break; | 3256 | break; |
2670 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 3257 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
2671 | rc = emulate_grp9(ctxt, ops, memop); | 3258 | rc = emulate_grp9(ctxt, ops); |
2672 | if (rc != 0) | 3259 | if (rc != X86EMUL_CONTINUE) |
2673 | goto done; | 3260 | goto done; |
2674 | c->dst.type = OP_NONE; | ||
2675 | break; | 3261 | break; |
2676 | } | 3262 | } |
2677 | goto writeback; | 3263 | goto writeback; |
2678 | 3264 | ||
2679 | cannot_emulate: | 3265 | cannot_emulate: |
2680 | DPRINTF("Cannot emulate %02x\n", c->b); | 3266 | DPRINTF("Cannot emulate %02x\n", c->b); |
2681 | c->eip = saved_eip; | ||
2682 | return -1; | 3267 | return -1; |
2683 | } | 3268 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index a790fa128a9f..93825ff3338f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -33,6 +33,29 @@ | |||
33 | #include <linux/kvm_host.h> | 33 | #include <linux/kvm_host.h> |
34 | #include "trace.h" | 34 | #include "trace.h" |
35 | 35 | ||
36 | static void pic_lock(struct kvm_pic *s) | ||
37 | __acquires(&s->lock) | ||
38 | { | ||
39 | raw_spin_lock(&s->lock); | ||
40 | } | ||
41 | |||
42 | static void pic_unlock(struct kvm_pic *s) | ||
43 | __releases(&s->lock) | ||
44 | { | ||
45 | bool wakeup = s->wakeup_needed; | ||
46 | struct kvm_vcpu *vcpu; | ||
47 | |||
48 | s->wakeup_needed = false; | ||
49 | |||
50 | raw_spin_unlock(&s->lock); | ||
51 | |||
52 | if (wakeup) { | ||
53 | vcpu = s->kvm->bsp_vcpu; | ||
54 | if (vcpu) | ||
55 | kvm_vcpu_kick(vcpu); | ||
56 | } | ||
57 | } | ||
58 | |||
36 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 59 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
37 | { | 60 | { |
38 | s->isr &= ~(1 << irq); | 61 | s->isr &= ~(1 << irq); |
@@ -45,19 +68,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
45 | * Other interrupt may be delivered to PIC while lock is dropped but | 68 | * Other interrupt may be delivered to PIC while lock is dropped but |
46 | * it should be safe since PIC state is already updated at this stage. | 69 | * it should be safe since PIC state is already updated at this stage. |
47 | */ | 70 | */ |
48 | raw_spin_unlock(&s->pics_state->lock); | 71 | pic_unlock(s->pics_state); |
49 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 72 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
50 | raw_spin_lock(&s->pics_state->lock); | 73 | pic_lock(s->pics_state); |
51 | } | 74 | } |
52 | 75 | ||
53 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 76 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
54 | { | 77 | { |
55 | struct kvm_pic *s = pic_irqchip(kvm); | 78 | struct kvm_pic *s = pic_irqchip(kvm); |
56 | 79 | ||
57 | raw_spin_lock(&s->lock); | 80 | pic_lock(s); |
58 | s->pics[0].isr_ack = 0xff; | 81 | s->pics[0].isr_ack = 0xff; |
59 | s->pics[1].isr_ack = 0xff; | 82 | s->pics[1].isr_ack = 0xff; |
60 | raw_spin_unlock(&s->lock); | 83 | pic_unlock(s); |
61 | } | 84 | } |
62 | 85 | ||
63 | /* | 86 | /* |
@@ -158,9 +181,9 @@ static void pic_update_irq(struct kvm_pic *s) | |||
158 | 181 | ||
159 | void kvm_pic_update_irq(struct kvm_pic *s) | 182 | void kvm_pic_update_irq(struct kvm_pic *s) |
160 | { | 183 | { |
161 | raw_spin_lock(&s->lock); | 184 | pic_lock(s); |
162 | pic_update_irq(s); | 185 | pic_update_irq(s); |
163 | raw_spin_unlock(&s->lock); | 186 | pic_unlock(s); |
164 | } | 187 | } |
165 | 188 | ||
166 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 189 | int kvm_pic_set_irq(void *opaque, int irq, int level) |
@@ -168,14 +191,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
168 | struct kvm_pic *s = opaque; | 191 | struct kvm_pic *s = opaque; |
169 | int ret = -1; | 192 | int ret = -1; |
170 | 193 | ||
171 | raw_spin_lock(&s->lock); | 194 | pic_lock(s); |
172 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 195 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
173 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 196 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
174 | pic_update_irq(s); | 197 | pic_update_irq(s); |
175 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 198 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
176 | s->pics[irq >> 3].imr, ret == 0); | 199 | s->pics[irq >> 3].imr, ret == 0); |
177 | } | 200 | } |
178 | raw_spin_unlock(&s->lock); | 201 | pic_unlock(s); |
179 | 202 | ||
180 | return ret; | 203 | return ret; |
181 | } | 204 | } |
@@ -205,7 +228,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
205 | int irq, irq2, intno; | 228 | int irq, irq2, intno; |
206 | struct kvm_pic *s = pic_irqchip(kvm); | 229 | struct kvm_pic *s = pic_irqchip(kvm); |
207 | 230 | ||
208 | raw_spin_lock(&s->lock); | 231 | pic_lock(s); |
209 | irq = pic_get_irq(&s->pics[0]); | 232 | irq = pic_get_irq(&s->pics[0]); |
210 | if (irq >= 0) { | 233 | if (irq >= 0) { |
211 | pic_intack(&s->pics[0], irq); | 234 | pic_intack(&s->pics[0], irq); |
@@ -230,7 +253,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
230 | intno = s->pics[0].irq_base + irq; | 253 | intno = s->pics[0].irq_base + irq; |
231 | } | 254 | } |
232 | pic_update_irq(s); | 255 | pic_update_irq(s); |
233 | raw_spin_unlock(&s->lock); | 256 | pic_unlock(s); |
234 | 257 | ||
235 | return intno; | 258 | return intno; |
236 | } | 259 | } |
@@ -444,7 +467,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
444 | printk(KERN_ERR "PIC: non byte write\n"); | 467 | printk(KERN_ERR "PIC: non byte write\n"); |
445 | return 0; | 468 | return 0; |
446 | } | 469 | } |
447 | raw_spin_lock(&s->lock); | 470 | pic_lock(s); |
448 | switch (addr) { | 471 | switch (addr) { |
449 | case 0x20: | 472 | case 0x20: |
450 | case 0x21: | 473 | case 0x21: |
@@ -457,7 +480,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 480 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
458 | break; | 481 | break; |
459 | } | 482 | } |
460 | raw_spin_unlock(&s->lock); | 483 | pic_unlock(s); |
461 | return 0; | 484 | return 0; |
462 | } | 485 | } |
463 | 486 | ||
@@ -474,7 +497,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
474 | printk(KERN_ERR "PIC: non byte read\n"); | 497 | printk(KERN_ERR "PIC: non byte read\n"); |
475 | return 0; | 498 | return 0; |
476 | } | 499 | } |
477 | raw_spin_lock(&s->lock); | 500 | pic_lock(s); |
478 | switch (addr) { | 501 | switch (addr) { |
479 | case 0x20: | 502 | case 0x20: |
480 | case 0x21: | 503 | case 0x21: |
@@ -488,7 +511,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
488 | break; | 511 | break; |
489 | } | 512 | } |
490 | *(unsigned char *)val = data; | 513 | *(unsigned char *)val = data; |
491 | raw_spin_unlock(&s->lock); | 514 | pic_unlock(s); |
492 | return 0; | 515 | return 0; |
493 | } | 516 | } |
494 | 517 | ||
@@ -505,7 +528,7 @@ static void pic_irq_request(void *opaque, int level) | |||
505 | s->output = level; | 528 | s->output = level; |
506 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | 529 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
507 | s->pics[0].isr_ack &= ~(1 << irq); | 530 | s->pics[0].isr_ack &= ~(1 << irq); |
508 | kvm_vcpu_kick(vcpu); | 531 | s->wakeup_needed = true; |
509 | } | 532 | } |
510 | } | 533 | } |
511 | 534 | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 34b15915754d..cd1f362f413d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -63,6 +63,7 @@ struct kvm_kpic_state { | |||
63 | 63 | ||
64 | struct kvm_pic { | 64 | struct kvm_pic { |
65 | raw_spinlock_t lock; | 65 | raw_spinlock_t lock; |
66 | bool wakeup_needed; | ||
66 | unsigned pending_acks; | 67 | unsigned pending_acks; |
67 | struct kvm *kvm; | 68 | struct kvm *kvm; |
68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 69 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 55c7524dda54..64bc6ea78d90 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h | |||
@@ -10,9 +10,7 @@ struct kvm_timer { | |||
10 | }; | 10 | }; |
11 | 11 | ||
12 | struct kvm_timer_ops { | 12 | struct kvm_timer_ops { |
13 | bool (*is_periodic)(struct kvm_timer *); | 13 | bool (*is_periodic)(struct kvm_timer *); |
14 | }; | 14 | }; |
15 | 15 | ||
16 | |||
17 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); | 16 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); |
18 | |||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 19a8906bcaa2..81563e76e28f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644); | |||
148 | 148 | ||
149 | #include <trace/events/kvm.h> | 149 | #include <trace/events/kvm.h> |
150 | 150 | ||
151 | #undef TRACE_INCLUDE_FILE | ||
152 | #define CREATE_TRACE_POINTS | 151 | #define CREATE_TRACE_POINTS |
153 | #include "mmutrace.h" | 152 | #include "mmutrace.h" |
154 | 153 | ||
@@ -174,12 +173,7 @@ struct kvm_shadow_walk_iterator { | |||
174 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
175 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
176 | 175 | ||
177 | 176 | typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); | |
178 | struct kvm_unsync_walk { | ||
179 | int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); | ||
180 | }; | ||
181 | |||
182 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); | ||
183 | 177 | ||
184 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
185 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
@@ -223,7 +217,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
223 | } | 217 | } |
224 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 218 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
225 | 219 | ||
226 | static int is_write_protection(struct kvm_vcpu *vcpu) | 220 | static bool is_write_protection(struct kvm_vcpu *vcpu) |
227 | { | 221 | { |
228 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | 222 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
229 | } | 223 | } |
@@ -327,7 +321,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | |||
327 | page = alloc_page(GFP_KERNEL); | 321 | page = alloc_page(GFP_KERNEL); |
328 | if (!page) | 322 | if (!page) |
329 | return -ENOMEM; | 323 | return -ENOMEM; |
330 | set_page_private(page, 0); | ||
331 | cache->objects[cache->nobjs++] = page_address(page); | 324 | cache->objects[cache->nobjs++] = page_address(page); |
332 | } | 325 | } |
333 | return 0; | 326 | return 0; |
@@ -438,9 +431,9 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
438 | int i; | 431 | int i; |
439 | 432 | ||
440 | gfn = unalias_gfn(kvm, gfn); | 433 | gfn = unalias_gfn(kvm, gfn); |
434 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
441 | for (i = PT_DIRECTORY_LEVEL; | 435 | for (i = PT_DIRECTORY_LEVEL; |
442 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 436 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
443 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
444 | write_count = slot_largepage_idx(gfn, slot, i); | 437 | write_count = slot_largepage_idx(gfn, slot, i); |
445 | *write_count -= 1; | 438 | *write_count -= 1; |
446 | WARN_ON(*write_count < 0); | 439 | WARN_ON(*write_count < 0); |
@@ -654,7 +647,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
654 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 647 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
655 | { | 648 | { |
656 | struct kvm_rmap_desc *desc; | 649 | struct kvm_rmap_desc *desc; |
657 | struct kvm_rmap_desc *prev_desc; | ||
658 | u64 *prev_spte; | 650 | u64 *prev_spte; |
659 | int i; | 651 | int i; |
660 | 652 | ||
@@ -666,7 +658,6 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | |||
666 | return NULL; | 658 | return NULL; |
667 | } | 659 | } |
668 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 660 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
669 | prev_desc = NULL; | ||
670 | prev_spte = NULL; | 661 | prev_spte = NULL; |
671 | while (desc) { | 662 | while (desc) { |
672 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { | 663 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { |
@@ -794,7 +785,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
794 | int retval = 0; | 785 | int retval = 0; |
795 | struct kvm_memslots *slots; | 786 | struct kvm_memslots *slots; |
796 | 787 | ||
797 | slots = rcu_dereference(kvm->memslots); | 788 | slots = kvm_memslots(kvm); |
798 | 789 | ||
799 | for (i = 0; i < slots->nmemslots; i++) { | 790 | for (i = 0; i < slots->nmemslots; i++) { |
800 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 791 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
@@ -925,7 +916,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
925 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 916 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
926 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 917 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
927 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 918 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
928 | INIT_LIST_HEAD(&sp->oos_link); | ||
929 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 919 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
930 | sp->multimapped = 0; | 920 | sp->multimapped = 0; |
931 | sp->parent_pte = parent_pte; | 921 | sp->parent_pte = parent_pte; |
@@ -1009,8 +999,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
1009 | } | 999 | } |
1010 | 1000 | ||
1011 | 1001 | ||
1012 | static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1002 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
1013 | mmu_parent_walk_fn fn) | ||
1014 | { | 1003 | { |
1015 | struct kvm_pte_chain *pte_chain; | 1004 | struct kvm_pte_chain *pte_chain; |
1016 | struct hlist_node *node; | 1005 | struct hlist_node *node; |
@@ -1019,8 +1008,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1019 | 1008 | ||
1020 | if (!sp->multimapped && sp->parent_pte) { | 1009 | if (!sp->multimapped && sp->parent_pte) { |
1021 | parent_sp = page_header(__pa(sp->parent_pte)); | 1010 | parent_sp = page_header(__pa(sp->parent_pte)); |
1022 | fn(vcpu, parent_sp); | 1011 | fn(parent_sp); |
1023 | mmu_parent_walk(vcpu, parent_sp, fn); | 1012 | mmu_parent_walk(parent_sp, fn); |
1024 | return; | 1013 | return; |
1025 | } | 1014 | } |
1026 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1015 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
@@ -1028,8 +1017,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1028 | if (!pte_chain->parent_ptes[i]) | 1017 | if (!pte_chain->parent_ptes[i]) |
1029 | break; | 1018 | break; |
1030 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | 1019 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); |
1031 | fn(vcpu, parent_sp); | 1020 | fn(parent_sp); |
1032 | mmu_parent_walk(vcpu, parent_sp, fn); | 1021 | mmu_parent_walk(parent_sp, fn); |
1033 | } | 1022 | } |
1034 | } | 1023 | } |
1035 | 1024 | ||
@@ -1066,16 +1055,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | |||
1066 | } | 1055 | } |
1067 | } | 1056 | } |
1068 | 1057 | ||
1069 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1058 | static int unsync_walk_fn(struct kvm_mmu_page *sp) |
1070 | { | 1059 | { |
1071 | kvm_mmu_update_parents_unsync(sp); | 1060 | kvm_mmu_update_parents_unsync(sp); |
1072 | return 1; | 1061 | return 1; |
1073 | } | 1062 | } |
1074 | 1063 | ||
1075 | static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, | 1064 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) |
1076 | struct kvm_mmu_page *sp) | ||
1077 | { | 1065 | { |
1078 | mmu_parent_walk(vcpu, sp, unsync_walk_fn); | 1066 | mmu_parent_walk(sp, unsync_walk_fn); |
1079 | kvm_mmu_update_parents_unsync(sp); | 1067 | kvm_mmu_update_parents_unsync(sp); |
1080 | } | 1068 | } |
1081 | 1069 | ||
@@ -1201,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
1201 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1189 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1202 | { | 1190 | { |
1203 | WARN_ON(!sp->unsync); | 1191 | WARN_ON(!sp->unsync); |
1192 | trace_kvm_mmu_sync_page(sp); | ||
1204 | sp->unsync = 0; | 1193 | sp->unsync = 0; |
1205 | --kvm->stat.mmu_unsync; | 1194 | --kvm->stat.mmu_unsync; |
1206 | } | 1195 | } |
@@ -1209,12 +1198,11 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | |||
1209 | 1198 | ||
1210 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1199 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1211 | { | 1200 | { |
1212 | if (sp->role.glevels != vcpu->arch.mmu.root_level) { | 1201 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
1213 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1202 | kvm_mmu_zap_page(vcpu->kvm, sp); |
1214 | return 1; | 1203 | return 1; |
1215 | } | 1204 | } |
1216 | 1205 | ||
1217 | trace_kvm_mmu_sync_page(sp); | ||
1218 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) | 1206 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) |
1219 | kvm_flush_remote_tlbs(vcpu->kvm); | 1207 | kvm_flush_remote_tlbs(vcpu->kvm); |
1220 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1208 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
@@ -1331,6 +1319,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1331 | role = vcpu->arch.mmu.base_role; | 1319 | role = vcpu->arch.mmu.base_role; |
1332 | role.level = level; | 1320 | role.level = level; |
1333 | role.direct = direct; | 1321 | role.direct = direct; |
1322 | if (role.direct) | ||
1323 | role.cr4_pae = 0; | ||
1334 | role.access = access; | 1324 | role.access = access; |
1335 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1325 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
1336 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1326 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
@@ -1351,7 +1341,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1351 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1341 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1352 | if (sp->unsync_children) { | 1342 | if (sp->unsync_children) { |
1353 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | 1343 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); |
1354 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1344 | kvm_mmu_mark_parents_unsync(sp); |
1355 | } | 1345 | } |
1356 | trace_kvm_mmu_get_page(sp, false); | 1346 | trace_kvm_mmu_get_page(sp, false); |
1357 | return sp; | 1347 | return sp; |
@@ -1573,13 +1563,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
1573 | r = 0; | 1563 | r = 0; |
1574 | index = kvm_page_table_hashfn(gfn); | 1564 | index = kvm_page_table_hashfn(gfn); |
1575 | bucket = &kvm->arch.mmu_page_hash[index]; | 1565 | bucket = &kvm->arch.mmu_page_hash[index]; |
1566 | restart: | ||
1576 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 1567 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) |
1577 | if (sp->gfn == gfn && !sp->role.direct) { | 1568 | if (sp->gfn == gfn && !sp->role.direct) { |
1578 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1569 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
1579 | sp->role.word); | 1570 | sp->role.word); |
1580 | r = 1; | 1571 | r = 1; |
1581 | if (kvm_mmu_zap_page(kvm, sp)) | 1572 | if (kvm_mmu_zap_page(kvm, sp)) |
1582 | n = bucket->first; | 1573 | goto restart; |
1583 | } | 1574 | } |
1584 | return r; | 1575 | return r; |
1585 | } | 1576 | } |
@@ -1593,13 +1584,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1593 | 1584 | ||
1594 | index = kvm_page_table_hashfn(gfn); | 1585 | index = kvm_page_table_hashfn(gfn); |
1595 | bucket = &kvm->arch.mmu_page_hash[index]; | 1586 | bucket = &kvm->arch.mmu_page_hash[index]; |
1587 | restart: | ||
1596 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { | 1588 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { |
1597 | if (sp->gfn == gfn && !sp->role.direct | 1589 | if (sp->gfn == gfn && !sp->role.direct |
1598 | && !sp->role.invalid) { | 1590 | && !sp->role.invalid) { |
1599 | pgprintk("%s: zap %lx %x\n", | 1591 | pgprintk("%s: zap %lx %x\n", |
1600 | __func__, gfn, sp->role.word); | 1592 | __func__, gfn, sp->role.word); |
1601 | if (kvm_mmu_zap_page(kvm, sp)) | 1593 | if (kvm_mmu_zap_page(kvm, sp)) |
1602 | nn = bucket->first; | 1594 | goto restart; |
1603 | } | 1595 | } |
1604 | } | 1596 | } |
1605 | } | 1597 | } |
@@ -1626,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp) | |||
1626 | } | 1618 | } |
1627 | } | 1619 | } |
1628 | 1620 | ||
1629 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | ||
1630 | { | ||
1631 | struct page *page; | ||
1632 | |||
1633 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | ||
1634 | |||
1635 | if (gpa == UNMAPPED_GVA) | ||
1636 | return NULL; | ||
1637 | |||
1638 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
1639 | |||
1640 | return page; | ||
1641 | } | ||
1642 | |||
1643 | /* | 1621 | /* |
1644 | * The function is based on mtrr_type_lookup() in | 1622 | * The function is based on mtrr_type_lookup() in |
1645 | * arch/x86/kernel/cpu/mtrr/generic.c | 1623 | * arch/x86/kernel/cpu/mtrr/generic.c |
@@ -1752,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1752 | struct kvm_mmu_page *s; | 1730 | struct kvm_mmu_page *s; |
1753 | struct hlist_node *node, *n; | 1731 | struct hlist_node *node, *n; |
1754 | 1732 | ||
1755 | trace_kvm_mmu_unsync_page(sp); | ||
1756 | index = kvm_page_table_hashfn(sp->gfn); | 1733 | index = kvm_page_table_hashfn(sp->gfn); |
1757 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1734 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
1758 | /* don't unsync if pagetable is shadowed with multiple roles */ | 1735 | /* don't unsync if pagetable is shadowed with multiple roles */ |
@@ -1762,10 +1739,11 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1762 | if (s->role.word != sp->role.word) | 1739 | if (s->role.word != sp->role.word) |
1763 | return 1; | 1740 | return 1; |
1764 | } | 1741 | } |
1742 | trace_kvm_mmu_unsync_page(sp); | ||
1765 | ++vcpu->kvm->stat.mmu_unsync; | 1743 | ++vcpu->kvm->stat.mmu_unsync; |
1766 | sp->unsync = 1; | 1744 | sp->unsync = 1; |
1767 | 1745 | ||
1768 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1746 | kvm_mmu_mark_parents_unsync(sp); |
1769 | 1747 | ||
1770 | mmu_convert_notrap(sp); | 1748 | mmu_convert_notrap(sp); |
1771 | return 0; | 1749 | return 0; |
@@ -2081,21 +2059,23 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2081 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2059 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2082 | 2060 | ||
2083 | ASSERT(!VALID_PAGE(root)); | 2061 | ASSERT(!VALID_PAGE(root)); |
2084 | if (tdp_enabled) | ||
2085 | direct = 1; | ||
2086 | if (mmu_check_root(vcpu, root_gfn)) | 2062 | if (mmu_check_root(vcpu, root_gfn)) |
2087 | return 1; | 2063 | return 1; |
2064 | if (tdp_enabled) { | ||
2065 | direct = 1; | ||
2066 | root_gfn = 0; | ||
2067 | } | ||
2068 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2088 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2069 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
2089 | PT64_ROOT_LEVEL, direct, | 2070 | PT64_ROOT_LEVEL, direct, |
2090 | ACC_ALL, NULL); | 2071 | ACC_ALL, NULL); |
2091 | root = __pa(sp->spt); | 2072 | root = __pa(sp->spt); |
2092 | ++sp->root_count; | 2073 | ++sp->root_count; |
2074 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2093 | vcpu->arch.mmu.root_hpa = root; | 2075 | vcpu->arch.mmu.root_hpa = root; |
2094 | return 0; | 2076 | return 0; |
2095 | } | 2077 | } |
2096 | direct = !is_paging(vcpu); | 2078 | direct = !is_paging(vcpu); |
2097 | if (tdp_enabled) | ||
2098 | direct = 1; | ||
2099 | for (i = 0; i < 4; ++i) { | 2079 | for (i = 0; i < 4; ++i) { |
2100 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2080 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
2101 | 2081 | ||
@@ -2111,11 +2091,18 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2111 | root_gfn = 0; | 2091 | root_gfn = 0; |
2112 | if (mmu_check_root(vcpu, root_gfn)) | 2092 | if (mmu_check_root(vcpu, root_gfn)) |
2113 | return 1; | 2093 | return 1; |
2094 | if (tdp_enabled) { | ||
2095 | direct = 1; | ||
2096 | root_gfn = i << 30; | ||
2097 | } | ||
2098 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2114 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2099 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
2115 | PT32_ROOT_LEVEL, direct, | 2100 | PT32_ROOT_LEVEL, direct, |
2116 | ACC_ALL, NULL); | 2101 | ACC_ALL, NULL); |
2117 | root = __pa(sp->spt); | 2102 | root = __pa(sp->spt); |
2118 | ++sp->root_count; | 2103 | ++sp->root_count; |
2104 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2105 | |||
2119 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 2106 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
2120 | } | 2107 | } |
2121 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 2108 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
@@ -2299,13 +2286,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2299 | /* no rsvd bits for 2 level 4K page table entries */ | 2286 | /* no rsvd bits for 2 level 4K page table entries */ |
2300 | context->rsvd_bits_mask[0][1] = 0; | 2287 | context->rsvd_bits_mask[0][1] = 0; |
2301 | context->rsvd_bits_mask[0][0] = 0; | 2288 | context->rsvd_bits_mask[0][0] = 0; |
2289 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | ||
2290 | |||
2291 | if (!is_pse(vcpu)) { | ||
2292 | context->rsvd_bits_mask[1][1] = 0; | ||
2293 | break; | ||
2294 | } | ||
2295 | |||
2302 | if (is_cpuid_PSE36()) | 2296 | if (is_cpuid_PSE36()) |
2303 | /* 36bits PSE 4MB page */ | 2297 | /* 36bits PSE 4MB page */ |
2304 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); | 2298 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); |
2305 | else | 2299 | else |
2306 | /* 32 bits PSE 4MB page */ | 2300 | /* 32 bits PSE 4MB page */ |
2307 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | 2301 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); |
2308 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | ||
2309 | break; | 2302 | break; |
2310 | case PT32E_ROOT_LEVEL: | 2303 | case PT32E_ROOT_LEVEL: |
2311 | context->rsvd_bits_mask[0][2] = | 2304 | context->rsvd_bits_mask[0][2] = |
@@ -2318,7 +2311,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2318 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2311 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2319 | rsvd_bits(maxphyaddr, 62) | | 2312 | rsvd_bits(maxphyaddr, 62) | |
2320 | rsvd_bits(13, 20); /* large page */ | 2313 | rsvd_bits(13, 20); /* large page */ |
2321 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2314 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2322 | break; | 2315 | break; |
2323 | case PT64_ROOT_LEVEL: | 2316 | case PT64_ROOT_LEVEL: |
2324 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 2317 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
@@ -2336,7 +2329,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2336 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2329 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2337 | rsvd_bits(maxphyaddr, 51) | | 2330 | rsvd_bits(maxphyaddr, 51) | |
2338 | rsvd_bits(13, 20); /* large page */ | 2331 | rsvd_bits(13, 20); /* large page */ |
2339 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2332 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2340 | break; | 2333 | break; |
2341 | } | 2334 | } |
2342 | } | 2335 | } |
@@ -2438,7 +2431,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | |||
2438 | else | 2431 | else |
2439 | r = paging32_init_context(vcpu); | 2432 | r = paging32_init_context(vcpu); |
2440 | 2433 | ||
2441 | vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; | 2434 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
2435 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | ||
2442 | 2436 | ||
2443 | return r; | 2437 | return r; |
2444 | } | 2438 | } |
@@ -2478,7 +2472,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
2478 | goto out; | 2472 | goto out; |
2479 | spin_lock(&vcpu->kvm->mmu_lock); | 2473 | spin_lock(&vcpu->kvm->mmu_lock); |
2480 | kvm_mmu_free_some_pages(vcpu); | 2474 | kvm_mmu_free_some_pages(vcpu); |
2475 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2481 | r = mmu_alloc_roots(vcpu); | 2476 | r = mmu_alloc_roots(vcpu); |
2477 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2482 | mmu_sync_roots(vcpu); | 2478 | mmu_sync_roots(vcpu); |
2483 | spin_unlock(&vcpu->kvm->mmu_lock); | 2479 | spin_unlock(&vcpu->kvm->mmu_lock); |
2484 | if (r) | 2480 | if (r) |
@@ -2527,7 +2523,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
2527 | } | 2523 | } |
2528 | 2524 | ||
2529 | ++vcpu->kvm->stat.mmu_pte_updated; | 2525 | ++vcpu->kvm->stat.mmu_pte_updated; |
2530 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 2526 | if (!sp->role.cr4_pae) |
2531 | paging32_update_pte(vcpu, sp, spte, new); | 2527 | paging32_update_pte(vcpu, sp, spte, new); |
2532 | else | 2528 | else |
2533 | paging64_update_pte(vcpu, sp, spte, new); | 2529 | paging64_update_pte(vcpu, sp, spte, new); |
@@ -2562,36 +2558,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
2562 | } | 2558 | } |
2563 | 2559 | ||
2564 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2560 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
2565 | const u8 *new, int bytes) | 2561 | u64 gpte) |
2566 | { | 2562 | { |
2567 | gfn_t gfn; | 2563 | gfn_t gfn; |
2568 | int r; | ||
2569 | u64 gpte = 0; | ||
2570 | pfn_t pfn; | 2564 | pfn_t pfn; |
2571 | 2565 | ||
2572 | if (bytes != 4 && bytes != 8) | ||
2573 | return; | ||
2574 | |||
2575 | /* | ||
2576 | * Assume that the pte write on a page table of the same type | ||
2577 | * as the current vcpu paging mode. This is nearly always true | ||
2578 | * (might be false while changing modes). Note it is verified later | ||
2579 | * by update_pte(). | ||
2580 | */ | ||
2581 | if (is_pae(vcpu)) { | ||
2582 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
2583 | if ((bytes == 4) && (gpa % 4 == 0)) { | ||
2584 | r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); | ||
2585 | if (r) | ||
2586 | return; | ||
2587 | memcpy((void *)&gpte + (gpa % 8), new, 4); | ||
2588 | } else if ((bytes == 8) && (gpa % 8 == 0)) { | ||
2589 | memcpy((void *)&gpte, new, 8); | ||
2590 | } | ||
2591 | } else { | ||
2592 | if ((bytes == 4) && (gpa % 4 == 0)) | ||
2593 | memcpy((void *)&gpte, new, 4); | ||
2594 | } | ||
2595 | if (!is_present_gpte(gpte)) | 2566 | if (!is_present_gpte(gpte)) |
2596 | return; | 2567 | return; |
2597 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2568 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
@@ -2640,10 +2611,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2640 | int flooded = 0; | 2611 | int flooded = 0; |
2641 | int npte; | 2612 | int npte; |
2642 | int r; | 2613 | int r; |
2614 | int invlpg_counter; | ||
2643 | 2615 | ||
2644 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2616 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
2645 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 2617 | |
2618 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | ||
2619 | |||
2620 | /* | ||
2621 | * Assume that the pte write on a page table of the same type | ||
2622 | * as the current vcpu paging mode. This is nearly always true | ||
2623 | * (might be false while changing modes). Note it is verified later | ||
2624 | * by update_pte(). | ||
2625 | */ | ||
2626 | if ((is_pae(vcpu) && bytes == 4) || !new) { | ||
2627 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
2628 | if (is_pae(vcpu)) { | ||
2629 | gpa &= ~(gpa_t)7; | ||
2630 | bytes = 8; | ||
2631 | } | ||
2632 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
2633 | if (r) | ||
2634 | gentry = 0; | ||
2635 | new = (const u8 *)&gentry; | ||
2636 | } | ||
2637 | |||
2638 | switch (bytes) { | ||
2639 | case 4: | ||
2640 | gentry = *(const u32 *)new; | ||
2641 | break; | ||
2642 | case 8: | ||
2643 | gentry = *(const u64 *)new; | ||
2644 | break; | ||
2645 | default: | ||
2646 | gentry = 0; | ||
2647 | break; | ||
2648 | } | ||
2649 | |||
2650 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | ||
2646 | spin_lock(&vcpu->kvm->mmu_lock); | 2651 | spin_lock(&vcpu->kvm->mmu_lock); |
2652 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
2653 | gentry = 0; | ||
2647 | kvm_mmu_access_page(vcpu, gfn); | 2654 | kvm_mmu_access_page(vcpu, gfn); |
2648 | kvm_mmu_free_some_pages(vcpu); | 2655 | kvm_mmu_free_some_pages(vcpu); |
2649 | ++vcpu->kvm->stat.mmu_pte_write; | 2656 | ++vcpu->kvm->stat.mmu_pte_write; |
@@ -2662,10 +2669,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2662 | } | 2669 | } |
2663 | index = kvm_page_table_hashfn(gfn); | 2670 | index = kvm_page_table_hashfn(gfn); |
2664 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2671 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
2672 | |||
2673 | restart: | ||
2665 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2674 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
2666 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) | 2675 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) |
2667 | continue; | 2676 | continue; |
2668 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | 2677 | pte_size = sp->role.cr4_pae ? 8 : 4; |
2669 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2678 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
2670 | misaligned |= bytes < 4; | 2679 | misaligned |= bytes < 4; |
2671 | if (misaligned || flooded) { | 2680 | if (misaligned || flooded) { |
@@ -2682,14 +2691,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2682 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2691 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
2683 | gpa, bytes, sp->role.word); | 2692 | gpa, bytes, sp->role.word); |
2684 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) | 2693 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) |
2685 | n = bucket->first; | 2694 | goto restart; |
2686 | ++vcpu->kvm->stat.mmu_flooded; | 2695 | ++vcpu->kvm->stat.mmu_flooded; |
2687 | continue; | 2696 | continue; |
2688 | } | 2697 | } |
2689 | page_offset = offset; | 2698 | page_offset = offset; |
2690 | level = sp->role.level; | 2699 | level = sp->role.level; |
2691 | npte = 1; | 2700 | npte = 1; |
2692 | if (sp->role.glevels == PT32_ROOT_LEVEL) { | 2701 | if (!sp->role.cr4_pae) { |
2693 | page_offset <<= 1; /* 32->64 */ | 2702 | page_offset <<= 1; /* 32->64 */ |
2694 | /* | 2703 | /* |
2695 | * A 32-bit pde maps 4MB while the shadow pdes map | 2704 | * A 32-bit pde maps 4MB while the shadow pdes map |
@@ -2707,20 +2716,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2707 | continue; | 2716 | continue; |
2708 | } | 2717 | } |
2709 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2718 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
2710 | if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { | ||
2711 | gentry = 0; | ||
2712 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
2713 | gpa & ~(u64)(pte_size - 1), | ||
2714 | &gentry, pte_size); | ||
2715 | new = (const void *)&gentry; | ||
2716 | if (r < 0) | ||
2717 | new = NULL; | ||
2718 | } | ||
2719 | while (npte--) { | 2719 | while (npte--) { |
2720 | entry = *spte; | 2720 | entry = *spte; |
2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
2722 | if (new) | 2722 | if (gentry) |
2723 | mmu_pte_write_new_pte(vcpu, sp, spte, new); | 2723 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); |
2725 | ++spte; | 2725 | ++spte; |
2726 | } | 2726 | } |
@@ -2900,22 +2900,23 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
2900 | struct kvm_mmu_page *sp, *node; | 2900 | struct kvm_mmu_page *sp, *node; |
2901 | 2901 | ||
2902 | spin_lock(&kvm->mmu_lock); | 2902 | spin_lock(&kvm->mmu_lock); |
2903 | restart: | ||
2903 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 2904 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
2904 | if (kvm_mmu_zap_page(kvm, sp)) | 2905 | if (kvm_mmu_zap_page(kvm, sp)) |
2905 | node = container_of(kvm->arch.active_mmu_pages.next, | 2906 | goto restart; |
2906 | struct kvm_mmu_page, link); | 2907 | |
2907 | spin_unlock(&kvm->mmu_lock); | 2908 | spin_unlock(&kvm->mmu_lock); |
2908 | 2909 | ||
2909 | kvm_flush_remote_tlbs(kvm); | 2910 | kvm_flush_remote_tlbs(kvm); |
2910 | } | 2911 | } |
2911 | 2912 | ||
2912 | static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) | 2913 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) |
2913 | { | 2914 | { |
2914 | struct kvm_mmu_page *page; | 2915 | struct kvm_mmu_page *page; |
2915 | 2916 | ||
2916 | page = container_of(kvm->arch.active_mmu_pages.prev, | 2917 | page = container_of(kvm->arch.active_mmu_pages.prev, |
2917 | struct kvm_mmu_page, link); | 2918 | struct kvm_mmu_page, link); |
2918 | kvm_mmu_zap_page(kvm, page); | 2919 | return kvm_mmu_zap_page(kvm, page) + 1; |
2919 | } | 2920 | } |
2920 | 2921 | ||
2921 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | 2922 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) |
@@ -2927,7 +2928,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2927 | spin_lock(&kvm_lock); | 2928 | spin_lock(&kvm_lock); |
2928 | 2929 | ||
2929 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2930 | list_for_each_entry(kvm, &vm_list, vm_list) { |
2930 | int npages, idx; | 2931 | int npages, idx, freed_pages; |
2931 | 2932 | ||
2932 | idx = srcu_read_lock(&kvm->srcu); | 2933 | idx = srcu_read_lock(&kvm->srcu); |
2933 | spin_lock(&kvm->mmu_lock); | 2934 | spin_lock(&kvm->mmu_lock); |
@@ -2935,8 +2936,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2935 | kvm->arch.n_free_mmu_pages; | 2936 | kvm->arch.n_free_mmu_pages; |
2936 | cache_count += npages; | 2937 | cache_count += npages; |
2937 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | 2938 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { |
2938 | kvm_mmu_remove_one_alloc_mmu_page(kvm); | 2939 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); |
2939 | cache_count--; | 2940 | cache_count -= freed_pages; |
2940 | kvm_freed = kvm; | 2941 | kvm_freed = kvm; |
2941 | } | 2942 | } |
2942 | nr_to_scan--; | 2943 | nr_to_scan--; |
@@ -3011,7 +3012,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3011 | unsigned int nr_pages = 0; | 3012 | unsigned int nr_pages = 0; |
3012 | struct kvm_memslots *slots; | 3013 | struct kvm_memslots *slots; |
3013 | 3014 | ||
3014 | slots = rcu_dereference(kvm->memslots); | 3015 | slots = kvm_memslots(kvm); |
3016 | |||
3015 | for (i = 0; i < slots->nmemslots; i++) | 3017 | for (i = 0; i < slots->nmemslots; i++) |
3016 | nr_pages += slots->memslots[i].npages; | 3018 | nr_pages += slots->memslots[i].npages; |
3017 | 3019 | ||
@@ -3174,8 +3176,7 @@ static gva_t canonicalize(gva_t gva) | |||
3174 | } | 3176 | } |
3175 | 3177 | ||
3176 | 3178 | ||
3177 | typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, | 3179 | typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); |
3178 | u64 *sptep); | ||
3179 | 3180 | ||
3180 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | 3181 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, |
3181 | inspect_spte_fn fn) | 3182 | inspect_spte_fn fn) |
@@ -3191,7 +3192,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
3191 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 3192 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
3192 | __mmu_spte_walk(kvm, child, fn); | 3193 | __mmu_spte_walk(kvm, child, fn); |
3193 | } else | 3194 | } else |
3194 | fn(kvm, sp, &sp->spt[i]); | 3195 | fn(kvm, &sp->spt[i]); |
3195 | } | 3196 | } |
3196 | } | 3197 | } |
3197 | } | 3198 | } |
@@ -3282,11 +3283,13 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
3282 | 3283 | ||
3283 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3284 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3284 | { | 3285 | { |
3286 | struct kvm *kvm = vcpu->kvm; | ||
3287 | struct kvm_memslots *slots; | ||
3285 | int nmaps = 0; | 3288 | int nmaps = 0; |
3286 | int i, j, k, idx; | 3289 | int i, j, k, idx; |
3287 | 3290 | ||
3288 | idx = srcu_read_lock(&kvm->srcu); | 3291 | idx = srcu_read_lock(&kvm->srcu); |
3289 | slots = rcu_dereference(kvm->memslots); | 3292 | slots = kvm_memslots(kvm); |
3290 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3293 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3291 | struct kvm_memory_slot *m = &slots->memslots[i]; | 3294 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3292 | struct kvm_rmap_desc *d; | 3295 | struct kvm_rmap_desc *d; |
@@ -3315,7 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
3315 | return nmaps; | 3318 | return nmaps; |
3316 | } | 3319 | } |
3317 | 3320 | ||
3318 | void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | 3321 | void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
3319 | { | 3322 | { |
3320 | unsigned long *rmapp; | 3323 | unsigned long *rmapp; |
3321 | struct kvm_mmu_page *rev_sp; | 3324 | struct kvm_mmu_page *rev_sp; |
@@ -3331,14 +3334,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | |||
3331 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", | 3334 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", |
3332 | audit_msg, gfn); | 3335 | audit_msg, gfn); |
3333 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", | 3336 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", |
3334 | audit_msg, sptep - rev_sp->spt, | 3337 | audit_msg, (long int)(sptep - rev_sp->spt), |
3335 | rev_sp->gfn); | 3338 | rev_sp->gfn); |
3336 | dump_stack(); | 3339 | dump_stack(); |
3337 | return; | 3340 | return; |
3338 | } | 3341 | } |
3339 | 3342 | ||
3340 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], | 3343 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], |
3341 | is_large_pte(*sptep)); | 3344 | rev_sp->role.level); |
3342 | if (!*rmapp) { | 3345 | if (!*rmapp) { |
3343 | if (!printk_ratelimit()) | 3346 | if (!printk_ratelimit()) |
3344 | return; | 3347 | return; |
@@ -3373,7 +3376,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | |||
3373 | continue; | 3376 | continue; |
3374 | if (!(ent & PT_WRITABLE_MASK)) | 3377 | if (!(ent & PT_WRITABLE_MASK)) |
3375 | continue; | 3378 | continue; |
3376 | inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); | 3379 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
3377 | } | 3380 | } |
3378 | } | 3381 | } |
3379 | return; | 3382 | return; |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 3e4a5c6ca2a9..42f07b1bfbc9 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -6,14 +6,12 @@ | |||
6 | 6 | ||
7 | #undef TRACE_SYSTEM | 7 | #undef TRACE_SYSTEM |
8 | #define TRACE_SYSTEM kvmmmu | 8 | #define TRACE_SYSTEM kvmmmu |
9 | #define TRACE_INCLUDE_PATH . | ||
10 | #define TRACE_INCLUDE_FILE mmutrace | ||
11 | 9 | ||
12 | #define KVM_MMU_PAGE_FIELDS \ | 10 | #define KVM_MMU_PAGE_FIELDS \ |
13 | __field(__u64, gfn) \ | 11 | __field(__u64, gfn) \ |
14 | __field(__u32, role) \ | 12 | __field(__u32, role) \ |
15 | __field(__u32, root_count) \ | 13 | __field(__u32, root_count) \ |
16 | __field(__u32, unsync) | 14 | __field(bool, unsync) |
17 | 15 | ||
18 | #define KVM_MMU_PAGE_ASSIGN(sp) \ | 16 | #define KVM_MMU_PAGE_ASSIGN(sp) \ |
19 | __entry->gfn = sp->gfn; \ | 17 | __entry->gfn = sp->gfn; \ |
@@ -30,14 +28,14 @@ | |||
30 | \ | 28 | \ |
31 | role.word = __entry->role; \ | 29 | role.word = __entry->role; \ |
32 | \ | 30 | \ |
33 | trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ | 31 | trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ |
34 | " %snxe root %u %s%c", \ | 32 | " %snxe root %u %s%c", \ |
35 | __entry->gfn, role.level, role.glevels, \ | 33 | __entry->gfn, role.level, \ |
34 | role.cr4_pae ? " pae" : "", \ | ||
36 | role.quadrant, \ | 35 | role.quadrant, \ |
37 | role.direct ? " direct" : "", \ | 36 | role.direct ? " direct" : "", \ |
38 | access_str[role.access], \ | 37 | access_str[role.access], \ |
39 | role.invalid ? " invalid" : "", \ | 38 | role.invalid ? " invalid" : "", \ |
40 | role.cr4_pge ? "" : "!", \ | ||
41 | role.nxe ? "" : "!", \ | 39 | role.nxe ? "" : "!", \ |
42 | __entry->root_count, \ | 40 | __entry->root_count, \ |
43 | __entry->unsync ? "unsync" : "sync", 0); \ | 41 | __entry->unsync ? "unsync" : "sync", 0); \ |
@@ -94,15 +92,15 @@ TRACE_EVENT( | |||
94 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) | 92 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) |
95 | ); | 93 | ); |
96 | 94 | ||
97 | /* We set a pte accessed bit */ | 95 | DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class, |
98 | TRACE_EVENT( | 96 | |
99 | kvm_mmu_set_accessed_bit, | ||
100 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 97 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
98 | |||
101 | TP_ARGS(table_gfn, index, size), | 99 | TP_ARGS(table_gfn, index, size), |
102 | 100 | ||
103 | TP_STRUCT__entry( | 101 | TP_STRUCT__entry( |
104 | __field(__u64, gpa) | 102 | __field(__u64, gpa) |
105 | ), | 103 | ), |
106 | 104 | ||
107 | TP_fast_assign( | 105 | TP_fast_assign( |
108 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 106 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) |
@@ -112,22 +110,20 @@ TRACE_EVENT( | |||
112 | TP_printk("gpa %llx", __entry->gpa) | 110 | TP_printk("gpa %llx", __entry->gpa) |
113 | ); | 111 | ); |
114 | 112 | ||
115 | /* We set a pte dirty bit */ | 113 | /* We set a pte accessed bit */ |
116 | TRACE_EVENT( | 114 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit, |
117 | kvm_mmu_set_dirty_bit, | 115 | |
118 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 116 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
119 | TP_ARGS(table_gfn, index, size), | ||
120 | 117 | ||
121 | TP_STRUCT__entry( | 118 | TP_ARGS(table_gfn, index, size) |
122 | __field(__u64, gpa) | 119 | ); |
123 | ), | ||
124 | 120 | ||
125 | TP_fast_assign( | 121 | /* We set a pte dirty bit */ |
126 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 122 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit, |
127 | + index * size; | ||
128 | ), | ||
129 | 123 | ||
130 | TP_printk("gpa %llx", __entry->gpa) | 124 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
125 | |||
126 | TP_ARGS(table_gfn, index, size) | ||
131 | ); | 127 | ); |
132 | 128 | ||
133 | TRACE_EVENT( | 129 | TRACE_EVENT( |
@@ -166,55 +162,45 @@ TRACE_EVENT( | |||
166 | __entry->created ? "new" : "existing") | 162 | __entry->created ? "new" : "existing") |
167 | ); | 163 | ); |
168 | 164 | ||
169 | TRACE_EVENT( | 165 | DECLARE_EVENT_CLASS(kvm_mmu_page_class, |
170 | kvm_mmu_sync_page, | 166 | |
171 | TP_PROTO(struct kvm_mmu_page *sp), | 167 | TP_PROTO(struct kvm_mmu_page *sp), |
172 | TP_ARGS(sp), | 168 | TP_ARGS(sp), |
173 | 169 | ||
174 | TP_STRUCT__entry( | 170 | TP_STRUCT__entry( |
175 | KVM_MMU_PAGE_FIELDS | 171 | KVM_MMU_PAGE_FIELDS |
176 | ), | 172 | ), |
177 | 173 | ||
178 | TP_fast_assign( | 174 | TP_fast_assign( |
179 | KVM_MMU_PAGE_ASSIGN(sp) | 175 | KVM_MMU_PAGE_ASSIGN(sp) |
180 | ), | 176 | ), |
181 | 177 | ||
182 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 178 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) |
183 | ); | 179 | ); |
184 | 180 | ||
185 | TRACE_EVENT( | 181 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page, |
186 | kvm_mmu_unsync_page, | ||
187 | TP_PROTO(struct kvm_mmu_page *sp), | 182 | TP_PROTO(struct kvm_mmu_page *sp), |
188 | TP_ARGS(sp), | ||
189 | |||
190 | TP_STRUCT__entry( | ||
191 | KVM_MMU_PAGE_FIELDS | ||
192 | ), | ||
193 | 183 | ||
194 | TP_fast_assign( | 184 | TP_ARGS(sp) |
195 | KVM_MMU_PAGE_ASSIGN(sp) | ||
196 | ), | ||
197 | |||
198 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | ||
199 | ); | 185 | ); |
200 | 186 | ||
201 | TRACE_EVENT( | 187 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, |
202 | kvm_mmu_zap_page, | ||
203 | TP_PROTO(struct kvm_mmu_page *sp), | 188 | TP_PROTO(struct kvm_mmu_page *sp), |
204 | TP_ARGS(sp), | ||
205 | 189 | ||
206 | TP_STRUCT__entry( | 190 | TP_ARGS(sp) |
207 | KVM_MMU_PAGE_FIELDS | 191 | ); |
208 | ), | ||
209 | 192 | ||
210 | TP_fast_assign( | 193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, |
211 | KVM_MMU_PAGE_ASSIGN(sp) | 194 | TP_PROTO(struct kvm_mmu_page *sp), |
212 | ), | ||
213 | 195 | ||
214 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 196 | TP_ARGS(sp) |
215 | ); | 197 | ); |
216 | |||
217 | #endif /* _TRACE_KVMMMU_H */ | 198 | #endif /* _TRACE_KVMMMU_H */ |
218 | 199 | ||
200 | #undef TRACE_INCLUDE_PATH | ||
201 | #define TRACE_INCLUDE_PATH . | ||
202 | #undef TRACE_INCLUDE_FILE | ||
203 | #define TRACE_INCLUDE_FILE mmutrace | ||
204 | |||
219 | /* This part must be outside protection */ | 205 | /* This part must be outside protection */ |
220 | #include <trace/define_trace.h> | 206 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 81eab9a50e6a..89d66ca4d87c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -170,7 +170,7 @@ walk: | |||
170 | goto access_error; | 170 | goto access_error; |
171 | 171 | ||
172 | #if PTTYPE == 64 | 172 | #if PTTYPE == 64 |
173 | if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) | 173 | if (fetch_fault && (pte & PT64_NX_MASK)) |
174 | goto access_error; | 174 | goto access_error; |
175 | #endif | 175 | #endif |
176 | 176 | ||
@@ -190,10 +190,10 @@ walk: | |||
190 | 190 | ||
191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || | 191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || |
192 | ((walker->level == PT_DIRECTORY_LEVEL) && | 192 | ((walker->level == PT_DIRECTORY_LEVEL) && |
193 | (pte & PT_PAGE_SIZE_MASK) && | 193 | is_large_pte(pte) && |
194 | (PTTYPE == 64 || is_pse(vcpu))) || | 194 | (PTTYPE == 64 || is_pse(vcpu))) || |
195 | ((walker->level == PT_PDPE_LEVEL) && | 195 | ((walker->level == PT_PDPE_LEVEL) && |
196 | (pte & PT_PAGE_SIZE_MASK) && | 196 | is_large_pte(pte) && |
197 | is_long_mode(vcpu))) { | 197 | is_long_mode(vcpu))) { |
198 | int lvl = walker->level; | 198 | int lvl = walker->level; |
199 | 199 | ||
@@ -258,11 +258,17 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
258 | pt_element_t gpte; | 258 | pt_element_t gpte; |
259 | unsigned pte_access; | 259 | unsigned pte_access; |
260 | pfn_t pfn; | 260 | pfn_t pfn; |
261 | u64 new_spte; | ||
261 | 262 | ||
262 | gpte = *(const pt_element_t *)pte; | 263 | gpte = *(const pt_element_t *)pte; |
263 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
264 | if (!is_present_gpte(gpte)) | 265 | if (!is_present_gpte(gpte)) { |
265 | __set_spte(spte, shadow_notrap_nonpresent_pte); | 266 | if (page->unsync) |
267 | new_spte = shadow_trap_nonpresent_pte; | ||
268 | else | ||
269 | new_spte = shadow_notrap_nonpresent_pte; | ||
270 | __set_spte(spte, new_spte); | ||
271 | } | ||
266 | return; | 272 | return; |
267 | } | 273 | } |
268 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
@@ -457,6 +463,7 @@ out_unlock: | |||
457 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 463 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
458 | { | 464 | { |
459 | struct kvm_shadow_walk_iterator iterator; | 465 | struct kvm_shadow_walk_iterator iterator; |
466 | gpa_t pte_gpa = -1; | ||
460 | int level; | 467 | int level; |
461 | u64 *sptep; | 468 | u64 *sptep; |
462 | int need_flush = 0; | 469 | int need_flush = 0; |
@@ -467,9 +474,16 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
467 | level = iterator.level; | 474 | level = iterator.level; |
468 | sptep = iterator.sptep; | 475 | sptep = iterator.sptep; |
469 | 476 | ||
470 | if (level == PT_PAGE_TABLE_LEVEL || | 477 | if (is_last_spte(*sptep, level)) { |
471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 478 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); |
472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 479 | int offset, shift; |
480 | |||
481 | shift = PAGE_SHIFT - | ||
482 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | ||
483 | offset = sp->role.quadrant << shift; | ||
484 | |||
485 | pte_gpa = (sp->gfn << PAGE_SHIFT) + offset; | ||
486 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
473 | 487 | ||
474 | if (is_shadow_present_pte(*sptep)) { | 488 | if (is_shadow_present_pte(*sptep)) { |
475 | rmap_remove(vcpu->kvm, sptep); | 489 | rmap_remove(vcpu->kvm, sptep); |
@@ -487,7 +501,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
487 | 501 | ||
488 | if (need_flush) | 502 | if (need_flush) |
489 | kvm_flush_remote_tlbs(vcpu->kvm); | 503 | kvm_flush_remote_tlbs(vcpu->kvm); |
504 | |||
505 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
506 | |||
490 | spin_unlock(&vcpu->kvm->mmu_lock); | 507 | spin_unlock(&vcpu->kvm->mmu_lock); |
508 | |||
509 | if (pte_gpa == -1) | ||
510 | return; | ||
511 | |||
512 | if (mmu_topup_memory_caches(vcpu)) | ||
513 | return; | ||
514 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
491 | } | 515 | } |
492 | 516 | ||
493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 517 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
@@ -551,12 +575,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
551 | { | 575 | { |
552 | int i, offset, nr_present; | 576 | int i, offset, nr_present; |
553 | bool reset_host_protection; | 577 | bool reset_host_protection; |
578 | gpa_t first_pte_gpa; | ||
554 | 579 | ||
555 | offset = nr_present = 0; | 580 | offset = nr_present = 0; |
556 | 581 | ||
557 | if (PTTYPE == 32) | 582 | if (PTTYPE == 32) |
558 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 583 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
559 | 584 | ||
585 | first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
586 | |||
560 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | 587 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { |
561 | unsigned pte_access; | 588 | unsigned pte_access; |
562 | pt_element_t gpte; | 589 | pt_element_t gpte; |
@@ -566,8 +593,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
566 | if (!is_shadow_present_pte(sp->spt[i])) | 593 | if (!is_shadow_present_pte(sp->spt[i])) |
567 | continue; | 594 | continue; |
568 | 595 | ||
569 | pte_gpa = gfn_to_gpa(sp->gfn); | 596 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); |
570 | pte_gpa += (i+offset) * sizeof(pt_element_t); | ||
571 | 597 | ||
572 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | 598 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, |
573 | sizeof(pt_element_t))) | 599 | sizeof(pt_element_t))) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2ba58206812a..96dc232bfc56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -44,10 +44,11 @@ MODULE_LICENSE("GPL"); | |||
44 | #define SEG_TYPE_LDT 2 | 44 | #define SEG_TYPE_LDT 2 |
45 | #define SEG_TYPE_BUSY_TSS16 3 | 45 | #define SEG_TYPE_BUSY_TSS16 3 |
46 | 46 | ||
47 | #define SVM_FEATURE_NPT (1 << 0) | 47 | #define SVM_FEATURE_NPT (1 << 0) |
48 | #define SVM_FEATURE_LBRV (1 << 1) | 48 | #define SVM_FEATURE_LBRV (1 << 1) |
49 | #define SVM_FEATURE_SVML (1 << 2) | 49 | #define SVM_FEATURE_SVML (1 << 2) |
50 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | 50 | #define SVM_FEATURE_NRIP (1 << 3) |
51 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
51 | 52 | ||
52 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 53 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
53 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 54 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
@@ -70,6 +71,7 @@ struct kvm_vcpu; | |||
70 | struct nested_state { | 71 | struct nested_state { |
71 | struct vmcb *hsave; | 72 | struct vmcb *hsave; |
72 | u64 hsave_msr; | 73 | u64 hsave_msr; |
74 | u64 vm_cr_msr; | ||
73 | u64 vmcb; | 75 | u64 vmcb; |
74 | 76 | ||
75 | /* These are the merged vectors */ | 77 | /* These are the merged vectors */ |
@@ -77,6 +79,7 @@ struct nested_state { | |||
77 | 79 | ||
78 | /* gpa pointers to the real vectors */ | 80 | /* gpa pointers to the real vectors */ |
79 | u64 vmcb_msrpm; | 81 | u64 vmcb_msrpm; |
82 | u64 vmcb_iopm; | ||
80 | 83 | ||
81 | /* A VMEXIT is required but not yet emulated */ | 84 | /* A VMEXIT is required but not yet emulated */ |
82 | bool exit_required; | 85 | bool exit_required; |
@@ -91,6 +94,9 @@ struct nested_state { | |||
91 | 94 | ||
92 | }; | 95 | }; |
93 | 96 | ||
97 | #define MSRPM_OFFSETS 16 | ||
98 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; | ||
99 | |||
94 | struct vcpu_svm { | 100 | struct vcpu_svm { |
95 | struct kvm_vcpu vcpu; | 101 | struct kvm_vcpu vcpu; |
96 | struct vmcb *vmcb; | 102 | struct vmcb *vmcb; |
@@ -110,13 +116,39 @@ struct vcpu_svm { | |||
110 | struct nested_state nested; | 116 | struct nested_state nested; |
111 | 117 | ||
112 | bool nmi_singlestep; | 118 | bool nmi_singlestep; |
119 | |||
120 | unsigned int3_injected; | ||
121 | unsigned long int3_rip; | ||
122 | }; | ||
123 | |||
124 | #define MSR_INVALID 0xffffffffU | ||
125 | |||
126 | static struct svm_direct_access_msrs { | ||
127 | u32 index; /* Index of the MSR */ | ||
128 | bool always; /* True if intercept is always on */ | ||
129 | } direct_access_msrs[] = { | ||
130 | { .index = MSR_K6_STAR, .always = true }, | ||
131 | { .index = MSR_IA32_SYSENTER_CS, .always = true }, | ||
132 | #ifdef CONFIG_X86_64 | ||
133 | { .index = MSR_GS_BASE, .always = true }, | ||
134 | { .index = MSR_FS_BASE, .always = true }, | ||
135 | { .index = MSR_KERNEL_GS_BASE, .always = true }, | ||
136 | { .index = MSR_LSTAR, .always = true }, | ||
137 | { .index = MSR_CSTAR, .always = true }, | ||
138 | { .index = MSR_SYSCALL_MASK, .always = true }, | ||
139 | #endif | ||
140 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, | ||
141 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, | ||
142 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, | ||
143 | { .index = MSR_IA32_LASTINTTOIP, .always = false }, | ||
144 | { .index = MSR_INVALID, .always = false }, | ||
113 | }; | 145 | }; |
114 | 146 | ||
115 | /* enable NPT for AMD64 and X86 with PAE */ | 147 | /* enable NPT for AMD64 and X86 with PAE */ |
116 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 148 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
117 | static bool npt_enabled = true; | 149 | static bool npt_enabled = true; |
118 | #else | 150 | #else |
119 | static bool npt_enabled = false; | 151 | static bool npt_enabled; |
120 | #endif | 152 | #endif |
121 | static int npt = 1; | 153 | static int npt = 1; |
122 | 154 | ||
@@ -129,6 +161,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu); | |||
129 | static void svm_complete_interrupts(struct vcpu_svm *svm); | 161 | static void svm_complete_interrupts(struct vcpu_svm *svm); |
130 | 162 | ||
131 | static int nested_svm_exit_handled(struct vcpu_svm *svm); | 163 | static int nested_svm_exit_handled(struct vcpu_svm *svm); |
164 | static int nested_svm_intercept(struct vcpu_svm *svm); | ||
132 | static int nested_svm_vmexit(struct vcpu_svm *svm); | 165 | static int nested_svm_vmexit(struct vcpu_svm *svm); |
133 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 166 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
134 | bool has_error_code, u32 error_code); | 167 | bool has_error_code, u32 error_code); |
@@ -163,8 +196,8 @@ static unsigned long iopm_base; | |||
163 | struct kvm_ldttss_desc { | 196 | struct kvm_ldttss_desc { |
164 | u16 limit0; | 197 | u16 limit0; |
165 | u16 base0; | 198 | u16 base0; |
166 | unsigned base1 : 8, type : 5, dpl : 2, p : 1; | 199 | unsigned base1:8, type:5, dpl:2, p:1; |
167 | unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; | 200 | unsigned limit1:4, zero0:3, g:1, base2:8; |
168 | u32 base3; | 201 | u32 base3; |
169 | u32 zero1; | 202 | u32 zero1; |
170 | } __attribute__((packed)); | 203 | } __attribute__((packed)); |
@@ -194,6 +227,27 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; | |||
194 | #define MSRS_RANGE_SIZE 2048 | 227 | #define MSRS_RANGE_SIZE 2048 |
195 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) | 228 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) |
196 | 229 | ||
230 | static u32 svm_msrpm_offset(u32 msr) | ||
231 | { | ||
232 | u32 offset; | ||
233 | int i; | ||
234 | |||
235 | for (i = 0; i < NUM_MSR_MAPS; i++) { | ||
236 | if (msr < msrpm_ranges[i] || | ||
237 | msr >= msrpm_ranges[i] + MSRS_IN_RANGE) | ||
238 | continue; | ||
239 | |||
240 | offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ | ||
241 | offset += (i * MSRS_RANGE_SIZE); /* add range offset */ | ||
242 | |||
243 | /* Now we have the u8 offset - but need the u32 offset */ | ||
244 | return offset / 4; | ||
245 | } | ||
246 | |||
247 | /* MSR not in any range */ | ||
248 | return MSR_INVALID; | ||
249 | } | ||
250 | |||
197 | #define MAX_INST_SIZE 15 | 251 | #define MAX_INST_SIZE 15 |
198 | 252 | ||
199 | static inline u32 svm_has(u32 feat) | 253 | static inline u32 svm_has(u32 feat) |
@@ -213,7 +267,7 @@ static inline void stgi(void) | |||
213 | 267 | ||
214 | static inline void invlpga(unsigned long addr, u32 asid) | 268 | static inline void invlpga(unsigned long addr, u32 asid) |
215 | { | 269 | { |
216 | asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); | 270 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
217 | } | 271 | } |
218 | 272 | ||
219 | static inline void force_new_asid(struct kvm_vcpu *vcpu) | 273 | static inline void force_new_asid(struct kvm_vcpu *vcpu) |
@@ -235,23 +289,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
235 | vcpu->arch.efer = efer; | 289 | vcpu->arch.efer = efer; |
236 | } | 290 | } |
237 | 291 | ||
238 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
239 | bool has_error_code, u32 error_code) | ||
240 | { | ||
241 | struct vcpu_svm *svm = to_svm(vcpu); | ||
242 | |||
243 | /* If we are within a nested VM we'd better #VMEXIT and let the | ||
244 | guest handle the exception */ | ||
245 | if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
246 | return; | ||
247 | |||
248 | svm->vmcb->control.event_inj = nr | ||
249 | | SVM_EVTINJ_VALID | ||
250 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
251 | | SVM_EVTINJ_TYPE_EXEPT; | ||
252 | svm->vmcb->control.event_inj_err = error_code; | ||
253 | } | ||
254 | |||
255 | static int is_external_interrupt(u32 info) | 292 | static int is_external_interrupt(u32 info) |
256 | { | 293 | { |
257 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | 294 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; |
@@ -264,7 +301,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
264 | u32 ret = 0; | 301 | u32 ret = 0; |
265 | 302 | ||
266 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) | 303 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) |
267 | ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; | 304 | ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; |
268 | return ret & mask; | 305 | return ret & mask; |
269 | } | 306 | } |
270 | 307 | ||
@@ -283,6 +320,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
283 | { | 320 | { |
284 | struct vcpu_svm *svm = to_svm(vcpu); | 321 | struct vcpu_svm *svm = to_svm(vcpu); |
285 | 322 | ||
323 | if (svm->vmcb->control.next_rip != 0) | ||
324 | svm->next_rip = svm->vmcb->control.next_rip; | ||
325 | |||
286 | if (!svm->next_rip) { | 326 | if (!svm->next_rip) { |
287 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != | 327 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
288 | EMULATE_DONE) | 328 | EMULATE_DONE) |
@@ -297,6 +337,43 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
297 | svm_set_interrupt_shadow(vcpu, 0); | 337 | svm_set_interrupt_shadow(vcpu, 0); |
298 | } | 338 | } |
299 | 339 | ||
340 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
341 | bool has_error_code, u32 error_code, | ||
342 | bool reinject) | ||
343 | { | ||
344 | struct vcpu_svm *svm = to_svm(vcpu); | ||
345 | |||
346 | /* | ||
347 | * If we are within a nested VM we'd better #VMEXIT and let the guest | ||
348 | * handle the exception | ||
349 | */ | ||
350 | if (!reinject && | ||
351 | nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
352 | return; | ||
353 | |||
354 | if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { | ||
355 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); | ||
356 | |||
357 | /* | ||
358 | * For guest debugging where we have to reinject #BP if some | ||
359 | * INT3 is guest-owned: | ||
360 | * Emulate nRIP by moving RIP forward. Will fail if injection | ||
361 | * raises a fault that is not intercepted. Still better than | ||
362 | * failing in all cases. | ||
363 | */ | ||
364 | skip_emulated_instruction(&svm->vcpu); | ||
365 | rip = kvm_rip_read(&svm->vcpu); | ||
366 | svm->int3_rip = rip + svm->vmcb->save.cs.base; | ||
367 | svm->int3_injected = rip - old_rip; | ||
368 | } | ||
369 | |||
370 | svm->vmcb->control.event_inj = nr | ||
371 | | SVM_EVTINJ_VALID | ||
372 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
373 | | SVM_EVTINJ_TYPE_EXEPT; | ||
374 | svm->vmcb->control.event_inj_err = error_code; | ||
375 | } | ||
376 | |||
300 | static int has_svm(void) | 377 | static int has_svm(void) |
301 | { | 378 | { |
302 | const char *msg; | 379 | const char *msg; |
@@ -319,7 +396,7 @@ static int svm_hardware_enable(void *garbage) | |||
319 | 396 | ||
320 | struct svm_cpu_data *sd; | 397 | struct svm_cpu_data *sd; |
321 | uint64_t efer; | 398 | uint64_t efer; |
322 | struct descriptor_table gdt_descr; | 399 | struct desc_ptr gdt_descr; |
323 | struct desc_struct *gdt; | 400 | struct desc_struct *gdt; |
324 | int me = raw_smp_processor_id(); | 401 | int me = raw_smp_processor_id(); |
325 | 402 | ||
@@ -344,8 +421,8 @@ static int svm_hardware_enable(void *garbage) | |||
344 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; | 421 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; |
345 | sd->next_asid = sd->max_asid + 1; | 422 | sd->next_asid = sd->max_asid + 1; |
346 | 423 | ||
347 | kvm_get_gdt(&gdt_descr); | 424 | native_store_gdt(&gdt_descr); |
348 | gdt = (struct desc_struct *)gdt_descr.base; | 425 | gdt = (struct desc_struct *)gdt_descr.address; |
349 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 426 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
350 | 427 | ||
351 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 428 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
@@ -391,42 +468,98 @@ err_1: | |||
391 | 468 | ||
392 | } | 469 | } |
393 | 470 | ||
471 | static bool valid_msr_intercept(u32 index) | ||
472 | { | ||
473 | int i; | ||
474 | |||
475 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) | ||
476 | if (direct_access_msrs[i].index == index) | ||
477 | return true; | ||
478 | |||
479 | return false; | ||
480 | } | ||
481 | |||
394 | static void set_msr_interception(u32 *msrpm, unsigned msr, | 482 | static void set_msr_interception(u32 *msrpm, unsigned msr, |
395 | int read, int write) | 483 | int read, int write) |
396 | { | 484 | { |
485 | u8 bit_read, bit_write; | ||
486 | unsigned long tmp; | ||
487 | u32 offset; | ||
488 | |||
489 | /* | ||
490 | * If this warning triggers extend the direct_access_msrs list at the | ||
491 | * beginning of the file | ||
492 | */ | ||
493 | WARN_ON(!valid_msr_intercept(msr)); | ||
494 | |||
495 | offset = svm_msrpm_offset(msr); | ||
496 | bit_read = 2 * (msr & 0x0f); | ||
497 | bit_write = 2 * (msr & 0x0f) + 1; | ||
498 | tmp = msrpm[offset]; | ||
499 | |||
500 | BUG_ON(offset == MSR_INVALID); | ||
501 | |||
502 | read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); | ||
503 | write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); | ||
504 | |||
505 | msrpm[offset] = tmp; | ||
506 | } | ||
507 | |||
508 | static void svm_vcpu_init_msrpm(u32 *msrpm) | ||
509 | { | ||
397 | int i; | 510 | int i; |
398 | 511 | ||
399 | for (i = 0; i < NUM_MSR_MAPS; i++) { | 512 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); |
400 | if (msr >= msrpm_ranges[i] && | 513 | |
401 | msr < msrpm_ranges[i] + MSRS_IN_RANGE) { | 514 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
402 | u32 msr_offset = (i * MSRS_IN_RANGE + msr - | 515 | if (!direct_access_msrs[i].always) |
403 | msrpm_ranges[i]) * 2; | 516 | continue; |
404 | 517 | ||
405 | u32 *base = msrpm + (msr_offset / 32); | 518 | set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); |
406 | u32 msr_shift = msr_offset % 32; | 519 | } |
407 | u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); | 520 | } |
408 | *base = (*base & ~(0x3 << msr_shift)) | | 521 | |
409 | (mask << msr_shift); | 522 | static void add_msr_offset(u32 offset) |
523 | { | ||
524 | int i; | ||
525 | |||
526 | for (i = 0; i < MSRPM_OFFSETS; ++i) { | ||
527 | |||
528 | /* Offset already in list? */ | ||
529 | if (msrpm_offsets[i] == offset) | ||
410 | return; | 530 | return; |
411 | } | 531 | |
532 | /* Slot used by another offset? */ | ||
533 | if (msrpm_offsets[i] != MSR_INVALID) | ||
534 | continue; | ||
535 | |||
536 | /* Add offset to list */ | ||
537 | msrpm_offsets[i] = offset; | ||
538 | |||
539 | return; | ||
412 | } | 540 | } |
541 | |||
542 | /* | ||
543 | * If this BUG triggers the msrpm_offsets table has an overflow. Just | ||
544 | * increase MSRPM_OFFSETS in this case. | ||
545 | */ | ||
413 | BUG(); | 546 | BUG(); |
414 | } | 547 | } |
415 | 548 | ||
416 | static void svm_vcpu_init_msrpm(u32 *msrpm) | 549 | static void init_msrpm_offsets(void) |
417 | { | 550 | { |
418 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); | 551 | int i; |
419 | 552 | ||
420 | #ifdef CONFIG_X86_64 | 553 | memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); |
421 | set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); | 554 | |
422 | set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); | 555 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
423 | set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); | 556 | u32 offset; |
424 | set_msr_interception(msrpm, MSR_LSTAR, 1, 1); | 557 | |
425 | set_msr_interception(msrpm, MSR_CSTAR, 1, 1); | 558 | offset = svm_msrpm_offset(direct_access_msrs[i].index); |
426 | set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); | 559 | BUG_ON(offset == MSR_INVALID); |
427 | #endif | 560 | |
428 | set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); | 561 | add_msr_offset(offset); |
429 | set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); | 562 | } |
430 | } | 563 | } |
431 | 564 | ||
432 | static void svm_enable_lbrv(struct vcpu_svm *svm) | 565 | static void svm_enable_lbrv(struct vcpu_svm *svm) |
@@ -467,6 +600,8 @@ static __init int svm_hardware_setup(void) | |||
467 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); | 600 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); |
468 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; | 601 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; |
469 | 602 | ||
603 | init_msrpm_offsets(); | ||
604 | |||
470 | if (boot_cpu_has(X86_FEATURE_NX)) | 605 | if (boot_cpu_has(X86_FEATURE_NX)) |
471 | kvm_enable_efer_bits(EFER_NX); | 606 | kvm_enable_efer_bits(EFER_NX); |
472 | 607 | ||
@@ -523,7 +658,7 @@ static void init_seg(struct vmcb_seg *seg) | |||
523 | { | 658 | { |
524 | seg->selector = 0; | 659 | seg->selector = 0; |
525 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | | 660 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | |
526 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ | 661 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ |
527 | seg->limit = 0xffff; | 662 | seg->limit = 0xffff; |
528 | seg->base = 0; | 663 | seg->base = 0; |
529 | } | 664 | } |
@@ -543,16 +678,16 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
543 | 678 | ||
544 | svm->vcpu.fpu_active = 1; | 679 | svm->vcpu.fpu_active = 1; |
545 | 680 | ||
546 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 681 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
547 | INTERCEPT_CR3_MASK | | 682 | INTERCEPT_CR3_MASK | |
548 | INTERCEPT_CR4_MASK; | 683 | INTERCEPT_CR4_MASK; |
549 | 684 | ||
550 | control->intercept_cr_write = INTERCEPT_CR0_MASK | | 685 | control->intercept_cr_write = INTERCEPT_CR0_MASK | |
551 | INTERCEPT_CR3_MASK | | 686 | INTERCEPT_CR3_MASK | |
552 | INTERCEPT_CR4_MASK | | 687 | INTERCEPT_CR4_MASK | |
553 | INTERCEPT_CR8_MASK; | 688 | INTERCEPT_CR8_MASK; |
554 | 689 | ||
555 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 690 | control->intercept_dr_read = INTERCEPT_DR0_MASK | |
556 | INTERCEPT_DR1_MASK | | 691 | INTERCEPT_DR1_MASK | |
557 | INTERCEPT_DR2_MASK | | 692 | INTERCEPT_DR2_MASK | |
558 | INTERCEPT_DR3_MASK | | 693 | INTERCEPT_DR3_MASK | |
@@ -561,7 +696,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
561 | INTERCEPT_DR6_MASK | | 696 | INTERCEPT_DR6_MASK | |
562 | INTERCEPT_DR7_MASK; | 697 | INTERCEPT_DR7_MASK; |
563 | 698 | ||
564 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 699 | control->intercept_dr_write = INTERCEPT_DR0_MASK | |
565 | INTERCEPT_DR1_MASK | | 700 | INTERCEPT_DR1_MASK | |
566 | INTERCEPT_DR2_MASK | | 701 | INTERCEPT_DR2_MASK | |
567 | INTERCEPT_DR3_MASK | | 702 | INTERCEPT_DR3_MASK | |
@@ -575,7 +710,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
575 | (1 << MC_VECTOR); | 710 | (1 << MC_VECTOR); |
576 | 711 | ||
577 | 712 | ||
578 | control->intercept = (1ULL << INTERCEPT_INTR) | | 713 | control->intercept = (1ULL << INTERCEPT_INTR) | |
579 | (1ULL << INTERCEPT_NMI) | | 714 | (1ULL << INTERCEPT_NMI) | |
580 | (1ULL << INTERCEPT_SMI) | | 715 | (1ULL << INTERCEPT_SMI) | |
581 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | 716 | (1ULL << INTERCEPT_SELECTIVE_CR0) | |
@@ -636,7 +771,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
636 | save->rip = 0x0000fff0; | 771 | save->rip = 0x0000fff0; |
637 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 772 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
638 | 773 | ||
639 | /* This is the guest-visible cr0 value. | 774 | /* |
775 | * This is the guest-visible cr0 value. | ||
640 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 776 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
641 | */ | 777 | */ |
642 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 778 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
@@ -729,6 +865,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
729 | svm_vcpu_init_msrpm(svm->msrpm); | 865 | svm_vcpu_init_msrpm(svm->msrpm); |
730 | 866 | ||
731 | svm->nested.msrpm = page_address(nested_msrpm_pages); | 867 | svm->nested.msrpm = page_address(nested_msrpm_pages); |
868 | svm_vcpu_init_msrpm(svm->nested.msrpm); | ||
732 | 869 | ||
733 | svm->vmcb = page_address(page); | 870 | svm->vmcb = page_address(page); |
734 | clear_page(svm->vmcb); | 871 | clear_page(svm->vmcb); |
@@ -882,7 +1019,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
882 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | 1019 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; |
883 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; | 1020 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; |
884 | 1021 | ||
885 | /* AMD's VMCB does not have an explicit unusable field, so emulate it | 1022 | /* |
1023 | * AMD's VMCB does not have an explicit unusable field, so emulate it | ||
886 | * for cross vendor migration purposes by "not present" | 1024 | * for cross vendor migration purposes by "not present" |
887 | */ | 1025 | */ |
888 | var->unusable = !var->present || (var->type == 0); | 1026 | var->unusable = !var->present || (var->type == 0); |
@@ -918,7 +1056,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
918 | var->type |= 0x1; | 1056 | var->type |= 0x1; |
919 | break; | 1057 | break; |
920 | case VCPU_SREG_SS: | 1058 | case VCPU_SREG_SS: |
921 | /* On AMD CPUs sometimes the DB bit in the segment | 1059 | /* |
1060 | * On AMD CPUs sometimes the DB bit in the segment | ||
922 | * descriptor is left as 1, although the whole segment has | 1061 | * descriptor is left as 1, although the whole segment has |
923 | * been made unusable. Clear it here to pass an Intel VMX | 1062 | * been made unusable. Clear it here to pass an Intel VMX |
924 | * entry check when cross vendor migrating. | 1063 | * entry check when cross vendor migrating. |
@@ -936,36 +1075,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu) | |||
936 | return save->cpl; | 1075 | return save->cpl; |
937 | } | 1076 | } |
938 | 1077 | ||
939 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1078 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
940 | { | 1079 | { |
941 | struct vcpu_svm *svm = to_svm(vcpu); | 1080 | struct vcpu_svm *svm = to_svm(vcpu); |
942 | 1081 | ||
943 | dt->limit = svm->vmcb->save.idtr.limit; | 1082 | dt->size = svm->vmcb->save.idtr.limit; |
944 | dt->base = svm->vmcb->save.idtr.base; | 1083 | dt->address = svm->vmcb->save.idtr.base; |
945 | } | 1084 | } |
946 | 1085 | ||
947 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1086 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
948 | { | 1087 | { |
949 | struct vcpu_svm *svm = to_svm(vcpu); | 1088 | struct vcpu_svm *svm = to_svm(vcpu); |
950 | 1089 | ||
951 | svm->vmcb->save.idtr.limit = dt->limit; | 1090 | svm->vmcb->save.idtr.limit = dt->size; |
952 | svm->vmcb->save.idtr.base = dt->base ; | 1091 | svm->vmcb->save.idtr.base = dt->address ; |
953 | } | 1092 | } |
954 | 1093 | ||
955 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1094 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
956 | { | 1095 | { |
957 | struct vcpu_svm *svm = to_svm(vcpu); | 1096 | struct vcpu_svm *svm = to_svm(vcpu); |
958 | 1097 | ||
959 | dt->limit = svm->vmcb->save.gdtr.limit; | 1098 | dt->size = svm->vmcb->save.gdtr.limit; |
960 | dt->base = svm->vmcb->save.gdtr.base; | 1099 | dt->address = svm->vmcb->save.gdtr.base; |
961 | } | 1100 | } |
962 | 1101 | ||
963 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1102 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
964 | { | 1103 | { |
965 | struct vcpu_svm *svm = to_svm(vcpu); | 1104 | struct vcpu_svm *svm = to_svm(vcpu); |
966 | 1105 | ||
967 | svm->vmcb->save.gdtr.limit = dt->limit; | 1106 | svm->vmcb->save.gdtr.limit = dt->size; |
968 | svm->vmcb->save.gdtr.base = dt->base ; | 1107 | svm->vmcb->save.gdtr.base = dt->address ; |
969 | } | 1108 | } |
970 | 1109 | ||
971 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1110 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
@@ -978,6 +1117,7 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
978 | 1117 | ||
979 | static void update_cr0_intercept(struct vcpu_svm *svm) | 1118 | static void update_cr0_intercept(struct vcpu_svm *svm) |
980 | { | 1119 | { |
1120 | struct vmcb *vmcb = svm->vmcb; | ||
981 | ulong gcr0 = svm->vcpu.arch.cr0; | 1121 | ulong gcr0 = svm->vcpu.arch.cr0; |
982 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1122 | u64 *hcr0 = &svm->vmcb->save.cr0; |
983 | 1123 | ||
@@ -989,11 +1129,25 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
989 | 1129 | ||
990 | 1130 | ||
991 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1131 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { |
992 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | 1132 | vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; |
993 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | 1133 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; |
1134 | if (is_nested(svm)) { | ||
1135 | struct vmcb *hsave = svm->nested.hsave; | ||
1136 | |||
1137 | hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
1138 | hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
1139 | vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; | ||
1140 | vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; | ||
1141 | } | ||
994 | } else { | 1142 | } else { |
995 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | 1143 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; |
996 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | 1144 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; |
1145 | if (is_nested(svm)) { | ||
1146 | struct vmcb *hsave = svm->nested.hsave; | ||
1147 | |||
1148 | hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
1149 | hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
1150 | } | ||
997 | } | 1151 | } |
998 | } | 1152 | } |
999 | 1153 | ||
@@ -1001,6 +1155,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1001 | { | 1155 | { |
1002 | struct vcpu_svm *svm = to_svm(vcpu); | 1156 | struct vcpu_svm *svm = to_svm(vcpu); |
1003 | 1157 | ||
1158 | if (is_nested(svm)) { | ||
1159 | /* | ||
1160 | * We are here because we run in nested mode, the host kvm | ||
1161 | * intercepts cr0 writes but the l1 hypervisor does not. | ||
1162 | * But the L1 hypervisor may intercept selective cr0 writes. | ||
1163 | * This needs to be checked here. | ||
1164 | */ | ||
1165 | unsigned long old, new; | ||
1166 | |||
1167 | /* Remove bits that would trigger a real cr0 write intercept */ | ||
1168 | old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1169 | new = cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1170 | |||
1171 | if (old == new) { | ||
1172 | /* cr0 write with ts and mp unchanged */ | ||
1173 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
1174 | if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) | ||
1175 | return; | ||
1176 | } | ||
1177 | } | ||
1178 | |||
1004 | #ifdef CONFIG_X86_64 | 1179 | #ifdef CONFIG_X86_64 |
1005 | if (vcpu->arch.efer & EFER_LME) { | 1180 | if (vcpu->arch.efer & EFER_LME) { |
1006 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1181 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
@@ -1134,70 +1309,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1134 | svm->vmcb->control.asid = sd->next_asid++; | 1309 | svm->vmcb->control.asid = sd->next_asid++; |
1135 | } | 1310 | } |
1136 | 1311 | ||
1137 | static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) | 1312 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
1138 | { | 1313 | { |
1139 | struct vcpu_svm *svm = to_svm(vcpu); | 1314 | struct vcpu_svm *svm = to_svm(vcpu); |
1140 | 1315 | ||
1141 | switch (dr) { | 1316 | svm->vmcb->save.dr7 = value; |
1142 | case 0 ... 3: | ||
1143 | *dest = vcpu->arch.db[dr]; | ||
1144 | break; | ||
1145 | case 4: | ||
1146 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1147 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1148 | /* fall through */ | ||
1149 | case 6: | ||
1150 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
1151 | *dest = vcpu->arch.dr6; | ||
1152 | else | ||
1153 | *dest = svm->vmcb->save.dr6; | ||
1154 | break; | ||
1155 | case 5: | ||
1156 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1157 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1158 | /* fall through */ | ||
1159 | case 7: | ||
1160 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
1161 | *dest = vcpu->arch.dr7; | ||
1162 | else | ||
1163 | *dest = svm->vmcb->save.dr7; | ||
1164 | break; | ||
1165 | } | ||
1166 | |||
1167 | return EMULATE_DONE; | ||
1168 | } | ||
1169 | |||
1170 | static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) | ||
1171 | { | ||
1172 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1173 | |||
1174 | switch (dr) { | ||
1175 | case 0 ... 3: | ||
1176 | vcpu->arch.db[dr] = value; | ||
1177 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
1178 | vcpu->arch.eff_db[dr] = value; | ||
1179 | break; | ||
1180 | case 4: | ||
1181 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1182 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1183 | /* fall through */ | ||
1184 | case 6: | ||
1185 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; | ||
1186 | break; | ||
1187 | case 5: | ||
1188 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1189 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1190 | /* fall through */ | ||
1191 | case 7: | ||
1192 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; | ||
1193 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
1194 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | ||
1195 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); | ||
1196 | } | ||
1197 | break; | ||
1198 | } | ||
1199 | |||
1200 | return EMULATE_DONE; | ||
1201 | } | 1317 | } |
1202 | 1318 | ||
1203 | static int pf_interception(struct vcpu_svm *svm) | 1319 | static int pf_interception(struct vcpu_svm *svm) |
@@ -1234,7 +1350,7 @@ static int db_interception(struct vcpu_svm *svm) | |||
1234 | } | 1350 | } |
1235 | 1351 | ||
1236 | if (svm->vcpu.guest_debug & | 1352 | if (svm->vcpu.guest_debug & |
1237 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ | 1353 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { |
1238 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1354 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
1239 | kvm_run->debug.arch.pc = | 1355 | kvm_run->debug.arch.pc = |
1240 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1356 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
@@ -1268,7 +1384,22 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1268 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | 1384 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1269 | { | 1385 | { |
1270 | struct vcpu_svm *svm = to_svm(vcpu); | 1386 | struct vcpu_svm *svm = to_svm(vcpu); |
1271 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1387 | u32 excp; |
1388 | |||
1389 | if (is_nested(svm)) { | ||
1390 | u32 h_excp, n_excp; | ||
1391 | |||
1392 | h_excp = svm->nested.hsave->control.intercept_exceptions; | ||
1393 | n_excp = svm->nested.intercept_exceptions; | ||
1394 | h_excp &= ~(1 << NM_VECTOR); | ||
1395 | excp = h_excp | n_excp; | ||
1396 | } else { | ||
1397 | excp = svm->vmcb->control.intercept_exceptions; | ||
1398 | excp &= ~(1 << NM_VECTOR); | ||
1399 | } | ||
1400 | |||
1401 | svm->vmcb->control.intercept_exceptions = excp; | ||
1402 | |||
1272 | svm->vcpu.fpu_active = 1; | 1403 | svm->vcpu.fpu_active = 1; |
1273 | update_cr0_intercept(svm); | 1404 | update_cr0_intercept(svm); |
1274 | } | 1405 | } |
@@ -1309,29 +1440,23 @@ static int shutdown_interception(struct vcpu_svm *svm) | |||
1309 | 1440 | ||
1310 | static int io_interception(struct vcpu_svm *svm) | 1441 | static int io_interception(struct vcpu_svm *svm) |
1311 | { | 1442 | { |
1443 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
1312 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1444 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
1313 | int size, in, string; | 1445 | int size, in, string; |
1314 | unsigned port; | 1446 | unsigned port; |
1315 | 1447 | ||
1316 | ++svm->vcpu.stat.io_exits; | 1448 | ++svm->vcpu.stat.io_exits; |
1317 | |||
1318 | svm->next_rip = svm->vmcb->control.exit_info_2; | ||
1319 | |||
1320 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1449 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
1321 | |||
1322 | if (string) { | ||
1323 | if (emulate_instruction(&svm->vcpu, | ||
1324 | 0, 0, 0) == EMULATE_DO_MMIO) | ||
1325 | return 0; | ||
1326 | return 1; | ||
1327 | } | ||
1328 | |||
1329 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1450 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
1451 | if (string || in) | ||
1452 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | ||
1453 | |||
1330 | port = io_info >> 16; | 1454 | port = io_info >> 16; |
1331 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1455 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
1332 | 1456 | svm->next_rip = svm->vmcb->control.exit_info_2; | |
1333 | skip_emulated_instruction(&svm->vcpu); | 1457 | skip_emulated_instruction(&svm->vcpu); |
1334 | return kvm_emulate_pio(&svm->vcpu, in, size, port); | 1458 | |
1459 | return kvm_fast_pio_out(vcpu, size, port); | ||
1335 | } | 1460 | } |
1336 | 1461 | ||
1337 | static int nmi_interception(struct vcpu_svm *svm) | 1462 | static int nmi_interception(struct vcpu_svm *svm) |
@@ -1384,6 +1509,8 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) | |||
1384 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 1509 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
1385 | bool has_error_code, u32 error_code) | 1510 | bool has_error_code, u32 error_code) |
1386 | { | 1511 | { |
1512 | int vmexit; | ||
1513 | |||
1387 | if (!is_nested(svm)) | 1514 | if (!is_nested(svm)) |
1388 | return 0; | 1515 | return 0; |
1389 | 1516 | ||
@@ -1392,21 +1519,28 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
1392 | svm->vmcb->control.exit_info_1 = error_code; | 1519 | svm->vmcb->control.exit_info_1 = error_code; |
1393 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; | 1520 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; |
1394 | 1521 | ||
1395 | return nested_svm_exit_handled(svm); | 1522 | vmexit = nested_svm_intercept(svm); |
1523 | if (vmexit == NESTED_EXIT_DONE) | ||
1524 | svm->nested.exit_required = true; | ||
1525 | |||
1526 | return vmexit; | ||
1396 | } | 1527 | } |
1397 | 1528 | ||
1398 | static inline int nested_svm_intr(struct vcpu_svm *svm) | 1529 | /* This function returns true if it is save to enable the irq window */ |
1530 | static inline bool nested_svm_intr(struct vcpu_svm *svm) | ||
1399 | { | 1531 | { |
1400 | if (!is_nested(svm)) | 1532 | if (!is_nested(svm)) |
1401 | return 0; | 1533 | return true; |
1402 | 1534 | ||
1403 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | 1535 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) |
1404 | return 0; | 1536 | return true; |
1405 | 1537 | ||
1406 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) | 1538 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) |
1407 | return 0; | 1539 | return false; |
1408 | 1540 | ||
1409 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1541 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
1542 | svm->vmcb->control.exit_info_1 = 0; | ||
1543 | svm->vmcb->control.exit_info_2 = 0; | ||
1410 | 1544 | ||
1411 | if (svm->nested.intercept & 1ULL) { | 1545 | if (svm->nested.intercept & 1ULL) { |
1412 | /* | 1546 | /* |
@@ -1417,21 +1551,40 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
1417 | */ | 1551 | */ |
1418 | svm->nested.exit_required = true; | 1552 | svm->nested.exit_required = true; |
1419 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | 1553 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); |
1420 | return 1; | 1554 | return false; |
1421 | } | 1555 | } |
1422 | 1556 | ||
1423 | return 0; | 1557 | return true; |
1558 | } | ||
1559 | |||
1560 | /* This function returns true if it is save to enable the nmi window */ | ||
1561 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) | ||
1562 | { | ||
1563 | if (!is_nested(svm)) | ||
1564 | return true; | ||
1565 | |||
1566 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) | ||
1567 | return true; | ||
1568 | |||
1569 | svm->vmcb->control.exit_code = SVM_EXIT_NMI; | ||
1570 | svm->nested.exit_required = true; | ||
1571 | |||
1572 | return false; | ||
1424 | } | 1573 | } |
1425 | 1574 | ||
1426 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | 1575 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) |
1427 | { | 1576 | { |
1428 | struct page *page; | 1577 | struct page *page; |
1429 | 1578 | ||
1579 | might_sleep(); | ||
1580 | |||
1430 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1581 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
1431 | if (is_error_page(page)) | 1582 | if (is_error_page(page)) |
1432 | goto error; | 1583 | goto error; |
1433 | 1584 | ||
1434 | return kmap_atomic(page, idx); | 1585 | *_page = page; |
1586 | |||
1587 | return kmap(page); | ||
1435 | 1588 | ||
1436 | error: | 1589 | error: |
1437 | kvm_release_page_clean(page); | 1590 | kvm_release_page_clean(page); |
@@ -1440,61 +1593,55 @@ error: | |||
1440 | return NULL; | 1593 | return NULL; |
1441 | } | 1594 | } |
1442 | 1595 | ||
1443 | static void nested_svm_unmap(void *addr, enum km_type idx) | 1596 | static void nested_svm_unmap(struct page *page) |
1444 | { | 1597 | { |
1445 | struct page *page; | 1598 | kunmap(page); |
1599 | kvm_release_page_dirty(page); | ||
1600 | } | ||
1446 | 1601 | ||
1447 | if (!addr) | 1602 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) |
1448 | return; | 1603 | { |
1604 | unsigned port; | ||
1605 | u8 val, bit; | ||
1606 | u64 gpa; | ||
1449 | 1607 | ||
1450 | page = kmap_atomic_to_page(addr); | 1608 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) |
1609 | return NESTED_EXIT_HOST; | ||
1451 | 1610 | ||
1452 | kunmap_atomic(addr, idx); | 1611 | port = svm->vmcb->control.exit_info_1 >> 16; |
1453 | kvm_release_page_dirty(page); | 1612 | gpa = svm->nested.vmcb_iopm + (port / 8); |
1613 | bit = port % 8; | ||
1614 | val = 0; | ||
1615 | |||
1616 | if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) | ||
1617 | val &= (1 << bit); | ||
1618 | |||
1619 | return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | ||
1454 | } | 1620 | } |
1455 | 1621 | ||
1456 | static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) | 1622 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) |
1457 | { | 1623 | { |
1458 | u32 param = svm->vmcb->control.exit_info_1 & 1; | 1624 | u32 offset, msr, value; |
1459 | u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 1625 | int write, mask; |
1460 | bool ret = false; | ||
1461 | u32 t0, t1; | ||
1462 | u8 *msrpm; | ||
1463 | 1626 | ||
1464 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) | 1627 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
1465 | return false; | 1628 | return NESTED_EXIT_HOST; |
1466 | 1629 | ||
1467 | msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1630 | msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
1631 | offset = svm_msrpm_offset(msr); | ||
1632 | write = svm->vmcb->control.exit_info_1 & 1; | ||
1633 | mask = 1 << ((2 * (msr & 0xf)) + write); | ||
1468 | 1634 | ||
1469 | if (!msrpm) | 1635 | if (offset == MSR_INVALID) |
1470 | goto out; | 1636 | return NESTED_EXIT_DONE; |
1471 | 1637 | ||
1472 | switch (msr) { | 1638 | /* Offset is in 32 bit units but need in 8 bit units */ |
1473 | case 0 ... 0x1fff: | 1639 | offset *= 4; |
1474 | t0 = (msr * 2) % 8; | ||
1475 | t1 = msr / 8; | ||
1476 | break; | ||
1477 | case 0xc0000000 ... 0xc0001fff: | ||
1478 | t0 = (8192 + msr - 0xc0000000) * 2; | ||
1479 | t1 = (t0 / 8); | ||
1480 | t0 %= 8; | ||
1481 | break; | ||
1482 | case 0xc0010000 ... 0xc0011fff: | ||
1483 | t0 = (16384 + msr - 0xc0010000) * 2; | ||
1484 | t1 = (t0 / 8); | ||
1485 | t0 %= 8; | ||
1486 | break; | ||
1487 | default: | ||
1488 | ret = true; | ||
1489 | goto out; | ||
1490 | } | ||
1491 | 1640 | ||
1492 | ret = msrpm[t1] & ((1 << param) << t0); | 1641 | if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) |
1493 | 1642 | return NESTED_EXIT_DONE; | |
1494 | out: | ||
1495 | nested_svm_unmap(msrpm, KM_USER0); | ||
1496 | 1643 | ||
1497 | return ret; | 1644 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
1498 | } | 1645 | } |
1499 | 1646 | ||
1500 | static int nested_svm_exit_special(struct vcpu_svm *svm) | 1647 | static int nested_svm_exit_special(struct vcpu_svm *svm) |
@@ -1504,17 +1651,21 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1504 | switch (exit_code) { | 1651 | switch (exit_code) { |
1505 | case SVM_EXIT_INTR: | 1652 | case SVM_EXIT_INTR: |
1506 | case SVM_EXIT_NMI: | 1653 | case SVM_EXIT_NMI: |
1654 | case SVM_EXIT_EXCP_BASE + MC_VECTOR: | ||
1507 | return NESTED_EXIT_HOST; | 1655 | return NESTED_EXIT_HOST; |
1508 | /* For now we are always handling NPFs when using them */ | ||
1509 | case SVM_EXIT_NPF: | 1656 | case SVM_EXIT_NPF: |
1657 | /* For now we are always handling NPFs when using them */ | ||
1510 | if (npt_enabled) | 1658 | if (npt_enabled) |
1511 | return NESTED_EXIT_HOST; | 1659 | return NESTED_EXIT_HOST; |
1512 | break; | 1660 | break; |
1513 | /* When we're shadowing, trap PFs */ | ||
1514 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: | 1661 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: |
1662 | /* When we're shadowing, trap PFs */ | ||
1515 | if (!npt_enabled) | 1663 | if (!npt_enabled) |
1516 | return NESTED_EXIT_HOST; | 1664 | return NESTED_EXIT_HOST; |
1517 | break; | 1665 | break; |
1666 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | ||
1667 | nm_interception(svm); | ||
1668 | break; | ||
1518 | default: | 1669 | default: |
1519 | break; | 1670 | break; |
1520 | } | 1671 | } |
@@ -1525,7 +1676,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1525 | /* | 1676 | /* |
1526 | * If this function returns true, this #vmexit was already handled | 1677 | * If this function returns true, this #vmexit was already handled |
1527 | */ | 1678 | */ |
1528 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | 1679 | static int nested_svm_intercept(struct vcpu_svm *svm) |
1529 | { | 1680 | { |
1530 | u32 exit_code = svm->vmcb->control.exit_code; | 1681 | u32 exit_code = svm->vmcb->control.exit_code; |
1531 | int vmexit = NESTED_EXIT_HOST; | 1682 | int vmexit = NESTED_EXIT_HOST; |
@@ -1534,6 +1685,9 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1534 | case SVM_EXIT_MSR: | 1685 | case SVM_EXIT_MSR: |
1535 | vmexit = nested_svm_exit_handled_msr(svm); | 1686 | vmexit = nested_svm_exit_handled_msr(svm); |
1536 | break; | 1687 | break; |
1688 | case SVM_EXIT_IOIO: | ||
1689 | vmexit = nested_svm_intercept_ioio(svm); | ||
1690 | break; | ||
1537 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { | 1691 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { |
1538 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); | 1692 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); |
1539 | if (svm->nested.intercept_cr_read & cr_bits) | 1693 | if (svm->nested.intercept_cr_read & cr_bits) |
@@ -1564,6 +1718,10 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1564 | vmexit = NESTED_EXIT_DONE; | 1718 | vmexit = NESTED_EXIT_DONE; |
1565 | break; | 1719 | break; |
1566 | } | 1720 | } |
1721 | case SVM_EXIT_ERR: { | ||
1722 | vmexit = NESTED_EXIT_DONE; | ||
1723 | break; | ||
1724 | } | ||
1567 | default: { | 1725 | default: { |
1568 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1726 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
1569 | if (svm->nested.intercept & exit_bits) | 1727 | if (svm->nested.intercept & exit_bits) |
@@ -1571,9 +1729,17 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1571 | } | 1729 | } |
1572 | } | 1730 | } |
1573 | 1731 | ||
1574 | if (vmexit == NESTED_EXIT_DONE) { | 1732 | return vmexit; |
1733 | } | ||
1734 | |||
1735 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | ||
1736 | { | ||
1737 | int vmexit; | ||
1738 | |||
1739 | vmexit = nested_svm_intercept(svm); | ||
1740 | |||
1741 | if (vmexit == NESTED_EXIT_DONE) | ||
1575 | nested_svm_vmexit(svm); | 1742 | nested_svm_vmexit(svm); |
1576 | } | ||
1577 | 1743 | ||
1578 | return vmexit; | 1744 | return vmexit; |
1579 | } | 1745 | } |
@@ -1615,6 +1781,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1615 | struct vmcb *nested_vmcb; | 1781 | struct vmcb *nested_vmcb; |
1616 | struct vmcb *hsave = svm->nested.hsave; | 1782 | struct vmcb *hsave = svm->nested.hsave; |
1617 | struct vmcb *vmcb = svm->vmcb; | 1783 | struct vmcb *vmcb = svm->vmcb; |
1784 | struct page *page; | ||
1618 | 1785 | ||
1619 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | 1786 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, |
1620 | vmcb->control.exit_info_1, | 1787 | vmcb->control.exit_info_1, |
@@ -1622,10 +1789,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1622 | vmcb->control.exit_int_info, | 1789 | vmcb->control.exit_int_info, |
1623 | vmcb->control.exit_int_info_err); | 1790 | vmcb->control.exit_int_info_err); |
1624 | 1791 | ||
1625 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1792 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); |
1626 | if (!nested_vmcb) | 1793 | if (!nested_vmcb) |
1627 | return 1; | 1794 | return 1; |
1628 | 1795 | ||
1796 | /* Exit nested SVM mode */ | ||
1797 | svm->nested.vmcb = 0; | ||
1798 | |||
1629 | /* Give the current vmcb to the guest */ | 1799 | /* Give the current vmcb to the guest */ |
1630 | disable_gif(svm); | 1800 | disable_gif(svm); |
1631 | 1801 | ||
@@ -1635,9 +1805,10 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1635 | nested_vmcb->save.ds = vmcb->save.ds; | 1805 | nested_vmcb->save.ds = vmcb->save.ds; |
1636 | nested_vmcb->save.gdtr = vmcb->save.gdtr; | 1806 | nested_vmcb->save.gdtr = vmcb->save.gdtr; |
1637 | nested_vmcb->save.idtr = vmcb->save.idtr; | 1807 | nested_vmcb->save.idtr = vmcb->save.idtr; |
1638 | if (npt_enabled) | 1808 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); |
1639 | nested_vmcb->save.cr3 = vmcb->save.cr3; | 1809 | nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; |
1640 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 1810 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
1811 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | ||
1641 | nested_vmcb->save.rflags = vmcb->save.rflags; | 1812 | nested_vmcb->save.rflags = vmcb->save.rflags; |
1642 | nested_vmcb->save.rip = vmcb->save.rip; | 1813 | nested_vmcb->save.rip = vmcb->save.rip; |
1643 | nested_vmcb->save.rsp = vmcb->save.rsp; | 1814 | nested_vmcb->save.rsp = vmcb->save.rsp; |
@@ -1709,10 +1880,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1709 | svm->vmcb->save.cpl = 0; | 1880 | svm->vmcb->save.cpl = 0; |
1710 | svm->vmcb->control.exit_int_info = 0; | 1881 | svm->vmcb->control.exit_int_info = 0; |
1711 | 1882 | ||
1712 | /* Exit nested SVM mode */ | 1883 | nested_svm_unmap(page); |
1713 | svm->nested.vmcb = 0; | ||
1714 | |||
1715 | nested_svm_unmap(nested_vmcb, KM_USER0); | ||
1716 | 1884 | ||
1717 | kvm_mmu_reset_context(&svm->vcpu); | 1885 | kvm_mmu_reset_context(&svm->vcpu); |
1718 | kvm_mmu_load(&svm->vcpu); | 1886 | kvm_mmu_load(&svm->vcpu); |
@@ -1722,19 +1890,33 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1722 | 1890 | ||
1723 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | 1891 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) |
1724 | { | 1892 | { |
1725 | u32 *nested_msrpm; | 1893 | /* |
1894 | * This function merges the msr permission bitmaps of kvm and the | ||
1895 | * nested vmcb. It is omptimized in that it only merges the parts where | ||
1896 | * the kvm msr permission bitmap may contain zero bits | ||
1897 | */ | ||
1726 | int i; | 1898 | int i; |
1727 | 1899 | ||
1728 | nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1900 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
1729 | if (!nested_msrpm) | 1901 | return true; |
1730 | return false; | ||
1731 | 1902 | ||
1732 | for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) | 1903 | for (i = 0; i < MSRPM_OFFSETS; i++) { |
1733 | svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; | 1904 | u32 value, p; |
1905 | u64 offset; | ||
1734 | 1906 | ||
1735 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); | 1907 | if (msrpm_offsets[i] == 0xffffffff) |
1908 | break; | ||
1909 | |||
1910 | p = msrpm_offsets[i]; | ||
1911 | offset = svm->nested.vmcb_msrpm + (p * 4); | ||
1912 | |||
1913 | if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) | ||
1914 | return false; | ||
1915 | |||
1916 | svm->nested.msrpm[p] = svm->msrpm[p] | value; | ||
1917 | } | ||
1736 | 1918 | ||
1737 | nested_svm_unmap(nested_msrpm, KM_USER0); | 1919 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); |
1738 | 1920 | ||
1739 | return true; | 1921 | return true; |
1740 | } | 1922 | } |
@@ -1744,26 +1926,34 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1744 | struct vmcb *nested_vmcb; | 1926 | struct vmcb *nested_vmcb; |
1745 | struct vmcb *hsave = svm->nested.hsave; | 1927 | struct vmcb *hsave = svm->nested.hsave; |
1746 | struct vmcb *vmcb = svm->vmcb; | 1928 | struct vmcb *vmcb = svm->vmcb; |
1929 | struct page *page; | ||
1930 | u64 vmcb_gpa; | ||
1747 | 1931 | ||
1748 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 1932 | vmcb_gpa = svm->vmcb->save.rax; |
1933 | |||
1934 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | ||
1749 | if (!nested_vmcb) | 1935 | if (!nested_vmcb) |
1750 | return false; | 1936 | return false; |
1751 | 1937 | ||
1752 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1938 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa, |
1753 | svm->nested.vmcb = svm->vmcb->save.rax; | ||
1754 | |||
1755 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
1756 | nested_vmcb->save.rip, | 1939 | nested_vmcb->save.rip, |
1757 | nested_vmcb->control.int_ctl, | 1940 | nested_vmcb->control.int_ctl, |
1758 | nested_vmcb->control.event_inj, | 1941 | nested_vmcb->control.event_inj, |
1759 | nested_vmcb->control.nested_ctl); | 1942 | nested_vmcb->control.nested_ctl); |
1760 | 1943 | ||
1944 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, | ||
1945 | nested_vmcb->control.intercept_cr_write, | ||
1946 | nested_vmcb->control.intercept_exceptions, | ||
1947 | nested_vmcb->control.intercept); | ||
1948 | |||
1761 | /* Clear internal status */ | 1949 | /* Clear internal status */ |
1762 | kvm_clear_exception_queue(&svm->vcpu); | 1950 | kvm_clear_exception_queue(&svm->vcpu); |
1763 | kvm_clear_interrupt_queue(&svm->vcpu); | 1951 | kvm_clear_interrupt_queue(&svm->vcpu); |
1764 | 1952 | ||
1765 | /* Save the old vmcb, so we don't need to pick what we save, but | 1953 | /* |
1766 | can restore everything when a VMEXIT occurs */ | 1954 | * Save the old vmcb, so we don't need to pick what we save, but can |
1955 | * restore everything when a VMEXIT occurs | ||
1956 | */ | ||
1767 | hsave->save.es = vmcb->save.es; | 1957 | hsave->save.es = vmcb->save.es; |
1768 | hsave->save.cs = vmcb->save.cs; | 1958 | hsave->save.cs = vmcb->save.cs; |
1769 | hsave->save.ss = vmcb->save.ss; | 1959 | hsave->save.ss = vmcb->save.ss; |
@@ -1803,14 +1993,17 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1803 | if (npt_enabled) { | 1993 | if (npt_enabled) { |
1804 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; | 1994 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; |
1805 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; | 1995 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; |
1806 | } else { | 1996 | } else |
1807 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); | 1997 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); |
1808 | kvm_mmu_reset_context(&svm->vcpu); | 1998 | |
1809 | } | 1999 | /* Guest paging mode is active - reset mmu */ |
2000 | kvm_mmu_reset_context(&svm->vcpu); | ||
2001 | |||
1810 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; | 2002 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; |
1811 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); | 2003 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); |
1812 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); | 2004 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); |
1813 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); | 2005 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); |
2006 | |||
1814 | /* In case we don't even reach vcpu_run, the fields are not updated */ | 2007 | /* In case we don't even reach vcpu_run, the fields are not updated */ |
1815 | svm->vmcb->save.rax = nested_vmcb->save.rax; | 2008 | svm->vmcb->save.rax = nested_vmcb->save.rax; |
1816 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; | 2009 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; |
@@ -1819,22 +2012,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1819 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; | 2012 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; |
1820 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; | 2013 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; |
1821 | 2014 | ||
1822 | /* We don't want a nested guest to be more powerful than the guest, | 2015 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; |
1823 | so all intercepts are ORed */ | 2016 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; |
1824 | svm->vmcb->control.intercept_cr_read |= | ||
1825 | nested_vmcb->control.intercept_cr_read; | ||
1826 | svm->vmcb->control.intercept_cr_write |= | ||
1827 | nested_vmcb->control.intercept_cr_write; | ||
1828 | svm->vmcb->control.intercept_dr_read |= | ||
1829 | nested_vmcb->control.intercept_dr_read; | ||
1830 | svm->vmcb->control.intercept_dr_write |= | ||
1831 | nested_vmcb->control.intercept_dr_write; | ||
1832 | svm->vmcb->control.intercept_exceptions |= | ||
1833 | nested_vmcb->control.intercept_exceptions; | ||
1834 | |||
1835 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
1836 | |||
1837 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; | ||
1838 | 2017 | ||
1839 | /* cache intercepts */ | 2018 | /* cache intercepts */ |
1840 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; | 2019 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; |
@@ -1851,13 +2030,43 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1851 | else | 2030 | else |
1852 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 2031 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
1853 | 2032 | ||
2033 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { | ||
2034 | /* We only want the cr8 intercept bits of the guest */ | ||
2035 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; | ||
2036 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | ||
2037 | } | ||
2038 | |||
2039 | /* We don't want to see VMMCALLs from a nested guest */ | ||
2040 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); | ||
2041 | |||
2042 | /* | ||
2043 | * We don't want a nested guest to be more powerful than the guest, so | ||
2044 | * all intercepts are ORed | ||
2045 | */ | ||
2046 | svm->vmcb->control.intercept_cr_read |= | ||
2047 | nested_vmcb->control.intercept_cr_read; | ||
2048 | svm->vmcb->control.intercept_cr_write |= | ||
2049 | nested_vmcb->control.intercept_cr_write; | ||
2050 | svm->vmcb->control.intercept_dr_read |= | ||
2051 | nested_vmcb->control.intercept_dr_read; | ||
2052 | svm->vmcb->control.intercept_dr_write |= | ||
2053 | nested_vmcb->control.intercept_dr_write; | ||
2054 | svm->vmcb->control.intercept_exceptions |= | ||
2055 | nested_vmcb->control.intercept_exceptions; | ||
2056 | |||
2057 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
2058 | |||
2059 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; | ||
1854 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 2060 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
1855 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 2061 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
1856 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 2062 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
1857 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 2063 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
1858 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 2064 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
1859 | 2065 | ||
1860 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2066 | nested_svm_unmap(page); |
2067 | |||
2068 | /* nested_vmcb is our indicator if nested SVM is activated */ | ||
2069 | svm->nested.vmcb = vmcb_gpa; | ||
1861 | 2070 | ||
1862 | enable_gif(svm); | 2071 | enable_gif(svm); |
1863 | 2072 | ||
@@ -1883,6 +2092,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
1883 | static int vmload_interception(struct vcpu_svm *svm) | 2092 | static int vmload_interception(struct vcpu_svm *svm) |
1884 | { | 2093 | { |
1885 | struct vmcb *nested_vmcb; | 2094 | struct vmcb *nested_vmcb; |
2095 | struct page *page; | ||
1886 | 2096 | ||
1887 | if (nested_svm_check_permissions(svm)) | 2097 | if (nested_svm_check_permissions(svm)) |
1888 | return 1; | 2098 | return 1; |
@@ -1890,12 +2100,12 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
1890 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2100 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1891 | skip_emulated_instruction(&svm->vcpu); | 2101 | skip_emulated_instruction(&svm->vcpu); |
1892 | 2102 | ||
1893 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2103 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
1894 | if (!nested_vmcb) | 2104 | if (!nested_vmcb) |
1895 | return 1; | 2105 | return 1; |
1896 | 2106 | ||
1897 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); | 2107 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); |
1898 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2108 | nested_svm_unmap(page); |
1899 | 2109 | ||
1900 | return 1; | 2110 | return 1; |
1901 | } | 2111 | } |
@@ -1903,6 +2113,7 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
1903 | static int vmsave_interception(struct vcpu_svm *svm) | 2113 | static int vmsave_interception(struct vcpu_svm *svm) |
1904 | { | 2114 | { |
1905 | struct vmcb *nested_vmcb; | 2115 | struct vmcb *nested_vmcb; |
2116 | struct page *page; | ||
1906 | 2117 | ||
1907 | if (nested_svm_check_permissions(svm)) | 2118 | if (nested_svm_check_permissions(svm)) |
1908 | return 1; | 2119 | return 1; |
@@ -1910,12 +2121,12 @@ static int vmsave_interception(struct vcpu_svm *svm) | |||
1910 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2121 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1911 | skip_emulated_instruction(&svm->vcpu); | 2122 | skip_emulated_instruction(&svm->vcpu); |
1912 | 2123 | ||
1913 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2124 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
1914 | if (!nested_vmcb) | 2125 | if (!nested_vmcb) |
1915 | return 1; | 2126 | return 1; |
1916 | 2127 | ||
1917 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); | 2128 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); |
1918 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2129 | nested_svm_unmap(page); |
1919 | 2130 | ||
1920 | return 1; | 2131 | return 1; |
1921 | } | 2132 | } |
@@ -2018,6 +2229,8 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2018 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; | 2229 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; |
2019 | uint32_t idt_v = | 2230 | uint32_t idt_v = |
2020 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; | 2231 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; |
2232 | bool has_error_code = false; | ||
2233 | u32 error_code = 0; | ||
2021 | 2234 | ||
2022 | tss_selector = (u16)svm->vmcb->control.exit_info_1; | 2235 | tss_selector = (u16)svm->vmcb->control.exit_info_1; |
2023 | 2236 | ||
@@ -2038,6 +2251,12 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2038 | svm->vcpu.arch.nmi_injected = false; | 2251 | svm->vcpu.arch.nmi_injected = false; |
2039 | break; | 2252 | break; |
2040 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2253 | case SVM_EXITINTINFO_TYPE_EXEPT: |
2254 | if (svm->vmcb->control.exit_info_2 & | ||
2255 | (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { | ||
2256 | has_error_code = true; | ||
2257 | error_code = | ||
2258 | (u32)svm->vmcb->control.exit_info_2; | ||
2259 | } | ||
2041 | kvm_clear_exception_queue(&svm->vcpu); | 2260 | kvm_clear_exception_queue(&svm->vcpu); |
2042 | break; | 2261 | break; |
2043 | case SVM_EXITINTINFO_TYPE_INTR: | 2262 | case SVM_EXITINTINFO_TYPE_INTR: |
@@ -2054,7 +2273,14 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2054 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | 2273 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) |
2055 | skip_emulated_instruction(&svm->vcpu); | 2274 | skip_emulated_instruction(&svm->vcpu); |
2056 | 2275 | ||
2057 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2276 | if (kvm_task_switch(&svm->vcpu, tss_selector, reason, |
2277 | has_error_code, error_code) == EMULATE_FAIL) { | ||
2278 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
2279 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
2280 | svm->vcpu.run->internal.ndata = 0; | ||
2281 | return 0; | ||
2282 | } | ||
2283 | return 1; | ||
2058 | } | 2284 | } |
2059 | 2285 | ||
2060 | static int cpuid_interception(struct vcpu_svm *svm) | 2286 | static int cpuid_interception(struct vcpu_svm *svm) |
@@ -2067,7 +2293,7 @@ static int cpuid_interception(struct vcpu_svm *svm) | |||
2067 | static int iret_interception(struct vcpu_svm *svm) | 2293 | static int iret_interception(struct vcpu_svm *svm) |
2068 | { | 2294 | { |
2069 | ++svm->vcpu.stat.nmi_window_exits; | 2295 | ++svm->vcpu.stat.nmi_window_exits; |
2070 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2296 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); |
2071 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2297 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
2072 | return 1; | 2298 | return 1; |
2073 | } | 2299 | } |
@@ -2145,9 +2371,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2145 | case MSR_IA32_SYSENTER_ESP: | 2371 | case MSR_IA32_SYSENTER_ESP: |
2146 | *data = svm->sysenter_esp; | 2372 | *data = svm->sysenter_esp; |
2147 | break; | 2373 | break; |
2148 | /* Nobody will change the following 5 values in the VMCB so | 2374 | /* |
2149 | we can safely return them on rdmsr. They will always be 0 | 2375 | * Nobody will change the following 5 values in the VMCB so we can |
2150 | until LBRV is implemented. */ | 2376 | * safely return them on rdmsr. They will always be 0 until LBRV is |
2377 | * implemented. | ||
2378 | */ | ||
2151 | case MSR_IA32_DEBUGCTLMSR: | 2379 | case MSR_IA32_DEBUGCTLMSR: |
2152 | *data = svm->vmcb->save.dbgctl; | 2380 | *data = svm->vmcb->save.dbgctl; |
2153 | break; | 2381 | break; |
@@ -2167,7 +2395,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2167 | *data = svm->nested.hsave_msr; | 2395 | *data = svm->nested.hsave_msr; |
2168 | break; | 2396 | break; |
2169 | case MSR_VM_CR: | 2397 | case MSR_VM_CR: |
2170 | *data = 0; | 2398 | *data = svm->nested.vm_cr_msr; |
2171 | break; | 2399 | break; |
2172 | case MSR_IA32_UCODE_REV: | 2400 | case MSR_IA32_UCODE_REV: |
2173 | *data = 0x01000065; | 2401 | *data = 0x01000065; |
@@ -2197,6 +2425,31 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
2197 | return 1; | 2425 | return 1; |
2198 | } | 2426 | } |
2199 | 2427 | ||
2428 | static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) | ||
2429 | { | ||
2430 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2431 | int svm_dis, chg_mask; | ||
2432 | |||
2433 | if (data & ~SVM_VM_CR_VALID_MASK) | ||
2434 | return 1; | ||
2435 | |||
2436 | chg_mask = SVM_VM_CR_VALID_MASK; | ||
2437 | |||
2438 | if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) | ||
2439 | chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); | ||
2440 | |||
2441 | svm->nested.vm_cr_msr &= ~chg_mask; | ||
2442 | svm->nested.vm_cr_msr |= (data & chg_mask); | ||
2443 | |||
2444 | svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; | ||
2445 | |||
2446 | /* check for svm_disable while efer.svme is set */ | ||
2447 | if (svm_dis && (vcpu->arch.efer & EFER_SVME)) | ||
2448 | return 1; | ||
2449 | |||
2450 | return 0; | ||
2451 | } | ||
2452 | |||
2200 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | 2453 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) |
2201 | { | 2454 | { |
2202 | struct vcpu_svm *svm = to_svm(vcpu); | 2455 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -2263,6 +2516,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2263 | svm->nested.hsave_msr = data; | 2516 | svm->nested.hsave_msr = data; |
2264 | break; | 2517 | break; |
2265 | case MSR_VM_CR: | 2518 | case MSR_VM_CR: |
2519 | return svm_set_vm_cr(vcpu, data); | ||
2266 | case MSR_VM_IGNNE: | 2520 | case MSR_VM_IGNNE: |
2267 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); | 2521 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); |
2268 | break; | 2522 | break; |
@@ -2326,16 +2580,16 @@ static int pause_interception(struct vcpu_svm *svm) | |||
2326 | } | 2580 | } |
2327 | 2581 | ||
2328 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | 2582 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { |
2329 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2583 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
2330 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2584 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2331 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2585 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
2332 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 2586 | [SVM_EXIT_READ_CR8] = emulate_on_interception, |
2333 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 2587 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
2334 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, | 2588 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, |
2335 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 2589 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, |
2336 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 2590 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, |
2337 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 2591 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
2338 | [SVM_EXIT_READ_DR0] = emulate_on_interception, | 2592 | [SVM_EXIT_READ_DR0] = emulate_on_interception, |
2339 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 2593 | [SVM_EXIT_READ_DR1] = emulate_on_interception, |
2340 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 2594 | [SVM_EXIT_READ_DR2] = emulate_on_interception, |
2341 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 2595 | [SVM_EXIT_READ_DR3] = emulate_on_interception, |
@@ -2354,15 +2608,14 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2354 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 2608 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
2355 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 2609 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
2356 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 2610 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
2357 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 2611 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
2358 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | 2612 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, |
2359 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 2613 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
2360 | [SVM_EXIT_INTR] = intr_interception, | 2614 | [SVM_EXIT_INTR] = intr_interception, |
2361 | [SVM_EXIT_NMI] = nmi_interception, | 2615 | [SVM_EXIT_NMI] = nmi_interception, |
2362 | [SVM_EXIT_SMI] = nop_on_interception, | 2616 | [SVM_EXIT_SMI] = nop_on_interception, |
2363 | [SVM_EXIT_INIT] = nop_on_interception, | 2617 | [SVM_EXIT_INIT] = nop_on_interception, |
2364 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 2618 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
2365 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ | ||
2366 | [SVM_EXIT_CPUID] = cpuid_interception, | 2619 | [SVM_EXIT_CPUID] = cpuid_interception, |
2367 | [SVM_EXIT_IRET] = iret_interception, | 2620 | [SVM_EXIT_IRET] = iret_interception, |
2368 | [SVM_EXIT_INVD] = emulate_on_interception, | 2621 | [SVM_EXIT_INVD] = emulate_on_interception, |
@@ -2370,7 +2623,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2370 | [SVM_EXIT_HLT] = halt_interception, | 2623 | [SVM_EXIT_HLT] = halt_interception, |
2371 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2624 | [SVM_EXIT_INVLPG] = invlpg_interception, |
2372 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2625 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
2373 | [SVM_EXIT_IOIO] = io_interception, | 2626 | [SVM_EXIT_IOIO] = io_interception, |
2374 | [SVM_EXIT_MSR] = msr_interception, | 2627 | [SVM_EXIT_MSR] = msr_interception, |
2375 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, | 2628 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, |
2376 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, | 2629 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, |
@@ -2393,7 +2646,12 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2393 | struct kvm_run *kvm_run = vcpu->run; | 2646 | struct kvm_run *kvm_run = vcpu->run; |
2394 | u32 exit_code = svm->vmcb->control.exit_code; | 2647 | u32 exit_code = svm->vmcb->control.exit_code; |
2395 | 2648 | ||
2396 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2649 | trace_kvm_exit(exit_code, vcpu); |
2650 | |||
2651 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
2652 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
2653 | if (npt_enabled) | ||
2654 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
2397 | 2655 | ||
2398 | if (unlikely(svm->nested.exit_required)) { | 2656 | if (unlikely(svm->nested.exit_required)) { |
2399 | nested_svm_vmexit(svm); | 2657 | nested_svm_vmexit(svm); |
@@ -2422,11 +2680,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2422 | 2680 | ||
2423 | svm_complete_interrupts(svm); | 2681 | svm_complete_interrupts(svm); |
2424 | 2682 | ||
2425 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
2426 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
2427 | if (npt_enabled) | ||
2428 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
2429 | |||
2430 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2683 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
2431 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2684 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2432 | kvm_run->fail_entry.hardware_entry_failure_reason | 2685 | kvm_run->fail_entry.hardware_entry_failure_reason |
@@ -2479,7 +2732,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) | |||
2479 | 2732 | ||
2480 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | 2733 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; |
2481 | vcpu->arch.hflags |= HF_NMI_MASK; | 2734 | vcpu->arch.hflags |= HF_NMI_MASK; |
2482 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | 2735 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); |
2483 | ++vcpu->stat.nmi_injections; | 2736 | ++vcpu->stat.nmi_injections; |
2484 | } | 2737 | } |
2485 | 2738 | ||
@@ -2511,6 +2764,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
2511 | { | 2764 | { |
2512 | struct vcpu_svm *svm = to_svm(vcpu); | 2765 | struct vcpu_svm *svm = to_svm(vcpu); |
2513 | 2766 | ||
2767 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2768 | return; | ||
2769 | |||
2514 | if (irr == -1) | 2770 | if (irr == -1) |
2515 | return; | 2771 | return; |
2516 | 2772 | ||
@@ -2522,8 +2778,12 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2522 | { | 2778 | { |
2523 | struct vcpu_svm *svm = to_svm(vcpu); | 2779 | struct vcpu_svm *svm = to_svm(vcpu); |
2524 | struct vmcb *vmcb = svm->vmcb; | 2780 | struct vmcb *vmcb = svm->vmcb; |
2525 | return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2781 | int ret; |
2526 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2782 | ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && |
2783 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
2784 | ret = ret && gif_set(svm) && nested_svm_nmi(svm); | ||
2785 | |||
2786 | return ret; | ||
2527 | } | 2787 | } |
2528 | 2788 | ||
2529 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | 2789 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) |
@@ -2539,10 +2799,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
2539 | 2799 | ||
2540 | if (masked) { | 2800 | if (masked) { |
2541 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | 2801 | svm->vcpu.arch.hflags |= HF_NMI_MASK; |
2542 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | 2802 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); |
2543 | } else { | 2803 | } else { |
2544 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | 2804 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; |
2545 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2805 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); |
2546 | } | 2806 | } |
2547 | } | 2807 | } |
2548 | 2808 | ||
@@ -2568,13 +2828,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
2568 | { | 2828 | { |
2569 | struct vcpu_svm *svm = to_svm(vcpu); | 2829 | struct vcpu_svm *svm = to_svm(vcpu); |
2570 | 2830 | ||
2571 | nested_svm_intr(svm); | 2831 | /* |
2572 | 2832 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes | |
2573 | /* In case GIF=0 we can't rely on the CPU to tell us when | 2833 | * 1, because that's a separate STGI/VMRUN intercept. The next time we |
2574 | * GIF becomes 1, because that's a separate STGI/VMRUN intercept. | 2834 | * get that intercept, this function will be called again though and |
2575 | * The next time we get that intercept, this function will be | 2835 | * we'll get the vintr intercept. |
2576 | * called again though and we'll get the vintr intercept. */ | 2836 | */ |
2577 | if (gif_set(svm)) { | 2837 | if (gif_set(svm) && nested_svm_intr(svm)) { |
2578 | svm_set_vintr(svm); | 2838 | svm_set_vintr(svm); |
2579 | svm_inject_irq(svm, 0x0); | 2839 | svm_inject_irq(svm, 0x0); |
2580 | } | 2840 | } |
@@ -2588,9 +2848,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2588 | == HF_NMI_MASK) | 2848 | == HF_NMI_MASK) |
2589 | return; /* IRET will cause a vm exit */ | 2849 | return; /* IRET will cause a vm exit */ |
2590 | 2850 | ||
2591 | /* Something prevents NMI from been injected. Single step over | 2851 | /* |
2592 | possible problem (IRET or exception injection or interrupt | 2852 | * Something prevents NMI from been injected. Single step over possible |
2593 | shadow) */ | 2853 | * problem (IRET or exception injection or interrupt shadow) |
2854 | */ | ||
2594 | svm->nmi_singlestep = true; | 2855 | svm->nmi_singlestep = true; |
2595 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2856 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
2596 | update_db_intercept(vcpu); | 2857 | update_db_intercept(vcpu); |
@@ -2614,6 +2875,9 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
2614 | { | 2875 | { |
2615 | struct vcpu_svm *svm = to_svm(vcpu); | 2876 | struct vcpu_svm *svm = to_svm(vcpu); |
2616 | 2877 | ||
2878 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2879 | return; | ||
2880 | |||
2617 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 2881 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { |
2618 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 2882 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
2619 | kvm_set_cr8(vcpu, cr8); | 2883 | kvm_set_cr8(vcpu, cr8); |
@@ -2625,6 +2889,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
2625 | struct vcpu_svm *svm = to_svm(vcpu); | 2889 | struct vcpu_svm *svm = to_svm(vcpu); |
2626 | u64 cr8; | 2890 | u64 cr8; |
2627 | 2891 | ||
2892 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2893 | return; | ||
2894 | |||
2628 | cr8 = kvm_get_cr8(vcpu); | 2895 | cr8 = kvm_get_cr8(vcpu); |
2629 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | 2896 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; |
2630 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 2897 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
@@ -2635,6 +2902,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2635 | u8 vector; | 2902 | u8 vector; |
2636 | int type; | 2903 | int type; |
2637 | u32 exitintinfo = svm->vmcb->control.exit_int_info; | 2904 | u32 exitintinfo = svm->vmcb->control.exit_int_info; |
2905 | unsigned int3_injected = svm->int3_injected; | ||
2906 | |||
2907 | svm->int3_injected = 0; | ||
2638 | 2908 | ||
2639 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) | 2909 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) |
2640 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | 2910 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); |
@@ -2654,18 +2924,25 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2654 | svm->vcpu.arch.nmi_injected = true; | 2924 | svm->vcpu.arch.nmi_injected = true; |
2655 | break; | 2925 | break; |
2656 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2926 | case SVM_EXITINTINFO_TYPE_EXEPT: |
2657 | /* In case of software exception do not reinject an exception | 2927 | /* |
2658 | vector, but re-execute and instruction instead */ | 2928 | * In case of software exceptions, do not reinject the vector, |
2659 | if (is_nested(svm)) | 2929 | * but re-execute the instruction instead. Rewind RIP first |
2660 | break; | 2930 | * if we emulated INT3 before. |
2661 | if (kvm_exception_is_soft(vector)) | 2931 | */ |
2932 | if (kvm_exception_is_soft(vector)) { | ||
2933 | if (vector == BP_VECTOR && int3_injected && | ||
2934 | kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) | ||
2935 | kvm_rip_write(&svm->vcpu, | ||
2936 | kvm_rip_read(&svm->vcpu) - | ||
2937 | int3_injected); | ||
2662 | break; | 2938 | break; |
2939 | } | ||
2663 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { | 2940 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { |
2664 | u32 err = svm->vmcb->control.exit_int_info_err; | 2941 | u32 err = svm->vmcb->control.exit_int_info_err; |
2665 | kvm_queue_exception_e(&svm->vcpu, vector, err); | 2942 | kvm_requeue_exception_e(&svm->vcpu, vector, err); |
2666 | 2943 | ||
2667 | } else | 2944 | } else |
2668 | kvm_queue_exception(&svm->vcpu, vector); | 2945 | kvm_requeue_exception(&svm->vcpu, vector); |
2669 | break; | 2946 | break; |
2670 | case SVM_EXITINTINFO_TYPE_INTR: | 2947 | case SVM_EXITINTINFO_TYPE_INTR: |
2671 | kvm_queue_interrupt(&svm->vcpu, vector, false); | 2948 | kvm_queue_interrupt(&svm->vcpu, vector, false); |
@@ -2688,6 +2965,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
2688 | u16 gs_selector; | 2965 | u16 gs_selector; |
2689 | u16 ldt_selector; | 2966 | u16 ldt_selector; |
2690 | 2967 | ||
2968 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
2969 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
2970 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
2971 | |||
2691 | /* | 2972 | /* |
2692 | * A vmexit emulation is required before the vcpu can be executed | 2973 | * A vmexit emulation is required before the vcpu can be executed |
2693 | * again. | 2974 | * again. |
@@ -2695,10 +2976,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
2695 | if (unlikely(svm->nested.exit_required)) | 2976 | if (unlikely(svm->nested.exit_required)) |
2696 | return; | 2977 | return; |
2697 | 2978 | ||
2698 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
2699 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
2700 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
2701 | |||
2702 | pre_svm_run(svm); | 2979 | pre_svm_run(svm); |
2703 | 2980 | ||
2704 | sync_lapic_to_cr8(vcpu); | 2981 | sync_lapic_to_cr8(vcpu); |
@@ -2879,25 +3156,39 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) | |||
2879 | { | 3156 | { |
2880 | } | 3157 | } |
2881 | 3158 | ||
3159 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
3160 | { | ||
3161 | switch (func) { | ||
3162 | case 0x8000000A: | ||
3163 | entry->eax = 1; /* SVM revision 1 */ | ||
3164 | entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper | ||
3165 | ASID emulation to nested SVM */ | ||
3166 | entry->ecx = 0; /* Reserved */ | ||
3167 | entry->edx = 0; /* Do not support any additional features */ | ||
3168 | |||
3169 | break; | ||
3170 | } | ||
3171 | } | ||
3172 | |||
2882 | static const struct trace_print_flags svm_exit_reasons_str[] = { | 3173 | static const struct trace_print_flags svm_exit_reasons_str[] = { |
2883 | { SVM_EXIT_READ_CR0, "read_cr0" }, | 3174 | { SVM_EXIT_READ_CR0, "read_cr0" }, |
2884 | { SVM_EXIT_READ_CR3, "read_cr3" }, | 3175 | { SVM_EXIT_READ_CR3, "read_cr3" }, |
2885 | { SVM_EXIT_READ_CR4, "read_cr4" }, | 3176 | { SVM_EXIT_READ_CR4, "read_cr4" }, |
2886 | { SVM_EXIT_READ_CR8, "read_cr8" }, | 3177 | { SVM_EXIT_READ_CR8, "read_cr8" }, |
2887 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, | 3178 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, |
2888 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, | 3179 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, |
2889 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, | 3180 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, |
2890 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, | 3181 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, |
2891 | { SVM_EXIT_READ_DR0, "read_dr0" }, | 3182 | { SVM_EXIT_READ_DR0, "read_dr0" }, |
2892 | { SVM_EXIT_READ_DR1, "read_dr1" }, | 3183 | { SVM_EXIT_READ_DR1, "read_dr1" }, |
2893 | { SVM_EXIT_READ_DR2, "read_dr2" }, | 3184 | { SVM_EXIT_READ_DR2, "read_dr2" }, |
2894 | { SVM_EXIT_READ_DR3, "read_dr3" }, | 3185 | { SVM_EXIT_READ_DR3, "read_dr3" }, |
2895 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, | 3186 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, |
2896 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, | 3187 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, |
2897 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, | 3188 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, |
2898 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, | 3189 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, |
2899 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, | 3190 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, |
2900 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, | 3191 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, |
2901 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, | 3192 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, |
2902 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, | 3193 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, |
2903 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, | 3194 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, |
@@ -2946,8 +3237,10 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
2946 | { | 3237 | { |
2947 | struct vcpu_svm *svm = to_svm(vcpu); | 3238 | struct vcpu_svm *svm = to_svm(vcpu); |
2948 | 3239 | ||
2949 | update_cr0_intercept(svm); | ||
2950 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | 3240 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; |
3241 | if (is_nested(svm)) | ||
3242 | svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
3243 | update_cr0_intercept(svm); | ||
2951 | } | 3244 | } |
2952 | 3245 | ||
2953 | static struct kvm_x86_ops svm_x86_ops = { | 3246 | static struct kvm_x86_ops svm_x86_ops = { |
@@ -2986,8 +3279,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2986 | .set_idt = svm_set_idt, | 3279 | .set_idt = svm_set_idt, |
2987 | .get_gdt = svm_get_gdt, | 3280 | .get_gdt = svm_get_gdt, |
2988 | .set_gdt = svm_set_gdt, | 3281 | .set_gdt = svm_set_gdt, |
2989 | .get_dr = svm_get_dr, | 3282 | .set_dr7 = svm_set_dr7, |
2990 | .set_dr = svm_set_dr, | ||
2991 | .cache_reg = svm_cache_reg, | 3283 | .cache_reg = svm_cache_reg, |
2992 | .get_rflags = svm_get_rflags, | 3284 | .get_rflags = svm_get_rflags, |
2993 | .set_rflags = svm_set_rflags, | 3285 | .set_rflags = svm_set_rflags, |
@@ -3023,12 +3315,14 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3023 | .cpuid_update = svm_cpuid_update, | 3315 | .cpuid_update = svm_cpuid_update, |
3024 | 3316 | ||
3025 | .rdtscp_supported = svm_rdtscp_supported, | 3317 | .rdtscp_supported = svm_rdtscp_supported, |
3318 | |||
3319 | .set_supported_cpuid = svm_set_supported_cpuid, | ||
3026 | }; | 3320 | }; |
3027 | 3321 | ||
3028 | static int __init svm_init(void) | 3322 | static int __init svm_init(void) |
3029 | { | 3323 | { |
3030 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), | 3324 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), |
3031 | THIS_MODULE); | 3325 | __alignof__(struct vcpu_svm), THIS_MODULE); |
3032 | } | 3326 | } |
3033 | 3327 | ||
3034 | static void __exit svm_exit(void) | 3328 | static void __exit svm_exit(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index eea40439066c..4ddadb1a5ffe 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -12,7 +12,8 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
12 | /* | 12 | /* |
13 | * There is a race window between reading and incrementing, but we do | 13 | * There is a race window between reading and incrementing, but we do |
14 | * not care about potentially loosing timer events in the !reinject | 14 | * not care about potentially loosing timer events in the !reinject |
15 | * case anyway. | 15 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked |
16 | * in vcpu_enter_guest. | ||
16 | */ | 17 | */ |
17 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 18 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
18 | atomic_inc(&ktimer->pending); | 19 | atomic_inc(&ktimer->pending); |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6ad30a29f044..a6544b8e7c0f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -5,8 +5,6 @@ | |||
5 | 5 | ||
6 | #undef TRACE_SYSTEM | 6 | #undef TRACE_SYSTEM |
7 | #define TRACE_SYSTEM kvm | 7 | #define TRACE_SYSTEM kvm |
8 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
9 | #define TRACE_INCLUDE_FILE trace | ||
10 | 8 | ||
11 | /* | 9 | /* |
12 | * Tracepoint for guest mode entry. | 10 | * Tracepoint for guest mode entry. |
@@ -184,8 +182,8 @@ TRACE_EVENT(kvm_apic, | |||
184 | * Tracepoint for kvm guest exit: | 182 | * Tracepoint for kvm guest exit: |
185 | */ | 183 | */ |
186 | TRACE_EVENT(kvm_exit, | 184 | TRACE_EVENT(kvm_exit, |
187 | TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), | 185 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), |
188 | TP_ARGS(exit_reason, guest_rip), | 186 | TP_ARGS(exit_reason, vcpu), |
189 | 187 | ||
190 | TP_STRUCT__entry( | 188 | TP_STRUCT__entry( |
191 | __field( unsigned int, exit_reason ) | 189 | __field( unsigned int, exit_reason ) |
@@ -194,7 +192,7 @@ TRACE_EVENT(kvm_exit, | |||
194 | 192 | ||
195 | TP_fast_assign( | 193 | TP_fast_assign( |
196 | __entry->exit_reason = exit_reason; | 194 | __entry->exit_reason = exit_reason; |
197 | __entry->guest_rip = guest_rip; | 195 | __entry->guest_rip = kvm_rip_read(vcpu); |
198 | ), | 196 | ), |
199 | 197 | ||
200 | TP_printk("reason %s rip 0x%lx", | 198 | TP_printk("reason %s rip 0x%lx", |
@@ -221,6 +219,38 @@ TRACE_EVENT(kvm_inj_virq, | |||
221 | TP_printk("irq %u", __entry->irq) | 219 | TP_printk("irq %u", __entry->irq) |
222 | ); | 220 | ); |
223 | 221 | ||
222 | #define EXS(x) { x##_VECTOR, "#" #x } | ||
223 | |||
224 | #define kvm_trace_sym_exc \ | ||
225 | EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ | ||
226 | EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ | ||
227 | EXS(MF), EXS(MC) | ||
228 | |||
229 | /* | ||
230 | * Tracepoint for kvm interrupt injection: | ||
231 | */ | ||
232 | TRACE_EVENT(kvm_inj_exception, | ||
233 | TP_PROTO(unsigned exception, bool has_error, unsigned error_code), | ||
234 | TP_ARGS(exception, has_error, error_code), | ||
235 | |||
236 | TP_STRUCT__entry( | ||
237 | __field( u8, exception ) | ||
238 | __field( u8, has_error ) | ||
239 | __field( u32, error_code ) | ||
240 | ), | ||
241 | |||
242 | TP_fast_assign( | ||
243 | __entry->exception = exception; | ||
244 | __entry->has_error = has_error; | ||
245 | __entry->error_code = error_code; | ||
246 | ), | ||
247 | |||
248 | TP_printk("%s (0x%x)", | ||
249 | __print_symbolic(__entry->exception, kvm_trace_sym_exc), | ||
250 | /* FIXME: don't print error_code if not present */ | ||
251 | __entry->has_error ? __entry->error_code : 0) | ||
252 | ); | ||
253 | |||
224 | /* | 254 | /* |
225 | * Tracepoint for page fault. | 255 | * Tracepoint for page fault. |
226 | */ | 256 | */ |
@@ -413,12 +443,34 @@ TRACE_EVENT(kvm_nested_vmrun, | |||
413 | ), | 443 | ), |
414 | 444 | ||
415 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | 445 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " |
416 | "event_inj: 0x%08x npt: %s\n", | 446 | "event_inj: 0x%08x npt: %s", |
417 | __entry->rip, __entry->vmcb, __entry->nested_rip, | 447 | __entry->rip, __entry->vmcb, __entry->nested_rip, |
418 | __entry->int_ctl, __entry->event_inj, | 448 | __entry->int_ctl, __entry->event_inj, |
419 | __entry->npt ? "on" : "off") | 449 | __entry->npt ? "on" : "off") |
420 | ); | 450 | ); |
421 | 451 | ||
452 | TRACE_EVENT(kvm_nested_intercepts, | ||
453 | TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept), | ||
454 | TP_ARGS(cr_read, cr_write, exceptions, intercept), | ||
455 | |||
456 | TP_STRUCT__entry( | ||
457 | __field( __u16, cr_read ) | ||
458 | __field( __u16, cr_write ) | ||
459 | __field( __u32, exceptions ) | ||
460 | __field( __u64, intercept ) | ||
461 | ), | ||
462 | |||
463 | TP_fast_assign( | ||
464 | __entry->cr_read = cr_read; | ||
465 | __entry->cr_write = cr_write; | ||
466 | __entry->exceptions = exceptions; | ||
467 | __entry->intercept = intercept; | ||
468 | ), | ||
469 | |||
470 | TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx", | ||
471 | __entry->cr_read, __entry->cr_write, __entry->exceptions, | ||
472 | __entry->intercept) | ||
473 | ); | ||
422 | /* | 474 | /* |
423 | * Tracepoint for #VMEXIT while nested | 475 | * Tracepoint for #VMEXIT while nested |
424 | */ | 476 | */ |
@@ -447,7 +499,7 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
447 | __entry->exit_int_info_err = exit_int_info_err; | 499 | __entry->exit_int_info_err = exit_int_info_err; |
448 | ), | 500 | ), |
449 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | 501 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " |
450 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 502 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
451 | __entry->rip, | 503 | __entry->rip, |
452 | ftrace_print_symbols_seq(p, __entry->exit_code, | 504 | ftrace_print_symbols_seq(p, __entry->exit_code, |
453 | kvm_x86_ops->exit_reasons_str), | 505 | kvm_x86_ops->exit_reasons_str), |
@@ -482,7 +534,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
482 | ), | 534 | ), |
483 | 535 | ||
484 | TP_printk("reason: %s ext_inf1: 0x%016llx " | 536 | TP_printk("reason: %s ext_inf1: 0x%016llx " |
485 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 537 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
486 | ftrace_print_symbols_seq(p, __entry->exit_code, | 538 | ftrace_print_symbols_seq(p, __entry->exit_code, |
487 | kvm_x86_ops->exit_reasons_str), | 539 | kvm_x86_ops->exit_reasons_str), |
488 | __entry->exit_info1, __entry->exit_info2, | 540 | __entry->exit_info1, __entry->exit_info2, |
@@ -504,7 +556,7 @@ TRACE_EVENT(kvm_nested_intr_vmexit, | |||
504 | __entry->rip = rip | 556 | __entry->rip = rip |
505 | ), | 557 | ), |
506 | 558 | ||
507 | TP_printk("rip: 0x%016llx\n", __entry->rip) | 559 | TP_printk("rip: 0x%016llx", __entry->rip) |
508 | ); | 560 | ); |
509 | 561 | ||
510 | /* | 562 | /* |
@@ -526,7 +578,7 @@ TRACE_EVENT(kvm_invlpga, | |||
526 | __entry->address = address; | 578 | __entry->address = address; |
527 | ), | 579 | ), |
528 | 580 | ||
529 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | 581 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", |
530 | __entry->rip, __entry->asid, __entry->address) | 582 | __entry->rip, __entry->asid, __entry->address) |
531 | ); | 583 | ); |
532 | 584 | ||
@@ -547,11 +599,102 @@ TRACE_EVENT(kvm_skinit, | |||
547 | __entry->slb = slb; | 599 | __entry->slb = slb; |
548 | ), | 600 | ), |
549 | 601 | ||
550 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | 602 | TP_printk("rip: 0x%016llx slb: 0x%08x", |
551 | __entry->rip, __entry->slb) | 603 | __entry->rip, __entry->slb) |
552 | ); | 604 | ); |
553 | 605 | ||
606 | #define __print_insn(insn, ilen) ({ \ | ||
607 | int i; \ | ||
608 | const char *ret = p->buffer + p->len; \ | ||
609 | \ | ||
610 | for (i = 0; i < ilen; ++i) \ | ||
611 | trace_seq_printf(p, " %02x", insn[i]); \ | ||
612 | trace_seq_printf(p, "%c", 0); \ | ||
613 | ret; \ | ||
614 | }) | ||
615 | |||
616 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) | ||
617 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) | ||
618 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) | ||
619 | #define KVM_EMUL_INSN_F_CS_L (1 << 3) | ||
620 | |||
621 | #define kvm_trace_symbol_emul_flags \ | ||
622 | { 0, "real" }, \ | ||
623 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
624 | | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ | ||
625 | { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ | ||
626 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
627 | | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ | ||
628 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
629 | | KVM_EMUL_INSN_F_CS_L, "prot64" } | ||
630 | |||
631 | #define kei_decode_mode(mode) ({ \ | ||
632 | u8 flags = 0xff; \ | ||
633 | switch (mode) { \ | ||
634 | case X86EMUL_MODE_REAL: \ | ||
635 | flags = 0; \ | ||
636 | break; \ | ||
637 | case X86EMUL_MODE_VM86: \ | ||
638 | flags = KVM_EMUL_INSN_F_EFL_VM; \ | ||
639 | break; \ | ||
640 | case X86EMUL_MODE_PROT16: \ | ||
641 | flags = KVM_EMUL_INSN_F_CR0_PE; \ | ||
642 | break; \ | ||
643 | case X86EMUL_MODE_PROT32: \ | ||
644 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
645 | | KVM_EMUL_INSN_F_CS_D; \ | ||
646 | break; \ | ||
647 | case X86EMUL_MODE_PROT64: \ | ||
648 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
649 | | KVM_EMUL_INSN_F_CS_L; \ | ||
650 | break; \ | ||
651 | } \ | ||
652 | flags; \ | ||
653 | }) | ||
654 | |||
655 | TRACE_EVENT(kvm_emulate_insn, | ||
656 | TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), | ||
657 | TP_ARGS(vcpu, failed), | ||
658 | |||
659 | TP_STRUCT__entry( | ||
660 | __field( __u64, rip ) | ||
661 | __field( __u32, csbase ) | ||
662 | __field( __u8, len ) | ||
663 | __array( __u8, insn, 15 ) | ||
664 | __field( __u8, flags ) | ||
665 | __field( __u8, failed ) | ||
666 | ), | ||
667 | |||
668 | TP_fast_assign( | ||
669 | __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
670 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); | ||
671 | __entry->len = vcpu->arch.emulate_ctxt.decode.eip | ||
672 | - vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
673 | memcpy(__entry->insn, | ||
674 | vcpu->arch.emulate_ctxt.decode.fetch.data, | ||
675 | 15); | ||
676 | __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); | ||
677 | __entry->failed = failed; | ||
678 | ), | ||
679 | |||
680 | TP_printk("%x:%llx:%s (%s)%s", | ||
681 | __entry->csbase, __entry->rip, | ||
682 | __print_insn(__entry->insn, __entry->len), | ||
683 | __print_symbolic(__entry->flags, | ||
684 | kvm_trace_symbol_emul_flags), | ||
685 | __entry->failed ? " failed" : "" | ||
686 | ) | ||
687 | ); | ||
688 | |||
689 | #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) | ||
690 | #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) | ||
691 | |||
554 | #endif /* _TRACE_KVM_H */ | 692 | #endif /* _TRACE_KVM_H */ |
555 | 693 | ||
694 | #undef TRACE_INCLUDE_PATH | ||
695 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
696 | #undef TRACE_INCLUDE_FILE | ||
697 | #define TRACE_INCLUDE_FILE trace | ||
698 | |||
556 | /* This part must be outside protection */ | 699 | /* This part must be outside protection */ |
557 | #include <trace/define_trace.h> | 700 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bc933cfb4e66..859a01a07dbf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/tboot.h> | ||
30 | #include "kvm_cache_regs.h" | 31 | #include "kvm_cache_regs.h" |
31 | #include "x86.h" | 32 | #include "x86.h" |
32 | 33 | ||
@@ -98,6 +99,8 @@ module_param(ple_gap, int, S_IRUGO); | |||
98 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | 99 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
99 | module_param(ple_window, int, S_IRUGO); | 100 | module_param(ple_window, int, S_IRUGO); |
100 | 101 | ||
102 | #define NR_AUTOLOAD_MSRS 1 | ||
103 | |||
101 | struct vmcs { | 104 | struct vmcs { |
102 | u32 revision_id; | 105 | u32 revision_id; |
103 | u32 abort; | 106 | u32 abort; |
@@ -125,6 +128,11 @@ struct vcpu_vmx { | |||
125 | u64 msr_guest_kernel_gs_base; | 128 | u64 msr_guest_kernel_gs_base; |
126 | #endif | 129 | #endif |
127 | struct vmcs *vmcs; | 130 | struct vmcs *vmcs; |
131 | struct msr_autoload { | ||
132 | unsigned nr; | ||
133 | struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; | ||
134 | struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; | ||
135 | } msr_autoload; | ||
128 | struct { | 136 | struct { |
129 | int loaded; | 137 | int loaded; |
130 | u16 fs_sel, gs_sel, ldt_sel; | 138 | u16 fs_sel, gs_sel, ldt_sel; |
@@ -234,56 +242,56 @@ static const u32 vmx_msr_index[] = { | |||
234 | }; | 242 | }; |
235 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 243 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
236 | 244 | ||
237 | static inline int is_page_fault(u32 intr_info) | 245 | static inline bool is_page_fault(u32 intr_info) |
238 | { | 246 | { |
239 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 247 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
240 | INTR_INFO_VALID_MASK)) == | 248 | INTR_INFO_VALID_MASK)) == |
241 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); | 249 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); |
242 | } | 250 | } |
243 | 251 | ||
244 | static inline int is_no_device(u32 intr_info) | 252 | static inline bool is_no_device(u32 intr_info) |
245 | { | 253 | { |
246 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 254 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
247 | INTR_INFO_VALID_MASK)) == | 255 | INTR_INFO_VALID_MASK)) == |
248 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); | 256 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); |
249 | } | 257 | } |
250 | 258 | ||
251 | static inline int is_invalid_opcode(u32 intr_info) | 259 | static inline bool is_invalid_opcode(u32 intr_info) |
252 | { | 260 | { |
253 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 261 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
254 | INTR_INFO_VALID_MASK)) == | 262 | INTR_INFO_VALID_MASK)) == |
255 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); | 263 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); |
256 | } | 264 | } |
257 | 265 | ||
258 | static inline int is_external_interrupt(u32 intr_info) | 266 | static inline bool is_external_interrupt(u32 intr_info) |
259 | { | 267 | { |
260 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 268 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
261 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 269 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
262 | } | 270 | } |
263 | 271 | ||
264 | static inline int is_machine_check(u32 intr_info) | 272 | static inline bool is_machine_check(u32 intr_info) |
265 | { | 273 | { |
266 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 274 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
267 | INTR_INFO_VALID_MASK)) == | 275 | INTR_INFO_VALID_MASK)) == |
268 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); | 276 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); |
269 | } | 277 | } |
270 | 278 | ||
271 | static inline int cpu_has_vmx_msr_bitmap(void) | 279 | static inline bool cpu_has_vmx_msr_bitmap(void) |
272 | { | 280 | { |
273 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; | 281 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; |
274 | } | 282 | } |
275 | 283 | ||
276 | static inline int cpu_has_vmx_tpr_shadow(void) | 284 | static inline bool cpu_has_vmx_tpr_shadow(void) |
277 | { | 285 | { |
278 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; | 286 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
279 | } | 287 | } |
280 | 288 | ||
281 | static inline int vm_need_tpr_shadow(struct kvm *kvm) | 289 | static inline bool vm_need_tpr_shadow(struct kvm *kvm) |
282 | { | 290 | { |
283 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); | 291 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); |
284 | } | 292 | } |
285 | 293 | ||
286 | static inline int cpu_has_secondary_exec_ctrls(void) | 294 | static inline bool cpu_has_secondary_exec_ctrls(void) |
287 | { | 295 | { |
288 | return vmcs_config.cpu_based_exec_ctrl & | 296 | return vmcs_config.cpu_based_exec_ctrl & |
289 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 297 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -303,80 +311,80 @@ static inline bool cpu_has_vmx_flexpriority(void) | |||
303 | 311 | ||
304 | static inline bool cpu_has_vmx_ept_execute_only(void) | 312 | static inline bool cpu_has_vmx_ept_execute_only(void) |
305 | { | 313 | { |
306 | return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); | 314 | return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; |
307 | } | 315 | } |
308 | 316 | ||
309 | static inline bool cpu_has_vmx_eptp_uncacheable(void) | 317 | static inline bool cpu_has_vmx_eptp_uncacheable(void) |
310 | { | 318 | { |
311 | return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); | 319 | return vmx_capability.ept & VMX_EPTP_UC_BIT; |
312 | } | 320 | } |
313 | 321 | ||
314 | static inline bool cpu_has_vmx_eptp_writeback(void) | 322 | static inline bool cpu_has_vmx_eptp_writeback(void) |
315 | { | 323 | { |
316 | return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); | 324 | return vmx_capability.ept & VMX_EPTP_WB_BIT; |
317 | } | 325 | } |
318 | 326 | ||
319 | static inline bool cpu_has_vmx_ept_2m_page(void) | 327 | static inline bool cpu_has_vmx_ept_2m_page(void) |
320 | { | 328 | { |
321 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 329 | return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; |
322 | } | 330 | } |
323 | 331 | ||
324 | static inline bool cpu_has_vmx_ept_1g_page(void) | 332 | static inline bool cpu_has_vmx_ept_1g_page(void) |
325 | { | 333 | { |
326 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | 334 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; |
327 | } | 335 | } |
328 | 336 | ||
329 | static inline int cpu_has_vmx_invept_individual_addr(void) | 337 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
330 | { | 338 | { |
331 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 339 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
332 | } | 340 | } |
333 | 341 | ||
334 | static inline int cpu_has_vmx_invept_context(void) | 342 | static inline bool cpu_has_vmx_invept_context(void) |
335 | { | 343 | { |
336 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); | 344 | return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; |
337 | } | 345 | } |
338 | 346 | ||
339 | static inline int cpu_has_vmx_invept_global(void) | 347 | static inline bool cpu_has_vmx_invept_global(void) |
340 | { | 348 | { |
341 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); | 349 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; |
342 | } | 350 | } |
343 | 351 | ||
344 | static inline int cpu_has_vmx_ept(void) | 352 | static inline bool cpu_has_vmx_ept(void) |
345 | { | 353 | { |
346 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 354 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
347 | SECONDARY_EXEC_ENABLE_EPT; | 355 | SECONDARY_EXEC_ENABLE_EPT; |
348 | } | 356 | } |
349 | 357 | ||
350 | static inline int cpu_has_vmx_unrestricted_guest(void) | 358 | static inline bool cpu_has_vmx_unrestricted_guest(void) |
351 | { | 359 | { |
352 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 360 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
353 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 361 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
354 | } | 362 | } |
355 | 363 | ||
356 | static inline int cpu_has_vmx_ple(void) | 364 | static inline bool cpu_has_vmx_ple(void) |
357 | { | 365 | { |
358 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 366 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
359 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 367 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
360 | } | 368 | } |
361 | 369 | ||
362 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 370 | static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) |
363 | { | 371 | { |
364 | return flexpriority_enabled && irqchip_in_kernel(kvm); | 372 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
365 | } | 373 | } |
366 | 374 | ||
367 | static inline int cpu_has_vmx_vpid(void) | 375 | static inline bool cpu_has_vmx_vpid(void) |
368 | { | 376 | { |
369 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 377 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
370 | SECONDARY_EXEC_ENABLE_VPID; | 378 | SECONDARY_EXEC_ENABLE_VPID; |
371 | } | 379 | } |
372 | 380 | ||
373 | static inline int cpu_has_vmx_rdtscp(void) | 381 | static inline bool cpu_has_vmx_rdtscp(void) |
374 | { | 382 | { |
375 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 383 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
376 | SECONDARY_EXEC_RDTSCP; | 384 | SECONDARY_EXEC_RDTSCP; |
377 | } | 385 | } |
378 | 386 | ||
379 | static inline int cpu_has_virtual_nmis(void) | 387 | static inline bool cpu_has_virtual_nmis(void) |
380 | { | 388 | { |
381 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 389 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
382 | } | 390 | } |
@@ -595,16 +603,56 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
595 | vmcs_write32(EXCEPTION_BITMAP, eb); | 603 | vmcs_write32(EXCEPTION_BITMAP, eb); |
596 | } | 604 | } |
597 | 605 | ||
606 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | ||
607 | { | ||
608 | unsigned i; | ||
609 | struct msr_autoload *m = &vmx->msr_autoload; | ||
610 | |||
611 | for (i = 0; i < m->nr; ++i) | ||
612 | if (m->guest[i].index == msr) | ||
613 | break; | ||
614 | |||
615 | if (i == m->nr) | ||
616 | return; | ||
617 | --m->nr; | ||
618 | m->guest[i] = m->guest[m->nr]; | ||
619 | m->host[i] = m->host[m->nr]; | ||
620 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
621 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
622 | } | ||
623 | |||
624 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | ||
625 | u64 guest_val, u64 host_val) | ||
626 | { | ||
627 | unsigned i; | ||
628 | struct msr_autoload *m = &vmx->msr_autoload; | ||
629 | |||
630 | for (i = 0; i < m->nr; ++i) | ||
631 | if (m->guest[i].index == msr) | ||
632 | break; | ||
633 | |||
634 | if (i == m->nr) { | ||
635 | ++m->nr; | ||
636 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
637 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
638 | } | ||
639 | |||
640 | m->guest[i].index = msr; | ||
641 | m->guest[i].value = guest_val; | ||
642 | m->host[i].index = msr; | ||
643 | m->host[i].value = host_val; | ||
644 | } | ||
645 | |||
598 | static void reload_tss(void) | 646 | static void reload_tss(void) |
599 | { | 647 | { |
600 | /* | 648 | /* |
601 | * VT restores TR but not its size. Useless. | 649 | * VT restores TR but not its size. Useless. |
602 | */ | 650 | */ |
603 | struct descriptor_table gdt; | 651 | struct desc_ptr gdt; |
604 | struct desc_struct *descs; | 652 | struct desc_struct *descs; |
605 | 653 | ||
606 | kvm_get_gdt(&gdt); | 654 | native_store_gdt(&gdt); |
607 | descs = (void *)gdt.base; | 655 | descs = (void *)gdt.address; |
608 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | 656 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ |
609 | load_TR_desc(); | 657 | load_TR_desc(); |
610 | } | 658 | } |
@@ -631,9 +679,57 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
631 | guest_efer |= host_efer & ignore_bits; | 679 | guest_efer |= host_efer & ignore_bits; |
632 | vmx->guest_msrs[efer_offset].data = guest_efer; | 680 | vmx->guest_msrs[efer_offset].data = guest_efer; |
633 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; | 681 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
682 | |||
683 | clear_atomic_switch_msr(vmx, MSR_EFER); | ||
684 | /* On ept, can't emulate nx, and must switch nx atomically */ | ||
685 | if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { | ||
686 | guest_efer = vmx->vcpu.arch.efer; | ||
687 | if (!(guest_efer & EFER_LMA)) | ||
688 | guest_efer &= ~EFER_LME; | ||
689 | add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); | ||
690 | return false; | ||
691 | } | ||
692 | |||
634 | return true; | 693 | return true; |
635 | } | 694 | } |
636 | 695 | ||
696 | static unsigned long segment_base(u16 selector) | ||
697 | { | ||
698 | struct desc_ptr gdt; | ||
699 | struct desc_struct *d; | ||
700 | unsigned long table_base; | ||
701 | unsigned long v; | ||
702 | |||
703 | if (!(selector & ~3)) | ||
704 | return 0; | ||
705 | |||
706 | native_store_gdt(&gdt); | ||
707 | table_base = gdt.address; | ||
708 | |||
709 | if (selector & 4) { /* from ldt */ | ||
710 | u16 ldt_selector = kvm_read_ldt(); | ||
711 | |||
712 | if (!(ldt_selector & ~3)) | ||
713 | return 0; | ||
714 | |||
715 | table_base = segment_base(ldt_selector); | ||
716 | } | ||
717 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
718 | v = get_desc_base(d); | ||
719 | #ifdef CONFIG_X86_64 | ||
720 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
721 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
722 | #endif | ||
723 | return v; | ||
724 | } | ||
725 | |||
726 | static inline unsigned long kvm_read_tr_base(void) | ||
727 | { | ||
728 | u16 tr; | ||
729 | asm("str %0" : "=g"(tr)); | ||
730 | return segment_base(tr); | ||
731 | } | ||
732 | |||
637 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 733 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
638 | { | 734 | { |
639 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 735 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -758,7 +854,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
758 | } | 854 | } |
759 | 855 | ||
760 | if (vcpu->cpu != cpu) { | 856 | if (vcpu->cpu != cpu) { |
761 | struct descriptor_table dt; | 857 | struct desc_ptr dt; |
762 | unsigned long sysenter_esp; | 858 | unsigned long sysenter_esp; |
763 | 859 | ||
764 | vcpu->cpu = cpu; | 860 | vcpu->cpu = cpu; |
@@ -767,8 +863,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
767 | * processors. | 863 | * processors. |
768 | */ | 864 | */ |
769 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ | 865 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ |
770 | kvm_get_gdt(&dt); | 866 | native_store_gdt(&dt); |
771 | vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ | 867 | vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ |
772 | 868 | ||
773 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 869 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
774 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 870 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
@@ -846,9 +942,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
846 | int ret = 0; | 942 | int ret = 0; |
847 | 943 | ||
848 | if (interruptibility & GUEST_INTR_STATE_STI) | 944 | if (interruptibility & GUEST_INTR_STATE_STI) |
849 | ret |= X86_SHADOW_INT_STI; | 945 | ret |= KVM_X86_SHADOW_INT_STI; |
850 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) | 946 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) |
851 | ret |= X86_SHADOW_INT_MOV_SS; | 947 | ret |= KVM_X86_SHADOW_INT_MOV_SS; |
852 | 948 | ||
853 | return ret & mask; | 949 | return ret & mask; |
854 | } | 950 | } |
@@ -860,9 +956,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
860 | 956 | ||
861 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); | 957 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); |
862 | 958 | ||
863 | if (mask & X86_SHADOW_INT_MOV_SS) | 959 | if (mask & KVM_X86_SHADOW_INT_MOV_SS) |
864 | interruptibility |= GUEST_INTR_STATE_MOV_SS; | 960 | interruptibility |= GUEST_INTR_STATE_MOV_SS; |
865 | if (mask & X86_SHADOW_INT_STI) | 961 | else if (mask & KVM_X86_SHADOW_INT_STI) |
866 | interruptibility |= GUEST_INTR_STATE_STI; | 962 | interruptibility |= GUEST_INTR_STATE_STI; |
867 | 963 | ||
868 | if ((interruptibility != interruptibility_old)) | 964 | if ((interruptibility != interruptibility_old)) |
@@ -882,7 +978,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
882 | } | 978 | } |
883 | 979 | ||
884 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 980 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
885 | bool has_error_code, u32 error_code) | 981 | bool has_error_code, u32 error_code, |
982 | bool reinject) | ||
886 | { | 983 | { |
887 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 984 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
888 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 985 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
@@ -1176,9 +1273,16 @@ static __init int vmx_disabled_by_bios(void) | |||
1176 | u64 msr; | 1273 | u64 msr; |
1177 | 1274 | ||
1178 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1275 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
1179 | return (msr & (FEATURE_CONTROL_LOCKED | | 1276 | if (msr & FEATURE_CONTROL_LOCKED) { |
1180 | FEATURE_CONTROL_VMXON_ENABLED)) | 1277 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) |
1181 | == FEATURE_CONTROL_LOCKED; | 1278 | && tboot_enabled()) |
1279 | return 1; | ||
1280 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | ||
1281 | && !tboot_enabled()) | ||
1282 | return 1; | ||
1283 | } | ||
1284 | |||
1285 | return 0; | ||
1182 | /* locked but not enabled */ | 1286 | /* locked but not enabled */ |
1183 | } | 1287 | } |
1184 | 1288 | ||
@@ -1186,21 +1290,23 @@ static int hardware_enable(void *garbage) | |||
1186 | { | 1290 | { |
1187 | int cpu = raw_smp_processor_id(); | 1291 | int cpu = raw_smp_processor_id(); |
1188 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1292 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
1189 | u64 old; | 1293 | u64 old, test_bits; |
1190 | 1294 | ||
1191 | if (read_cr4() & X86_CR4_VMXE) | 1295 | if (read_cr4() & X86_CR4_VMXE) |
1192 | return -EBUSY; | 1296 | return -EBUSY; |
1193 | 1297 | ||
1194 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1298 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
1195 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1299 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
1196 | if ((old & (FEATURE_CONTROL_LOCKED | | 1300 | |
1197 | FEATURE_CONTROL_VMXON_ENABLED)) | 1301 | test_bits = FEATURE_CONTROL_LOCKED; |
1198 | != (FEATURE_CONTROL_LOCKED | | 1302 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
1199 | FEATURE_CONTROL_VMXON_ENABLED)) | 1303 | if (tboot_enabled()) |
1304 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; | ||
1305 | |||
1306 | if ((old & test_bits) != test_bits) { | ||
1200 | /* enable and lock */ | 1307 | /* enable and lock */ |
1201 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | | 1308 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); |
1202 | FEATURE_CONTROL_LOCKED | | 1309 | } |
1203 | FEATURE_CONTROL_VMXON_ENABLED); | ||
1204 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1310 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
1205 | asm volatile (ASM_VMX_VMXON_RAX | 1311 | asm volatile (ASM_VMX_VMXON_RAX |
1206 | : : "a"(&phys_addr), "m"(phys_addr) | 1312 | : : "a"(&phys_addr), "m"(phys_addr) |
@@ -1521,7 +1627,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
1521 | struct kvm_memslots *slots; | 1627 | struct kvm_memslots *slots; |
1522 | gfn_t base_gfn; | 1628 | gfn_t base_gfn; |
1523 | 1629 | ||
1524 | slots = rcu_dereference(kvm->memslots); | 1630 | slots = kvm_memslots(kvm); |
1525 | base_gfn = kvm->memslots->memslots[0].base_gfn + | 1631 | base_gfn = kvm->memslots->memslots[0].base_gfn + |
1526 | kvm->memslots->memslots[0].npages - 3; | 1632 | kvm->memslots->memslots[0].npages - 3; |
1527 | return base_gfn << PAGE_SHIFT; | 1633 | return base_gfn << PAGE_SHIFT; |
@@ -1649,6 +1755,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
1649 | vmcs_write32(VM_ENTRY_CONTROLS, | 1755 | vmcs_write32(VM_ENTRY_CONTROLS, |
1650 | vmcs_read32(VM_ENTRY_CONTROLS) | 1756 | vmcs_read32(VM_ENTRY_CONTROLS) |
1651 | & ~VM_ENTRY_IA32E_MODE); | 1757 | & ~VM_ENTRY_IA32E_MODE); |
1758 | vmx_set_efer(vcpu, vcpu->arch.efer); | ||
1652 | } | 1759 | } |
1653 | 1760 | ||
1654 | #endif | 1761 | #endif |
@@ -1934,28 +2041,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | |||
1934 | *l = (ar >> 13) & 1; | 2041 | *l = (ar >> 13) & 1; |
1935 | } | 2042 | } |
1936 | 2043 | ||
1937 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2044 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1938 | { | 2045 | { |
1939 | dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); | 2046 | dt->size = vmcs_read32(GUEST_IDTR_LIMIT); |
1940 | dt->base = vmcs_readl(GUEST_IDTR_BASE); | 2047 | dt->address = vmcs_readl(GUEST_IDTR_BASE); |
1941 | } | 2048 | } |
1942 | 2049 | ||
1943 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2050 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1944 | { | 2051 | { |
1945 | vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); | 2052 | vmcs_write32(GUEST_IDTR_LIMIT, dt->size); |
1946 | vmcs_writel(GUEST_IDTR_BASE, dt->base); | 2053 | vmcs_writel(GUEST_IDTR_BASE, dt->address); |
1947 | } | 2054 | } |
1948 | 2055 | ||
1949 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2056 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1950 | { | 2057 | { |
1951 | dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); | 2058 | dt->size = vmcs_read32(GUEST_GDTR_LIMIT); |
1952 | dt->base = vmcs_readl(GUEST_GDTR_BASE); | 2059 | dt->address = vmcs_readl(GUEST_GDTR_BASE); |
1953 | } | 2060 | } |
1954 | 2061 | ||
1955 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2062 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1956 | { | 2063 | { |
1957 | vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); | 2064 | vmcs_write32(GUEST_GDTR_LIMIT, dt->size); |
1958 | vmcs_writel(GUEST_GDTR_BASE, dt->base); | 2065 | vmcs_writel(GUEST_GDTR_BASE, dt->address); |
1959 | } | 2066 | } |
1960 | 2067 | ||
1961 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | 2068 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) |
@@ -2296,6 +2403,16 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
2296 | spin_unlock(&vmx_vpid_lock); | 2403 | spin_unlock(&vmx_vpid_lock); |
2297 | } | 2404 | } |
2298 | 2405 | ||
2406 | static void free_vpid(struct vcpu_vmx *vmx) | ||
2407 | { | ||
2408 | if (!enable_vpid) | ||
2409 | return; | ||
2410 | spin_lock(&vmx_vpid_lock); | ||
2411 | if (vmx->vpid != 0) | ||
2412 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
2413 | spin_unlock(&vmx_vpid_lock); | ||
2414 | } | ||
2415 | |||
2299 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 2416 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) |
2300 | { | 2417 | { |
2301 | int f = sizeof(unsigned long); | 2418 | int f = sizeof(unsigned long); |
@@ -2334,7 +2451,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2334 | u32 junk; | 2451 | u32 junk; |
2335 | u64 host_pat, tsc_this, tsc_base; | 2452 | u64 host_pat, tsc_this, tsc_base; |
2336 | unsigned long a; | 2453 | unsigned long a; |
2337 | struct descriptor_table dt; | 2454 | struct desc_ptr dt; |
2338 | int i; | 2455 | int i; |
2339 | unsigned long kvm_vmx_return; | 2456 | unsigned long kvm_vmx_return; |
2340 | u32 exec_control; | 2457 | u32 exec_control; |
@@ -2415,14 +2532,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2415 | 2532 | ||
2416 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | 2533 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ |
2417 | 2534 | ||
2418 | kvm_get_idt(&dt); | 2535 | native_store_idt(&dt); |
2419 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ | 2536 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
2420 | 2537 | ||
2421 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); | 2538 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); |
2422 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ | 2539 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ |
2423 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | 2540 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); |
2424 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); | 2541 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); |
2542 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); | ||
2425 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); | 2543 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); |
2544 | vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); | ||
2426 | 2545 | ||
2427 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); | 2546 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); |
2428 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); | 2547 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); |
@@ -2703,8 +2822,7 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2703 | return 0; | 2822 | return 0; |
2704 | 2823 | ||
2705 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 2824 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
2706 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | | 2825 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); |
2707 | GUEST_INTR_STATE_NMI)); | ||
2708 | } | 2826 | } |
2709 | 2827 | ||
2710 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 2828 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
@@ -2948,22 +3066,20 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
2948 | int size, in, string; | 3066 | int size, in, string; |
2949 | unsigned port; | 3067 | unsigned port; |
2950 | 3068 | ||
2951 | ++vcpu->stat.io_exits; | ||
2952 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3069 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
2953 | string = (exit_qualification & 16) != 0; | 3070 | string = (exit_qualification & 16) != 0; |
3071 | in = (exit_qualification & 8) != 0; | ||
2954 | 3072 | ||
2955 | if (string) { | 3073 | ++vcpu->stat.io_exits; |
2956 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) | ||
2957 | return 0; | ||
2958 | return 1; | ||
2959 | } | ||
2960 | 3074 | ||
2961 | size = (exit_qualification & 7) + 1; | 3075 | if (string || in) |
2962 | in = (exit_qualification & 8) != 0; | 3076 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); |
2963 | port = exit_qualification >> 16; | ||
2964 | 3077 | ||
3078 | port = exit_qualification >> 16; | ||
3079 | size = (exit_qualification & 7) + 1; | ||
2965 | skip_emulated_instruction(vcpu); | 3080 | skip_emulated_instruction(vcpu); |
2966 | return kvm_emulate_pio(vcpu, in, size, port); | 3081 | |
3082 | return kvm_fast_pio_out(vcpu, size, port); | ||
2967 | } | 3083 | } |
2968 | 3084 | ||
2969 | static void | 3085 | static void |
@@ -3054,19 +3170,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3054 | return 0; | 3170 | return 0; |
3055 | } | 3171 | } |
3056 | 3172 | ||
3057 | static int check_dr_alias(struct kvm_vcpu *vcpu) | ||
3058 | { | ||
3059 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
3060 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3061 | return -1; | ||
3062 | } | ||
3063 | return 0; | ||
3064 | } | ||
3065 | |||
3066 | static int handle_dr(struct kvm_vcpu *vcpu) | 3173 | static int handle_dr(struct kvm_vcpu *vcpu) |
3067 | { | 3174 | { |
3068 | unsigned long exit_qualification; | 3175 | unsigned long exit_qualification; |
3069 | unsigned long val; | ||
3070 | int dr, reg; | 3176 | int dr, reg; |
3071 | 3177 | ||
3072 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | 3178 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ |
@@ -3101,67 +3207,20 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
3101 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | 3207 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; |
3102 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 3208 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
3103 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 3209 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
3104 | switch (dr) { | 3210 | unsigned long val; |
3105 | case 0 ... 3: | 3211 | if (!kvm_get_dr(vcpu, dr, &val)) |
3106 | val = vcpu->arch.db[dr]; | 3212 | kvm_register_write(vcpu, reg, val); |
3107 | break; | 3213 | } else |
3108 | case 4: | 3214 | kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); |
3109 | if (check_dr_alias(vcpu) < 0) | ||
3110 | return 1; | ||
3111 | /* fall through */ | ||
3112 | case 6: | ||
3113 | val = vcpu->arch.dr6; | ||
3114 | break; | ||
3115 | case 5: | ||
3116 | if (check_dr_alias(vcpu) < 0) | ||
3117 | return 1; | ||
3118 | /* fall through */ | ||
3119 | default: /* 7 */ | ||
3120 | val = vcpu->arch.dr7; | ||
3121 | break; | ||
3122 | } | ||
3123 | kvm_register_write(vcpu, reg, val); | ||
3124 | } else { | ||
3125 | val = vcpu->arch.regs[reg]; | ||
3126 | switch (dr) { | ||
3127 | case 0 ... 3: | ||
3128 | vcpu->arch.db[dr] = val; | ||
3129 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
3130 | vcpu->arch.eff_db[dr] = val; | ||
3131 | break; | ||
3132 | case 4: | ||
3133 | if (check_dr_alias(vcpu) < 0) | ||
3134 | return 1; | ||
3135 | /* fall through */ | ||
3136 | case 6: | ||
3137 | if (val & 0xffffffff00000000ULL) { | ||
3138 | kvm_inject_gp(vcpu, 0); | ||
3139 | return 1; | ||
3140 | } | ||
3141 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
3142 | break; | ||
3143 | case 5: | ||
3144 | if (check_dr_alias(vcpu) < 0) | ||
3145 | return 1; | ||
3146 | /* fall through */ | ||
3147 | default: /* 7 */ | ||
3148 | if (val & 0xffffffff00000000ULL) { | ||
3149 | kvm_inject_gp(vcpu, 0); | ||
3150 | return 1; | ||
3151 | } | ||
3152 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
3153 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
3154 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | ||
3155 | vcpu->arch.switch_db_regs = | ||
3156 | (val & DR7_BP_EN_MASK); | ||
3157 | } | ||
3158 | break; | ||
3159 | } | ||
3160 | } | ||
3161 | skip_emulated_instruction(vcpu); | 3215 | skip_emulated_instruction(vcpu); |
3162 | return 1; | 3216 | return 1; |
3163 | } | 3217 | } |
3164 | 3218 | ||
3219 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | ||
3220 | { | ||
3221 | vmcs_writel(GUEST_DR7, val); | ||
3222 | } | ||
3223 | |||
3165 | static int handle_cpuid(struct kvm_vcpu *vcpu) | 3224 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
3166 | { | 3225 | { |
3167 | kvm_emulate_cpuid(vcpu); | 3226 | kvm_emulate_cpuid(vcpu); |
@@ -3293,6 +3352,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3293 | { | 3352 | { |
3294 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3353 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3295 | unsigned long exit_qualification; | 3354 | unsigned long exit_qualification; |
3355 | bool has_error_code = false; | ||
3356 | u32 error_code = 0; | ||
3296 | u16 tss_selector; | 3357 | u16 tss_selector; |
3297 | int reason, type, idt_v; | 3358 | int reason, type, idt_v; |
3298 | 3359 | ||
@@ -3315,6 +3376,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3315 | kvm_clear_interrupt_queue(vcpu); | 3376 | kvm_clear_interrupt_queue(vcpu); |
3316 | break; | 3377 | break; |
3317 | case INTR_TYPE_HARD_EXCEPTION: | 3378 | case INTR_TYPE_HARD_EXCEPTION: |
3379 | if (vmx->idt_vectoring_info & | ||
3380 | VECTORING_INFO_DELIVER_CODE_MASK) { | ||
3381 | has_error_code = true; | ||
3382 | error_code = | ||
3383 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
3384 | } | ||
3385 | /* fall through */ | ||
3318 | case INTR_TYPE_SOFT_EXCEPTION: | 3386 | case INTR_TYPE_SOFT_EXCEPTION: |
3319 | kvm_clear_exception_queue(vcpu); | 3387 | kvm_clear_exception_queue(vcpu); |
3320 | break; | 3388 | break; |
@@ -3329,8 +3397,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3329 | type != INTR_TYPE_NMI_INTR)) | 3397 | type != INTR_TYPE_NMI_INTR)) |
3330 | skip_emulated_instruction(vcpu); | 3398 | skip_emulated_instruction(vcpu); |
3331 | 3399 | ||
3332 | if (!kvm_task_switch(vcpu, tss_selector, reason)) | 3400 | if (kvm_task_switch(vcpu, tss_selector, reason, |
3401 | has_error_code, error_code) == EMULATE_FAIL) { | ||
3402 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
3403 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3404 | vcpu->run->internal.ndata = 0; | ||
3333 | return 0; | 3405 | return 0; |
3406 | } | ||
3334 | 3407 | ||
3335 | /* clear all local breakpoint enable flags */ | 3408 | /* clear all local breakpoint enable flags */ |
3336 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); | 3409 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); |
@@ -3575,7 +3648,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3575 | u32 exit_reason = vmx->exit_reason; | 3648 | u32 exit_reason = vmx->exit_reason; |
3576 | u32 vectoring_info = vmx->idt_vectoring_info; | 3649 | u32 vectoring_info = vmx->idt_vectoring_info; |
3577 | 3650 | ||
3578 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3651 | trace_kvm_exit(exit_reason, vcpu); |
3579 | 3652 | ||
3580 | /* If guest state is invalid, start emulating */ | 3653 | /* If guest state is invalid, start emulating */ |
3581 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3654 | if (vmx->emulation_required && emulate_invalid_guest_state) |
@@ -3660,8 +3733,11 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3660 | 3733 | ||
3661 | /* We need to handle NMIs before interrupts are enabled */ | 3734 | /* We need to handle NMIs before interrupts are enabled */ |
3662 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && | 3735 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && |
3663 | (exit_intr_info & INTR_INFO_VALID_MASK)) | 3736 | (exit_intr_info & INTR_INFO_VALID_MASK)) { |
3737 | kvm_before_handle_nmi(&vmx->vcpu); | ||
3664 | asm("int $2"); | 3738 | asm("int $2"); |
3739 | kvm_after_handle_nmi(&vmx->vcpu); | ||
3740 | } | ||
3665 | 3741 | ||
3666 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 3742 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
3667 | 3743 | ||
@@ -3921,10 +3997,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
3921 | { | 3997 | { |
3922 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3998 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3923 | 3999 | ||
3924 | spin_lock(&vmx_vpid_lock); | 4000 | free_vpid(vmx); |
3925 | if (vmx->vpid != 0) | ||
3926 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
3927 | spin_unlock(&vmx_vpid_lock); | ||
3928 | vmx_free_vmcs(vcpu); | 4001 | vmx_free_vmcs(vcpu); |
3929 | kfree(vmx->guest_msrs); | 4002 | kfree(vmx->guest_msrs); |
3930 | kvm_vcpu_uninit(vcpu); | 4003 | kvm_vcpu_uninit(vcpu); |
@@ -3986,6 +4059,7 @@ free_msrs: | |||
3986 | uninit_vcpu: | 4059 | uninit_vcpu: |
3987 | kvm_vcpu_uninit(&vmx->vcpu); | 4060 | kvm_vcpu_uninit(&vmx->vcpu); |
3988 | free_vcpu: | 4061 | free_vcpu: |
4062 | free_vpid(vmx); | ||
3989 | kmem_cache_free(kvm_vcpu_cache, vmx); | 4063 | kmem_cache_free(kvm_vcpu_cache, vmx); |
3990 | return ERR_PTR(err); | 4064 | return ERR_PTR(err); |
3991 | } | 4065 | } |
@@ -4116,6 +4190,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
4116 | } | 4190 | } |
4117 | } | 4191 | } |
4118 | 4192 | ||
4193 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
4194 | { | ||
4195 | } | ||
4196 | |||
4119 | static struct kvm_x86_ops vmx_x86_ops = { | 4197 | static struct kvm_x86_ops vmx_x86_ops = { |
4120 | .cpu_has_kvm_support = cpu_has_kvm_support, | 4198 | .cpu_has_kvm_support = cpu_has_kvm_support, |
4121 | .disabled_by_bios = vmx_disabled_by_bios, | 4199 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -4152,6 +4230,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4152 | .set_idt = vmx_set_idt, | 4230 | .set_idt = vmx_set_idt, |
4153 | .get_gdt = vmx_get_gdt, | 4231 | .get_gdt = vmx_get_gdt, |
4154 | .set_gdt = vmx_set_gdt, | 4232 | .set_gdt = vmx_set_gdt, |
4233 | .set_dr7 = vmx_set_dr7, | ||
4155 | .cache_reg = vmx_cache_reg, | 4234 | .cache_reg = vmx_cache_reg, |
4156 | .get_rflags = vmx_get_rflags, | 4235 | .get_rflags = vmx_get_rflags, |
4157 | .set_rflags = vmx_set_rflags, | 4236 | .set_rflags = vmx_set_rflags, |
@@ -4187,6 +4266,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4187 | .cpuid_update = vmx_cpuid_update, | 4266 | .cpuid_update = vmx_cpuid_update, |
4188 | 4267 | ||
4189 | .rdtscp_supported = vmx_rdtscp_supported, | 4268 | .rdtscp_supported = vmx_rdtscp_supported, |
4269 | |||
4270 | .set_supported_cpuid = vmx_set_supported_cpuid, | ||
4190 | }; | 4271 | }; |
4191 | 4272 | ||
4192 | static int __init vmx_init(void) | 4273 | static int __init vmx_init(void) |
@@ -4234,7 +4315,8 @@ static int __init vmx_init(void) | |||
4234 | 4315 | ||
4235 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | 4316 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
4236 | 4317 | ||
4237 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); | 4318 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), |
4319 | __alignof__(struct vcpu_vmx), THIS_MODULE); | ||
4238 | if (r) | 4320 | if (r) |
4239 | goto out3; | 4321 | goto out3; |
4240 | 4322 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3c4ca98ad27f..05d571f6f196 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -40,8 +40,9 @@ | |||
40 | #include <linux/user-return-notifier.h> | 40 | #include <linux/user-return-notifier.h> |
41 | #include <linux/srcu.h> | 41 | #include <linux/srcu.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/perf_event.h> | ||
43 | #include <trace/events/kvm.h> | 44 | #include <trace/events/kvm.h> |
44 | #undef TRACE_INCLUDE_FILE | 45 | |
45 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
46 | #include "trace.h" | 47 | #include "trace.h" |
47 | 48 | ||
@@ -223,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore) | |||
223 | kvm_on_user_return(&smsr->urn); | 224 | kvm_on_user_return(&smsr->urn); |
224 | } | 225 | } |
225 | 226 | ||
226 | unsigned long segment_base(u16 selector) | ||
227 | { | ||
228 | struct descriptor_table gdt; | ||
229 | struct desc_struct *d; | ||
230 | unsigned long table_base; | ||
231 | unsigned long v; | ||
232 | |||
233 | if (selector == 0) | ||
234 | return 0; | ||
235 | |||
236 | kvm_get_gdt(&gdt); | ||
237 | table_base = gdt.base; | ||
238 | |||
239 | if (selector & 4) { /* from ldt */ | ||
240 | u16 ldt_selector = kvm_read_ldt(); | ||
241 | |||
242 | table_base = segment_base(ldt_selector); | ||
243 | } | ||
244 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
245 | v = get_desc_base(d); | ||
246 | #ifdef CONFIG_X86_64 | ||
247 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
248 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
249 | #endif | ||
250 | return v; | ||
251 | } | ||
252 | EXPORT_SYMBOL_GPL(segment_base); | ||
253 | |||
254 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | 227 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) |
255 | { | 228 | { |
256 | if (irqchip_in_kernel(vcpu->kvm)) | 229 | if (irqchip_in_kernel(vcpu->kvm)) |
@@ -292,7 +265,8 @@ static int exception_class(int vector) | |||
292 | } | 265 | } |
293 | 266 | ||
294 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | 267 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, |
295 | unsigned nr, bool has_error, u32 error_code) | 268 | unsigned nr, bool has_error, u32 error_code, |
269 | bool reinject) | ||
296 | { | 270 | { |
297 | u32 prev_nr; | 271 | u32 prev_nr; |
298 | int class1, class2; | 272 | int class1, class2; |
@@ -303,6 +277,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
303 | vcpu->arch.exception.has_error_code = has_error; | 277 | vcpu->arch.exception.has_error_code = has_error; |
304 | vcpu->arch.exception.nr = nr; | 278 | vcpu->arch.exception.nr = nr; |
305 | vcpu->arch.exception.error_code = error_code; | 279 | vcpu->arch.exception.error_code = error_code; |
280 | vcpu->arch.exception.reinject = reinject; | ||
306 | return; | 281 | return; |
307 | } | 282 | } |
308 | 283 | ||
@@ -331,10 +306,16 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
331 | 306 | ||
332 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) | 307 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) |
333 | { | 308 | { |
334 | kvm_multiple_exception(vcpu, nr, false, 0); | 309 | kvm_multiple_exception(vcpu, nr, false, 0, false); |
335 | } | 310 | } |
336 | EXPORT_SYMBOL_GPL(kvm_queue_exception); | 311 | EXPORT_SYMBOL_GPL(kvm_queue_exception); |
337 | 312 | ||
313 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) | ||
314 | { | ||
315 | kvm_multiple_exception(vcpu, nr, false, 0, true); | ||
316 | } | ||
317 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); | ||
318 | |||
338 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | 319 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, |
339 | u32 error_code) | 320 | u32 error_code) |
340 | { | 321 | { |
@@ -351,10 +332,16 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); | |||
351 | 332 | ||
352 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 333 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
353 | { | 334 | { |
354 | kvm_multiple_exception(vcpu, nr, true, error_code); | 335 | kvm_multiple_exception(vcpu, nr, true, error_code, false); |
355 | } | 336 | } |
356 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); | 337 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); |
357 | 338 | ||
339 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | ||
340 | { | ||
341 | kvm_multiple_exception(vcpu, nr, true, error_code, true); | ||
342 | } | ||
343 | EXPORT_SYMBOL_GPL(kvm_requeue_exception_e); | ||
344 | |||
358 | /* | 345 | /* |
359 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue | 346 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue |
360 | * a #GP and return false. | 347 | * a #GP and return false. |
@@ -475,7 +462,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
475 | } | 462 | } |
476 | 463 | ||
477 | kvm_x86_ops->set_cr0(vcpu, cr0); | 464 | kvm_x86_ops->set_cr0(vcpu, cr0); |
478 | vcpu->arch.cr0 = cr0; | ||
479 | 465 | ||
480 | kvm_mmu_reset_context(vcpu); | 466 | kvm_mmu_reset_context(vcpu); |
481 | return; | 467 | return; |
@@ -484,7 +470,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); | |||
484 | 470 | ||
485 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 471 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
486 | { | 472 | { |
487 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); | 473 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); |
488 | } | 474 | } |
489 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 475 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
490 | 476 | ||
@@ -516,7 +502,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
516 | } | 502 | } |
517 | kvm_x86_ops->set_cr4(vcpu, cr4); | 503 | kvm_x86_ops->set_cr4(vcpu, cr4); |
518 | vcpu->arch.cr4 = cr4; | 504 | vcpu->arch.cr4 = cr4; |
519 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; | ||
520 | kvm_mmu_reset_context(vcpu); | 505 | kvm_mmu_reset_context(vcpu); |
521 | } | 506 | } |
522 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 507 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
@@ -591,6 +576,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
591 | } | 576 | } |
592 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 577 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
593 | 578 | ||
579 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | ||
580 | { | ||
581 | switch (dr) { | ||
582 | case 0 ... 3: | ||
583 | vcpu->arch.db[dr] = val; | ||
584 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
585 | vcpu->arch.eff_db[dr] = val; | ||
586 | break; | ||
587 | case 4: | ||
588 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
589 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
590 | return 1; | ||
591 | } | ||
592 | /* fall through */ | ||
593 | case 6: | ||
594 | if (val & 0xffffffff00000000ULL) { | ||
595 | kvm_inject_gp(vcpu, 0); | ||
596 | return 1; | ||
597 | } | ||
598 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
599 | break; | ||
600 | case 5: | ||
601 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
602 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
603 | return 1; | ||
604 | } | ||
605 | /* fall through */ | ||
606 | default: /* 7 */ | ||
607 | if (val & 0xffffffff00000000ULL) { | ||
608 | kvm_inject_gp(vcpu, 0); | ||
609 | return 1; | ||
610 | } | ||
611 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
612 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
613 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | ||
614 | vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); | ||
615 | } | ||
616 | break; | ||
617 | } | ||
618 | |||
619 | return 0; | ||
620 | } | ||
621 | EXPORT_SYMBOL_GPL(kvm_set_dr); | ||
622 | |||
623 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | ||
624 | { | ||
625 | switch (dr) { | ||
626 | case 0 ... 3: | ||
627 | *val = vcpu->arch.db[dr]; | ||
628 | break; | ||
629 | case 4: | ||
630 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
631 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
632 | return 1; | ||
633 | } | ||
634 | /* fall through */ | ||
635 | case 6: | ||
636 | *val = vcpu->arch.dr6; | ||
637 | break; | ||
638 | case 5: | ||
639 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
640 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
641 | return 1; | ||
642 | } | ||
643 | /* fall through */ | ||
644 | default: /* 7 */ | ||
645 | *val = vcpu->arch.dr7; | ||
646 | break; | ||
647 | } | ||
648 | |||
649 | return 0; | ||
650 | } | ||
651 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
652 | |||
594 | static inline u32 bit(int bitno) | 653 | static inline u32 bit(int bitno) |
595 | { | 654 | { |
596 | return 1 << (bitno & 31); | 655 | return 1 << (bitno & 31); |
@@ -605,9 +664,10 @@ static inline u32 bit(int bitno) | |||
605 | * kvm-specific. Those are put in the beginning of the list. | 664 | * kvm-specific. Those are put in the beginning of the list. |
606 | */ | 665 | */ |
607 | 666 | ||
608 | #define KVM_SAVE_MSRS_BEGIN 5 | 667 | #define KVM_SAVE_MSRS_BEGIN 7 |
609 | static u32 msrs_to_save[] = { | 668 | static u32 msrs_to_save[] = { |
610 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 669 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
670 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | ||
611 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 671 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
612 | HV_X64_MSR_APIC_ASSIST_PAGE, | 672 | HV_X64_MSR_APIC_ASSIST_PAGE, |
613 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 673 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
@@ -624,48 +684,42 @@ static u32 emulated_msrs[] = { | |||
624 | MSR_IA32_MISC_ENABLE, | 684 | MSR_IA32_MISC_ENABLE, |
625 | }; | 685 | }; |
626 | 686 | ||
627 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | 687 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) |
628 | { | 688 | { |
629 | if (efer & efer_reserved_bits) { | 689 | if (efer & efer_reserved_bits) |
630 | kvm_inject_gp(vcpu, 0); | 690 | return 1; |
631 | return; | ||
632 | } | ||
633 | 691 | ||
634 | if (is_paging(vcpu) | 692 | if (is_paging(vcpu) |
635 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { | 693 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) |
636 | kvm_inject_gp(vcpu, 0); | 694 | return 1; |
637 | return; | ||
638 | } | ||
639 | 695 | ||
640 | if (efer & EFER_FFXSR) { | 696 | if (efer & EFER_FFXSR) { |
641 | struct kvm_cpuid_entry2 *feat; | 697 | struct kvm_cpuid_entry2 *feat; |
642 | 698 | ||
643 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 699 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
644 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { | 700 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) |
645 | kvm_inject_gp(vcpu, 0); | 701 | return 1; |
646 | return; | ||
647 | } | ||
648 | } | 702 | } |
649 | 703 | ||
650 | if (efer & EFER_SVME) { | 704 | if (efer & EFER_SVME) { |
651 | struct kvm_cpuid_entry2 *feat; | 705 | struct kvm_cpuid_entry2 *feat; |
652 | 706 | ||
653 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 707 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
654 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { | 708 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) |
655 | kvm_inject_gp(vcpu, 0); | 709 | return 1; |
656 | return; | ||
657 | } | ||
658 | } | 710 | } |
659 | 711 | ||
660 | kvm_x86_ops->set_efer(vcpu, efer); | ||
661 | |||
662 | efer &= ~EFER_LMA; | 712 | efer &= ~EFER_LMA; |
663 | efer |= vcpu->arch.efer & EFER_LMA; | 713 | efer |= vcpu->arch.efer & EFER_LMA; |
664 | 714 | ||
715 | kvm_x86_ops->set_efer(vcpu, efer); | ||
716 | |||
665 | vcpu->arch.efer = efer; | 717 | vcpu->arch.efer = efer; |
666 | 718 | ||
667 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 719 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
668 | kvm_mmu_reset_context(vcpu); | 720 | kvm_mmu_reset_context(vcpu); |
721 | |||
722 | return 0; | ||
669 | } | 723 | } |
670 | 724 | ||
671 | void kvm_enable_efer_bits(u64 mask) | 725 | void kvm_enable_efer_bits(u64 mask) |
@@ -695,14 +749,22 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
695 | 749 | ||
696 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | 750 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) |
697 | { | 751 | { |
698 | static int version; | 752 | int version; |
753 | int r; | ||
699 | struct pvclock_wall_clock wc; | 754 | struct pvclock_wall_clock wc; |
700 | struct timespec boot; | 755 | struct timespec boot; |
701 | 756 | ||
702 | if (!wall_clock) | 757 | if (!wall_clock) |
703 | return; | 758 | return; |
704 | 759 | ||
705 | version++; | 760 | r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); |
761 | if (r) | ||
762 | return; | ||
763 | |||
764 | if (version & 1) | ||
765 | ++version; /* first time write, random junk */ | ||
766 | |||
767 | ++version; | ||
706 | 768 | ||
707 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | 769 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); |
708 | 770 | ||
@@ -795,6 +857,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
795 | vcpu->hv_clock.system_time = ts.tv_nsec + | 857 | vcpu->hv_clock.system_time = ts.tv_nsec + |
796 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; | 858 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
797 | 859 | ||
860 | vcpu->hv_clock.flags = 0; | ||
861 | |||
798 | /* | 862 | /* |
799 | * The interface expects us to write an even number signaling that the | 863 | * The interface expects us to write an even number signaling that the |
800 | * update is finished. Since the guest won't see the intermediate | 864 | * update is finished. Since the guest won't see the intermediate |
@@ -1086,10 +1150,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1086 | { | 1150 | { |
1087 | switch (msr) { | 1151 | switch (msr) { |
1088 | case MSR_EFER: | 1152 | case MSR_EFER: |
1089 | set_efer(vcpu, data); | 1153 | return set_efer(vcpu, data); |
1090 | break; | ||
1091 | case MSR_K7_HWCR: | 1154 | case MSR_K7_HWCR: |
1092 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 1155 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
1156 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | ||
1093 | if (data != 0) { | 1157 | if (data != 0) { |
1094 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1158 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
1095 | data); | 1159 | data); |
@@ -1132,10 +1196,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1132 | case MSR_IA32_MISC_ENABLE: | 1196 | case MSR_IA32_MISC_ENABLE: |
1133 | vcpu->arch.ia32_misc_enable_msr = data; | 1197 | vcpu->arch.ia32_misc_enable_msr = data; |
1134 | break; | 1198 | break; |
1199 | case MSR_KVM_WALL_CLOCK_NEW: | ||
1135 | case MSR_KVM_WALL_CLOCK: | 1200 | case MSR_KVM_WALL_CLOCK: |
1136 | vcpu->kvm->arch.wall_clock = data; | 1201 | vcpu->kvm->arch.wall_clock = data; |
1137 | kvm_write_wall_clock(vcpu->kvm, data); | 1202 | kvm_write_wall_clock(vcpu->kvm, data); |
1138 | break; | 1203 | break; |
1204 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
1139 | case MSR_KVM_SYSTEM_TIME: { | 1205 | case MSR_KVM_SYSTEM_TIME: { |
1140 | if (vcpu->arch.time_page) { | 1206 | if (vcpu->arch.time_page) { |
1141 | kvm_release_page_dirty(vcpu->arch.time_page); | 1207 | kvm_release_page_dirty(vcpu->arch.time_page); |
@@ -1407,9 +1473,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1407 | data = vcpu->arch.efer; | 1473 | data = vcpu->arch.efer; |
1408 | break; | 1474 | break; |
1409 | case MSR_KVM_WALL_CLOCK: | 1475 | case MSR_KVM_WALL_CLOCK: |
1476 | case MSR_KVM_WALL_CLOCK_NEW: | ||
1410 | data = vcpu->kvm->arch.wall_clock; | 1477 | data = vcpu->kvm->arch.wall_clock; |
1411 | break; | 1478 | break; |
1412 | case MSR_KVM_SYSTEM_TIME: | 1479 | case MSR_KVM_SYSTEM_TIME: |
1480 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
1413 | data = vcpu->arch.time; | 1481 | data = vcpu->arch.time; |
1414 | break; | 1482 | break; |
1415 | case MSR_IA32_P5_MC_ADDR: | 1483 | case MSR_IA32_P5_MC_ADDR: |
@@ -1548,6 +1616,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1548 | case KVM_CAP_HYPERV_VAPIC: | 1616 | case KVM_CAP_HYPERV_VAPIC: |
1549 | case KVM_CAP_HYPERV_SPIN: | 1617 | case KVM_CAP_HYPERV_SPIN: |
1550 | case KVM_CAP_PCI_SEGMENT: | 1618 | case KVM_CAP_PCI_SEGMENT: |
1619 | case KVM_CAP_DEBUGREGS: | ||
1551 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1620 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1552 | r = 1; | 1621 | r = 1; |
1553 | break; | 1622 | break; |
@@ -1712,6 +1781,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1712 | if (copy_from_user(cpuid_entries, entries, | 1781 | if (copy_from_user(cpuid_entries, entries, |
1713 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | 1782 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) |
1714 | goto out_free; | 1783 | goto out_free; |
1784 | vcpu_load(vcpu); | ||
1715 | for (i = 0; i < cpuid->nent; i++) { | 1785 | for (i = 0; i < cpuid->nent; i++) { |
1716 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | 1786 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; |
1717 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | 1787 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; |
@@ -1729,6 +1799,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1729 | r = 0; | 1799 | r = 0; |
1730 | kvm_apic_set_version(vcpu); | 1800 | kvm_apic_set_version(vcpu); |
1731 | kvm_x86_ops->cpuid_update(vcpu); | 1801 | kvm_x86_ops->cpuid_update(vcpu); |
1802 | vcpu_put(vcpu); | ||
1732 | 1803 | ||
1733 | out_free: | 1804 | out_free: |
1734 | vfree(cpuid_entries); | 1805 | vfree(cpuid_entries); |
@@ -1749,9 +1820,11 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
1749 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | 1820 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, |
1750 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | 1821 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) |
1751 | goto out; | 1822 | goto out; |
1823 | vcpu_load(vcpu); | ||
1752 | vcpu->arch.cpuid_nent = cpuid->nent; | 1824 | vcpu->arch.cpuid_nent = cpuid->nent; |
1753 | kvm_apic_set_version(vcpu); | 1825 | kvm_apic_set_version(vcpu); |
1754 | kvm_x86_ops->cpuid_update(vcpu); | 1826 | kvm_x86_ops->cpuid_update(vcpu); |
1827 | vcpu_put(vcpu); | ||
1755 | return 0; | 1828 | return 0; |
1756 | 1829 | ||
1757 | out: | 1830 | out: |
@@ -1764,6 +1837,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1764 | { | 1837 | { |
1765 | int r; | 1838 | int r; |
1766 | 1839 | ||
1840 | vcpu_load(vcpu); | ||
1767 | r = -E2BIG; | 1841 | r = -E2BIG; |
1768 | if (cpuid->nent < vcpu->arch.cpuid_nent) | 1842 | if (cpuid->nent < vcpu->arch.cpuid_nent) |
1769 | goto out; | 1843 | goto out; |
@@ -1775,6 +1849,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1775 | 1849 | ||
1776 | out: | 1850 | out: |
1777 | cpuid->nent = vcpu->arch.cpuid_nent; | 1851 | cpuid->nent = vcpu->arch.cpuid_nent; |
1852 | vcpu_put(vcpu); | ||
1778 | return r; | 1853 | return r; |
1779 | } | 1854 | } |
1780 | 1855 | ||
@@ -1905,6 +1980,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1905 | } | 1980 | } |
1906 | break; | 1981 | break; |
1907 | } | 1982 | } |
1983 | case KVM_CPUID_SIGNATURE: { | ||
1984 | char signature[12] = "KVMKVMKVM\0\0"; | ||
1985 | u32 *sigptr = (u32 *)signature; | ||
1986 | entry->eax = 0; | ||
1987 | entry->ebx = sigptr[0]; | ||
1988 | entry->ecx = sigptr[1]; | ||
1989 | entry->edx = sigptr[2]; | ||
1990 | break; | ||
1991 | } | ||
1992 | case KVM_CPUID_FEATURES: | ||
1993 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
1994 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
1995 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
1996 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
1997 | entry->ebx = 0; | ||
1998 | entry->ecx = 0; | ||
1999 | entry->edx = 0; | ||
2000 | break; | ||
1908 | case 0x80000000: | 2001 | case 0x80000000: |
1909 | entry->eax = min(entry->eax, 0x8000001a); | 2002 | entry->eax = min(entry->eax, 0x8000001a); |
1910 | break; | 2003 | break; |
@@ -1913,6 +2006,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1913 | entry->ecx &= kvm_supported_word6_x86_features; | 2006 | entry->ecx &= kvm_supported_word6_x86_features; |
1914 | break; | 2007 | break; |
1915 | } | 2008 | } |
2009 | |||
2010 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
2011 | |||
1916 | put_cpu(); | 2012 | put_cpu(); |
1917 | } | 2013 | } |
1918 | 2014 | ||
@@ -1948,6 +2044,23 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
1948 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) | 2044 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) |
1949 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | 2045 | do_cpuid_ent(&cpuid_entries[nent], func, 0, |
1950 | &nent, cpuid->nent); | 2046 | &nent, cpuid->nent); |
2047 | |||
2048 | |||
2049 | |||
2050 | r = -E2BIG; | ||
2051 | if (nent >= cpuid->nent) | ||
2052 | goto out_free; | ||
2053 | |||
2054 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | ||
2055 | cpuid->nent); | ||
2056 | |||
2057 | r = -E2BIG; | ||
2058 | if (nent >= cpuid->nent) | ||
2059 | goto out_free; | ||
2060 | |||
2061 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, | ||
2062 | cpuid->nent); | ||
2063 | |||
1951 | r = -E2BIG; | 2064 | r = -E2BIG; |
1952 | if (nent >= cpuid->nent) | 2065 | if (nent >= cpuid->nent) |
1953 | goto out_free; | 2066 | goto out_free; |
@@ -2027,6 +2140,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2027 | int r; | 2140 | int r; |
2028 | unsigned bank_num = mcg_cap & 0xff, bank; | 2141 | unsigned bank_num = mcg_cap & 0xff, bank; |
2029 | 2142 | ||
2143 | vcpu_load(vcpu); | ||
2030 | r = -EINVAL; | 2144 | r = -EINVAL; |
2031 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) | 2145 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) |
2032 | goto out; | 2146 | goto out; |
@@ -2041,6 +2155,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2041 | for (bank = 0; bank < bank_num; bank++) | 2155 | for (bank = 0; bank < bank_num; bank++) |
2042 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; | 2156 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; |
2043 | out: | 2157 | out: |
2158 | vcpu_put(vcpu); | ||
2044 | return r; | 2159 | return r; |
2045 | } | 2160 | } |
2046 | 2161 | ||
@@ -2100,14 +2215,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2100 | { | 2215 | { |
2101 | vcpu_load(vcpu); | 2216 | vcpu_load(vcpu); |
2102 | 2217 | ||
2103 | events->exception.injected = vcpu->arch.exception.pending; | 2218 | events->exception.injected = |
2219 | vcpu->arch.exception.pending && | ||
2220 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | ||
2104 | events->exception.nr = vcpu->arch.exception.nr; | 2221 | events->exception.nr = vcpu->arch.exception.nr; |
2105 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | 2222 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; |
2106 | events->exception.error_code = vcpu->arch.exception.error_code; | 2223 | events->exception.error_code = vcpu->arch.exception.error_code; |
2107 | 2224 | ||
2108 | events->interrupt.injected = vcpu->arch.interrupt.pending; | 2225 | events->interrupt.injected = |
2226 | vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; | ||
2109 | events->interrupt.nr = vcpu->arch.interrupt.nr; | 2227 | events->interrupt.nr = vcpu->arch.interrupt.nr; |
2110 | events->interrupt.soft = vcpu->arch.interrupt.soft; | 2228 | events->interrupt.soft = 0; |
2229 | events->interrupt.shadow = | ||
2230 | kvm_x86_ops->get_interrupt_shadow(vcpu, | ||
2231 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); | ||
2111 | 2232 | ||
2112 | events->nmi.injected = vcpu->arch.nmi_injected; | 2233 | events->nmi.injected = vcpu->arch.nmi_injected; |
2113 | events->nmi.pending = vcpu->arch.nmi_pending; | 2234 | events->nmi.pending = vcpu->arch.nmi_pending; |
@@ -2116,7 +2237,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2116 | events->sipi_vector = vcpu->arch.sipi_vector; | 2237 | events->sipi_vector = vcpu->arch.sipi_vector; |
2117 | 2238 | ||
2118 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2239 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2119 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR); | 2240 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2241 | | KVM_VCPUEVENT_VALID_SHADOW); | ||
2120 | 2242 | ||
2121 | vcpu_put(vcpu); | 2243 | vcpu_put(vcpu); |
2122 | } | 2244 | } |
@@ -2125,7 +2247,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2125 | struct kvm_vcpu_events *events) | 2247 | struct kvm_vcpu_events *events) |
2126 | { | 2248 | { |
2127 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | 2249 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING |
2128 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) | 2250 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2251 | | KVM_VCPUEVENT_VALID_SHADOW)) | ||
2129 | return -EINVAL; | 2252 | return -EINVAL; |
2130 | 2253 | ||
2131 | vcpu_load(vcpu); | 2254 | vcpu_load(vcpu); |
@@ -2140,6 +2263,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2140 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2263 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
2141 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | 2264 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) |
2142 | kvm_pic_clear_isr_ack(vcpu->kvm); | 2265 | kvm_pic_clear_isr_ack(vcpu->kvm); |
2266 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | ||
2267 | kvm_x86_ops->set_interrupt_shadow(vcpu, | ||
2268 | events->interrupt.shadow); | ||
2143 | 2269 | ||
2144 | vcpu->arch.nmi_injected = events->nmi.injected; | 2270 | vcpu->arch.nmi_injected = events->nmi.injected; |
2145 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) | 2271 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) |
@@ -2154,6 +2280,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2154 | return 0; | 2280 | return 0; |
2155 | } | 2281 | } |
2156 | 2282 | ||
2283 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | ||
2284 | struct kvm_debugregs *dbgregs) | ||
2285 | { | ||
2286 | vcpu_load(vcpu); | ||
2287 | |||
2288 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | ||
2289 | dbgregs->dr6 = vcpu->arch.dr6; | ||
2290 | dbgregs->dr7 = vcpu->arch.dr7; | ||
2291 | dbgregs->flags = 0; | ||
2292 | |||
2293 | vcpu_put(vcpu); | ||
2294 | } | ||
2295 | |||
2296 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | ||
2297 | struct kvm_debugregs *dbgregs) | ||
2298 | { | ||
2299 | if (dbgregs->flags) | ||
2300 | return -EINVAL; | ||
2301 | |||
2302 | vcpu_load(vcpu); | ||
2303 | |||
2304 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | ||
2305 | vcpu->arch.dr6 = dbgregs->dr6; | ||
2306 | vcpu->arch.dr7 = dbgregs->dr7; | ||
2307 | |||
2308 | vcpu_put(vcpu); | ||
2309 | |||
2310 | return 0; | ||
2311 | } | ||
2312 | |||
2157 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2313 | long kvm_arch_vcpu_ioctl(struct file *filp, |
2158 | unsigned int ioctl, unsigned long arg) | 2314 | unsigned int ioctl, unsigned long arg) |
2159 | { | 2315 | { |
@@ -2308,7 +2464,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2308 | r = -EFAULT; | 2464 | r = -EFAULT; |
2309 | if (copy_from_user(&mce, argp, sizeof mce)) | 2465 | if (copy_from_user(&mce, argp, sizeof mce)) |
2310 | goto out; | 2466 | goto out; |
2467 | vcpu_load(vcpu); | ||
2311 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2468 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
2469 | vcpu_put(vcpu); | ||
2312 | break; | 2470 | break; |
2313 | } | 2471 | } |
2314 | case KVM_GET_VCPU_EVENTS: { | 2472 | case KVM_GET_VCPU_EVENTS: { |
@@ -2332,6 +2490,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2332 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | 2490 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); |
2333 | break; | 2491 | break; |
2334 | } | 2492 | } |
2493 | case KVM_GET_DEBUGREGS: { | ||
2494 | struct kvm_debugregs dbgregs; | ||
2495 | |||
2496 | kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); | ||
2497 | |||
2498 | r = -EFAULT; | ||
2499 | if (copy_to_user(argp, &dbgregs, | ||
2500 | sizeof(struct kvm_debugregs))) | ||
2501 | break; | ||
2502 | r = 0; | ||
2503 | break; | ||
2504 | } | ||
2505 | case KVM_SET_DEBUGREGS: { | ||
2506 | struct kvm_debugregs dbgregs; | ||
2507 | |||
2508 | r = -EFAULT; | ||
2509 | if (copy_from_user(&dbgregs, argp, | ||
2510 | sizeof(struct kvm_debugregs))) | ||
2511 | break; | ||
2512 | |||
2513 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); | ||
2514 | break; | ||
2515 | } | ||
2335 | default: | 2516 | default: |
2336 | r = -EINVAL; | 2517 | r = -EINVAL; |
2337 | } | 2518 | } |
@@ -2385,7 +2566,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | |||
2385 | struct kvm_mem_alias *alias; | 2566 | struct kvm_mem_alias *alias; |
2386 | struct kvm_mem_aliases *aliases; | 2567 | struct kvm_mem_aliases *aliases; |
2387 | 2568 | ||
2388 | aliases = rcu_dereference(kvm->arch.aliases); | 2569 | aliases = kvm_aliases(kvm); |
2389 | 2570 | ||
2390 | for (i = 0; i < aliases->naliases; ++i) { | 2571 | for (i = 0; i < aliases->naliases; ++i) { |
2391 | alias = &aliases->aliases[i]; | 2572 | alias = &aliases->aliases[i]; |
@@ -2404,7 +2585,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | |||
2404 | struct kvm_mem_alias *alias; | 2585 | struct kvm_mem_alias *alias; |
2405 | struct kvm_mem_aliases *aliases; | 2586 | struct kvm_mem_aliases *aliases; |
2406 | 2587 | ||
2407 | aliases = rcu_dereference(kvm->arch.aliases); | 2588 | aliases = kvm_aliases(kvm); |
2408 | 2589 | ||
2409 | for (i = 0; i < aliases->naliases; ++i) { | 2590 | for (i = 0; i < aliases->naliases; ++i) { |
2410 | alias = &aliases->aliases[i]; | 2591 | alias = &aliases->aliases[i]; |
@@ -2799,11 +2980,13 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2799 | r = -EFAULT; | 2980 | r = -EFAULT; |
2800 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | 2981 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) |
2801 | goto out; | 2982 | goto out; |
2983 | r = -ENXIO; | ||
2802 | if (irqchip_in_kernel(kvm)) { | 2984 | if (irqchip_in_kernel(kvm)) { |
2803 | __s32 status; | 2985 | __s32 status; |
2804 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2986 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
2805 | irq_event.irq, irq_event.level); | 2987 | irq_event.irq, irq_event.level); |
2806 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2988 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
2989 | r = -EFAULT; | ||
2807 | irq_event.status = status; | 2990 | irq_event.status = status; |
2808 | if (copy_to_user(argp, &irq_event, | 2991 | if (copy_to_user(argp, &irq_event, |
2809 | sizeof irq_event)) | 2992 | sizeof irq_event)) |
@@ -3019,6 +3202,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
3019 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3202 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
3020 | } | 3203 | } |
3021 | 3204 | ||
3205 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | ||
3206 | struct kvm_segment *var, int seg) | ||
3207 | { | ||
3208 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
3209 | } | ||
3210 | |||
3211 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
3212 | struct kvm_segment *var, int seg) | ||
3213 | { | ||
3214 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
3215 | } | ||
3216 | |||
3022 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3217 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
3023 | { | 3218 | { |
3024 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3219 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
@@ -3099,14 +3294,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | |||
3099 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | 3294 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); |
3100 | } | 3295 | } |
3101 | 3296 | ||
3102 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3297 | static int kvm_write_guest_virt_system(gva_t addr, void *val, |
3103 | struct kvm_vcpu *vcpu, u32 *error) | 3298 | unsigned int bytes, |
3299 | struct kvm_vcpu *vcpu, | ||
3300 | u32 *error) | ||
3104 | { | 3301 | { |
3105 | void *data = val; | 3302 | void *data = val; |
3106 | int r = X86EMUL_CONTINUE; | 3303 | int r = X86EMUL_CONTINUE; |
3107 | 3304 | ||
3108 | while (bytes) { | 3305 | while (bytes) { |
3109 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); | 3306 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, |
3307 | PFERR_WRITE_MASK, error); | ||
3110 | unsigned offset = addr & (PAGE_SIZE-1); | 3308 | unsigned offset = addr & (PAGE_SIZE-1); |
3111 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3309 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
3112 | int ret; | 3310 | int ret; |
@@ -3129,7 +3327,6 @@ out: | |||
3129 | return r; | 3327 | return r; |
3130 | } | 3328 | } |
3131 | 3329 | ||
3132 | |||
3133 | static int emulator_read_emulated(unsigned long addr, | 3330 | static int emulator_read_emulated(unsigned long addr, |
3134 | void *val, | 3331 | void *val, |
3135 | unsigned int bytes, | 3332 | unsigned int bytes, |
@@ -3232,9 +3429,9 @@ mmio: | |||
3232 | } | 3429 | } |
3233 | 3430 | ||
3234 | int emulator_write_emulated(unsigned long addr, | 3431 | int emulator_write_emulated(unsigned long addr, |
3235 | const void *val, | 3432 | const void *val, |
3236 | unsigned int bytes, | 3433 | unsigned int bytes, |
3237 | struct kvm_vcpu *vcpu) | 3434 | struct kvm_vcpu *vcpu) |
3238 | { | 3435 | { |
3239 | /* Crossing a page boundary? */ | 3436 | /* Crossing a page boundary? */ |
3240 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3437 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
@@ -3252,45 +3449,150 @@ int emulator_write_emulated(unsigned long addr, | |||
3252 | } | 3449 | } |
3253 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | 3450 | EXPORT_SYMBOL_GPL(emulator_write_emulated); |
3254 | 3451 | ||
3452 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | ||
3453 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) | ||
3454 | |||
3455 | #ifdef CONFIG_X86_64 | ||
3456 | # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) | ||
3457 | #else | ||
3458 | # define CMPXCHG64(ptr, old, new) \ | ||
3459 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) | ||
3460 | #endif | ||
3461 | |||
3255 | static int emulator_cmpxchg_emulated(unsigned long addr, | 3462 | static int emulator_cmpxchg_emulated(unsigned long addr, |
3256 | const void *old, | 3463 | const void *old, |
3257 | const void *new, | 3464 | const void *new, |
3258 | unsigned int bytes, | 3465 | unsigned int bytes, |
3259 | struct kvm_vcpu *vcpu) | 3466 | struct kvm_vcpu *vcpu) |
3260 | { | 3467 | { |
3261 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3468 | gpa_t gpa; |
3262 | #ifndef CONFIG_X86_64 | 3469 | struct page *page; |
3263 | /* guests cmpxchg8b have to be emulated atomically */ | 3470 | char *kaddr; |
3264 | if (bytes == 8) { | 3471 | bool exchanged; |
3265 | gpa_t gpa; | ||
3266 | struct page *page; | ||
3267 | char *kaddr; | ||
3268 | u64 val; | ||
3269 | 3472 | ||
3270 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); | 3473 | /* guests cmpxchg8b have to be emulated atomically */ |
3474 | if (bytes > 8 || (bytes & (bytes - 1))) | ||
3475 | goto emul_write; | ||
3271 | 3476 | ||
3272 | if (gpa == UNMAPPED_GVA || | 3477 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
3273 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
3274 | goto emul_write; | ||
3275 | 3478 | ||
3276 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) | 3479 | if (gpa == UNMAPPED_GVA || |
3277 | goto emul_write; | 3480 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
3481 | goto emul_write; | ||
3278 | 3482 | ||
3279 | val = *(u64 *)new; | 3483 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) |
3484 | goto emul_write; | ||
3280 | 3485 | ||
3281 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3486 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3282 | 3487 | ||
3283 | kaddr = kmap_atomic(page, KM_USER0); | 3488 | kaddr = kmap_atomic(page, KM_USER0); |
3284 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 3489 | kaddr += offset_in_page(gpa); |
3285 | kunmap_atomic(kaddr, KM_USER0); | 3490 | switch (bytes) { |
3286 | kvm_release_page_dirty(page); | 3491 | case 1: |
3492 | exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); | ||
3493 | break; | ||
3494 | case 2: | ||
3495 | exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); | ||
3496 | break; | ||
3497 | case 4: | ||
3498 | exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); | ||
3499 | break; | ||
3500 | case 8: | ||
3501 | exchanged = CMPXCHG64(kaddr, old, new); | ||
3502 | break; | ||
3503 | default: | ||
3504 | BUG(); | ||
3287 | } | 3505 | } |
3506 | kunmap_atomic(kaddr, KM_USER0); | ||
3507 | kvm_release_page_dirty(page); | ||
3508 | |||
3509 | if (!exchanged) | ||
3510 | return X86EMUL_CMPXCHG_FAILED; | ||
3511 | |||
3512 | kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); | ||
3513 | |||
3514 | return X86EMUL_CONTINUE; | ||
3515 | |||
3288 | emul_write: | 3516 | emul_write: |
3289 | #endif | 3517 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3290 | 3518 | ||
3291 | return emulator_write_emulated(addr, new, bytes, vcpu); | 3519 | return emulator_write_emulated(addr, new, bytes, vcpu); |
3292 | } | 3520 | } |
3293 | 3521 | ||
3522 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
3523 | { | ||
3524 | /* TODO: String I/O for in kernel device */ | ||
3525 | int r; | ||
3526 | |||
3527 | if (vcpu->arch.pio.in) | ||
3528 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
3529 | vcpu->arch.pio.size, pd); | ||
3530 | else | ||
3531 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3532 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
3533 | pd); | ||
3534 | return r; | ||
3535 | } | ||
3536 | |||
3537 | |||
3538 | static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | ||
3539 | unsigned int count, struct kvm_vcpu *vcpu) | ||
3540 | { | ||
3541 | if (vcpu->arch.pio.count) | ||
3542 | goto data_avail; | ||
3543 | |||
3544 | trace_kvm_pio(1, port, size, 1); | ||
3545 | |||
3546 | vcpu->arch.pio.port = port; | ||
3547 | vcpu->arch.pio.in = 1; | ||
3548 | vcpu->arch.pio.count = count; | ||
3549 | vcpu->arch.pio.size = size; | ||
3550 | |||
3551 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3552 | data_avail: | ||
3553 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
3554 | vcpu->arch.pio.count = 0; | ||
3555 | return 1; | ||
3556 | } | ||
3557 | |||
3558 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3559 | vcpu->run->io.direction = KVM_EXIT_IO_IN; | ||
3560 | vcpu->run->io.size = size; | ||
3561 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3562 | vcpu->run->io.count = count; | ||
3563 | vcpu->run->io.port = port; | ||
3564 | |||
3565 | return 0; | ||
3566 | } | ||
3567 | |||
3568 | static int emulator_pio_out_emulated(int size, unsigned short port, | ||
3569 | const void *val, unsigned int count, | ||
3570 | struct kvm_vcpu *vcpu) | ||
3571 | { | ||
3572 | trace_kvm_pio(0, port, size, 1); | ||
3573 | |||
3574 | vcpu->arch.pio.port = port; | ||
3575 | vcpu->arch.pio.in = 0; | ||
3576 | vcpu->arch.pio.count = count; | ||
3577 | vcpu->arch.pio.size = size; | ||
3578 | |||
3579 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
3580 | |||
3581 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3582 | vcpu->arch.pio.count = 0; | ||
3583 | return 1; | ||
3584 | } | ||
3585 | |||
3586 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3587 | vcpu->run->io.direction = KVM_EXIT_IO_OUT; | ||
3588 | vcpu->run->io.size = size; | ||
3589 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3590 | vcpu->run->io.count = count; | ||
3591 | vcpu->run->io.port = port; | ||
3592 | |||
3593 | return 0; | ||
3594 | } | ||
3595 | |||
3294 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 3596 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
3295 | { | 3597 | { |
3296 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 3598 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
@@ -3311,14 +3613,14 @@ int emulate_clts(struct kvm_vcpu *vcpu) | |||
3311 | 3613 | ||
3312 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3614 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
3313 | { | 3615 | { |
3314 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); | 3616 | return kvm_get_dr(ctxt->vcpu, dr, dest); |
3315 | } | 3617 | } |
3316 | 3618 | ||
3317 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3619 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
3318 | { | 3620 | { |
3319 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3621 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
3320 | 3622 | ||
3321 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); | 3623 | return kvm_set_dr(ctxt->vcpu, dr, value & mask); |
3322 | } | 3624 | } |
3323 | 3625 | ||
3324 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3626 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
@@ -3339,12 +3641,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3339 | } | 3641 | } |
3340 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3642 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
3341 | 3643 | ||
3644 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
3645 | { | ||
3646 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
3647 | } | ||
3648 | |||
3649 | static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | ||
3650 | { | ||
3651 | unsigned long value; | ||
3652 | |||
3653 | switch (cr) { | ||
3654 | case 0: | ||
3655 | value = kvm_read_cr0(vcpu); | ||
3656 | break; | ||
3657 | case 2: | ||
3658 | value = vcpu->arch.cr2; | ||
3659 | break; | ||
3660 | case 3: | ||
3661 | value = vcpu->arch.cr3; | ||
3662 | break; | ||
3663 | case 4: | ||
3664 | value = kvm_read_cr4(vcpu); | ||
3665 | break; | ||
3666 | case 8: | ||
3667 | value = kvm_get_cr8(vcpu); | ||
3668 | break; | ||
3669 | default: | ||
3670 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
3671 | return 0; | ||
3672 | } | ||
3673 | |||
3674 | return value; | ||
3675 | } | ||
3676 | |||
3677 | static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | ||
3678 | { | ||
3679 | switch (cr) { | ||
3680 | case 0: | ||
3681 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
3682 | break; | ||
3683 | case 2: | ||
3684 | vcpu->arch.cr2 = val; | ||
3685 | break; | ||
3686 | case 3: | ||
3687 | kvm_set_cr3(vcpu, val); | ||
3688 | break; | ||
3689 | case 4: | ||
3690 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
3691 | break; | ||
3692 | case 8: | ||
3693 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
3694 | break; | ||
3695 | default: | ||
3696 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
3697 | } | ||
3698 | } | ||
3699 | |||
3700 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | ||
3701 | { | ||
3702 | return kvm_x86_ops->get_cpl(vcpu); | ||
3703 | } | ||
3704 | |||
3705 | static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | ||
3706 | { | ||
3707 | kvm_x86_ops->get_gdt(vcpu, dt); | ||
3708 | } | ||
3709 | |||
3710 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | ||
3711 | struct kvm_vcpu *vcpu) | ||
3712 | { | ||
3713 | struct kvm_segment var; | ||
3714 | |||
3715 | kvm_get_segment(vcpu, &var, seg); | ||
3716 | |||
3717 | if (var.unusable) | ||
3718 | return false; | ||
3719 | |||
3720 | if (var.g) | ||
3721 | var.limit >>= 12; | ||
3722 | set_desc_limit(desc, var.limit); | ||
3723 | set_desc_base(desc, (unsigned long)var.base); | ||
3724 | desc->type = var.type; | ||
3725 | desc->s = var.s; | ||
3726 | desc->dpl = var.dpl; | ||
3727 | desc->p = var.present; | ||
3728 | desc->avl = var.avl; | ||
3729 | desc->l = var.l; | ||
3730 | desc->d = var.db; | ||
3731 | desc->g = var.g; | ||
3732 | |||
3733 | return true; | ||
3734 | } | ||
3735 | |||
3736 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | ||
3737 | struct kvm_vcpu *vcpu) | ||
3738 | { | ||
3739 | struct kvm_segment var; | ||
3740 | |||
3741 | /* needed to preserve selector */ | ||
3742 | kvm_get_segment(vcpu, &var, seg); | ||
3743 | |||
3744 | var.base = get_desc_base(desc); | ||
3745 | var.limit = get_desc_limit(desc); | ||
3746 | if (desc->g) | ||
3747 | var.limit = (var.limit << 12) | 0xfff; | ||
3748 | var.type = desc->type; | ||
3749 | var.present = desc->p; | ||
3750 | var.dpl = desc->dpl; | ||
3751 | var.db = desc->d; | ||
3752 | var.s = desc->s; | ||
3753 | var.l = desc->l; | ||
3754 | var.g = desc->g; | ||
3755 | var.avl = desc->avl; | ||
3756 | var.present = desc->p; | ||
3757 | var.unusable = !var.present; | ||
3758 | var.padding = 0; | ||
3759 | |||
3760 | kvm_set_segment(vcpu, &var, seg); | ||
3761 | return; | ||
3762 | } | ||
3763 | |||
3764 | static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) | ||
3765 | { | ||
3766 | struct kvm_segment kvm_seg; | ||
3767 | |||
3768 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
3769 | return kvm_seg.selector; | ||
3770 | } | ||
3771 | |||
3772 | static void emulator_set_segment_selector(u16 sel, int seg, | ||
3773 | struct kvm_vcpu *vcpu) | ||
3774 | { | ||
3775 | struct kvm_segment kvm_seg; | ||
3776 | |||
3777 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
3778 | kvm_seg.selector = sel; | ||
3779 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
3780 | } | ||
3781 | |||
3782 | static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
3783 | { | ||
3784 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
3785 | } | ||
3786 | |||
3342 | static struct x86_emulate_ops emulate_ops = { | 3787 | static struct x86_emulate_ops emulate_ops = { |
3343 | .read_std = kvm_read_guest_virt_system, | 3788 | .read_std = kvm_read_guest_virt_system, |
3789 | .write_std = kvm_write_guest_virt_system, | ||
3344 | .fetch = kvm_fetch_guest_virt, | 3790 | .fetch = kvm_fetch_guest_virt, |
3345 | .read_emulated = emulator_read_emulated, | 3791 | .read_emulated = emulator_read_emulated, |
3346 | .write_emulated = emulator_write_emulated, | 3792 | .write_emulated = emulator_write_emulated, |
3347 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3793 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
3794 | .pio_in_emulated = emulator_pio_in_emulated, | ||
3795 | .pio_out_emulated = emulator_pio_out_emulated, | ||
3796 | .get_cached_descriptor = emulator_get_cached_descriptor, | ||
3797 | .set_cached_descriptor = emulator_set_cached_descriptor, | ||
3798 | .get_segment_selector = emulator_get_segment_selector, | ||
3799 | .set_segment_selector = emulator_set_segment_selector, | ||
3800 | .get_gdt = emulator_get_gdt, | ||
3801 | .get_cr = emulator_get_cr, | ||
3802 | .set_cr = emulator_set_cr, | ||
3803 | .cpl = emulator_get_cpl, | ||
3804 | .set_rflags = emulator_set_rflags, | ||
3348 | }; | 3805 | }; |
3349 | 3806 | ||
3350 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 3807 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
@@ -3375,14 +3832,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3375 | cache_all_regs(vcpu); | 3832 | cache_all_regs(vcpu); |
3376 | 3833 | ||
3377 | vcpu->mmio_is_write = 0; | 3834 | vcpu->mmio_is_write = 0; |
3378 | vcpu->arch.pio.string = 0; | ||
3379 | 3835 | ||
3380 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 3836 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
3381 | int cs_db, cs_l; | 3837 | int cs_db, cs_l; |
3382 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3838 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
3383 | 3839 | ||
3384 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3840 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
3385 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 3841 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
3842 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | ||
3386 | vcpu->arch.emulate_ctxt.mode = | 3843 | vcpu->arch.emulate_ctxt.mode = |
3387 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | 3844 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : |
3388 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3845 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
@@ -3391,6 +3848,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3391 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3848 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
3392 | 3849 | ||
3393 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3850 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3851 | trace_kvm_emulate_insn_start(vcpu); | ||
3394 | 3852 | ||
3395 | /* Only allow emulation of specific instructions on #UD | 3853 | /* Only allow emulation of specific instructions on #UD |
3396 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 3854 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ |
@@ -3423,6 +3881,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3423 | ++vcpu->stat.insn_emulation; | 3881 | ++vcpu->stat.insn_emulation; |
3424 | if (r) { | 3882 | if (r) { |
3425 | ++vcpu->stat.insn_emulation_fail; | 3883 | ++vcpu->stat.insn_emulation_fail; |
3884 | trace_kvm_emulate_insn_failed(vcpu); | ||
3426 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3885 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
3427 | return EMULATE_DONE; | 3886 | return EMULATE_DONE; |
3428 | return EMULATE_FAIL; | 3887 | return EMULATE_FAIL; |
@@ -3434,16 +3893,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3434 | return EMULATE_DONE; | 3893 | return EMULATE_DONE; |
3435 | } | 3894 | } |
3436 | 3895 | ||
3896 | restart: | ||
3437 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3897 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3438 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | 3898 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; |
3439 | 3899 | ||
3440 | if (r == 0) | 3900 | if (r == 0) |
3441 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | 3901 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); |
3442 | 3902 | ||
3443 | if (vcpu->arch.pio.string) | 3903 | if (vcpu->arch.pio.count) { |
3904 | if (!vcpu->arch.pio.in) | ||
3905 | vcpu->arch.pio.count = 0; | ||
3444 | return EMULATE_DO_MMIO; | 3906 | return EMULATE_DO_MMIO; |
3907 | } | ||
3445 | 3908 | ||
3446 | if ((r || vcpu->mmio_is_write) && run) { | 3909 | if (r || vcpu->mmio_is_write) { |
3447 | run->exit_reason = KVM_EXIT_MMIO; | 3910 | run->exit_reason = KVM_EXIT_MMIO; |
3448 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 3911 | run->mmio.phys_addr = vcpu->mmio_phys_addr; |
3449 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 3912 | memcpy(run->mmio.data, vcpu->mmio_data, 8); |
@@ -3453,222 +3916,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3453 | 3916 | ||
3454 | if (r) { | 3917 | if (r) { |
3455 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3918 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
3456 | return EMULATE_DONE; | 3919 | goto done; |
3457 | if (!vcpu->mmio_needed) { | 3920 | if (!vcpu->mmio_needed) { |
3921 | ++vcpu->stat.insn_emulation_fail; | ||
3922 | trace_kvm_emulate_insn_failed(vcpu); | ||
3458 | kvm_report_emulation_failure(vcpu, "mmio"); | 3923 | kvm_report_emulation_failure(vcpu, "mmio"); |
3459 | return EMULATE_FAIL; | 3924 | return EMULATE_FAIL; |
3460 | } | 3925 | } |
3461 | return EMULATE_DO_MMIO; | 3926 | return EMULATE_DO_MMIO; |
3462 | } | 3927 | } |
3463 | 3928 | ||
3464 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
3465 | |||
3466 | if (vcpu->mmio_is_write) { | 3929 | if (vcpu->mmio_is_write) { |
3467 | vcpu->mmio_needed = 0; | 3930 | vcpu->mmio_needed = 0; |
3468 | return EMULATE_DO_MMIO; | 3931 | return EMULATE_DO_MMIO; |
3469 | } | 3932 | } |
3470 | 3933 | ||
3471 | return EMULATE_DONE; | 3934 | done: |
3472 | } | 3935 | if (vcpu->arch.exception.pending) |
3473 | EXPORT_SYMBOL_GPL(emulate_instruction); | 3936 | vcpu->arch.emulate_ctxt.restart = false; |
3474 | |||
3475 | static int pio_copy_data(struct kvm_vcpu *vcpu) | ||
3476 | { | ||
3477 | void *p = vcpu->arch.pio_data; | ||
3478 | gva_t q = vcpu->arch.pio.guest_gva; | ||
3479 | unsigned bytes; | ||
3480 | int ret; | ||
3481 | u32 error_code; | ||
3482 | |||
3483 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | ||
3484 | if (vcpu->arch.pio.in) | ||
3485 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); | ||
3486 | else | ||
3487 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); | ||
3488 | |||
3489 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3490 | kvm_inject_page_fault(vcpu, q, error_code); | ||
3491 | |||
3492 | return ret; | ||
3493 | } | ||
3494 | |||
3495 | int complete_pio(struct kvm_vcpu *vcpu) | ||
3496 | { | ||
3497 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
3498 | long delta; | ||
3499 | int r; | ||
3500 | unsigned long val; | ||
3501 | |||
3502 | if (!io->string) { | ||
3503 | if (io->in) { | ||
3504 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
3505 | memcpy(&val, vcpu->arch.pio_data, io->size); | ||
3506 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | ||
3507 | } | ||
3508 | } else { | ||
3509 | if (io->in) { | ||
3510 | r = pio_copy_data(vcpu); | ||
3511 | if (r) | ||
3512 | goto out; | ||
3513 | } | ||
3514 | |||
3515 | delta = 1; | ||
3516 | if (io->rep) { | ||
3517 | delta *= io->cur_count; | ||
3518 | /* | ||
3519 | * The size of the register should really depend on | ||
3520 | * current address size. | ||
3521 | */ | ||
3522 | val = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
3523 | val -= delta; | ||
3524 | kvm_register_write(vcpu, VCPU_REGS_RCX, val); | ||
3525 | } | ||
3526 | if (io->down) | ||
3527 | delta = -delta; | ||
3528 | delta *= io->size; | ||
3529 | if (io->in) { | ||
3530 | val = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
3531 | val += delta; | ||
3532 | kvm_register_write(vcpu, VCPU_REGS_RDI, val); | ||
3533 | } else { | ||
3534 | val = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
3535 | val += delta; | ||
3536 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | ||
3537 | } | ||
3538 | } | ||
3539 | out: | ||
3540 | io->count -= io->cur_count; | ||
3541 | io->cur_count = 0; | ||
3542 | |||
3543 | return 0; | ||
3544 | } | ||
3545 | |||
3546 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
3547 | { | ||
3548 | /* TODO: String I/O for in kernel device */ | ||
3549 | int r; | ||
3550 | |||
3551 | if (vcpu->arch.pio.in) | ||
3552 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
3553 | vcpu->arch.pio.size, pd); | ||
3554 | else | ||
3555 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3556 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
3557 | pd); | ||
3558 | return r; | ||
3559 | } | ||
3560 | |||
3561 | static int pio_string_write(struct kvm_vcpu *vcpu) | ||
3562 | { | ||
3563 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
3564 | void *pd = vcpu->arch.pio_data; | ||
3565 | int i, r = 0; | ||
3566 | |||
3567 | for (i = 0; i < io->cur_count; i++) { | ||
3568 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3569 | io->port, io->size, pd)) { | ||
3570 | r = -EOPNOTSUPP; | ||
3571 | break; | ||
3572 | } | ||
3573 | pd += io->size; | ||
3574 | } | ||
3575 | return r; | ||
3576 | } | ||
3577 | |||
3578 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | ||
3579 | { | ||
3580 | unsigned long val; | ||
3581 | 3937 | ||
3582 | trace_kvm_pio(!in, port, size, 1); | 3938 | if (vcpu->arch.emulate_ctxt.restart) |
3939 | goto restart; | ||
3583 | 3940 | ||
3584 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3941 | return EMULATE_DONE; |
3585 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
3586 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
3587 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3588 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; | ||
3589 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
3590 | vcpu->arch.pio.in = in; | ||
3591 | vcpu->arch.pio.string = 0; | ||
3592 | vcpu->arch.pio.down = 0; | ||
3593 | vcpu->arch.pio.rep = 0; | ||
3594 | |||
3595 | if (!vcpu->arch.pio.in) { | ||
3596 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
3597 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
3598 | } | ||
3599 | |||
3600 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3601 | complete_pio(vcpu); | ||
3602 | return 1; | ||
3603 | } | ||
3604 | return 0; | ||
3605 | } | 3942 | } |
3606 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3943 | EXPORT_SYMBOL_GPL(emulate_instruction); |
3607 | 3944 | ||
3608 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | 3945 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
3609 | int size, unsigned long count, int down, | ||
3610 | gva_t address, int rep, unsigned port) | ||
3611 | { | 3946 | { |
3612 | unsigned now, in_page; | 3947 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3613 | int ret = 0; | 3948 | int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); |
3614 | 3949 | /* do not return to emulator after return from userspace */ | |
3615 | trace_kvm_pio(!in, port, size, count); | 3950 | vcpu->arch.pio.count = 0; |
3616 | |||
3617 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3618 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
3619 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
3620 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3621 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; | ||
3622 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
3623 | vcpu->arch.pio.in = in; | ||
3624 | vcpu->arch.pio.string = 1; | ||
3625 | vcpu->arch.pio.down = down; | ||
3626 | vcpu->arch.pio.rep = rep; | ||
3627 | |||
3628 | if (!count) { | ||
3629 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
3630 | return 1; | ||
3631 | } | ||
3632 | |||
3633 | if (!down) | ||
3634 | in_page = PAGE_SIZE - offset_in_page(address); | ||
3635 | else | ||
3636 | in_page = offset_in_page(address) + size; | ||
3637 | now = min(count, (unsigned long)in_page / size); | ||
3638 | if (!now) | ||
3639 | now = 1; | ||
3640 | if (down) { | ||
3641 | /* | ||
3642 | * String I/O in reverse. Yuck. Kill the guest, fix later. | ||
3643 | */ | ||
3644 | pr_unimpl(vcpu, "guest string pio down\n"); | ||
3645 | kvm_inject_gp(vcpu, 0); | ||
3646 | return 1; | ||
3647 | } | ||
3648 | vcpu->run->io.count = now; | ||
3649 | vcpu->arch.pio.cur_count = now; | ||
3650 | |||
3651 | if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) | ||
3652 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
3653 | |||
3654 | vcpu->arch.pio.guest_gva = address; | ||
3655 | |||
3656 | if (!vcpu->arch.pio.in) { | ||
3657 | /* string PIO write */ | ||
3658 | ret = pio_copy_data(vcpu); | ||
3659 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3660 | return 1; | ||
3661 | if (ret == 0 && !pio_string_write(vcpu)) { | ||
3662 | complete_pio(vcpu); | ||
3663 | if (vcpu->arch.pio.count == 0) | ||
3664 | ret = 1; | ||
3665 | } | ||
3666 | } | ||
3667 | /* no string PIO read support yet */ | ||
3668 | |||
3669 | return ret; | 3951 | return ret; |
3670 | } | 3952 | } |
3671 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); | 3953 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); |
3672 | 3954 | ||
3673 | static void bounce_off(void *info) | 3955 | static void bounce_off(void *info) |
3674 | { | 3956 | { |
@@ -3743,6 +4025,51 @@ static void kvm_timer_init(void) | |||
3743 | } | 4025 | } |
3744 | } | 4026 | } |
3745 | 4027 | ||
4028 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | ||
4029 | |||
4030 | static int kvm_is_in_guest(void) | ||
4031 | { | ||
4032 | return percpu_read(current_vcpu) != NULL; | ||
4033 | } | ||
4034 | |||
4035 | static int kvm_is_user_mode(void) | ||
4036 | { | ||
4037 | int user_mode = 3; | ||
4038 | |||
4039 | if (percpu_read(current_vcpu)) | ||
4040 | user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu)); | ||
4041 | |||
4042 | return user_mode != 0; | ||
4043 | } | ||
4044 | |||
4045 | static unsigned long kvm_get_guest_ip(void) | ||
4046 | { | ||
4047 | unsigned long ip = 0; | ||
4048 | |||
4049 | if (percpu_read(current_vcpu)) | ||
4050 | ip = kvm_rip_read(percpu_read(current_vcpu)); | ||
4051 | |||
4052 | return ip; | ||
4053 | } | ||
4054 | |||
4055 | static struct perf_guest_info_callbacks kvm_guest_cbs = { | ||
4056 | .is_in_guest = kvm_is_in_guest, | ||
4057 | .is_user_mode = kvm_is_user_mode, | ||
4058 | .get_guest_ip = kvm_get_guest_ip, | ||
4059 | }; | ||
4060 | |||
4061 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) | ||
4062 | { | ||
4063 | percpu_write(current_vcpu, vcpu); | ||
4064 | } | ||
4065 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); | ||
4066 | |||
4067 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) | ||
4068 | { | ||
4069 | percpu_write(current_vcpu, NULL); | ||
4070 | } | ||
4071 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); | ||
4072 | |||
3746 | int kvm_arch_init(void *opaque) | 4073 | int kvm_arch_init(void *opaque) |
3747 | { | 4074 | { |
3748 | int r; | 4075 | int r; |
@@ -3779,6 +4106,8 @@ int kvm_arch_init(void *opaque) | |||
3779 | 4106 | ||
3780 | kvm_timer_init(); | 4107 | kvm_timer_init(); |
3781 | 4108 | ||
4109 | perf_register_guest_info_callbacks(&kvm_guest_cbs); | ||
4110 | |||
3782 | return 0; | 4111 | return 0; |
3783 | 4112 | ||
3784 | out: | 4113 | out: |
@@ -3787,6 +4116,8 @@ out: | |||
3787 | 4116 | ||
3788 | void kvm_arch_exit(void) | 4117 | void kvm_arch_exit(void) |
3789 | { | 4118 | { |
4119 | perf_unregister_guest_info_callbacks(&kvm_guest_cbs); | ||
4120 | |||
3790 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 4121 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
3791 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, | 4122 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, |
3792 | CPUFREQ_TRANSITION_NOTIFIER); | 4123 | CPUFREQ_TRANSITION_NOTIFIER); |
@@ -3942,85 +4273,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
3942 | return emulator_write_emulated(rip, instruction, 3, vcpu); | 4273 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
3943 | } | 4274 | } |
3944 | 4275 | ||
3945 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
3946 | { | ||
3947 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
3948 | } | ||
3949 | |||
3950 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4276 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
3951 | { | 4277 | { |
3952 | struct descriptor_table dt = { limit, base }; | 4278 | struct desc_ptr dt = { limit, base }; |
3953 | 4279 | ||
3954 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4280 | kvm_x86_ops->set_gdt(vcpu, &dt); |
3955 | } | 4281 | } |
3956 | 4282 | ||
3957 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4283 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
3958 | { | 4284 | { |
3959 | struct descriptor_table dt = { limit, base }; | 4285 | struct desc_ptr dt = { limit, base }; |
3960 | 4286 | ||
3961 | kvm_x86_ops->set_idt(vcpu, &dt); | 4287 | kvm_x86_ops->set_idt(vcpu, &dt); |
3962 | } | 4288 | } |
3963 | 4289 | ||
3964 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
3965 | unsigned long *rflags) | ||
3966 | { | ||
3967 | kvm_lmsw(vcpu, msw); | ||
3968 | *rflags = kvm_get_rflags(vcpu); | ||
3969 | } | ||
3970 | |||
3971 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | ||
3972 | { | ||
3973 | unsigned long value; | ||
3974 | |||
3975 | switch (cr) { | ||
3976 | case 0: | ||
3977 | value = kvm_read_cr0(vcpu); | ||
3978 | break; | ||
3979 | case 2: | ||
3980 | value = vcpu->arch.cr2; | ||
3981 | break; | ||
3982 | case 3: | ||
3983 | value = vcpu->arch.cr3; | ||
3984 | break; | ||
3985 | case 4: | ||
3986 | value = kvm_read_cr4(vcpu); | ||
3987 | break; | ||
3988 | case 8: | ||
3989 | value = kvm_get_cr8(vcpu); | ||
3990 | break; | ||
3991 | default: | ||
3992 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
3993 | return 0; | ||
3994 | } | ||
3995 | |||
3996 | return value; | ||
3997 | } | ||
3998 | |||
3999 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | ||
4000 | unsigned long *rflags) | ||
4001 | { | ||
4002 | switch (cr) { | ||
4003 | case 0: | ||
4004 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
4005 | *rflags = kvm_get_rflags(vcpu); | ||
4006 | break; | ||
4007 | case 2: | ||
4008 | vcpu->arch.cr2 = val; | ||
4009 | break; | ||
4010 | case 3: | ||
4011 | kvm_set_cr3(vcpu, val); | ||
4012 | break; | ||
4013 | case 4: | ||
4014 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
4015 | break; | ||
4016 | case 8: | ||
4017 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
4018 | break; | ||
4019 | default: | ||
4020 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
4021 | } | ||
4022 | } | ||
4023 | |||
4024 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 4290 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
4025 | { | 4291 | { |
4026 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | 4292 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; |
@@ -4084,9 +4350,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | |||
4084 | { | 4350 | { |
4085 | struct kvm_cpuid_entry2 *best; | 4351 | struct kvm_cpuid_entry2 *best; |
4086 | 4352 | ||
4353 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
4354 | if (!best || best->eax < 0x80000008) | ||
4355 | goto not_found; | ||
4087 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | 4356 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
4088 | if (best) | 4357 | if (best) |
4089 | return best->eax & 0xff; | 4358 | return best->eax & 0xff; |
4359 | not_found: | ||
4090 | return 36; | 4360 | return 36; |
4091 | } | 4361 | } |
4092 | 4362 | ||
@@ -4200,9 +4470,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
4200 | { | 4470 | { |
4201 | /* try to reinject previous events if any */ | 4471 | /* try to reinject previous events if any */ |
4202 | if (vcpu->arch.exception.pending) { | 4472 | if (vcpu->arch.exception.pending) { |
4473 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | ||
4474 | vcpu->arch.exception.has_error_code, | ||
4475 | vcpu->arch.exception.error_code); | ||
4203 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, | 4476 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, |
4204 | vcpu->arch.exception.has_error_code, | 4477 | vcpu->arch.exception.has_error_code, |
4205 | vcpu->arch.exception.error_code); | 4478 | vcpu->arch.exception.error_code, |
4479 | vcpu->arch.exception.reinject); | ||
4206 | return; | 4480 | return; |
4207 | } | 4481 | } |
4208 | 4482 | ||
@@ -4432,7 +4706,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4432 | } | 4706 | } |
4433 | 4707 | ||
4434 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 4708 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4435 | post_kvm_run_save(vcpu); | ||
4436 | 4709 | ||
4437 | vapic_exit(vcpu); | 4710 | vapic_exit(vcpu); |
4438 | 4711 | ||
@@ -4460,26 +4733,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4460 | if (!irqchip_in_kernel(vcpu->kvm)) | 4733 | if (!irqchip_in_kernel(vcpu->kvm)) |
4461 | kvm_set_cr8(vcpu, kvm_run->cr8); | 4734 | kvm_set_cr8(vcpu, kvm_run->cr8); |
4462 | 4735 | ||
4463 | if (vcpu->arch.pio.cur_count) { | 4736 | if (vcpu->arch.pio.count || vcpu->mmio_needed || |
4464 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4737 | vcpu->arch.emulate_ctxt.restart) { |
4465 | r = complete_pio(vcpu); | 4738 | if (vcpu->mmio_needed) { |
4466 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4739 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
4467 | if (r) | 4740 | vcpu->mmio_read_completed = 1; |
4468 | goto out; | 4741 | vcpu->mmio_needed = 0; |
4469 | } | 4742 | } |
4470 | if (vcpu->mmio_needed) { | ||
4471 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | ||
4472 | vcpu->mmio_read_completed = 1; | ||
4473 | vcpu->mmio_needed = 0; | ||
4474 | |||
4475 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4743 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
4476 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, | 4744 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); |
4477 | EMULTYPE_NO_DECODE); | ||
4478 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4745 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4479 | if (r == EMULATE_DO_MMIO) { | 4746 | if (r == EMULATE_DO_MMIO) { |
4480 | /* | ||
4481 | * Read-modify-write. Back to userspace. | ||
4482 | */ | ||
4483 | r = 0; | 4747 | r = 0; |
4484 | goto out; | 4748 | goto out; |
4485 | } | 4749 | } |
@@ -4491,6 +4755,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4491 | r = __vcpu_run(vcpu); | 4755 | r = __vcpu_run(vcpu); |
4492 | 4756 | ||
4493 | out: | 4757 | out: |
4758 | post_kvm_run_save(vcpu); | ||
4494 | if (vcpu->sigset_active) | 4759 | if (vcpu->sigset_active) |
4495 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 4760 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
4496 | 4761 | ||
@@ -4562,12 +4827,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4562 | return 0; | 4827 | return 0; |
4563 | } | 4828 | } |
4564 | 4829 | ||
4565 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
4566 | struct kvm_segment *var, int seg) | ||
4567 | { | ||
4568 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
4569 | } | ||
4570 | |||
4571 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 4830 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
4572 | { | 4831 | { |
4573 | struct kvm_segment cs; | 4832 | struct kvm_segment cs; |
@@ -4581,7 +4840,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); | |||
4581 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 4840 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
4582 | struct kvm_sregs *sregs) | 4841 | struct kvm_sregs *sregs) |
4583 | { | 4842 | { |
4584 | struct descriptor_table dt; | 4843 | struct desc_ptr dt; |
4585 | 4844 | ||
4586 | vcpu_load(vcpu); | 4845 | vcpu_load(vcpu); |
4587 | 4846 | ||
@@ -4596,11 +4855,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4596 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 4855 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
4597 | 4856 | ||
4598 | kvm_x86_ops->get_idt(vcpu, &dt); | 4857 | kvm_x86_ops->get_idt(vcpu, &dt); |
4599 | sregs->idt.limit = dt.limit; | 4858 | sregs->idt.limit = dt.size; |
4600 | sregs->idt.base = dt.base; | 4859 | sregs->idt.base = dt.address; |
4601 | kvm_x86_ops->get_gdt(vcpu, &dt); | 4860 | kvm_x86_ops->get_gdt(vcpu, &dt); |
4602 | sregs->gdt.limit = dt.limit; | 4861 | sregs->gdt.limit = dt.size; |
4603 | sregs->gdt.base = dt.base; | 4862 | sregs->gdt.base = dt.address; |
4604 | 4863 | ||
4605 | sregs->cr0 = kvm_read_cr0(vcpu); | 4864 | sregs->cr0 = kvm_read_cr0(vcpu); |
4606 | sregs->cr2 = vcpu->arch.cr2; | 4865 | sregs->cr2 = vcpu->arch.cr2; |
@@ -4639,563 +4898,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
4639 | return 0; | 4898 | return 0; |
4640 | } | 4899 | } |
4641 | 4900 | ||
4642 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | 4901 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
4643 | struct kvm_segment *var, int seg) | 4902 | bool has_error_code, u32 error_code) |
4644 | { | ||
4645 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
4646 | } | ||
4647 | |||
4648 | static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | ||
4649 | struct kvm_segment *kvm_desct) | ||
4650 | { | ||
4651 | kvm_desct->base = get_desc_base(seg_desc); | ||
4652 | kvm_desct->limit = get_desc_limit(seg_desc); | ||
4653 | if (seg_desc->g) { | ||
4654 | kvm_desct->limit <<= 12; | ||
4655 | kvm_desct->limit |= 0xfff; | ||
4656 | } | ||
4657 | kvm_desct->selector = selector; | ||
4658 | kvm_desct->type = seg_desc->type; | ||
4659 | kvm_desct->present = seg_desc->p; | ||
4660 | kvm_desct->dpl = seg_desc->dpl; | ||
4661 | kvm_desct->db = seg_desc->d; | ||
4662 | kvm_desct->s = seg_desc->s; | ||
4663 | kvm_desct->l = seg_desc->l; | ||
4664 | kvm_desct->g = seg_desc->g; | ||
4665 | kvm_desct->avl = seg_desc->avl; | ||
4666 | if (!selector) | ||
4667 | kvm_desct->unusable = 1; | ||
4668 | else | ||
4669 | kvm_desct->unusable = 0; | ||
4670 | kvm_desct->padding = 0; | ||
4671 | } | ||
4672 | |||
4673 | static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, | ||
4674 | u16 selector, | ||
4675 | struct descriptor_table *dtable) | ||
4676 | { | ||
4677 | if (selector & 1 << 2) { | ||
4678 | struct kvm_segment kvm_seg; | ||
4679 | |||
4680 | kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); | ||
4681 | |||
4682 | if (kvm_seg.unusable) | ||
4683 | dtable->limit = 0; | ||
4684 | else | ||
4685 | dtable->limit = kvm_seg.limit; | ||
4686 | dtable->base = kvm_seg.base; | ||
4687 | } | ||
4688 | else | ||
4689 | kvm_x86_ops->get_gdt(vcpu, dtable); | ||
4690 | } | ||
4691 | |||
4692 | /* allowed just for 8 bytes segments */ | ||
4693 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
4694 | struct desc_struct *seg_desc) | ||
4695 | { | ||
4696 | struct descriptor_table dtable; | ||
4697 | u16 index = selector >> 3; | ||
4698 | int ret; | ||
4699 | u32 err; | ||
4700 | gva_t addr; | ||
4701 | |||
4702 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
4703 | |||
4704 | if (dtable.limit < index * 8 + 7) { | ||
4705 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | ||
4706 | return X86EMUL_PROPAGATE_FAULT; | ||
4707 | } | ||
4708 | addr = dtable.base + index * 8; | ||
4709 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
4710 | vcpu, &err); | ||
4711 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
4712 | kvm_inject_page_fault(vcpu, addr, err); | ||
4713 | |||
4714 | return ret; | ||
4715 | } | ||
4716 | |||
4717 | /* allowed just for 8 bytes segments */ | ||
4718 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
4719 | struct desc_struct *seg_desc) | ||
4720 | { | ||
4721 | struct descriptor_table dtable; | ||
4722 | u16 index = selector >> 3; | ||
4723 | |||
4724 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
4725 | |||
4726 | if (dtable.limit < index * 8 + 7) | ||
4727 | return 1; | ||
4728 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); | ||
4729 | } | ||
4730 | |||
4731 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
4732 | struct desc_struct *seg_desc) | ||
4733 | { | ||
4734 | u32 base_addr = get_desc_base(seg_desc); | ||
4735 | |||
4736 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
4737 | } | ||
4738 | |||
4739 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, | ||
4740 | struct desc_struct *seg_desc) | ||
4741 | { | ||
4742 | u32 base_addr = get_desc_base(seg_desc); | ||
4743 | |||
4744 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); | ||
4745 | } | ||
4746 | |||
4747 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | ||
4748 | { | ||
4749 | struct kvm_segment kvm_seg; | ||
4750 | |||
4751 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4752 | return kvm_seg.selector; | ||
4753 | } | ||
4754 | |||
4755 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
4756 | { | ||
4757 | struct kvm_segment segvar = { | ||
4758 | .base = selector << 4, | ||
4759 | .limit = 0xffff, | ||
4760 | .selector = selector, | ||
4761 | .type = 3, | ||
4762 | .present = 1, | ||
4763 | .dpl = 3, | ||
4764 | .db = 0, | ||
4765 | .s = 1, | ||
4766 | .l = 0, | ||
4767 | .g = 0, | ||
4768 | .avl = 0, | ||
4769 | .unusable = 0, | ||
4770 | }; | ||
4771 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | ||
4772 | return X86EMUL_CONTINUE; | ||
4773 | } | ||
4774 | |||
4775 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | ||
4776 | { | 4903 | { |
4777 | return (seg != VCPU_SREG_LDTR) && | 4904 | int cs_db, cs_l, ret; |
4778 | (seg != VCPU_SREG_TR) && | 4905 | cache_all_regs(vcpu); |
4779 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); | ||
4780 | } | ||
4781 | |||
4782 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
4783 | { | ||
4784 | struct kvm_segment kvm_seg; | ||
4785 | struct desc_struct seg_desc; | ||
4786 | u8 dpl, rpl, cpl; | ||
4787 | unsigned err_vec = GP_VECTOR; | ||
4788 | u32 err_code = 0; | ||
4789 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
4790 | int ret; | ||
4791 | 4906 | ||
4792 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) | 4907 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
4793 | return kvm_load_realmode_segment(vcpu, selector, seg); | ||
4794 | 4908 | ||
4795 | /* NULL selector is not valid for TR, CS and SS */ | 4909 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
4796 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | 4910 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
4797 | && null_selector) | 4911 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); |
4798 | goto exception; | 4912 | vcpu->arch.emulate_ctxt.mode = |
4913 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
4914 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | ||
4915 | ? X86EMUL_MODE_VM86 : cs_l | ||
4916 | ? X86EMUL_MODE_PROT64 : cs_db | ||
4917 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | ||
4799 | 4918 | ||
4800 | /* TR should be in GDT only */ | 4919 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, |
4801 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | 4920 | tss_selector, reason, has_error_code, |
4802 | goto exception; | 4921 | error_code); |
4803 | 4922 | ||
4804 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4805 | if (ret) | 4923 | if (ret) |
4806 | return ret; | 4924 | return EMULATE_FAIL; |
4807 | |||
4808 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
4809 | |||
4810 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
4811 | kvm_seg.unusable = 1; | ||
4812 | goto load; | ||
4813 | } | ||
4814 | |||
4815 | err_code = selector & 0xfffc; | ||
4816 | err_vec = GP_VECTOR; | ||
4817 | |||
4818 | /* can't load system descriptor into segment selecor */ | ||
4819 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
4820 | goto exception; | ||
4821 | |||
4822 | if (!kvm_seg.present) { | ||
4823 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
4824 | goto exception; | ||
4825 | } | ||
4826 | |||
4827 | rpl = selector & 3; | ||
4828 | dpl = kvm_seg.dpl; | ||
4829 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
4830 | |||
4831 | switch (seg) { | ||
4832 | case VCPU_SREG_SS: | ||
4833 | /* | ||
4834 | * segment is not a writable data segment or segment | ||
4835 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
4836 | */ | ||
4837 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
4838 | goto exception; | ||
4839 | break; | ||
4840 | case VCPU_SREG_CS: | ||
4841 | if (!(kvm_seg.type & 8)) | ||
4842 | goto exception; | ||
4843 | |||
4844 | if (kvm_seg.type & 4) { | ||
4845 | /* conforming */ | ||
4846 | if (dpl > cpl) | ||
4847 | goto exception; | ||
4848 | } else { | ||
4849 | /* nonconforming */ | ||
4850 | if (rpl > cpl || dpl != cpl) | ||
4851 | goto exception; | ||
4852 | } | ||
4853 | /* CS(RPL) <- CPL */ | ||
4854 | selector = (selector & 0xfffc) | cpl; | ||
4855 | break; | ||
4856 | case VCPU_SREG_TR: | ||
4857 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
4858 | goto exception; | ||
4859 | break; | ||
4860 | case VCPU_SREG_LDTR: | ||
4861 | if (kvm_seg.s || kvm_seg.type != 2) | ||
4862 | goto exception; | ||
4863 | break; | ||
4864 | default: /* DS, ES, FS, or GS */ | ||
4865 | /* | ||
4866 | * segment is not a data or readable code segment or | ||
4867 | * ((segment is a data or nonconforming code segment) | ||
4868 | * and (both RPL and CPL > DPL)) | ||
4869 | */ | ||
4870 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
4871 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
4872 | goto exception; | ||
4873 | break; | ||
4874 | } | ||
4875 | |||
4876 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
4877 | /* mark segment as accessed */ | ||
4878 | kvm_seg.type |= 1; | ||
4879 | seg_desc.type |= 1; | ||
4880 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4881 | } | ||
4882 | load: | ||
4883 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4884 | return X86EMUL_CONTINUE; | ||
4885 | exception: | ||
4886 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
4887 | return X86EMUL_PROPAGATE_FAULT; | ||
4888 | } | ||
4889 | |||
4890 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | ||
4891 | struct tss_segment_32 *tss) | ||
4892 | { | ||
4893 | tss->cr3 = vcpu->arch.cr3; | ||
4894 | tss->eip = kvm_rip_read(vcpu); | ||
4895 | tss->eflags = kvm_get_rflags(vcpu); | ||
4896 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
4897 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
4898 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
4899 | tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
4900 | tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
4901 | tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
4902 | tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
4903 | tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
4904 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
4905 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
4906 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
4907 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
4908 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); | ||
4909 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); | ||
4910 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
4911 | } | ||
4912 | |||
4913 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
4914 | { | ||
4915 | struct kvm_segment kvm_seg; | ||
4916 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4917 | kvm_seg.selector = sel; | ||
4918 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4919 | } | ||
4920 | |||
4921 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | ||
4922 | struct tss_segment_32 *tss) | ||
4923 | { | ||
4924 | kvm_set_cr3(vcpu, tss->cr3); | ||
4925 | |||
4926 | kvm_rip_write(vcpu, tss->eip); | ||
4927 | kvm_set_rflags(vcpu, tss->eflags | 2); | ||
4928 | |||
4929 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | ||
4930 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | ||
4931 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); | ||
4932 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); | ||
4933 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); | ||
4934 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); | ||
4935 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | ||
4936 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | ||
4937 | |||
4938 | /* | ||
4939 | * SDM says that segment selectors are loaded before segment | ||
4940 | * descriptors | ||
4941 | */ | ||
4942 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
4943 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
4944 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
4945 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
4946 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
4947 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
4948 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
4949 | |||
4950 | /* | ||
4951 | * Now load segment descriptors. If fault happenes at this stage | ||
4952 | * it is handled in a context of new task | ||
4953 | */ | ||
4954 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
4955 | return 1; | ||
4956 | |||
4957 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
4958 | return 1; | ||
4959 | |||
4960 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | ||
4961 | return 1; | ||
4962 | |||
4963 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
4964 | return 1; | ||
4965 | |||
4966 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
4967 | return 1; | ||
4968 | |||
4969 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) | ||
4970 | return 1; | ||
4971 | |||
4972 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) | ||
4973 | return 1; | ||
4974 | return 0; | ||
4975 | } | ||
4976 | |||
4977 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, | ||
4978 | struct tss_segment_16 *tss) | ||
4979 | { | ||
4980 | tss->ip = kvm_rip_read(vcpu); | ||
4981 | tss->flag = kvm_get_rflags(vcpu); | ||
4982 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
4983 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
4984 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
4985 | tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
4986 | tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
4987 | tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
4988 | tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
4989 | tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
4990 | |||
4991 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
4992 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
4993 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
4994 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
4995 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
4996 | } | ||
4997 | |||
4998 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | ||
4999 | struct tss_segment_16 *tss) | ||
5000 | { | ||
5001 | kvm_rip_write(vcpu, tss->ip); | ||
5002 | kvm_set_rflags(vcpu, tss->flag | 2); | ||
5003 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | ||
5004 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | ||
5005 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | ||
5006 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); | ||
5007 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); | ||
5008 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); | ||
5009 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | ||
5010 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | ||
5011 | |||
5012 | /* | ||
5013 | * SDM says that segment selectors are loaded before segment | ||
5014 | * descriptors | ||
5015 | */ | ||
5016 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
5017 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
5018 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
5019 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5020 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5021 | |||
5022 | /* | ||
5023 | * Now load segment descriptors. If fault happenes at this stage | ||
5024 | * it is handled in a context of new task | ||
5025 | */ | ||
5026 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
5027 | return 1; | ||
5028 | |||
5029 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
5030 | return 1; | ||
5031 | |||
5032 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | ||
5033 | return 1; | ||
5034 | |||
5035 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
5036 | return 1; | ||
5037 | |||
5038 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
5039 | return 1; | ||
5040 | return 0; | ||
5041 | } | ||
5042 | |||
5043 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
5044 | u16 old_tss_sel, u32 old_tss_base, | ||
5045 | struct desc_struct *nseg_desc) | ||
5046 | { | ||
5047 | struct tss_segment_16 tss_segment_16; | ||
5048 | int ret = 0; | ||
5049 | |||
5050 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
5051 | sizeof tss_segment_16)) | ||
5052 | goto out; | ||
5053 | |||
5054 | save_state_to_tss16(vcpu, &tss_segment_16); | ||
5055 | |||
5056 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
5057 | sizeof tss_segment_16)) | ||
5058 | goto out; | ||
5059 | |||
5060 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
5061 | &tss_segment_16, sizeof tss_segment_16)) | ||
5062 | goto out; | ||
5063 | |||
5064 | if (old_tss_sel != 0xffff) { | ||
5065 | tss_segment_16.prev_task_link = old_tss_sel; | ||
5066 | 4925 | ||
5067 | if (kvm_write_guest(vcpu->kvm, | 4926 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
5068 | get_tss_base_addr_write(vcpu, nseg_desc), | 4927 | return EMULATE_DONE; |
5069 | &tss_segment_16.prev_task_link, | ||
5070 | sizeof tss_segment_16.prev_task_link)) | ||
5071 | goto out; | ||
5072 | } | ||
5073 | |||
5074 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | ||
5075 | goto out; | ||
5076 | |||
5077 | ret = 1; | ||
5078 | out: | ||
5079 | return ret; | ||
5080 | } | ||
5081 | |||
5082 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
5083 | u16 old_tss_sel, u32 old_tss_base, | ||
5084 | struct desc_struct *nseg_desc) | ||
5085 | { | ||
5086 | struct tss_segment_32 tss_segment_32; | ||
5087 | int ret = 0; | ||
5088 | |||
5089 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
5090 | sizeof tss_segment_32)) | ||
5091 | goto out; | ||
5092 | |||
5093 | save_state_to_tss32(vcpu, &tss_segment_32); | ||
5094 | |||
5095 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
5096 | sizeof tss_segment_32)) | ||
5097 | goto out; | ||
5098 | |||
5099 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
5100 | &tss_segment_32, sizeof tss_segment_32)) | ||
5101 | goto out; | ||
5102 | |||
5103 | if (old_tss_sel != 0xffff) { | ||
5104 | tss_segment_32.prev_task_link = old_tss_sel; | ||
5105 | |||
5106 | if (kvm_write_guest(vcpu->kvm, | ||
5107 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
5108 | &tss_segment_32.prev_task_link, | ||
5109 | sizeof tss_segment_32.prev_task_link)) | ||
5110 | goto out; | ||
5111 | } | ||
5112 | |||
5113 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | ||
5114 | goto out; | ||
5115 | |||
5116 | ret = 1; | ||
5117 | out: | ||
5118 | return ret; | ||
5119 | } | ||
5120 | |||
5121 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | ||
5122 | { | ||
5123 | struct kvm_segment tr_seg; | ||
5124 | struct desc_struct cseg_desc; | ||
5125 | struct desc_struct nseg_desc; | ||
5126 | int ret = 0; | ||
5127 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
5128 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
5129 | u32 desc_limit; | ||
5130 | |||
5131 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); | ||
5132 | |||
5133 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
5134 | * descriptors should generate a pagefault. | ||
5135 | */ | ||
5136 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | ||
5137 | goto out; | ||
5138 | |||
5139 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) | ||
5140 | goto out; | ||
5141 | |||
5142 | if (reason != TASK_SWITCH_IRET) { | ||
5143 | int cpl; | ||
5144 | |||
5145 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
5146 | if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { | ||
5147 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
5148 | return 1; | ||
5149 | } | ||
5150 | } | ||
5151 | |||
5152 | desc_limit = get_desc_limit(&nseg_desc); | ||
5153 | if (!nseg_desc.p || | ||
5154 | ((desc_limit < 0x67 && (nseg_desc.type & 8)) || | ||
5155 | desc_limit < 0x2b)) { | ||
5156 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); | ||
5157 | return 1; | ||
5158 | } | ||
5159 | |||
5160 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
5161 | cseg_desc.type &= ~(1 << 1); //clear the B flag | ||
5162 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); | ||
5163 | } | ||
5164 | |||
5165 | if (reason == TASK_SWITCH_IRET) { | ||
5166 | u32 eflags = kvm_get_rflags(vcpu); | ||
5167 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | ||
5168 | } | ||
5169 | |||
5170 | /* set back link to prev task only if NT bit is set in eflags | ||
5171 | note that old_tss_sel is not used afetr this point */ | ||
5172 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
5173 | old_tss_sel = 0xffff; | ||
5174 | |||
5175 | if (nseg_desc.type & 8) | ||
5176 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | ||
5177 | old_tss_base, &nseg_desc); | ||
5178 | else | ||
5179 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, | ||
5180 | old_tss_base, &nseg_desc); | ||
5181 | |||
5182 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | ||
5183 | u32 eflags = kvm_get_rflags(vcpu); | ||
5184 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); | ||
5185 | } | ||
5186 | |||
5187 | if (reason != TASK_SWITCH_IRET) { | ||
5188 | nseg_desc.type |= (1 << 1); | ||
5189 | save_guest_segment_descriptor(vcpu, tss_selector, | ||
5190 | &nseg_desc); | ||
5191 | } | ||
5192 | |||
5193 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); | ||
5194 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | ||
5195 | tr_seg.type = 11; | ||
5196 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | ||
5197 | out: | ||
5198 | return ret; | ||
5199 | } | 4928 | } |
5200 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 4929 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
5201 | 4930 | ||
@@ -5204,15 +4933,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5204 | { | 4933 | { |
5205 | int mmu_reset_needed = 0; | 4934 | int mmu_reset_needed = 0; |
5206 | int pending_vec, max_bits; | 4935 | int pending_vec, max_bits; |
5207 | struct descriptor_table dt; | 4936 | struct desc_ptr dt; |
5208 | 4937 | ||
5209 | vcpu_load(vcpu); | 4938 | vcpu_load(vcpu); |
5210 | 4939 | ||
5211 | dt.limit = sregs->idt.limit; | 4940 | dt.size = sregs->idt.limit; |
5212 | dt.base = sregs->idt.base; | 4941 | dt.address = sregs->idt.base; |
5213 | kvm_x86_ops->set_idt(vcpu, &dt); | 4942 | kvm_x86_ops->set_idt(vcpu, &dt); |
5214 | dt.limit = sregs->gdt.limit; | 4943 | dt.size = sregs->gdt.limit; |
5215 | dt.base = sregs->gdt.base; | 4944 | dt.address = sregs->gdt.base; |
5216 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4945 | kvm_x86_ops->set_gdt(vcpu, &dt); |
5217 | 4946 | ||
5218 | vcpu->arch.cr2 = sregs->cr2; | 4947 | vcpu->arch.cr2 = sregs->cr2; |
@@ -5311,11 +5040,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5311 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 5040 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
5312 | } | 5041 | } |
5313 | 5042 | ||
5314 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | 5043 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
5315 | vcpu->arch.singlestep_cs = | 5044 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + |
5316 | get_segment_selector(vcpu, VCPU_SREG_CS); | 5045 | get_segment_base(vcpu, VCPU_SREG_CS); |
5317 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
5318 | } | ||
5319 | 5046 | ||
5320 | /* | 5047 | /* |
5321 | * Trigger an rflags update that will inject or remove the trace | 5048 | * Trigger an rflags update that will inject or remove the trace |
@@ -5806,13 +5533,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
5806 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5533 | return kvm_x86_ops->interrupt_allowed(vcpu); |
5807 | } | 5534 | } |
5808 | 5535 | ||
5536 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) | ||
5537 | { | ||
5538 | unsigned long current_rip = kvm_rip_read(vcpu) + | ||
5539 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
5540 | |||
5541 | return current_rip == linear_rip; | ||
5542 | } | ||
5543 | EXPORT_SYMBOL_GPL(kvm_is_linear_rip); | ||
5544 | |||
5809 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | 5545 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) |
5810 | { | 5546 | { |
5811 | unsigned long rflags; | 5547 | unsigned long rflags; |
5812 | 5548 | ||
5813 | rflags = kvm_x86_ops->get_rflags(vcpu); | 5549 | rflags = kvm_x86_ops->get_rflags(vcpu); |
5814 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 5550 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
5815 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | 5551 | rflags &= ~X86_EFLAGS_TF; |
5816 | return rflags; | 5552 | return rflags; |
5817 | } | 5553 | } |
5818 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | 5554 | EXPORT_SYMBOL_GPL(kvm_get_rflags); |
@@ -5820,10 +5556,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags); | |||
5820 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 5556 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
5821 | { | 5557 | { |
5822 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | 5558 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && |
5823 | vcpu->arch.singlestep_cs == | 5559 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) |
5824 | get_segment_selector(vcpu, VCPU_SREG_CS) && | 5560 | rflags |= X86_EFLAGS_TF; |
5825 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
5826 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
5827 | kvm_x86_ops->set_rflags(vcpu, rflags); | 5561 | kvm_x86_ops->set_rflags(vcpu, rflags); |
5828 | } | 5562 | } |
5829 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 5563 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
@@ -5839,3 +5573,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | |||
5839 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | 5573 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); |
5840 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | 5574 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); |
5841 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | 5575 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); |
5576 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2d101639bd8d..f4b54458285b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -65,4 +65,14 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
66 | } | 66 | } |
67 | 67 | ||
68 | static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) | ||
69 | { | ||
70 | return rcu_dereference_check(kvm->arch.aliases, | ||
71 | srcu_read_lock_held(&kvm->srcu) | ||
72 | || lockdep_is_held(&kvm->slots_lock)); | ||
73 | } | ||
74 | |||
75 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | ||
76 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | ||
77 | |||
68 | #endif | 78 | #endif |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 2bdf628066bd..9257510b4836 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -1390,7 +1390,6 @@ __init void lguest_init(void) | |||
1390 | #endif | 1390 | #endif |
1391 | #ifdef CONFIG_ACPI | 1391 | #ifdef CONFIG_ACPI |
1392 | acpi_disabled = 1; | 1392 | acpi_disabled = 1; |
1393 | acpi_ht = 0; | ||
1394 | #endif | 1393 | #endif |
1395 | 1394 | ||
1396 | /* | 1395 | /* |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 419386c24b82..f871e04b6965 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -20,17 +20,18 @@ lib-y := delay.o | |||
20 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
22 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
23 | lib-$(CONFIG_KPROBES) += insn.o inat.o | 23 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
24 | 24 | ||
25 | obj-y += msr.o msr-reg.o msr-reg-export.o | 25 | obj-y += msr.o msr-reg.o msr-reg-export.o |
26 | 26 | ||
27 | ifeq ($(CONFIG_X86_32),y) | 27 | ifeq ($(CONFIG_X86_32),y) |
28 | obj-y += atomic64_32.o | 28 | obj-y += atomic64_32.o |
29 | lib-y += atomic64_cx8_32.o | ||
29 | lib-y += checksum_32.o | 30 | lib-y += checksum_32.o |
30 | lib-y += strstr_32.o | 31 | lib-y += strstr_32.o |
31 | lib-y += semaphore_32.o string_32.o | 32 | lib-y += semaphore_32.o string_32.o |
32 | ifneq ($(CONFIG_X86_CMPXCHG64),y) | 33 | ifneq ($(CONFIG_X86_CMPXCHG64),y) |
33 | lib-y += cmpxchg8b_emu.o | 34 | lib-y += cmpxchg8b_emu.o atomic64_386_32.o |
34 | endif | 35 | endif |
35 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | 36 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o |
36 | else | 37 | else |
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 824fa0be55a3..540179e8e9fa 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c | |||
@@ -6,225 +6,54 @@ | |||
6 | #include <asm/cmpxchg.h> | 6 | #include <asm/cmpxchg.h> |
7 | #include <asm/atomic.h> | 7 | #include <asm/atomic.h> |
8 | 8 | ||
9 | static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new) | 9 | long long atomic64_read_cx8(long long, const atomic64_t *v); |
10 | { | 10 | EXPORT_SYMBOL(atomic64_read_cx8); |
11 | u32 low = new; | 11 | long long atomic64_set_cx8(long long, const atomic64_t *v); |
12 | u32 high = new >> 32; | 12 | EXPORT_SYMBOL(atomic64_set_cx8); |
13 | 13 | long long atomic64_xchg_cx8(long long, unsigned high); | |
14 | asm volatile( | 14 | EXPORT_SYMBOL(atomic64_xchg_cx8); |
15 | LOCK_PREFIX "cmpxchg8b %1\n" | 15 | long long atomic64_add_return_cx8(long long a, atomic64_t *v); |
16 | : "+A" (old), "+m" (*ptr) | 16 | EXPORT_SYMBOL(atomic64_add_return_cx8); |
17 | : "b" (low), "c" (high) | 17 | long long atomic64_sub_return_cx8(long long a, atomic64_t *v); |
18 | ); | 18 | EXPORT_SYMBOL(atomic64_sub_return_cx8); |
19 | return old; | 19 | long long atomic64_inc_return_cx8(long long a, atomic64_t *v); |
20 | } | 20 | EXPORT_SYMBOL(atomic64_inc_return_cx8); |
21 | 21 | long long atomic64_dec_return_cx8(long long a, atomic64_t *v); | |
22 | u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val) | 22 | EXPORT_SYMBOL(atomic64_dec_return_cx8); |
23 | { | 23 | long long atomic64_dec_if_positive_cx8(atomic64_t *v); |
24 | return cmpxchg8b(&ptr->counter, old_val, new_val); | 24 | EXPORT_SYMBOL(atomic64_dec_if_positive_cx8); |
25 | } | 25 | int atomic64_inc_not_zero_cx8(atomic64_t *v); |
26 | EXPORT_SYMBOL(atomic64_cmpxchg); | 26 | EXPORT_SYMBOL(atomic64_inc_not_zero_cx8); |
27 | 27 | int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u); | |
28 | /** | 28 | EXPORT_SYMBOL(atomic64_add_unless_cx8); |
29 | * atomic64_xchg - xchg atomic64 variable | 29 | |
30 | * @ptr: pointer to type atomic64_t | 30 | #ifndef CONFIG_X86_CMPXCHG64 |
31 | * @new_val: value to assign | 31 | long long atomic64_read_386(long long, const atomic64_t *v); |
32 | * | 32 | EXPORT_SYMBOL(atomic64_read_386); |
33 | * Atomically xchgs the value of @ptr to @new_val and returns | 33 | long long atomic64_set_386(long long, const atomic64_t *v); |
34 | * the old value. | 34 | EXPORT_SYMBOL(atomic64_set_386); |
35 | */ | 35 | long long atomic64_xchg_386(long long, unsigned high); |
36 | u64 atomic64_xchg(atomic64_t *ptr, u64 new_val) | 36 | EXPORT_SYMBOL(atomic64_xchg_386); |
37 | { | 37 | long long atomic64_add_return_386(long long a, atomic64_t *v); |
38 | /* | 38 | EXPORT_SYMBOL(atomic64_add_return_386); |
39 | * Try first with a (possibly incorrect) assumption about | 39 | long long atomic64_sub_return_386(long long a, atomic64_t *v); |
40 | * what we have there. We'll do two loops most likely, | 40 | EXPORT_SYMBOL(atomic64_sub_return_386); |
41 | * but we'll get an ownership MESI transaction straight away | 41 | long long atomic64_inc_return_386(long long a, atomic64_t *v); |
42 | * instead of a read transaction followed by a | 42 | EXPORT_SYMBOL(atomic64_inc_return_386); |
43 | * flush-for-ownership transaction: | 43 | long long atomic64_dec_return_386(long long a, atomic64_t *v); |
44 | */ | 44 | EXPORT_SYMBOL(atomic64_dec_return_386); |
45 | u64 old_val, real_val = 0; | 45 | long long atomic64_add_386(long long a, atomic64_t *v); |
46 | 46 | EXPORT_SYMBOL(atomic64_add_386); | |
47 | do { | 47 | long long atomic64_sub_386(long long a, atomic64_t *v); |
48 | old_val = real_val; | 48 | EXPORT_SYMBOL(atomic64_sub_386); |
49 | 49 | long long atomic64_inc_386(long long a, atomic64_t *v); | |
50 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | 50 | EXPORT_SYMBOL(atomic64_inc_386); |
51 | 51 | long long atomic64_dec_386(long long a, atomic64_t *v); | |
52 | } while (real_val != old_val); | 52 | EXPORT_SYMBOL(atomic64_dec_386); |
53 | 53 | long long atomic64_dec_if_positive_386(atomic64_t *v); | |
54 | return old_val; | 54 | EXPORT_SYMBOL(atomic64_dec_if_positive_386); |
55 | } | 55 | int atomic64_inc_not_zero_386(atomic64_t *v); |
56 | EXPORT_SYMBOL(atomic64_xchg); | 56 | EXPORT_SYMBOL(atomic64_inc_not_zero_386); |
57 | 57 | int atomic64_add_unless_386(atomic64_t *v, long long a, long long u); | |
58 | /** | 58 | EXPORT_SYMBOL(atomic64_add_unless_386); |
59 | * atomic64_set - set atomic64 variable | 59 | #endif |
60 | * @ptr: pointer to type atomic64_t | ||
61 | * @new_val: value to assign | ||
62 | * | ||
63 | * Atomically sets the value of @ptr to @new_val. | ||
64 | */ | ||
65 | void atomic64_set(atomic64_t *ptr, u64 new_val) | ||
66 | { | ||
67 | atomic64_xchg(ptr, new_val); | ||
68 | } | ||
69 | EXPORT_SYMBOL(atomic64_set); | ||
70 | |||
71 | /** | ||
72 | EXPORT_SYMBOL(atomic64_read); | ||
73 | * atomic64_add_return - add and return | ||
74 | * @delta: integer value to add | ||
75 | * @ptr: pointer to type atomic64_t | ||
76 | * | ||
77 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | ||
78 | */ | ||
79 | noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr) | ||
80 | { | ||
81 | /* | ||
82 | * Try first with a (possibly incorrect) assumption about | ||
83 | * what we have there. We'll do two loops most likely, | ||
84 | * but we'll get an ownership MESI transaction straight away | ||
85 | * instead of a read transaction followed by a | ||
86 | * flush-for-ownership transaction: | ||
87 | */ | ||
88 | u64 old_val, new_val, real_val = 0; | ||
89 | |||
90 | do { | ||
91 | old_val = real_val; | ||
92 | new_val = old_val + delta; | ||
93 | |||
94 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | ||
95 | |||
96 | } while (real_val != old_val); | ||
97 | |||
98 | return new_val; | ||
99 | } | ||
100 | EXPORT_SYMBOL(atomic64_add_return); | ||
101 | |||
102 | u64 atomic64_sub_return(u64 delta, atomic64_t *ptr) | ||
103 | { | ||
104 | return atomic64_add_return(-delta, ptr); | ||
105 | } | ||
106 | EXPORT_SYMBOL(atomic64_sub_return); | ||
107 | |||
108 | u64 atomic64_inc_return(atomic64_t *ptr) | ||
109 | { | ||
110 | return atomic64_add_return(1, ptr); | ||
111 | } | ||
112 | EXPORT_SYMBOL(atomic64_inc_return); | ||
113 | |||
114 | u64 atomic64_dec_return(atomic64_t *ptr) | ||
115 | { | ||
116 | return atomic64_sub_return(1, ptr); | ||
117 | } | ||
118 | EXPORT_SYMBOL(atomic64_dec_return); | ||
119 | |||
120 | /** | ||
121 | * atomic64_add - add integer to atomic64 variable | ||
122 | * @delta: integer value to add | ||
123 | * @ptr: pointer to type atomic64_t | ||
124 | * | ||
125 | * Atomically adds @delta to @ptr. | ||
126 | */ | ||
127 | void atomic64_add(u64 delta, atomic64_t *ptr) | ||
128 | { | ||
129 | atomic64_add_return(delta, ptr); | ||
130 | } | ||
131 | EXPORT_SYMBOL(atomic64_add); | ||
132 | |||
133 | /** | ||
134 | * atomic64_sub - subtract the atomic64 variable | ||
135 | * @delta: integer value to subtract | ||
136 | * @ptr: pointer to type atomic64_t | ||
137 | * | ||
138 | * Atomically subtracts @delta from @ptr. | ||
139 | */ | ||
140 | void atomic64_sub(u64 delta, atomic64_t *ptr) | ||
141 | { | ||
142 | atomic64_add(-delta, ptr); | ||
143 | } | ||
144 | EXPORT_SYMBOL(atomic64_sub); | ||
145 | |||
146 | /** | ||
147 | * atomic64_sub_and_test - subtract value from variable and test result | ||
148 | * @delta: integer value to subtract | ||
149 | * @ptr: pointer to type atomic64_t | ||
150 | * | ||
151 | * Atomically subtracts @delta from @ptr and returns | ||
152 | * true if the result is zero, or false for all | ||
153 | * other cases. | ||
154 | */ | ||
155 | int atomic64_sub_and_test(u64 delta, atomic64_t *ptr) | ||
156 | { | ||
157 | u64 new_val = atomic64_sub_return(delta, ptr); | ||
158 | |||
159 | return new_val == 0; | ||
160 | } | ||
161 | EXPORT_SYMBOL(atomic64_sub_and_test); | ||
162 | |||
163 | /** | ||
164 | * atomic64_inc - increment atomic64 variable | ||
165 | * @ptr: pointer to type atomic64_t | ||
166 | * | ||
167 | * Atomically increments @ptr by 1. | ||
168 | */ | ||
169 | void atomic64_inc(atomic64_t *ptr) | ||
170 | { | ||
171 | atomic64_add(1, ptr); | ||
172 | } | ||
173 | EXPORT_SYMBOL(atomic64_inc); | ||
174 | |||
175 | /** | ||
176 | * atomic64_dec - decrement atomic64 variable | ||
177 | * @ptr: pointer to type atomic64_t | ||
178 | * | ||
179 | * Atomically decrements @ptr by 1. | ||
180 | */ | ||
181 | void atomic64_dec(atomic64_t *ptr) | ||
182 | { | ||
183 | atomic64_sub(1, ptr); | ||
184 | } | ||
185 | EXPORT_SYMBOL(atomic64_dec); | ||
186 | |||
187 | /** | ||
188 | * atomic64_dec_and_test - decrement and test | ||
189 | * @ptr: pointer to type atomic64_t | ||
190 | * | ||
191 | * Atomically decrements @ptr by 1 and | ||
192 | * returns true if the result is 0, or false for all other | ||
193 | * cases. | ||
194 | */ | ||
195 | int atomic64_dec_and_test(atomic64_t *ptr) | ||
196 | { | ||
197 | return atomic64_sub_and_test(1, ptr); | ||
198 | } | ||
199 | EXPORT_SYMBOL(atomic64_dec_and_test); | ||
200 | |||
201 | /** | ||
202 | * atomic64_inc_and_test - increment and test | ||
203 | * @ptr: pointer to type atomic64_t | ||
204 | * | ||
205 | * Atomically increments @ptr by 1 | ||
206 | * and returns true if the result is zero, or false for all | ||
207 | * other cases. | ||
208 | */ | ||
209 | int atomic64_inc_and_test(atomic64_t *ptr) | ||
210 | { | ||
211 | return atomic64_sub_and_test(-1, ptr); | ||
212 | } | ||
213 | EXPORT_SYMBOL(atomic64_inc_and_test); | ||
214 | |||
215 | /** | ||
216 | * atomic64_add_negative - add and test if negative | ||
217 | * @delta: integer value to add | ||
218 | * @ptr: pointer to type atomic64_t | ||
219 | * | ||
220 | * Atomically adds @delta to @ptr and returns true | ||
221 | * if the result is negative, or false when | ||
222 | * result is greater than or equal to zero. | ||
223 | */ | ||
224 | int atomic64_add_negative(u64 delta, atomic64_t *ptr) | ||
225 | { | ||
226 | s64 new_val = atomic64_add_return(delta, ptr); | ||
227 | |||
228 | return new_val < 0; | ||
229 | } | ||
230 | EXPORT_SYMBOL(atomic64_add_negative); | ||
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S new file mode 100644 index 000000000000..4a5979aa6883 --- /dev/null +++ b/arch/x86/lib/atomic64_386_32.S | |||
@@ -0,0 +1,174 @@ | |||
1 | /* | ||
2 | * atomic64_t for 386/486 | ||
3 | * | ||
4 | * Copyright © 2010 Luca Barbieri | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/linkage.h> | ||
13 | #include <asm/alternative-asm.h> | ||
14 | #include <asm/dwarf2.h> | ||
15 | |||
16 | /* if you want SMP support, implement these with real spinlocks */ | ||
17 | .macro LOCK reg | ||
18 | pushfl | ||
19 | CFI_ADJUST_CFA_OFFSET 4 | ||
20 | cli | ||
21 | .endm | ||
22 | |||
23 | .macro UNLOCK reg | ||
24 | popfl | ||
25 | CFI_ADJUST_CFA_OFFSET -4 | ||
26 | .endm | ||
27 | |||
28 | .macro BEGIN func reg | ||
29 | $v = \reg | ||
30 | |||
31 | ENTRY(atomic64_\func\()_386) | ||
32 | CFI_STARTPROC | ||
33 | LOCK $v | ||
34 | |||
35 | .macro RETURN | ||
36 | UNLOCK $v | ||
37 | ret | ||
38 | .endm | ||
39 | |||
40 | .macro END_ | ||
41 | CFI_ENDPROC | ||
42 | ENDPROC(atomic64_\func\()_386) | ||
43 | .purgem RETURN | ||
44 | .purgem END_ | ||
45 | .purgem END | ||
46 | .endm | ||
47 | |||
48 | .macro END | ||
49 | RETURN | ||
50 | END_ | ||
51 | .endm | ||
52 | .endm | ||
53 | |||
54 | BEGIN read %ecx | ||
55 | movl ($v), %eax | ||
56 | movl 4($v), %edx | ||
57 | END | ||
58 | |||
59 | BEGIN set %esi | ||
60 | movl %ebx, ($v) | ||
61 | movl %ecx, 4($v) | ||
62 | END | ||
63 | |||
64 | BEGIN xchg %esi | ||
65 | movl ($v), %eax | ||
66 | movl 4($v), %edx | ||
67 | movl %ebx, ($v) | ||
68 | movl %ecx, 4($v) | ||
69 | END | ||
70 | |||
71 | BEGIN add %ecx | ||
72 | addl %eax, ($v) | ||
73 | adcl %edx, 4($v) | ||
74 | END | ||
75 | |||
76 | BEGIN add_return %ecx | ||
77 | addl ($v), %eax | ||
78 | adcl 4($v), %edx | ||
79 | movl %eax, ($v) | ||
80 | movl %edx, 4($v) | ||
81 | END | ||
82 | |||
83 | BEGIN sub %ecx | ||
84 | subl %eax, ($v) | ||
85 | sbbl %edx, 4($v) | ||
86 | END | ||
87 | |||
88 | BEGIN sub_return %ecx | ||
89 | negl %edx | ||
90 | negl %eax | ||
91 | sbbl $0, %edx | ||
92 | addl ($v), %eax | ||
93 | adcl 4($v), %edx | ||
94 | movl %eax, ($v) | ||
95 | movl %edx, 4($v) | ||
96 | END | ||
97 | |||
98 | BEGIN inc %esi | ||
99 | addl $1, ($v) | ||
100 | adcl $0, 4($v) | ||
101 | END | ||
102 | |||
103 | BEGIN inc_return %esi | ||
104 | movl ($v), %eax | ||
105 | movl 4($v), %edx | ||
106 | addl $1, %eax | ||
107 | adcl $0, %edx | ||
108 | movl %eax, ($v) | ||
109 | movl %edx, 4($v) | ||
110 | END | ||
111 | |||
112 | BEGIN dec %esi | ||
113 | subl $1, ($v) | ||
114 | sbbl $0, 4($v) | ||
115 | END | ||
116 | |||
117 | BEGIN dec_return %esi | ||
118 | movl ($v), %eax | ||
119 | movl 4($v), %edx | ||
120 | subl $1, %eax | ||
121 | sbbl $0, %edx | ||
122 | movl %eax, ($v) | ||
123 | movl %edx, 4($v) | ||
124 | END | ||
125 | |||
126 | BEGIN add_unless %ecx | ||
127 | addl %eax, %esi | ||
128 | adcl %edx, %edi | ||
129 | addl ($v), %eax | ||
130 | adcl 4($v), %edx | ||
131 | cmpl %eax, %esi | ||
132 | je 3f | ||
133 | 1: | ||
134 | movl %eax, ($v) | ||
135 | movl %edx, 4($v) | ||
136 | movl $1, %eax | ||
137 | 2: | ||
138 | RETURN | ||
139 | 3: | ||
140 | cmpl %edx, %edi | ||
141 | jne 1b | ||
142 | xorl %eax, %eax | ||
143 | jmp 2b | ||
144 | END_ | ||
145 | |||
146 | BEGIN inc_not_zero %esi | ||
147 | movl ($v), %eax | ||
148 | movl 4($v), %edx | ||
149 | testl %eax, %eax | ||
150 | je 3f | ||
151 | 1: | ||
152 | addl $1, %eax | ||
153 | adcl $0, %edx | ||
154 | movl %eax, ($v) | ||
155 | movl %edx, 4($v) | ||
156 | movl $1, %eax | ||
157 | 2: | ||
158 | RETURN | ||
159 | 3: | ||
160 | testl %edx, %edx | ||
161 | jne 1b | ||
162 | jmp 2b | ||
163 | END_ | ||
164 | |||
165 | BEGIN dec_if_positive %esi | ||
166 | movl ($v), %eax | ||
167 | movl 4($v), %edx | ||
168 | subl $1, %eax | ||
169 | sbbl $0, %edx | ||
170 | js 1f | ||
171 | movl %eax, ($v) | ||
172 | movl %edx, 4($v) | ||
173 | 1: | ||
174 | END | ||
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S new file mode 100644 index 000000000000..71e080de3352 --- /dev/null +++ b/arch/x86/lib/atomic64_cx8_32.S | |||
@@ -0,0 +1,224 @@ | |||
1 | /* | ||
2 | * atomic64_t for 586+ | ||
3 | * | ||
4 | * Copyright © 2010 Luca Barbieri | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/linkage.h> | ||
13 | #include <asm/alternative-asm.h> | ||
14 | #include <asm/dwarf2.h> | ||
15 | |||
16 | .macro SAVE reg | ||
17 | pushl %\reg | ||
18 | CFI_ADJUST_CFA_OFFSET 4 | ||
19 | CFI_REL_OFFSET \reg, 0 | ||
20 | .endm | ||
21 | |||
22 | .macro RESTORE reg | ||
23 | popl %\reg | ||
24 | CFI_ADJUST_CFA_OFFSET -4 | ||
25 | CFI_RESTORE \reg | ||
26 | .endm | ||
27 | |||
28 | .macro read64 reg | ||
29 | movl %ebx, %eax | ||
30 | movl %ecx, %edx | ||
31 | /* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */ | ||
32 | LOCK_PREFIX | ||
33 | cmpxchg8b (\reg) | ||
34 | .endm | ||
35 | |||
36 | ENTRY(atomic64_read_cx8) | ||
37 | CFI_STARTPROC | ||
38 | |||
39 | read64 %ecx | ||
40 | ret | ||
41 | CFI_ENDPROC | ||
42 | ENDPROC(atomic64_read_cx8) | ||
43 | |||
44 | ENTRY(atomic64_set_cx8) | ||
45 | CFI_STARTPROC | ||
46 | |||
47 | 1: | ||
48 | /* we don't need LOCK_PREFIX since aligned 64-bit writes | ||
49 | * are atomic on 586 and newer */ | ||
50 | cmpxchg8b (%esi) | ||
51 | jne 1b | ||
52 | |||
53 | ret | ||
54 | CFI_ENDPROC | ||
55 | ENDPROC(atomic64_set_cx8) | ||
56 | |||
57 | ENTRY(atomic64_xchg_cx8) | ||
58 | CFI_STARTPROC | ||
59 | |||
60 | movl %ebx, %eax | ||
61 | movl %ecx, %edx | ||
62 | 1: | ||
63 | LOCK_PREFIX | ||
64 | cmpxchg8b (%esi) | ||
65 | jne 1b | ||
66 | |||
67 | ret | ||
68 | CFI_ENDPROC | ||
69 | ENDPROC(atomic64_xchg_cx8) | ||
70 | |||
71 | .macro addsub_return func ins insc | ||
72 | ENTRY(atomic64_\func\()_return_cx8) | ||
73 | CFI_STARTPROC | ||
74 | SAVE ebp | ||
75 | SAVE ebx | ||
76 | SAVE esi | ||
77 | SAVE edi | ||
78 | |||
79 | movl %eax, %esi | ||
80 | movl %edx, %edi | ||
81 | movl %ecx, %ebp | ||
82 | |||
83 | read64 %ebp | ||
84 | 1: | ||
85 | movl %eax, %ebx | ||
86 | movl %edx, %ecx | ||
87 | \ins\()l %esi, %ebx | ||
88 | \insc\()l %edi, %ecx | ||
89 | LOCK_PREFIX | ||
90 | cmpxchg8b (%ebp) | ||
91 | jne 1b | ||
92 | |||
93 | 10: | ||
94 | movl %ebx, %eax | ||
95 | movl %ecx, %edx | ||
96 | RESTORE edi | ||
97 | RESTORE esi | ||
98 | RESTORE ebx | ||
99 | RESTORE ebp | ||
100 | ret | ||
101 | CFI_ENDPROC | ||
102 | ENDPROC(atomic64_\func\()_return_cx8) | ||
103 | .endm | ||
104 | |||
105 | addsub_return add add adc | ||
106 | addsub_return sub sub sbb | ||
107 | |||
108 | .macro incdec_return func ins insc | ||
109 | ENTRY(atomic64_\func\()_return_cx8) | ||
110 | CFI_STARTPROC | ||
111 | SAVE ebx | ||
112 | |||
113 | read64 %esi | ||
114 | 1: | ||
115 | movl %eax, %ebx | ||
116 | movl %edx, %ecx | ||
117 | \ins\()l $1, %ebx | ||
118 | \insc\()l $0, %ecx | ||
119 | LOCK_PREFIX | ||
120 | cmpxchg8b (%esi) | ||
121 | jne 1b | ||
122 | |||
123 | 10: | ||
124 | movl %ebx, %eax | ||
125 | movl %ecx, %edx | ||
126 | RESTORE ebx | ||
127 | ret | ||
128 | CFI_ENDPROC | ||
129 | ENDPROC(atomic64_\func\()_return_cx8) | ||
130 | .endm | ||
131 | |||
132 | incdec_return inc add adc | ||
133 | incdec_return dec sub sbb | ||
134 | |||
135 | ENTRY(atomic64_dec_if_positive_cx8) | ||
136 | CFI_STARTPROC | ||
137 | SAVE ebx | ||
138 | |||
139 | read64 %esi | ||
140 | 1: | ||
141 | movl %eax, %ebx | ||
142 | movl %edx, %ecx | ||
143 | subl $1, %ebx | ||
144 | sbb $0, %ecx | ||
145 | js 2f | ||
146 | LOCK_PREFIX | ||
147 | cmpxchg8b (%esi) | ||
148 | jne 1b | ||
149 | |||
150 | 2: | ||
151 | movl %ebx, %eax | ||
152 | movl %ecx, %edx | ||
153 | RESTORE ebx | ||
154 | ret | ||
155 | CFI_ENDPROC | ||
156 | ENDPROC(atomic64_dec_if_positive_cx8) | ||
157 | |||
158 | ENTRY(atomic64_add_unless_cx8) | ||
159 | CFI_STARTPROC | ||
160 | SAVE ebp | ||
161 | SAVE ebx | ||
162 | /* these just push these two parameters on the stack */ | ||
163 | SAVE edi | ||
164 | SAVE esi | ||
165 | |||
166 | movl %ecx, %ebp | ||
167 | movl %eax, %esi | ||
168 | movl %edx, %edi | ||
169 | |||
170 | read64 %ebp | ||
171 | 1: | ||
172 | cmpl %eax, 0(%esp) | ||
173 | je 4f | ||
174 | 2: | ||
175 | movl %eax, %ebx | ||
176 | movl %edx, %ecx | ||
177 | addl %esi, %ebx | ||
178 | adcl %edi, %ecx | ||
179 | LOCK_PREFIX | ||
180 | cmpxchg8b (%ebp) | ||
181 | jne 1b | ||
182 | |||
183 | movl $1, %eax | ||
184 | 3: | ||
185 | addl $8, %esp | ||
186 | CFI_ADJUST_CFA_OFFSET -8 | ||
187 | RESTORE ebx | ||
188 | RESTORE ebp | ||
189 | ret | ||
190 | 4: | ||
191 | cmpl %edx, 4(%esp) | ||
192 | jne 2b | ||
193 | xorl %eax, %eax | ||
194 | jmp 3b | ||
195 | CFI_ENDPROC | ||
196 | ENDPROC(atomic64_add_unless_cx8) | ||
197 | |||
198 | ENTRY(atomic64_inc_not_zero_cx8) | ||
199 | CFI_STARTPROC | ||
200 | SAVE ebx | ||
201 | |||
202 | read64 %esi | ||
203 | 1: | ||
204 | testl %eax, %eax | ||
205 | je 4f | ||
206 | 2: | ||
207 | movl %eax, %ebx | ||
208 | movl %edx, %ecx | ||
209 | addl $1, %ebx | ||
210 | adcl $0, %ecx | ||
211 | LOCK_PREFIX | ||
212 | cmpxchg8b (%esi) | ||
213 | jne 1b | ||
214 | |||
215 | movl $1, %eax | ||
216 | 3: | ||
217 | RESTORE ebx | ||
218 | ret | ||
219 | 4: | ||
220 | testl %edx, %edx | ||
221 | jne 2b | ||
222 | jmp 3b | ||
223 | CFI_ENDPROC | ||
224 | ENDPROC(atomic64_inc_not_zero_cx8) | ||
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index aa0987088774..dc8adad10a2f 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c | |||
@@ -30,10 +30,10 @@ static void fclex(void) | |||
30 | } | 30 | } |
31 | 31 | ||
32 | /* Needs to be externally visible */ | 32 | /* Needs to be externally visible */ |
33 | void finit_task(struct task_struct *tsk) | 33 | void finit_soft_fpu(struct i387_soft_struct *soft) |
34 | { | 34 | { |
35 | struct i387_soft_struct *soft = &tsk->thread.xstate->soft; | ||
36 | struct address *oaddr, *iaddr; | 35 | struct address *oaddr, *iaddr; |
36 | memset(soft, 0, sizeof(*soft)); | ||
37 | soft->cwd = 0x037f; | 37 | soft->cwd = 0x037f; |
38 | soft->swd = 0; | 38 | soft->swd = 0; |
39 | soft->ftop = 0; /* We don't keep top in the status word internally. */ | 39 | soft->ftop = 0; /* We don't keep top in the status word internally. */ |
@@ -52,7 +52,7 @@ void finit_task(struct task_struct *tsk) | |||
52 | 52 | ||
53 | void finit(void) | 53 | void finit(void) |
54 | { | 54 | { |
55 | finit_task(current); | 55 | finit_soft_fpu(¤t->thread.fpu.state->soft); |
56 | } | 56 | } |
57 | 57 | ||
58 | /* | 58 | /* |
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 5d87f586f8d7..7718541541d4 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c | |||
@@ -681,7 +681,7 @@ int fpregs_soft_set(struct task_struct *target, | |||
681 | unsigned int pos, unsigned int count, | 681 | unsigned int pos, unsigned int count, |
682 | const void *kbuf, const void __user *ubuf) | 682 | const void *kbuf, const void __user *ubuf) |
683 | { | 683 | { |
684 | struct i387_soft_struct *s387 = &target->thread.xstate->soft; | 684 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; |
685 | void *space = s387->st_space; | 685 | void *space = s387->st_space; |
686 | int ret; | 686 | int ret; |
687 | int offset, other, i, tags, regnr, tag, newtop; | 687 | int offset, other, i, tags, regnr, tag, newtop; |
@@ -733,7 +733,7 @@ int fpregs_soft_get(struct task_struct *target, | |||
733 | unsigned int pos, unsigned int count, | 733 | unsigned int pos, unsigned int count, |
734 | void *kbuf, void __user *ubuf) | 734 | void *kbuf, void __user *ubuf) |
735 | { | 735 | { |
736 | struct i387_soft_struct *s387 = &target->thread.xstate->soft; | 736 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; |
737 | const void *space = s387->st_space; | 737 | const void *space = s387->st_space; |
738 | int ret; | 738 | int ret; |
739 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; | 739 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; |
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 50fa0ec2c8a5..2c614410a5f3 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h | |||
@@ -31,7 +31,7 @@ | |||
31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ | 31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ |
32 | == (1 << 10)) | 32 | == (1 << 10)) |
33 | 33 | ||
34 | #define I387 (current->thread.xstate) | 34 | #define I387 (current->thread.fpu.state) |
35 | #define FPU_info (I387->soft.info) | 35 | #define FPU_info (I387->soft.info) |
36 | 36 | ||
37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) | 37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 06630d26e56d..a4c768397baa 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -6,6 +6,7 @@ nostackp := $(call cc-option, -fno-stack-protector) | |||
6 | CFLAGS_physaddr.o := $(nostackp) | 6 | CFLAGS_physaddr.o := $(nostackp) |
7 | CFLAGS_setup_nx.o := $(nostackp) | 7 | CFLAGS_setup_nx.o := $(nostackp) |
8 | 8 | ||
9 | obj-$(CONFIG_X86_PAT) += pat_rbtree.o | ||
9 | obj-$(CONFIG_SMP) += tlb.o | 10 | obj-$(CONFIG_SMP) += tlb.o |
10 | 11 | ||
11 | obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o | 12 | obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 8948f47fde05..a7bcc23ef96c 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -33,9 +33,6 @@ int numa_off __initdata; | |||
33 | static unsigned long __initdata nodemap_addr; | 33 | static unsigned long __initdata nodemap_addr; |
34 | static unsigned long __initdata nodemap_size; | 34 | static unsigned long __initdata nodemap_size; |
35 | 35 | ||
36 | DEFINE_PER_CPU(int, node_number) = 0; | ||
37 | EXPORT_PER_CPU_SYMBOL(node_number); | ||
38 | |||
39 | /* | 36 | /* |
40 | * Map cpu index to node index | 37 | * Map cpu index to node index |
41 | */ | 38 | */ |
@@ -809,7 +806,7 @@ void __cpuinit numa_set_node(int cpu, int node) | |||
809 | per_cpu(x86_cpu_to_node_map, cpu) = node; | 806 | per_cpu(x86_cpu_to_node_map, cpu) = node; |
810 | 807 | ||
811 | if (node != NUMA_NO_NODE) | 808 | if (node != NUMA_NO_NODE) |
812 | per_cpu(node_number, cpu) = node; | 809 | set_cpu_numa_node(cpu, node); |
813 | } | 810 | } |
814 | 811 | ||
815 | void __cpuinit numa_clear_node(int cpu) | 812 | void __cpuinit numa_clear_node(int cpu) |
@@ -867,7 +864,7 @@ void __cpuinit numa_remove_cpu(int cpu) | |||
867 | numa_set_cpumask(cpu, 0); | 864 | numa_set_cpumask(cpu, 0); |
868 | } | 865 | } |
869 | 866 | ||
870 | int cpu_to_node(int cpu) | 867 | int __cpu_to_node(int cpu) |
871 | { | 868 | { |
872 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | 869 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { |
873 | printk(KERN_WARNING | 870 | printk(KERN_WARNING |
@@ -877,7 +874,7 @@ int cpu_to_node(int cpu) | |||
877 | } | 874 | } |
878 | return per_cpu(x86_cpu_to_node_map, cpu); | 875 | return per_cpu(x86_cpu_to_node_map, cpu); |
879 | } | 876 | } |
880 | EXPORT_SYMBOL(cpu_to_node); | 877 | EXPORT_SYMBOL(__cpu_to_node); |
881 | 878 | ||
882 | /* | 879 | /* |
883 | * Same function as cpu_to_node() but used if called before the | 880 | * Same function as cpu_to_node() but used if called before the |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 28195c350b97..532e7933d606 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -997,7 +997,8 @@ out_err: | |||
997 | } | 997 | } |
998 | EXPORT_SYMBOL(set_memory_uc); | 998 | EXPORT_SYMBOL(set_memory_uc); |
999 | 999 | ||
1000 | int set_memory_array_uc(unsigned long *addr, int addrinarray) | 1000 | int _set_memory_array(unsigned long *addr, int addrinarray, |
1001 | unsigned long new_type) | ||
1001 | { | 1002 | { |
1002 | int i, j; | 1003 | int i, j; |
1003 | int ret; | 1004 | int ret; |
@@ -1007,13 +1008,19 @@ int set_memory_array_uc(unsigned long *addr, int addrinarray) | |||
1007 | */ | 1008 | */ |
1008 | for (i = 0; i < addrinarray; i++) { | 1009 | for (i = 0; i < addrinarray; i++) { |
1009 | ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, | 1010 | ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, |
1010 | _PAGE_CACHE_UC_MINUS, NULL); | 1011 | new_type, NULL); |
1011 | if (ret) | 1012 | if (ret) |
1012 | goto out_free; | 1013 | goto out_free; |
1013 | } | 1014 | } |
1014 | 1015 | ||
1015 | ret = change_page_attr_set(addr, addrinarray, | 1016 | ret = change_page_attr_set(addr, addrinarray, |
1016 | __pgprot(_PAGE_CACHE_UC_MINUS), 1); | 1017 | __pgprot(_PAGE_CACHE_UC_MINUS), 1); |
1018 | |||
1019 | if (!ret && new_type == _PAGE_CACHE_WC) | ||
1020 | ret = change_page_attr_set_clr(addr, addrinarray, | ||
1021 | __pgprot(_PAGE_CACHE_WC), | ||
1022 | __pgprot(_PAGE_CACHE_MASK), | ||
1023 | 0, CPA_ARRAY, NULL); | ||
1017 | if (ret) | 1024 | if (ret) |
1018 | goto out_free; | 1025 | goto out_free; |
1019 | 1026 | ||
@@ -1025,8 +1032,19 @@ out_free: | |||
1025 | 1032 | ||
1026 | return ret; | 1033 | return ret; |
1027 | } | 1034 | } |
1035 | |||
1036 | int set_memory_array_uc(unsigned long *addr, int addrinarray) | ||
1037 | { | ||
1038 | return _set_memory_array(addr, addrinarray, _PAGE_CACHE_UC_MINUS); | ||
1039 | } | ||
1028 | EXPORT_SYMBOL(set_memory_array_uc); | 1040 | EXPORT_SYMBOL(set_memory_array_uc); |
1029 | 1041 | ||
1042 | int set_memory_array_wc(unsigned long *addr, int addrinarray) | ||
1043 | { | ||
1044 | return _set_memory_array(addr, addrinarray, _PAGE_CACHE_WC); | ||
1045 | } | ||
1046 | EXPORT_SYMBOL(set_memory_array_wc); | ||
1047 | |||
1030 | int _set_memory_wc(unsigned long addr, int numpages) | 1048 | int _set_memory_wc(unsigned long addr, int numpages) |
1031 | { | 1049 | { |
1032 | int ret; | 1050 | int ret; |
@@ -1153,26 +1171,34 @@ int set_pages_uc(struct page *page, int numpages) | |||
1153 | } | 1171 | } |
1154 | EXPORT_SYMBOL(set_pages_uc); | 1172 | EXPORT_SYMBOL(set_pages_uc); |
1155 | 1173 | ||
1156 | int set_pages_array_uc(struct page **pages, int addrinarray) | 1174 | static int _set_pages_array(struct page **pages, int addrinarray, |
1175 | unsigned long new_type) | ||
1157 | { | 1176 | { |
1158 | unsigned long start; | 1177 | unsigned long start; |
1159 | unsigned long end; | 1178 | unsigned long end; |
1160 | int i; | 1179 | int i; |
1161 | int free_idx; | 1180 | int free_idx; |
1181 | int ret; | ||
1162 | 1182 | ||
1163 | for (i = 0; i < addrinarray; i++) { | 1183 | for (i = 0; i < addrinarray; i++) { |
1164 | if (PageHighMem(pages[i])) | 1184 | if (PageHighMem(pages[i])) |
1165 | continue; | 1185 | continue; |
1166 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; | 1186 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; |
1167 | end = start + PAGE_SIZE; | 1187 | end = start + PAGE_SIZE; |
1168 | if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) | 1188 | if (reserve_memtype(start, end, new_type, NULL)) |
1169 | goto err_out; | 1189 | goto err_out; |
1170 | } | 1190 | } |
1171 | 1191 | ||
1172 | if (cpa_set_pages_array(pages, addrinarray, | 1192 | ret = cpa_set_pages_array(pages, addrinarray, |
1173 | __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) { | 1193 | __pgprot(_PAGE_CACHE_UC_MINUS)); |
1174 | return 0; /* Success */ | 1194 | if (!ret && new_type == _PAGE_CACHE_WC) |
1175 | } | 1195 | ret = change_page_attr_set_clr(NULL, addrinarray, |
1196 | __pgprot(_PAGE_CACHE_WC), | ||
1197 | __pgprot(_PAGE_CACHE_MASK), | ||
1198 | 0, CPA_PAGES_ARRAY, pages); | ||
1199 | if (ret) | ||
1200 | goto err_out; | ||
1201 | return 0; /* Success */ | ||
1176 | err_out: | 1202 | err_out: |
1177 | free_idx = i; | 1203 | free_idx = i; |
1178 | for (i = 0; i < free_idx; i++) { | 1204 | for (i = 0; i < free_idx; i++) { |
@@ -1184,8 +1210,19 @@ err_out: | |||
1184 | } | 1210 | } |
1185 | return -EINVAL; | 1211 | return -EINVAL; |
1186 | } | 1212 | } |
1213 | |||
1214 | int set_pages_array_uc(struct page **pages, int addrinarray) | ||
1215 | { | ||
1216 | return _set_pages_array(pages, addrinarray, _PAGE_CACHE_UC_MINUS); | ||
1217 | } | ||
1187 | EXPORT_SYMBOL(set_pages_array_uc); | 1218 | EXPORT_SYMBOL(set_pages_array_uc); |
1188 | 1219 | ||
1220 | int set_pages_array_wc(struct page **pages, int addrinarray) | ||
1221 | { | ||
1222 | return _set_pages_array(pages, addrinarray, _PAGE_CACHE_WC); | ||
1223 | } | ||
1224 | EXPORT_SYMBOL(set_pages_array_wc); | ||
1225 | |||
1189 | int set_pages_wb(struct page *page, int numpages) | 1226 | int set_pages_wb(struct page *page, int numpages) |
1190 | { | 1227 | { |
1191 | unsigned long addr = (unsigned long)page_address(page); | 1228 | unsigned long addr = (unsigned long)page_address(page); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index edc8b95afc1a..acc15b23b743 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <asm/pat.h> | 30 | #include <asm/pat.h> |
31 | #include <asm/io.h> | 31 | #include <asm/io.h> |
32 | 32 | ||
33 | #include "pat_internal.h" | ||
34 | |||
33 | #ifdef CONFIG_X86_PAT | 35 | #ifdef CONFIG_X86_PAT |
34 | int __read_mostly pat_enabled = 1; | 36 | int __read_mostly pat_enabled = 1; |
35 | 37 | ||
@@ -53,19 +55,15 @@ static inline void pat_disable(const char *reason) | |||
53 | #endif | 55 | #endif |
54 | 56 | ||
55 | 57 | ||
56 | static int debug_enable; | 58 | int pat_debug_enable; |
57 | 59 | ||
58 | static int __init pat_debug_setup(char *str) | 60 | static int __init pat_debug_setup(char *str) |
59 | { | 61 | { |
60 | debug_enable = 1; | 62 | pat_debug_enable = 1; |
61 | return 0; | 63 | return 0; |
62 | } | 64 | } |
63 | __setup("debugpat", pat_debug_setup); | 65 | __setup("debugpat", pat_debug_setup); |
64 | 66 | ||
65 | #define dprintk(fmt, arg...) \ | ||
66 | do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | ||
67 | |||
68 | |||
69 | static u64 __read_mostly boot_pat_state; | 67 | static u64 __read_mostly boot_pat_state; |
70 | 68 | ||
71 | enum { | 69 | enum { |
@@ -132,84 +130,7 @@ void pat_init(void) | |||
132 | 130 | ||
133 | #undef PAT | 131 | #undef PAT |
134 | 132 | ||
135 | static char *cattr_name(unsigned long flags) | 133 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ |
136 | { | ||
137 | switch (flags & _PAGE_CACHE_MASK) { | ||
138 | case _PAGE_CACHE_UC: return "uncached"; | ||
139 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | ||
140 | case _PAGE_CACHE_WB: return "write-back"; | ||
141 | case _PAGE_CACHE_WC: return "write-combining"; | ||
142 | default: return "broken"; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * The global memtype list keeps track of memory type for specific | ||
148 | * physical memory areas. Conflicting memory types in different | ||
149 | * mappings can cause CPU cache corruption. To avoid this we keep track. | ||
150 | * | ||
151 | * The list is sorted based on starting address and can contain multiple | ||
152 | * entries for each address (this allows reference counting for overlapping | ||
153 | * areas). All the aliases have the same cache attributes of course. | ||
154 | * Zero attributes are represented as holes. | ||
155 | * | ||
156 | * The data structure is a list that is also organized as an rbtree | ||
157 | * sorted on the start address of memtype range. | ||
158 | * | ||
159 | * memtype_lock protects both the linear list and rbtree. | ||
160 | */ | ||
161 | |||
162 | struct memtype { | ||
163 | u64 start; | ||
164 | u64 end; | ||
165 | unsigned long type; | ||
166 | struct list_head nd; | ||
167 | struct rb_node rb; | ||
168 | }; | ||
169 | |||
170 | static struct rb_root memtype_rbroot = RB_ROOT; | ||
171 | static LIST_HEAD(memtype_list); | ||
172 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | ||
173 | |||
174 | static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) | ||
175 | { | ||
176 | struct rb_node *node = root->rb_node; | ||
177 | struct memtype *last_lower = NULL; | ||
178 | |||
179 | while (node) { | ||
180 | struct memtype *data = container_of(node, struct memtype, rb); | ||
181 | |||
182 | if (data->start < start) { | ||
183 | last_lower = data; | ||
184 | node = node->rb_right; | ||
185 | } else if (data->start > start) { | ||
186 | node = node->rb_left; | ||
187 | } else | ||
188 | return data; | ||
189 | } | ||
190 | |||
191 | /* Will return NULL if there is no entry with its start <= start */ | ||
192 | return last_lower; | ||
193 | } | ||
194 | |||
195 | static void memtype_rb_insert(struct rb_root *root, struct memtype *data) | ||
196 | { | ||
197 | struct rb_node **new = &(root->rb_node); | ||
198 | struct rb_node *parent = NULL; | ||
199 | |||
200 | while (*new) { | ||
201 | struct memtype *this = container_of(*new, struct memtype, rb); | ||
202 | |||
203 | parent = *new; | ||
204 | if (data->start <= this->start) | ||
205 | new = &((*new)->rb_left); | ||
206 | else if (data->start > this->start) | ||
207 | new = &((*new)->rb_right); | ||
208 | } | ||
209 | |||
210 | rb_link_node(&data->rb, parent, new); | ||
211 | rb_insert_color(&data->rb, root); | ||
212 | } | ||
213 | 134 | ||
214 | /* | 135 | /* |
215 | * Does intersection of PAT memory type and MTRR memory type and returns | 136 | * Does intersection of PAT memory type and MTRR memory type and returns |
@@ -237,33 +158,6 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) | |||
237 | return req_type; | 158 | return req_type; |
238 | } | 159 | } |
239 | 160 | ||
240 | static int | ||
241 | chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) | ||
242 | { | ||
243 | if (new->type != entry->type) { | ||
244 | if (type) { | ||
245 | new->type = entry->type; | ||
246 | *type = entry->type; | ||
247 | } else | ||
248 | goto conflict; | ||
249 | } | ||
250 | |||
251 | /* check overlaps with more than one entry in the list */ | ||
252 | list_for_each_entry_continue(entry, &memtype_list, nd) { | ||
253 | if (new->end <= entry->start) | ||
254 | break; | ||
255 | else if (new->type != entry->type) | ||
256 | goto conflict; | ||
257 | } | ||
258 | return 0; | ||
259 | |||
260 | conflict: | ||
261 | printk(KERN_INFO "%s:%d conflicting memory types " | ||
262 | "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, | ||
263 | new->end, cattr_name(new->type), cattr_name(entry->type)); | ||
264 | return -EBUSY; | ||
265 | } | ||
266 | |||
267 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | 161 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) |
268 | { | 162 | { |
269 | int ram_page = 0, not_rampage = 0; | 163 | int ram_page = 0, not_rampage = 0; |
@@ -296,8 +190,6 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | |||
296 | * Here we do two pass: | 190 | * Here we do two pass: |
297 | * - Find the memtype of all the pages in the range, look for any conflicts | 191 | * - Find the memtype of all the pages in the range, look for any conflicts |
298 | * - In case of no conflicts, set the new memtype for pages in the range | 192 | * - In case of no conflicts, set the new memtype for pages in the range |
299 | * | ||
300 | * Caller must hold memtype_lock for atomicity. | ||
301 | */ | 193 | */ |
302 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | 194 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, |
303 | unsigned long *new_type) | 195 | unsigned long *new_type) |
@@ -364,9 +256,8 @@ static int free_ram_pages_type(u64 start, u64 end) | |||
364 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, | 256 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, |
365 | unsigned long *new_type) | 257 | unsigned long *new_type) |
366 | { | 258 | { |
367 | struct memtype *new, *entry; | 259 | struct memtype *new; |
368 | unsigned long actual_type; | 260 | unsigned long actual_type; |
369 | struct list_head *where; | ||
370 | int is_range_ram; | 261 | int is_range_ram; |
371 | int err = 0; | 262 | int err = 0; |
372 | 263 | ||
@@ -404,9 +295,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
404 | is_range_ram = pat_pagerange_is_ram(start, end); | 295 | is_range_ram = pat_pagerange_is_ram(start, end); |
405 | if (is_range_ram == 1) { | 296 | if (is_range_ram == 1) { |
406 | 297 | ||
407 | spin_lock(&memtype_lock); | ||
408 | err = reserve_ram_pages_type(start, end, req_type, new_type); | 298 | err = reserve_ram_pages_type(start, end, req_type, new_type); |
409 | spin_unlock(&memtype_lock); | ||
410 | 299 | ||
411 | return err; | 300 | return err; |
412 | } else if (is_range_ram < 0) { | 301 | } else if (is_range_ram < 0) { |
@@ -423,42 +312,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
423 | 312 | ||
424 | spin_lock(&memtype_lock); | 313 | spin_lock(&memtype_lock); |
425 | 314 | ||
426 | /* Search for existing mapping that overlaps the current range */ | 315 | err = rbt_memtype_check_insert(new, new_type); |
427 | where = NULL; | ||
428 | list_for_each_entry(entry, &memtype_list, nd) { | ||
429 | if (end <= entry->start) { | ||
430 | where = entry->nd.prev; | ||
431 | break; | ||
432 | } else if (start <= entry->start) { /* end > entry->start */ | ||
433 | err = chk_conflict(new, entry, new_type); | ||
434 | if (!err) { | ||
435 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | ||
436 | entry->start, entry->end); | ||
437 | where = entry->nd.prev; | ||
438 | } | ||
439 | break; | ||
440 | } else if (start < entry->end) { /* start > entry->start */ | ||
441 | err = chk_conflict(new, entry, new_type); | ||
442 | if (!err) { | ||
443 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | ||
444 | entry->start, entry->end); | ||
445 | |||
446 | /* | ||
447 | * Move to right position in the linked | ||
448 | * list to add this new entry | ||
449 | */ | ||
450 | list_for_each_entry_continue(entry, | ||
451 | &memtype_list, nd) { | ||
452 | if (start <= entry->start) { | ||
453 | where = entry->nd.prev; | ||
454 | break; | ||
455 | } | ||
456 | } | ||
457 | } | ||
458 | break; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | if (err) { | 316 | if (err) { |
463 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " | 317 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " |
464 | "track %s, req %s\n", | 318 | "track %s, req %s\n", |
@@ -469,13 +323,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
469 | return err; | 323 | return err; |
470 | } | 324 | } |
471 | 325 | ||
472 | if (where) | ||
473 | list_add(&new->nd, where); | ||
474 | else | ||
475 | list_add_tail(&new->nd, &memtype_list); | ||
476 | |||
477 | memtype_rb_insert(&memtype_rbroot, new); | ||
478 | |||
479 | spin_unlock(&memtype_lock); | 326 | spin_unlock(&memtype_lock); |
480 | 327 | ||
481 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 328 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", |
@@ -487,9 +334,9 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
487 | 334 | ||
488 | int free_memtype(u64 start, u64 end) | 335 | int free_memtype(u64 start, u64 end) |
489 | { | 336 | { |
490 | struct memtype *entry, *saved_entry; | ||
491 | int err = -EINVAL; | 337 | int err = -EINVAL; |
492 | int is_range_ram; | 338 | int is_range_ram; |
339 | struct memtype *entry; | ||
493 | 340 | ||
494 | if (!pat_enabled) | 341 | if (!pat_enabled) |
495 | return 0; | 342 | return 0; |
@@ -501,9 +348,7 @@ int free_memtype(u64 start, u64 end) | |||
501 | is_range_ram = pat_pagerange_is_ram(start, end); | 348 | is_range_ram = pat_pagerange_is_ram(start, end); |
502 | if (is_range_ram == 1) { | 349 | if (is_range_ram == 1) { |
503 | 350 | ||
504 | spin_lock(&memtype_lock); | ||
505 | err = free_ram_pages_type(start, end); | 351 | err = free_ram_pages_type(start, end); |
506 | spin_unlock(&memtype_lock); | ||
507 | 352 | ||
508 | return err; | 353 | return err; |
509 | } else if (is_range_ram < 0) { | 354 | } else if (is_range_ram < 0) { |
@@ -511,56 +356,20 @@ int free_memtype(u64 start, u64 end) | |||
511 | } | 356 | } |
512 | 357 | ||
513 | spin_lock(&memtype_lock); | 358 | spin_lock(&memtype_lock); |
514 | 359 | entry = rbt_memtype_erase(start, end); | |
515 | entry = memtype_rb_search(&memtype_rbroot, start); | ||
516 | if (unlikely(entry == NULL)) | ||
517 | goto unlock_ret; | ||
518 | |||
519 | /* | ||
520 | * Saved entry points to an entry with start same or less than what | ||
521 | * we searched for. Now go through the list in both directions to look | ||
522 | * for the entry that matches with both start and end, with list stored | ||
523 | * in sorted start address | ||
524 | */ | ||
525 | saved_entry = entry; | ||
526 | list_for_each_entry_from(entry, &memtype_list, nd) { | ||
527 | if (entry->start == start && entry->end == end) { | ||
528 | rb_erase(&entry->rb, &memtype_rbroot); | ||
529 | list_del(&entry->nd); | ||
530 | kfree(entry); | ||
531 | err = 0; | ||
532 | break; | ||
533 | } else if (entry->start > start) { | ||
534 | break; | ||
535 | } | ||
536 | } | ||
537 | |||
538 | if (!err) | ||
539 | goto unlock_ret; | ||
540 | |||
541 | entry = saved_entry; | ||
542 | list_for_each_entry_reverse(entry, &memtype_list, nd) { | ||
543 | if (entry->start == start && entry->end == end) { | ||
544 | rb_erase(&entry->rb, &memtype_rbroot); | ||
545 | list_del(&entry->nd); | ||
546 | kfree(entry); | ||
547 | err = 0; | ||
548 | break; | ||
549 | } else if (entry->start < start) { | ||
550 | break; | ||
551 | } | ||
552 | } | ||
553 | unlock_ret: | ||
554 | spin_unlock(&memtype_lock); | 360 | spin_unlock(&memtype_lock); |
555 | 361 | ||
556 | if (err) { | 362 | if (!entry) { |
557 | printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", | 363 | printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", |
558 | current->comm, current->pid, start, end); | 364 | current->comm, current->pid, start, end); |
365 | return -EINVAL; | ||
559 | } | 366 | } |
560 | 367 | ||
368 | kfree(entry); | ||
369 | |||
561 | dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); | 370 | dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); |
562 | 371 | ||
563 | return err; | 372 | return 0; |
564 | } | 373 | } |
565 | 374 | ||
566 | 375 | ||
@@ -583,10 +392,8 @@ static unsigned long lookup_memtype(u64 paddr) | |||
583 | 392 | ||
584 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 393 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
585 | struct page *page; | 394 | struct page *page; |
586 | spin_lock(&memtype_lock); | ||
587 | page = pfn_to_page(paddr >> PAGE_SHIFT); | 395 | page = pfn_to_page(paddr >> PAGE_SHIFT); |
588 | rettype = get_page_memtype(page); | 396 | rettype = get_page_memtype(page); |
589 | spin_unlock(&memtype_lock); | ||
590 | /* | 397 | /* |
591 | * -1 from get_page_memtype() implies RAM page is in its | 398 | * -1 from get_page_memtype() implies RAM page is in its |
592 | * default state and not reserved, and hence of type WB | 399 | * default state and not reserved, and hence of type WB |
@@ -599,7 +406,7 @@ static unsigned long lookup_memtype(u64 paddr) | |||
599 | 406 | ||
600 | spin_lock(&memtype_lock); | 407 | spin_lock(&memtype_lock); |
601 | 408 | ||
602 | entry = memtype_rb_search(&memtype_rbroot, paddr); | 409 | entry = rbt_memtype_lookup(paddr); |
603 | if (entry != NULL) | 410 | if (entry != NULL) |
604 | rettype = entry->type; | 411 | rettype = entry->type; |
605 | else | 412 | else |
@@ -936,29 +743,25 @@ EXPORT_SYMBOL_GPL(pgprot_writecombine); | |||
936 | 743 | ||
937 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) | 744 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) |
938 | 745 | ||
939 | /* get Nth element of the linked list */ | ||
940 | static struct memtype *memtype_get_idx(loff_t pos) | 746 | static struct memtype *memtype_get_idx(loff_t pos) |
941 | { | 747 | { |
942 | struct memtype *list_node, *print_entry; | 748 | struct memtype *print_entry; |
943 | int i = 1; | 749 | int ret; |
944 | 750 | ||
945 | print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 751 | print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); |
946 | if (!print_entry) | 752 | if (!print_entry) |
947 | return NULL; | 753 | return NULL; |
948 | 754 | ||
949 | spin_lock(&memtype_lock); | 755 | spin_lock(&memtype_lock); |
950 | list_for_each_entry(list_node, &memtype_list, nd) { | 756 | ret = rbt_memtype_copy_nth_element(print_entry, pos); |
951 | if (pos == i) { | ||
952 | *print_entry = *list_node; | ||
953 | spin_unlock(&memtype_lock); | ||
954 | return print_entry; | ||
955 | } | ||
956 | ++i; | ||
957 | } | ||
958 | spin_unlock(&memtype_lock); | 757 | spin_unlock(&memtype_lock); |
959 | kfree(print_entry); | ||
960 | 758 | ||
961 | return NULL; | 759 | if (!ret) { |
760 | return print_entry; | ||
761 | } else { | ||
762 | kfree(print_entry); | ||
763 | return NULL; | ||
764 | } | ||
962 | } | 765 | } |
963 | 766 | ||
964 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) | 767 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) |
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h new file mode 100644 index 000000000000..77e5ba153fac --- /dev/null +++ b/arch/x86/mm/pat_internal.h | |||
@@ -0,0 +1,46 @@ | |||
1 | #ifndef __PAT_INTERNAL_H_ | ||
2 | #define __PAT_INTERNAL_H_ | ||
3 | |||
4 | extern int pat_debug_enable; | ||
5 | |||
6 | #define dprintk(fmt, arg...) \ | ||
7 | do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | ||
8 | |||
9 | struct memtype { | ||
10 | u64 start; | ||
11 | u64 end; | ||
12 | u64 subtree_max_end; | ||
13 | unsigned long type; | ||
14 | struct rb_node rb; | ||
15 | }; | ||
16 | |||
17 | static inline char *cattr_name(unsigned long flags) | ||
18 | { | ||
19 | switch (flags & _PAGE_CACHE_MASK) { | ||
20 | case _PAGE_CACHE_UC: return "uncached"; | ||
21 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | ||
22 | case _PAGE_CACHE_WB: return "write-back"; | ||
23 | case _PAGE_CACHE_WC: return "write-combining"; | ||
24 | default: return "broken"; | ||
25 | } | ||
26 | } | ||
27 | |||
28 | #ifdef CONFIG_X86_PAT | ||
29 | extern int rbt_memtype_check_insert(struct memtype *new, | ||
30 | unsigned long *new_type); | ||
31 | extern struct memtype *rbt_memtype_erase(u64 start, u64 end); | ||
32 | extern struct memtype *rbt_memtype_lookup(u64 addr); | ||
33 | extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); | ||
34 | #else | ||
35 | static inline int rbt_memtype_check_insert(struct memtype *new, | ||
36 | unsigned long *new_type) | ||
37 | { return 0; } | ||
38 | static inline struct memtype *rbt_memtype_erase(u64 start, u64 end) | ||
39 | { return NULL; } | ||
40 | static inline struct memtype *rbt_memtype_lookup(u64 addr) | ||
41 | { return NULL; } | ||
42 | static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) | ||
43 | { return 0; } | ||
44 | #endif | ||
45 | |||
46 | #endif /* __PAT_INTERNAL_H_ */ | ||
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c new file mode 100644 index 000000000000..f537087bb740 --- /dev/null +++ b/arch/x86/mm/pat_rbtree.c | |||
@@ -0,0 +1,274 @@ | |||
1 | /* | ||
2 | * Handle caching attributes in page tables (PAT) | ||
3 | * | ||
4 | * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | ||
5 | * Suresh B Siddha <suresh.b.siddha@intel.com> | ||
6 | * | ||
7 | * Interval tree (augmented rbtree) used to store the PAT memory type | ||
8 | * reservations. | ||
9 | */ | ||
10 | |||
11 | #include <linux/seq_file.h> | ||
12 | #include <linux/debugfs.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/rbtree.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/gfp.h> | ||
18 | |||
19 | #include <asm/pgtable.h> | ||
20 | #include <asm/pat.h> | ||
21 | |||
22 | #include "pat_internal.h" | ||
23 | |||
24 | /* | ||
25 | * The memtype tree keeps track of memory type for specific | ||
26 | * physical memory areas. Without proper tracking, conflicting memory | ||
27 | * types in different mappings can cause CPU cache corruption. | ||
28 | * | ||
29 | * The tree is an interval tree (augmented rbtree) with tree ordered | ||
30 | * on starting address. Tree can contain multiple entries for | ||
31 | * different regions which overlap. All the aliases have the same | ||
32 | * cache attributes of course. | ||
33 | * | ||
34 | * memtype_lock protects the rbtree. | ||
35 | */ | ||
36 | |||
37 | static void memtype_rb_augment_cb(struct rb_node *node); | ||
38 | static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); | ||
39 | |||
40 | static int is_node_overlap(struct memtype *node, u64 start, u64 end) | ||
41 | { | ||
42 | if (node->start >= end || node->end <= start) | ||
43 | return 0; | ||
44 | |||
45 | return 1; | ||
46 | } | ||
47 | |||
48 | static u64 get_subtree_max_end(struct rb_node *node) | ||
49 | { | ||
50 | u64 ret = 0; | ||
51 | if (node) { | ||
52 | struct memtype *data = container_of(node, struct memtype, rb); | ||
53 | ret = data->subtree_max_end; | ||
54 | } | ||
55 | return ret; | ||
56 | } | ||
57 | |||
58 | /* Update 'subtree_max_end' for a node, based on node and its children */ | ||
59 | static void update_node_max_end(struct rb_node *node) | ||
60 | { | ||
61 | struct memtype *data; | ||
62 | u64 max_end, child_max_end; | ||
63 | |||
64 | if (!node) | ||
65 | return; | ||
66 | |||
67 | data = container_of(node, struct memtype, rb); | ||
68 | max_end = data->end; | ||
69 | |||
70 | child_max_end = get_subtree_max_end(node->rb_right); | ||
71 | if (child_max_end > max_end) | ||
72 | max_end = child_max_end; | ||
73 | |||
74 | child_max_end = get_subtree_max_end(node->rb_left); | ||
75 | if (child_max_end > max_end) | ||
76 | max_end = child_max_end; | ||
77 | |||
78 | data->subtree_max_end = max_end; | ||
79 | } | ||
80 | |||
81 | /* Update 'subtree_max_end' for a node and all its ancestors */ | ||
82 | static void update_path_max_end(struct rb_node *node) | ||
83 | { | ||
84 | u64 old_max_end, new_max_end; | ||
85 | |||
86 | while (node) { | ||
87 | struct memtype *data = container_of(node, struct memtype, rb); | ||
88 | |||
89 | old_max_end = data->subtree_max_end; | ||
90 | update_node_max_end(node); | ||
91 | new_max_end = data->subtree_max_end; | ||
92 | |||
93 | if (new_max_end == old_max_end) | ||
94 | break; | ||
95 | |||
96 | node = rb_parent(node); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /* Find the first (lowest start addr) overlapping range from rb tree */ | ||
101 | static struct memtype *memtype_rb_lowest_match(struct rb_root *root, | ||
102 | u64 start, u64 end) | ||
103 | { | ||
104 | struct rb_node *node = root->rb_node; | ||
105 | struct memtype *last_lower = NULL; | ||
106 | |||
107 | while (node) { | ||
108 | struct memtype *data = container_of(node, struct memtype, rb); | ||
109 | |||
110 | if (get_subtree_max_end(node->rb_left) > start) { | ||
111 | /* Lowest overlap if any must be on left side */ | ||
112 | node = node->rb_left; | ||
113 | } else if (is_node_overlap(data, start, end)) { | ||
114 | last_lower = data; | ||
115 | break; | ||
116 | } else if (start >= data->start) { | ||
117 | /* Lowest overlap if any must be on right side */ | ||
118 | node = node->rb_right; | ||
119 | } else { | ||
120 | break; | ||
121 | } | ||
122 | } | ||
123 | return last_lower; /* Returns NULL if there is no overlap */ | ||
124 | } | ||
125 | |||
126 | static struct memtype *memtype_rb_exact_match(struct rb_root *root, | ||
127 | u64 start, u64 end) | ||
128 | { | ||
129 | struct memtype *match; | ||
130 | |||
131 | match = memtype_rb_lowest_match(root, start, end); | ||
132 | while (match != NULL && match->start < end) { | ||
133 | struct rb_node *node; | ||
134 | |||
135 | if (match->start == start && match->end == end) | ||
136 | return match; | ||
137 | |||
138 | node = rb_next(&match->rb); | ||
139 | if (node) | ||
140 | match = container_of(node, struct memtype, rb); | ||
141 | else | ||
142 | match = NULL; | ||
143 | } | ||
144 | |||
145 | return NULL; /* Returns NULL if there is no exact match */ | ||
146 | } | ||
147 | |||
148 | static int memtype_rb_check_conflict(struct rb_root *root, | ||
149 | u64 start, u64 end, | ||
150 | unsigned long reqtype, unsigned long *newtype) | ||
151 | { | ||
152 | struct rb_node *node; | ||
153 | struct memtype *match; | ||
154 | int found_type = reqtype; | ||
155 | |||
156 | match = memtype_rb_lowest_match(&memtype_rbroot, start, end); | ||
157 | if (match == NULL) | ||
158 | goto success; | ||
159 | |||
160 | if (match->type != found_type && newtype == NULL) | ||
161 | goto failure; | ||
162 | |||
163 | dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); | ||
164 | found_type = match->type; | ||
165 | |||
166 | node = rb_next(&match->rb); | ||
167 | while (node) { | ||
168 | match = container_of(node, struct memtype, rb); | ||
169 | |||
170 | if (match->start >= end) /* Checked all possible matches */ | ||
171 | goto success; | ||
172 | |||
173 | if (is_node_overlap(match, start, end) && | ||
174 | match->type != found_type) { | ||
175 | goto failure; | ||
176 | } | ||
177 | |||
178 | node = rb_next(&match->rb); | ||
179 | } | ||
180 | success: | ||
181 | if (newtype) | ||
182 | *newtype = found_type; | ||
183 | |||
184 | return 0; | ||
185 | |||
186 | failure: | ||
187 | printk(KERN_INFO "%s:%d conflicting memory types " | ||
188 | "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, | ||
189 | end, cattr_name(found_type), cattr_name(match->type)); | ||
190 | return -EBUSY; | ||
191 | } | ||
192 | |||
193 | static void memtype_rb_augment_cb(struct rb_node *node) | ||
194 | { | ||
195 | if (node) | ||
196 | update_path_max_end(node); | ||
197 | } | ||
198 | |||
199 | static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) | ||
200 | { | ||
201 | struct rb_node **node = &(root->rb_node); | ||
202 | struct rb_node *parent = NULL; | ||
203 | |||
204 | while (*node) { | ||
205 | struct memtype *data = container_of(*node, struct memtype, rb); | ||
206 | |||
207 | parent = *node; | ||
208 | if (newdata->start <= data->start) | ||
209 | node = &((*node)->rb_left); | ||
210 | else if (newdata->start > data->start) | ||
211 | node = &((*node)->rb_right); | ||
212 | } | ||
213 | |||
214 | rb_link_node(&newdata->rb, parent, node); | ||
215 | rb_insert_color(&newdata->rb, root); | ||
216 | } | ||
217 | |||
218 | int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) | ||
219 | { | ||
220 | int err = 0; | ||
221 | |||
222 | err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, | ||
223 | new->type, ret_type); | ||
224 | |||
225 | if (!err) { | ||
226 | if (ret_type) | ||
227 | new->type = *ret_type; | ||
228 | |||
229 | memtype_rb_insert(&memtype_rbroot, new); | ||
230 | } | ||
231 | return err; | ||
232 | } | ||
233 | |||
234 | struct memtype *rbt_memtype_erase(u64 start, u64 end) | ||
235 | { | ||
236 | struct memtype *data; | ||
237 | |||
238 | data = memtype_rb_exact_match(&memtype_rbroot, start, end); | ||
239 | if (!data) | ||
240 | goto out; | ||
241 | |||
242 | rb_erase(&data->rb, &memtype_rbroot); | ||
243 | out: | ||
244 | return data; | ||
245 | } | ||
246 | |||
247 | struct memtype *rbt_memtype_lookup(u64 addr) | ||
248 | { | ||
249 | struct memtype *data; | ||
250 | data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); | ||
251 | return data; | ||
252 | } | ||
253 | |||
254 | #if defined(CONFIG_DEBUG_FS) | ||
255 | int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) | ||
256 | { | ||
257 | struct rb_node *node; | ||
258 | int i = 1; | ||
259 | |||
260 | node = rb_first(&memtype_rbroot); | ||
261 | while (node && pos != i) { | ||
262 | node = rb_next(node); | ||
263 | i++; | ||
264 | } | ||
265 | |||
266 | if (node) { /* pos == i */ | ||
267 | struct memtype *this = container_of(node, struct memtype, rb); | ||
268 | *out = *this; | ||
269 | return 0; | ||
270 | } else { | ||
271 | return 1; | ||
272 | } | ||
273 | } | ||
274 | #endif | ||
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index df3d5c861cda..308e32570d84 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c | |||
@@ -34,7 +34,7 @@ | |||
34 | /* IA32 Manual 3, 2-1 */ | 34 | /* IA32 Manual 3, 2-1 */ |
35 | static unsigned char prefix_codes[] = { | 35 | static unsigned char prefix_codes[] = { |
36 | 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, | 36 | 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, |
37 | 0x65, 0x2E, 0x3E, 0x66, 0x67 | 37 | 0x65, 0x66, 0x67 |
38 | }; | 38 | }; |
39 | /* IA32 Manual 3, 3-432*/ | 39 | /* IA32 Manual 3, 3-432*/ |
40 | static unsigned int reg_rop[] = { | 40 | static unsigned int reg_rop[] = { |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 792854003ed3..cac718499256 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/pagemap.h> | 9 | #include <linux/pagemap.h> |
10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/quicklist.h> | ||
13 | 12 | ||
14 | #include <asm/system.h> | 13 | #include <asm/system.h> |
15 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 28c68762648f..f9897f7a9ef1 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -363,6 +363,54 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
363 | for (i = 0; i < MAX_NUMNODES; i++) | 363 | for (i = 0; i < MAX_NUMNODES; i++) |
364 | cutoff_node(i, start, end); | 364 | cutoff_node(i, start, end); |
365 | 365 | ||
366 | /* | ||
367 | * Join together blocks on the same node, holes between | ||
368 | * which don't overlap with memory on other nodes. | ||
369 | */ | ||
370 | for (i = 0; i < num_node_memblks; ++i) { | ||
371 | int j, k; | ||
372 | |||
373 | for (j = i + 1; j < num_node_memblks; ++j) { | ||
374 | unsigned long start, end; | ||
375 | |||
376 | if (memblk_nodeid[i] != memblk_nodeid[j]) | ||
377 | continue; | ||
378 | start = min(node_memblk_range[i].end, | ||
379 | node_memblk_range[j].end); | ||
380 | end = max(node_memblk_range[i].start, | ||
381 | node_memblk_range[j].start); | ||
382 | for (k = 0; k < num_node_memblks; ++k) { | ||
383 | if (memblk_nodeid[i] == memblk_nodeid[k]) | ||
384 | continue; | ||
385 | if (start < node_memblk_range[k].end && | ||
386 | end > node_memblk_range[k].start) | ||
387 | break; | ||
388 | } | ||
389 | if (k < num_node_memblks) | ||
390 | continue; | ||
391 | start = min(node_memblk_range[i].start, | ||
392 | node_memblk_range[j].start); | ||
393 | end = max(node_memblk_range[i].end, | ||
394 | node_memblk_range[j].end); | ||
395 | printk(KERN_INFO "SRAT: Node %d " | ||
396 | "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", | ||
397 | memblk_nodeid[i], | ||
398 | node_memblk_range[i].start, | ||
399 | node_memblk_range[i].end, | ||
400 | node_memblk_range[j].start, | ||
401 | node_memblk_range[j].end, | ||
402 | start, end); | ||
403 | node_memblk_range[i].start = start; | ||
404 | node_memblk_range[i].end = end; | ||
405 | k = --num_node_memblks - j; | ||
406 | memmove(memblk_nodeid + j, memblk_nodeid + j+1, | ||
407 | k * sizeof(*memblk_nodeid)); | ||
408 | memmove(node_memblk_range + j, node_memblk_range + j+1, | ||
409 | k * sizeof(*node_memblk_range)); | ||
410 | --j; | ||
411 | } | ||
412 | } | ||
413 | |||
366 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 414 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
367 | memblk_nodeid); | 415 | memblk_nodeid); |
368 | if (memnode_shift < 0) { | 416 | if (memnode_shift < 0) { |
@@ -461,7 +509,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
461 | * node, it must now point to the fake node ID. | 509 | * node, it must now point to the fake node ID. |
462 | */ | 510 | */ |
463 | for (j = 0; j < MAX_LOCAL_APIC; j++) | 511 | for (j = 0; j < MAX_LOCAL_APIC; j++) |
464 | if (apicid_to_node[j] == nid) | 512 | if (apicid_to_node[j] == nid && |
513 | fake_apicid_to_node[j] == NUMA_NO_NODE) | ||
465 | fake_apicid_to_node[j] = i; | 514 | fake_apicid_to_node[j] = i; |
466 | } | 515 | } |
467 | for (i = 0; i < num_nodes; i++) | 516 | for (i = 0; i < num_nodes; i++) |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 2c505ee71014..b28d2f1253bb 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -31,8 +31,9 @@ static struct op_x86_model_spec *model; | |||
31 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); | 31 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); |
32 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); | 32 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); |
33 | 33 | ||
34 | /* 0 == registered but off, 1 == registered and on */ | 34 | /* must be protected with get_online_cpus()/put_online_cpus(): */ |
35 | static int nmi_enabled = 0; | 35 | static int nmi_enabled; |
36 | static int ctr_running; | ||
36 | 37 | ||
37 | struct op_counter_config counter_config[OP_MAX_COUNTER]; | 38 | struct op_counter_config counter_config[OP_MAX_COUNTER]; |
38 | 39 | ||
@@ -61,12 +62,16 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
61 | { | 62 | { |
62 | struct die_args *args = (struct die_args *)data; | 63 | struct die_args *args = (struct die_args *)data; |
63 | int ret = NOTIFY_DONE; | 64 | int ret = NOTIFY_DONE; |
64 | int cpu = smp_processor_id(); | ||
65 | 65 | ||
66 | switch (val) { | 66 | switch (val) { |
67 | case DIE_NMI: | 67 | case DIE_NMI: |
68 | case DIE_NMI_IPI: | 68 | case DIE_NMI_IPI: |
69 | model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); | 69 | if (ctr_running) |
70 | model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); | ||
71 | else if (!nmi_enabled) | ||
72 | break; | ||
73 | else | ||
74 | model->stop(&__get_cpu_var(cpu_msrs)); | ||
70 | ret = NOTIFY_STOP; | 75 | ret = NOTIFY_STOP; |
71 | break; | 76 | break; |
72 | default: | 77 | default: |
@@ -95,24 +100,36 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) | |||
95 | static void nmi_cpu_start(void *dummy) | 100 | static void nmi_cpu_start(void *dummy) |
96 | { | 101 | { |
97 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | 102 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); |
98 | model->start(msrs); | 103 | if (!msrs->controls) |
104 | WARN_ON_ONCE(1); | ||
105 | else | ||
106 | model->start(msrs); | ||
99 | } | 107 | } |
100 | 108 | ||
101 | static int nmi_start(void) | 109 | static int nmi_start(void) |
102 | { | 110 | { |
111 | get_online_cpus(); | ||
103 | on_each_cpu(nmi_cpu_start, NULL, 1); | 112 | on_each_cpu(nmi_cpu_start, NULL, 1); |
113 | ctr_running = 1; | ||
114 | put_online_cpus(); | ||
104 | return 0; | 115 | return 0; |
105 | } | 116 | } |
106 | 117 | ||
107 | static void nmi_cpu_stop(void *dummy) | 118 | static void nmi_cpu_stop(void *dummy) |
108 | { | 119 | { |
109 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | 120 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); |
110 | model->stop(msrs); | 121 | if (!msrs->controls) |
122 | WARN_ON_ONCE(1); | ||
123 | else | ||
124 | model->stop(msrs); | ||
111 | } | 125 | } |
112 | 126 | ||
113 | static void nmi_stop(void) | 127 | static void nmi_stop(void) |
114 | { | 128 | { |
129 | get_online_cpus(); | ||
115 | on_each_cpu(nmi_cpu_stop, NULL, 1); | 130 | on_each_cpu(nmi_cpu_stop, NULL, 1); |
131 | ctr_running = 0; | ||
132 | put_online_cpus(); | ||
116 | } | 133 | } |
117 | 134 | ||
118 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 135 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX |
@@ -252,7 +269,10 @@ static int nmi_switch_event(void) | |||
252 | if (nmi_multiplex_on() < 0) | 269 | if (nmi_multiplex_on() < 0) |
253 | return -EINVAL; /* not necessary */ | 270 | return -EINVAL; /* not necessary */ |
254 | 271 | ||
255 | on_each_cpu(nmi_cpu_switch, NULL, 1); | 272 | get_online_cpus(); |
273 | if (ctr_running) | ||
274 | on_each_cpu(nmi_cpu_switch, NULL, 1); | ||
275 | put_online_cpus(); | ||
256 | 276 | ||
257 | return 0; | 277 | return 0; |
258 | } | 278 | } |
@@ -295,6 +315,7 @@ static void free_msrs(void) | |||
295 | kfree(per_cpu(cpu_msrs, i).controls); | 315 | kfree(per_cpu(cpu_msrs, i).controls); |
296 | per_cpu(cpu_msrs, i).controls = NULL; | 316 | per_cpu(cpu_msrs, i).controls = NULL; |
297 | } | 317 | } |
318 | nmi_shutdown_mux(); | ||
298 | } | 319 | } |
299 | 320 | ||
300 | static int allocate_msrs(void) | 321 | static int allocate_msrs(void) |
@@ -307,14 +328,21 @@ static int allocate_msrs(void) | |||
307 | per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, | 328 | per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, |
308 | GFP_KERNEL); | 329 | GFP_KERNEL); |
309 | if (!per_cpu(cpu_msrs, i).counters) | 330 | if (!per_cpu(cpu_msrs, i).counters) |
310 | return 0; | 331 | goto fail; |
311 | per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, | 332 | per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, |
312 | GFP_KERNEL); | 333 | GFP_KERNEL); |
313 | if (!per_cpu(cpu_msrs, i).controls) | 334 | if (!per_cpu(cpu_msrs, i).controls) |
314 | return 0; | 335 | goto fail; |
315 | } | 336 | } |
316 | 337 | ||
338 | if (!nmi_setup_mux()) | ||
339 | goto fail; | ||
340 | |||
317 | return 1; | 341 | return 1; |
342 | |||
343 | fail: | ||
344 | free_msrs(); | ||
345 | return 0; | ||
318 | } | 346 | } |
319 | 347 | ||
320 | static void nmi_cpu_setup(void *dummy) | 348 | static void nmi_cpu_setup(void *dummy) |
@@ -336,49 +364,6 @@ static struct notifier_block profile_exceptions_nb = { | |||
336 | .priority = 2 | 364 | .priority = 2 |
337 | }; | 365 | }; |
338 | 366 | ||
339 | static int nmi_setup(void) | ||
340 | { | ||
341 | int err = 0; | ||
342 | int cpu; | ||
343 | |||
344 | if (!allocate_msrs()) | ||
345 | err = -ENOMEM; | ||
346 | else if (!nmi_setup_mux()) | ||
347 | err = -ENOMEM; | ||
348 | else | ||
349 | err = register_die_notifier(&profile_exceptions_nb); | ||
350 | |||
351 | if (err) { | ||
352 | free_msrs(); | ||
353 | nmi_shutdown_mux(); | ||
354 | return err; | ||
355 | } | ||
356 | |||
357 | /* We need to serialize save and setup for HT because the subset | ||
358 | * of msrs are distinct for save and setup operations | ||
359 | */ | ||
360 | |||
361 | /* Assume saved/restored counters are the same on all CPUs */ | ||
362 | model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); | ||
363 | for_each_possible_cpu(cpu) { | ||
364 | if (!cpu) | ||
365 | continue; | ||
366 | |||
367 | memcpy(per_cpu(cpu_msrs, cpu).counters, | ||
368 | per_cpu(cpu_msrs, 0).counters, | ||
369 | sizeof(struct op_msr) * model->num_counters); | ||
370 | |||
371 | memcpy(per_cpu(cpu_msrs, cpu).controls, | ||
372 | per_cpu(cpu_msrs, 0).controls, | ||
373 | sizeof(struct op_msr) * model->num_controls); | ||
374 | |||
375 | mux_clone(cpu); | ||
376 | } | ||
377 | on_each_cpu(nmi_cpu_setup, NULL, 1); | ||
378 | nmi_enabled = 1; | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) | 367 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) |
383 | { | 368 | { |
384 | struct op_msr *counters = msrs->counters; | 369 | struct op_msr *counters = msrs->counters; |
@@ -412,20 +397,24 @@ static void nmi_cpu_shutdown(void *dummy) | |||
412 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); | 397 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); |
413 | apic_write(APIC_LVTERR, v); | 398 | apic_write(APIC_LVTERR, v); |
414 | nmi_cpu_restore_registers(msrs); | 399 | nmi_cpu_restore_registers(msrs); |
400 | if (model->cpu_down) | ||
401 | model->cpu_down(); | ||
415 | } | 402 | } |
416 | 403 | ||
417 | static void nmi_shutdown(void) | 404 | static void nmi_cpu_up(void *dummy) |
418 | { | 405 | { |
419 | struct op_msrs *msrs; | 406 | if (nmi_enabled) |
407 | nmi_cpu_setup(dummy); | ||
408 | if (ctr_running) | ||
409 | nmi_cpu_start(dummy); | ||
410 | } | ||
420 | 411 | ||
421 | nmi_enabled = 0; | 412 | static void nmi_cpu_down(void *dummy) |
422 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | 413 | { |
423 | unregister_die_notifier(&profile_exceptions_nb); | 414 | if (ctr_running) |
424 | nmi_shutdown_mux(); | 415 | nmi_cpu_stop(dummy); |
425 | msrs = &get_cpu_var(cpu_msrs); | 416 | if (nmi_enabled) |
426 | model->shutdown(msrs); | 417 | nmi_cpu_shutdown(dummy); |
427 | free_msrs(); | ||
428 | put_cpu_var(cpu_msrs); | ||
429 | } | 418 | } |
430 | 419 | ||
431 | static int nmi_create_files(struct super_block *sb, struct dentry *root) | 420 | static int nmi_create_files(struct super_block *sb, struct dentry *root) |
@@ -457,7 +446,6 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) | |||
457 | return 0; | 446 | return 0; |
458 | } | 447 | } |
459 | 448 | ||
460 | #ifdef CONFIG_SMP | ||
461 | static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | 449 | static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, |
462 | void *data) | 450 | void *data) |
463 | { | 451 | { |
@@ -465,10 +453,10 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | |||
465 | switch (action) { | 453 | switch (action) { |
466 | case CPU_DOWN_FAILED: | 454 | case CPU_DOWN_FAILED: |
467 | case CPU_ONLINE: | 455 | case CPU_ONLINE: |
468 | smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); | 456 | smp_call_function_single(cpu, nmi_cpu_up, NULL, 0); |
469 | break; | 457 | break; |
470 | case CPU_DOWN_PREPARE: | 458 | case CPU_DOWN_PREPARE: |
471 | smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); | 459 | smp_call_function_single(cpu, nmi_cpu_down, NULL, 1); |
472 | break; | 460 | break; |
473 | } | 461 | } |
474 | return NOTIFY_DONE; | 462 | return NOTIFY_DONE; |
@@ -477,7 +465,75 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | |||
477 | static struct notifier_block oprofile_cpu_nb = { | 465 | static struct notifier_block oprofile_cpu_nb = { |
478 | .notifier_call = oprofile_cpu_notifier | 466 | .notifier_call = oprofile_cpu_notifier |
479 | }; | 467 | }; |
480 | #endif | 468 | |
469 | static int nmi_setup(void) | ||
470 | { | ||
471 | int err = 0; | ||
472 | int cpu; | ||
473 | |||
474 | if (!allocate_msrs()) | ||
475 | return -ENOMEM; | ||
476 | |||
477 | /* We need to serialize save and setup for HT because the subset | ||
478 | * of msrs are distinct for save and setup operations | ||
479 | */ | ||
480 | |||
481 | /* Assume saved/restored counters are the same on all CPUs */ | ||
482 | err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); | ||
483 | if (err) | ||
484 | goto fail; | ||
485 | |||
486 | for_each_possible_cpu(cpu) { | ||
487 | if (!cpu) | ||
488 | continue; | ||
489 | |||
490 | memcpy(per_cpu(cpu_msrs, cpu).counters, | ||
491 | per_cpu(cpu_msrs, 0).counters, | ||
492 | sizeof(struct op_msr) * model->num_counters); | ||
493 | |||
494 | memcpy(per_cpu(cpu_msrs, cpu).controls, | ||
495 | per_cpu(cpu_msrs, 0).controls, | ||
496 | sizeof(struct op_msr) * model->num_controls); | ||
497 | |||
498 | mux_clone(cpu); | ||
499 | } | ||
500 | |||
501 | nmi_enabled = 0; | ||
502 | ctr_running = 0; | ||
503 | barrier(); | ||
504 | err = register_die_notifier(&profile_exceptions_nb); | ||
505 | if (err) | ||
506 | goto fail; | ||
507 | |||
508 | get_online_cpus(); | ||
509 | register_cpu_notifier(&oprofile_cpu_nb); | ||
510 | on_each_cpu(nmi_cpu_setup, NULL, 1); | ||
511 | nmi_enabled = 1; | ||
512 | put_online_cpus(); | ||
513 | |||
514 | return 0; | ||
515 | fail: | ||
516 | free_msrs(); | ||
517 | return err; | ||
518 | } | ||
519 | |||
520 | static void nmi_shutdown(void) | ||
521 | { | ||
522 | struct op_msrs *msrs; | ||
523 | |||
524 | get_online_cpus(); | ||
525 | unregister_cpu_notifier(&oprofile_cpu_nb); | ||
526 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | ||
527 | nmi_enabled = 0; | ||
528 | ctr_running = 0; | ||
529 | put_online_cpus(); | ||
530 | barrier(); | ||
531 | unregister_die_notifier(&profile_exceptions_nb); | ||
532 | msrs = &get_cpu_var(cpu_msrs); | ||
533 | model->shutdown(msrs); | ||
534 | free_msrs(); | ||
535 | put_cpu_var(cpu_msrs); | ||
536 | } | ||
481 | 537 | ||
482 | #ifdef CONFIG_PM | 538 | #ifdef CONFIG_PM |
483 | 539 | ||
@@ -687,9 +743,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
687 | return -ENODEV; | 743 | return -ENODEV; |
688 | } | 744 | } |
689 | 745 | ||
690 | #ifdef CONFIG_SMP | ||
691 | register_cpu_notifier(&oprofile_cpu_nb); | ||
692 | #endif | ||
693 | /* default values, can be overwritten by model */ | 746 | /* default values, can be overwritten by model */ |
694 | ops->create_files = nmi_create_files; | 747 | ops->create_files = nmi_create_files; |
695 | ops->setup = nmi_setup; | 748 | ops->setup = nmi_setup; |
@@ -716,12 +769,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
716 | 769 | ||
717 | void op_nmi_exit(void) | 770 | void op_nmi_exit(void) |
718 | { | 771 | { |
719 | if (using_nmi) { | 772 | if (using_nmi) |
720 | exit_sysfs(); | 773 | exit_sysfs(); |
721 | #ifdef CONFIG_SMP | ||
722 | unregister_cpu_notifier(&oprofile_cpu_nb); | ||
723 | #endif | ||
724 | } | ||
725 | if (model->exit) | ||
726 | model->exit(); | ||
727 | } | 774 | } |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 090cbbec7dbd..b67a6b5aa8d4 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -30,13 +30,10 @@ | |||
30 | #include "op_counter.h" | 30 | #include "op_counter.h" |
31 | 31 | ||
32 | #define NUM_COUNTERS 4 | 32 | #define NUM_COUNTERS 4 |
33 | #define NUM_CONTROLS 4 | ||
34 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 33 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX |
35 | #define NUM_VIRT_COUNTERS 32 | 34 | #define NUM_VIRT_COUNTERS 32 |
36 | #define NUM_VIRT_CONTROLS 32 | ||
37 | #else | 35 | #else |
38 | #define NUM_VIRT_COUNTERS NUM_COUNTERS | 36 | #define NUM_VIRT_COUNTERS NUM_COUNTERS |
39 | #define NUM_VIRT_CONTROLS NUM_CONTROLS | ||
40 | #endif | 37 | #endif |
41 | 38 | ||
42 | #define OP_EVENT_MASK 0x0FFF | 39 | #define OP_EVENT_MASK 0x0FFF |
@@ -105,102 +102,6 @@ static u32 get_ibs_caps(void) | |||
105 | return ibs_caps; | 102 | return ibs_caps; |
106 | } | 103 | } |
107 | 104 | ||
108 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
109 | |||
110 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | ||
111 | struct op_msrs const * const msrs) | ||
112 | { | ||
113 | u64 val; | ||
114 | int i; | ||
115 | |||
116 | /* enable active counters */ | ||
117 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
118 | int virt = op_x86_phys_to_virt(i); | ||
119 | if (!reset_value[virt]) | ||
120 | continue; | ||
121 | rdmsrl(msrs->controls[i].addr, val); | ||
122 | val &= model->reserved; | ||
123 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
124 | wrmsrl(msrs->controls[i].addr, val); | ||
125 | } | ||
126 | } | ||
127 | |||
128 | #endif | ||
129 | |||
130 | /* functions for op_amd_spec */ | ||
131 | |||
132 | static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | ||
133 | { | ||
134 | int i; | ||
135 | |||
136 | for (i = 0; i < NUM_COUNTERS; i++) { | ||
137 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
138 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
139 | } | ||
140 | |||
141 | for (i = 0; i < NUM_CONTROLS; i++) { | ||
142 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) | ||
143 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
144 | } | ||
145 | } | ||
146 | |||
147 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | ||
148 | struct op_msrs const * const msrs) | ||
149 | { | ||
150 | u64 val; | ||
151 | int i; | ||
152 | |||
153 | /* setup reset_value */ | ||
154 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | ||
155 | if (counter_config[i].enabled | ||
156 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | ||
157 | reset_value[i] = counter_config[i].count; | ||
158 | else | ||
159 | reset_value[i] = 0; | ||
160 | } | ||
161 | |||
162 | /* clear all counters */ | ||
163 | for (i = 0; i < NUM_CONTROLS; ++i) { | ||
164 | if (unlikely(!msrs->controls[i].addr)) { | ||
165 | if (counter_config[i].enabled && !smp_processor_id()) | ||
166 | /* | ||
167 | * counter is reserved, this is on all | ||
168 | * cpus, so report only for cpu #0 | ||
169 | */ | ||
170 | op_x86_warn_reserved(i); | ||
171 | continue; | ||
172 | } | ||
173 | rdmsrl(msrs->controls[i].addr, val); | ||
174 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
175 | op_x86_warn_in_use(i); | ||
176 | val &= model->reserved; | ||
177 | wrmsrl(msrs->controls[i].addr, val); | ||
178 | } | ||
179 | |||
180 | /* avoid a false detection of ctr overflows in NMI handler */ | ||
181 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
182 | if (unlikely(!msrs->counters[i].addr)) | ||
183 | continue; | ||
184 | wrmsrl(msrs->counters[i].addr, -1LL); | ||
185 | } | ||
186 | |||
187 | /* enable active counters */ | ||
188 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
189 | int virt = op_x86_phys_to_virt(i); | ||
190 | if (!reset_value[virt]) | ||
191 | continue; | ||
192 | |||
193 | /* setup counter registers */ | ||
194 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | ||
195 | |||
196 | /* setup control registers */ | ||
197 | rdmsrl(msrs->controls[i].addr, val); | ||
198 | val &= model->reserved; | ||
199 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
200 | wrmsrl(msrs->controls[i].addr, val); | ||
201 | } | ||
202 | } | ||
203 | |||
204 | /* | 105 | /* |
205 | * 16-bit Linear Feedback Shift Register (LFSR) | 106 | * 16-bit Linear Feedback Shift Register (LFSR) |
206 | * | 107 | * |
@@ -365,6 +266,125 @@ static void op_amd_stop_ibs(void) | |||
365 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); | 266 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); |
366 | } | 267 | } |
367 | 268 | ||
269 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
270 | |||
271 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | ||
272 | struct op_msrs const * const msrs) | ||
273 | { | ||
274 | u64 val; | ||
275 | int i; | ||
276 | |||
277 | /* enable active counters */ | ||
278 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
279 | int virt = op_x86_phys_to_virt(i); | ||
280 | if (!reset_value[virt]) | ||
281 | continue; | ||
282 | rdmsrl(msrs->controls[i].addr, val); | ||
283 | val &= model->reserved; | ||
284 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
285 | wrmsrl(msrs->controls[i].addr, val); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | #endif | ||
290 | |||
291 | /* functions for op_amd_spec */ | ||
292 | |||
293 | static void op_amd_shutdown(struct op_msrs const * const msrs) | ||
294 | { | ||
295 | int i; | ||
296 | |||
297 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
298 | if (!msrs->counters[i].addr) | ||
299 | continue; | ||
300 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
301 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
302 | } | ||
303 | } | ||
304 | |||
305 | static int op_amd_fill_in_addresses(struct op_msrs * const msrs) | ||
306 | { | ||
307 | int i; | ||
308 | |||
309 | for (i = 0; i < NUM_COUNTERS; i++) { | ||
310 | if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
311 | goto fail; | ||
312 | if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { | ||
313 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
314 | goto fail; | ||
315 | } | ||
316 | /* both registers must be reserved */ | ||
317 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
318 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
319 | continue; | ||
320 | fail: | ||
321 | if (!counter_config[i].enabled) | ||
322 | continue; | ||
323 | op_x86_warn_reserved(i); | ||
324 | op_amd_shutdown(msrs); | ||
325 | return -EBUSY; | ||
326 | } | ||
327 | |||
328 | return 0; | ||
329 | } | ||
330 | |||
331 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | ||
332 | struct op_msrs const * const msrs) | ||
333 | { | ||
334 | u64 val; | ||
335 | int i; | ||
336 | |||
337 | /* setup reset_value */ | ||
338 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | ||
339 | if (counter_config[i].enabled | ||
340 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | ||
341 | reset_value[i] = counter_config[i].count; | ||
342 | else | ||
343 | reset_value[i] = 0; | ||
344 | } | ||
345 | |||
346 | /* clear all counters */ | ||
347 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
348 | if (!msrs->controls[i].addr) | ||
349 | continue; | ||
350 | rdmsrl(msrs->controls[i].addr, val); | ||
351 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
352 | op_x86_warn_in_use(i); | ||
353 | val &= model->reserved; | ||
354 | wrmsrl(msrs->controls[i].addr, val); | ||
355 | /* | ||
356 | * avoid a false detection of ctr overflows in NMI | ||
357 | * handler | ||
358 | */ | ||
359 | wrmsrl(msrs->counters[i].addr, -1LL); | ||
360 | } | ||
361 | |||
362 | /* enable active counters */ | ||
363 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
364 | int virt = op_x86_phys_to_virt(i); | ||
365 | if (!reset_value[virt]) | ||
366 | continue; | ||
367 | |||
368 | /* setup counter registers */ | ||
369 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | ||
370 | |||
371 | /* setup control registers */ | ||
372 | rdmsrl(msrs->controls[i].addr, val); | ||
373 | val &= model->reserved; | ||
374 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
375 | wrmsrl(msrs->controls[i].addr, val); | ||
376 | } | ||
377 | |||
378 | if (ibs_caps) | ||
379 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); | ||
380 | } | ||
381 | |||
382 | static void op_amd_cpu_shutdown(void) | ||
383 | { | ||
384 | if (ibs_caps) | ||
385 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | ||
386 | } | ||
387 | |||
368 | static int op_amd_check_ctrs(struct pt_regs * const regs, | 388 | static int op_amd_check_ctrs(struct pt_regs * const regs, |
369 | struct op_msrs const * const msrs) | 389 | struct op_msrs const * const msrs) |
370 | { | 390 | { |
@@ -425,42 +445,16 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
425 | op_amd_stop_ibs(); | 445 | op_amd_stop_ibs(); |
426 | } | 446 | } |
427 | 447 | ||
428 | static void op_amd_shutdown(struct op_msrs const * const msrs) | 448 | static int __init_ibs_nmi(void) |
429 | { | ||
430 | int i; | ||
431 | |||
432 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
433 | if (msrs->counters[i].addr) | ||
434 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
435 | } | ||
436 | for (i = 0; i < NUM_CONTROLS; ++i) { | ||
437 | if (msrs->controls[i].addr) | ||
438 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
439 | } | ||
440 | } | ||
441 | |||
442 | static u8 ibs_eilvt_off; | ||
443 | |||
444 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) | ||
445 | { | ||
446 | ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); | ||
447 | } | ||
448 | |||
449 | static inline void apic_clear_ibs_nmi_per_cpu(void *arg) | ||
450 | { | ||
451 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | ||
452 | } | ||
453 | |||
454 | static int init_ibs_nmi(void) | ||
455 | { | 449 | { |
456 | #define IBSCTL_LVTOFFSETVAL (1 << 8) | 450 | #define IBSCTL_LVTOFFSETVAL (1 << 8) |
457 | #define IBSCTL 0x1cc | 451 | #define IBSCTL 0x1cc |
458 | struct pci_dev *cpu_cfg; | 452 | struct pci_dev *cpu_cfg; |
459 | int nodes; | 453 | int nodes; |
460 | u32 value = 0; | 454 | u32 value = 0; |
455 | u8 ibs_eilvt_off; | ||
461 | 456 | ||
462 | /* per CPU setup */ | 457 | ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); |
463 | on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1); | ||
464 | 458 | ||
465 | nodes = 0; | 459 | nodes = 0; |
466 | cpu_cfg = NULL; | 460 | cpu_cfg = NULL; |
@@ -490,22 +484,15 @@ static int init_ibs_nmi(void) | |||
490 | return 0; | 484 | return 0; |
491 | } | 485 | } |
492 | 486 | ||
493 | /* uninitialize the APIC for the IBS interrupts if needed */ | ||
494 | static void clear_ibs_nmi(void) | ||
495 | { | ||
496 | if (ibs_caps) | ||
497 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | ||
498 | } | ||
499 | |||
500 | /* initialize the APIC for the IBS interrupts if available */ | 487 | /* initialize the APIC for the IBS interrupts if available */ |
501 | static void ibs_init(void) | 488 | static void init_ibs(void) |
502 | { | 489 | { |
503 | ibs_caps = get_ibs_caps(); | 490 | ibs_caps = get_ibs_caps(); |
504 | 491 | ||
505 | if (!ibs_caps) | 492 | if (!ibs_caps) |
506 | return; | 493 | return; |
507 | 494 | ||
508 | if (init_ibs_nmi()) { | 495 | if (__init_ibs_nmi()) { |
509 | ibs_caps = 0; | 496 | ibs_caps = 0; |
510 | return; | 497 | return; |
511 | } | 498 | } |
@@ -514,14 +501,6 @@ static void ibs_init(void) | |||
514 | (unsigned)ibs_caps); | 501 | (unsigned)ibs_caps); |
515 | } | 502 | } |
516 | 503 | ||
517 | static void ibs_exit(void) | ||
518 | { | ||
519 | if (!ibs_caps) | ||
520 | return; | ||
521 | |||
522 | clear_ibs_nmi(); | ||
523 | } | ||
524 | |||
525 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); | 504 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); |
526 | 505 | ||
527 | static int setup_ibs_files(struct super_block *sb, struct dentry *root) | 506 | static int setup_ibs_files(struct super_block *sb, struct dentry *root) |
@@ -570,27 +549,22 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
570 | 549 | ||
571 | static int op_amd_init(struct oprofile_operations *ops) | 550 | static int op_amd_init(struct oprofile_operations *ops) |
572 | { | 551 | { |
573 | ibs_init(); | 552 | init_ibs(); |
574 | create_arch_files = ops->create_files; | 553 | create_arch_files = ops->create_files; |
575 | ops->create_files = setup_ibs_files; | 554 | ops->create_files = setup_ibs_files; |
576 | return 0; | 555 | return 0; |
577 | } | 556 | } |
578 | 557 | ||
579 | static void op_amd_exit(void) | ||
580 | { | ||
581 | ibs_exit(); | ||
582 | } | ||
583 | |||
584 | struct op_x86_model_spec op_amd_spec = { | 558 | struct op_x86_model_spec op_amd_spec = { |
585 | .num_counters = NUM_COUNTERS, | 559 | .num_counters = NUM_COUNTERS, |
586 | .num_controls = NUM_CONTROLS, | 560 | .num_controls = NUM_COUNTERS, |
587 | .num_virt_counters = NUM_VIRT_COUNTERS, | 561 | .num_virt_counters = NUM_VIRT_COUNTERS, |
588 | .reserved = MSR_AMD_EVENTSEL_RESERVED, | 562 | .reserved = MSR_AMD_EVENTSEL_RESERVED, |
589 | .event_mask = OP_EVENT_MASK, | 563 | .event_mask = OP_EVENT_MASK, |
590 | .init = op_amd_init, | 564 | .init = op_amd_init, |
591 | .exit = op_amd_exit, | ||
592 | .fill_in_addresses = &op_amd_fill_in_addresses, | 565 | .fill_in_addresses = &op_amd_fill_in_addresses, |
593 | .setup_ctrs = &op_amd_setup_ctrs, | 566 | .setup_ctrs = &op_amd_setup_ctrs, |
567 | .cpu_down = &op_amd_cpu_shutdown, | ||
594 | .check_ctrs = &op_amd_check_ctrs, | 568 | .check_ctrs = &op_amd_check_ctrs, |
595 | .start = &op_amd_start, | 569 | .start = &op_amd_start, |
596 | .stop = &op_amd_stop, | 570 | .stop = &op_amd_stop, |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index e6a160a4684a..182558dd5515 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -385,8 +385,26 @@ static unsigned int get_stagger(void) | |||
385 | 385 | ||
386 | static unsigned long reset_value[NUM_COUNTERS_NON_HT]; | 386 | static unsigned long reset_value[NUM_COUNTERS_NON_HT]; |
387 | 387 | ||
388 | static void p4_shutdown(struct op_msrs const * const msrs) | ||
389 | { | ||
390 | int i; | ||
388 | 391 | ||
389 | static void p4_fill_in_addresses(struct op_msrs * const msrs) | 392 | for (i = 0; i < num_counters; ++i) { |
393 | if (msrs->counters[i].addr) | ||
394 | release_perfctr_nmi(msrs->counters[i].addr); | ||
395 | } | ||
396 | /* | ||
397 | * some of the control registers are specially reserved in | ||
398 | * conjunction with the counter registers (hence the starting offset). | ||
399 | * This saves a few bits. | ||
400 | */ | ||
401 | for (i = num_counters; i < num_controls; ++i) { | ||
402 | if (msrs->controls[i].addr) | ||
403 | release_evntsel_nmi(msrs->controls[i].addr); | ||
404 | } | ||
405 | } | ||
406 | |||
407 | static int p4_fill_in_addresses(struct op_msrs * const msrs) | ||
390 | { | 408 | { |
391 | unsigned int i; | 409 | unsigned int i; |
392 | unsigned int addr, cccraddr, stag; | 410 | unsigned int addr, cccraddr, stag; |
@@ -468,6 +486,18 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) | |||
468 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | 486 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
469 | } | 487 | } |
470 | } | 488 | } |
489 | |||
490 | for (i = 0; i < num_counters; ++i) { | ||
491 | if (!counter_config[i].enabled) | ||
492 | continue; | ||
493 | if (msrs->controls[i].addr) | ||
494 | continue; | ||
495 | op_x86_warn_reserved(i); | ||
496 | p4_shutdown(msrs); | ||
497 | return -EBUSY; | ||
498 | } | ||
499 | |||
500 | return 0; | ||
471 | } | 501 | } |
472 | 502 | ||
473 | 503 | ||
@@ -668,26 +698,6 @@ static void p4_stop(struct op_msrs const * const msrs) | |||
668 | } | 698 | } |
669 | } | 699 | } |
670 | 700 | ||
671 | static void p4_shutdown(struct op_msrs const * const msrs) | ||
672 | { | ||
673 | int i; | ||
674 | |||
675 | for (i = 0; i < num_counters; ++i) { | ||
676 | if (msrs->counters[i].addr) | ||
677 | release_perfctr_nmi(msrs->counters[i].addr); | ||
678 | } | ||
679 | /* | ||
680 | * some of the control registers are specially reserved in | ||
681 | * conjunction with the counter registers (hence the starting offset). | ||
682 | * This saves a few bits. | ||
683 | */ | ||
684 | for (i = num_counters; i < num_controls; ++i) { | ||
685 | if (msrs->controls[i].addr) | ||
686 | release_evntsel_nmi(msrs->controls[i].addr); | ||
687 | } | ||
688 | } | ||
689 | |||
690 | |||
691 | #ifdef CONFIG_SMP | 701 | #ifdef CONFIG_SMP |
692 | struct op_x86_model_spec op_p4_ht2_spec = { | 702 | struct op_x86_model_spec op_p4_ht2_spec = { |
693 | .num_counters = NUM_COUNTERS_HT2, | 703 | .num_counters = NUM_COUNTERS_HT2, |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 2bf90fafa7b5..d769cda54082 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -30,19 +30,46 @@ static int counter_width = 32; | |||
30 | 30 | ||
31 | static u64 *reset_value; | 31 | static u64 *reset_value; |
32 | 32 | ||
33 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) | 33 | static void ppro_shutdown(struct op_msrs const * const msrs) |
34 | { | 34 | { |
35 | int i; | 35 | int i; |
36 | 36 | ||
37 | for (i = 0; i < num_counters; i++) { | 37 | for (i = 0; i < num_counters; ++i) { |
38 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 38 | if (!msrs->counters[i].addr) |
39 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 39 | continue; |
40 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
41 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | ||
42 | } | ||
43 | if (reset_value) { | ||
44 | kfree(reset_value); | ||
45 | reset_value = NULL; | ||
40 | } | 46 | } |
47 | } | ||
48 | |||
49 | static int ppro_fill_in_addresses(struct op_msrs * const msrs) | ||
50 | { | ||
51 | int i; | ||
41 | 52 | ||
42 | for (i = 0; i < num_counters; i++) { | 53 | for (i = 0; i < num_counters; i++) { |
43 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 54 | if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
44 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 55 | goto fail; |
56 | if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) { | ||
57 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
58 | goto fail; | ||
59 | } | ||
60 | /* both registers must be reserved */ | ||
61 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | ||
62 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | ||
63 | continue; | ||
64 | fail: | ||
65 | if (!counter_config[i].enabled) | ||
66 | continue; | ||
67 | op_x86_warn_reserved(i); | ||
68 | ppro_shutdown(msrs); | ||
69 | return -EBUSY; | ||
45 | } | 70 | } |
71 | |||
72 | return 0; | ||
46 | } | 73 | } |
47 | 74 | ||
48 | 75 | ||
@@ -78,26 +105,17 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
78 | 105 | ||
79 | /* clear all counters */ | 106 | /* clear all counters */ |
80 | for (i = 0; i < num_counters; ++i) { | 107 | for (i = 0; i < num_counters; ++i) { |
81 | if (unlikely(!msrs->controls[i].addr)) { | 108 | if (!msrs->controls[i].addr) |
82 | if (counter_config[i].enabled && !smp_processor_id()) | ||
83 | /* | ||
84 | * counter is reserved, this is on all | ||
85 | * cpus, so report only for cpu #0 | ||
86 | */ | ||
87 | op_x86_warn_reserved(i); | ||
88 | continue; | 109 | continue; |
89 | } | ||
90 | rdmsrl(msrs->controls[i].addr, val); | 110 | rdmsrl(msrs->controls[i].addr, val); |
91 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | 111 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) |
92 | op_x86_warn_in_use(i); | 112 | op_x86_warn_in_use(i); |
93 | val &= model->reserved; | 113 | val &= model->reserved; |
94 | wrmsrl(msrs->controls[i].addr, val); | 114 | wrmsrl(msrs->controls[i].addr, val); |
95 | } | 115 | /* |
96 | 116 | * avoid a false detection of ctr overflows in NMI * | |
97 | /* avoid a false detection of ctr overflows in NMI handler */ | 117 | * handler |
98 | for (i = 0; i < num_counters; ++i) { | 118 | */ |
99 | if (unlikely(!msrs->counters[i].addr)) | ||
100 | continue; | ||
101 | wrmsrl(msrs->counters[i].addr, -1LL); | 119 | wrmsrl(msrs->counters[i].addr, -1LL); |
102 | } | 120 | } |
103 | 121 | ||
@@ -189,25 +207,6 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
189 | } | 207 | } |
190 | } | 208 | } |
191 | 209 | ||
192 | static void ppro_shutdown(struct op_msrs const * const msrs) | ||
193 | { | ||
194 | int i; | ||
195 | |||
196 | for (i = 0; i < num_counters; ++i) { | ||
197 | if (msrs->counters[i].addr) | ||
198 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
199 | } | ||
200 | for (i = 0; i < num_counters; ++i) { | ||
201 | if (msrs->controls[i].addr) | ||
202 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | ||
203 | } | ||
204 | if (reset_value) { | ||
205 | kfree(reset_value); | ||
206 | reset_value = NULL; | ||
207 | } | ||
208 | } | ||
209 | |||
210 | |||
211 | struct op_x86_model_spec op_ppro_spec = { | 210 | struct op_x86_model_spec op_ppro_spec = { |
212 | .num_counters = 2, | 211 | .num_counters = 2, |
213 | .num_controls = 2, | 212 | .num_controls = 2, |
@@ -239,11 +238,11 @@ static void arch_perfmon_setup_counters(void) | |||
239 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | 238 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && |
240 | current_cpu_data.x86_model == 15) { | 239 | current_cpu_data.x86_model == 15) { |
241 | eax.split.version_id = 2; | 240 | eax.split.version_id = 2; |
242 | eax.split.num_events = 2; | 241 | eax.split.num_counters = 2; |
243 | eax.split.bit_width = 40; | 242 | eax.split.bit_width = 40; |
244 | } | 243 | } |
245 | 244 | ||
246 | num_counters = eax.split.num_events; | 245 | num_counters = eax.split.num_counters; |
247 | 246 | ||
248 | op_arch_perfmon_spec.num_counters = num_counters; | 247 | op_arch_perfmon_spec.num_counters = num_counters; |
249 | op_arch_perfmon_spec.num_controls = num_counters; | 248 | op_arch_perfmon_spec.num_controls = num_counters; |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index ff82a755edd4..89017fa1fd63 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -40,10 +40,10 @@ struct op_x86_model_spec { | |||
40 | u64 reserved; | 40 | u64 reserved; |
41 | u16 event_mask; | 41 | u16 event_mask; |
42 | int (*init)(struct oprofile_operations *ops); | 42 | int (*init)(struct oprofile_operations *ops); |
43 | void (*exit)(void); | 43 | int (*fill_in_addresses)(struct op_msrs * const msrs); |
44 | void (*fill_in_addresses)(struct op_msrs * const msrs); | ||
45 | void (*setup_ctrs)(struct op_x86_model_spec const *model, | 44 | void (*setup_ctrs)(struct op_x86_model_spec const *model, |
46 | struct op_msrs const * const msrs); | 45 | struct op_msrs const * const msrs); |
46 | void (*cpu_down)(void); | ||
47 | int (*check_ctrs)(struct pt_regs * const regs, | 47 | int (*check_ctrs)(struct pt_regs * const regs, |
48 | struct op_msrs const * const msrs); | 48 | struct op_msrs const * const msrs); |
49 | void (*start)(struct op_msrs const * const msrs); | 49 | void (*start)(struct op_msrs const * const msrs); |
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index b110d97fb925..a0207a7fdf39 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -18,6 +18,8 @@ obj-$(CONFIG_X86_MRST) += mrst.o | |||
18 | obj-y += common.o early.o | 18 | obj-y += common.o early.o |
19 | obj-y += amd_bus.o bus_numa.o | 19 | obj-y += amd_bus.o bus_numa.o |
20 | 20 | ||
21 | obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o | ||
22 | |||
21 | ifeq ($(CONFIG_PCI_DEBUG),y) | 23 | ifeq ($(CONFIG_PCI_DEBUG),y) |
22 | EXTRA_CFLAGS += -DDEBUG | 24 | EXTRA_CFLAGS += -DDEBUG |
23 | endif | 25 | endif |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 31930fd30ea9..2ec04c424a62 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -207,10 +207,9 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
207 | if (!info.res) | 207 | if (!info.res) |
208 | goto res_alloc_fail; | 208 | goto res_alloc_fail; |
209 | 209 | ||
210 | info.name = kmalloc(16, GFP_KERNEL); | 210 | info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); |
211 | if (!info.name) | 211 | if (!info.name) |
212 | goto name_alloc_fail; | 212 | goto name_alloc_fail; |
213 | sprintf(info.name, "PCI Bus %04x:%02x", domain, busnum); | ||
214 | 213 | ||
215 | info.res_num = 0; | 214 | info.res_num = 0; |
216 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, | 215 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, |
@@ -224,8 +223,11 @@ res_alloc_fail: | |||
224 | return; | 223 | return; |
225 | } | 224 | } |
226 | 225 | ||
227 | struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) | 226 | struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) |
228 | { | 227 | { |
228 | struct acpi_device *device = root->device; | ||
229 | int domain = root->segment; | ||
230 | int busnum = root->secondary.start; | ||
229 | struct pci_bus *bus; | 231 | struct pci_bus *bus; |
230 | struct pci_sysdata *sd; | 232 | struct pci_sysdata *sd; |
231 | int node; | 233 | int node; |
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c new file mode 100644 index 000000000000..0846a5bbbfbd --- /dev/null +++ b/arch/x86/pci/broadcom_bus.c | |||
@@ -0,0 +1,101 @@ | |||
1 | /* | ||
2 | * Read address ranges from a Broadcom CNB20LE Host Bridge | ||
3 | * | ||
4 | * Copyright (c) 2010 Ira W. Snyder <iws@ovro.caltech.edu> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the | ||
8 | * Free Software Foundation; either version 2 of the License, or (at your | ||
9 | * option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/delay.h> | ||
13 | #include <linux/dmi.h> | ||
14 | #include <linux/pci.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <asm/pci_x86.h> | ||
17 | |||
18 | #include "bus_numa.h" | ||
19 | |||
20 | static void __devinit cnb20le_res(struct pci_dev *dev) | ||
21 | { | ||
22 | struct pci_root_info *info; | ||
23 | struct resource res; | ||
24 | u16 word1, word2; | ||
25 | u8 fbus, lbus; | ||
26 | int i; | ||
27 | |||
28 | /* | ||
29 | * The x86_pci_root_bus_res_quirks() function already refuses to use | ||
30 | * this information if ACPI _CRS was used. Therefore, we don't bother | ||
31 | * checking if ACPI is enabled, and just generate the information | ||
32 | * for both the ACPI _CRS and no ACPI cases. | ||
33 | */ | ||
34 | |||
35 | info = &pci_root_info[pci_root_num]; | ||
36 | pci_root_num++; | ||
37 | |||
38 | /* read the PCI bus numbers */ | ||
39 | pci_read_config_byte(dev, 0x44, &fbus); | ||
40 | pci_read_config_byte(dev, 0x45, &lbus); | ||
41 | info->bus_min = fbus; | ||
42 | info->bus_max = lbus; | ||
43 | |||
44 | /* | ||
45 | * Add the legacy IDE ports on bus 0 | ||
46 | * | ||
47 | * These do not exist anywhere in the bridge registers, AFAICT. I do | ||
48 | * not have the datasheet, so this is the best I can do. | ||
49 | */ | ||
50 | if (fbus == 0) { | ||
51 | update_res(info, 0x01f0, 0x01f7, IORESOURCE_IO, 0); | ||
52 | update_res(info, 0x03f6, 0x03f6, IORESOURCE_IO, 0); | ||
53 | update_res(info, 0x0170, 0x0177, IORESOURCE_IO, 0); | ||
54 | update_res(info, 0x0376, 0x0376, IORESOURCE_IO, 0); | ||
55 | update_res(info, 0xffa0, 0xffaf, IORESOURCE_IO, 0); | ||
56 | } | ||
57 | |||
58 | /* read the non-prefetchable memory window */ | ||
59 | pci_read_config_word(dev, 0xc0, &word1); | ||
60 | pci_read_config_word(dev, 0xc2, &word2); | ||
61 | if (word1 != word2) { | ||
62 | res.start = (word1 << 16) | 0x0000; | ||
63 | res.end = (word2 << 16) | 0xffff; | ||
64 | res.flags = IORESOURCE_MEM; | ||
65 | update_res(info, res.start, res.end, res.flags, 0); | ||
66 | } | ||
67 | |||
68 | /* read the prefetchable memory window */ | ||
69 | pci_read_config_word(dev, 0xc4, &word1); | ||
70 | pci_read_config_word(dev, 0xc6, &word2); | ||
71 | if (word1 != word2) { | ||
72 | res.start = (word1 << 16) | 0x0000; | ||
73 | res.end = (word2 << 16) | 0xffff; | ||
74 | res.flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; | ||
75 | update_res(info, res.start, res.end, res.flags, 0); | ||
76 | } | ||
77 | |||
78 | /* read the IO port window */ | ||
79 | pci_read_config_word(dev, 0xd0, &word1); | ||
80 | pci_read_config_word(dev, 0xd2, &word2); | ||
81 | if (word1 != word2) { | ||
82 | res.start = word1; | ||
83 | res.end = word2; | ||
84 | res.flags = IORESOURCE_IO; | ||
85 | update_res(info, res.start, res.end, res.flags, 0); | ||
86 | } | ||
87 | |||
88 | /* print information about this host bridge */ | ||
89 | res.start = fbus; | ||
90 | res.end = lbus; | ||
91 | res.flags = IORESOURCE_BUS; | ||
92 | dev_info(&dev->dev, "CNB20LE PCI Host Bridge (domain %04x %pR)\n", | ||
93 | pci_domain_nr(dev->bus), &res); | ||
94 | |||
95 | for (i = 0; i < info->res_num; i++) | ||
96 | dev_info(&dev->dev, "host bridge window %pR\n", &info->res[i]); | ||
97 | } | ||
98 | |||
99 | DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE, | ||
100 | cnb20le_res); | ||
101 | |||
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index cf2e93869c48..215a27ae050d 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -76,7 +76,7 @@ struct pci_ops pci_root_ops = { | |||
76 | * This interrupt-safe spinlock protects all accesses to PCI | 76 | * This interrupt-safe spinlock protects all accesses to PCI |
77 | * configuration space. | 77 | * configuration space. |
78 | */ | 78 | */ |
79 | DEFINE_SPINLOCK(pci_config_lock); | 79 | DEFINE_RAW_SPINLOCK(pci_config_lock); |
80 | 80 | ||
81 | static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) | 81 | static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) |
82 | { | 82 | { |
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 347d882b3bb3..bd33620b0071 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c | |||
@@ -27,7 +27,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, | |||
27 | return -EINVAL; | 27 | return -EINVAL; |
28 | } | 28 | } |
29 | 29 | ||
30 | spin_lock_irqsave(&pci_config_lock, flags); | 30 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
31 | 31 | ||
32 | outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); | 32 | outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); |
33 | 33 | ||
@@ -43,7 +43,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, | |||
43 | break; | 43 | break; |
44 | } | 44 | } |
45 | 45 | ||
46 | spin_unlock_irqrestore(&pci_config_lock, flags); | 46 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
47 | 47 | ||
48 | return 0; | 48 | return 0; |
49 | } | 49 | } |
@@ -56,7 +56,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, | |||
56 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) | 56 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) |
57 | return -EINVAL; | 57 | return -EINVAL; |
58 | 58 | ||
59 | spin_lock_irqsave(&pci_config_lock, flags); | 59 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
60 | 60 | ||
61 | outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); | 61 | outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); |
62 | 62 | ||
@@ -72,7 +72,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, | |||
72 | break; | 72 | break; |
73 | } | 73 | } |
74 | 74 | ||
75 | spin_unlock_irqrestore(&pci_config_lock, flags); | 75 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
76 | 76 | ||
77 | return 0; | 77 | return 0; |
78 | } | 78 | } |
@@ -108,7 +108,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, | |||
108 | if (dev & 0x10) | 108 | if (dev & 0x10) |
109 | return PCIBIOS_DEVICE_NOT_FOUND; | 109 | return PCIBIOS_DEVICE_NOT_FOUND; |
110 | 110 | ||
111 | spin_lock_irqsave(&pci_config_lock, flags); | 111 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
112 | 112 | ||
113 | outb((u8)(0xF0 | (fn << 1)), 0xCF8); | 113 | outb((u8)(0xF0 | (fn << 1)), 0xCF8); |
114 | outb((u8)bus, 0xCFA); | 114 | outb((u8)bus, 0xCFA); |
@@ -127,7 +127,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, | |||
127 | 127 | ||
128 | outb(0, 0xCF8); | 128 | outb(0, 0xCF8); |
129 | 129 | ||
130 | spin_unlock_irqrestore(&pci_config_lock, flags); | 130 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
131 | 131 | ||
132 | return 0; | 132 | return 0; |
133 | } | 133 | } |
@@ -147,7 +147,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, | |||
147 | if (dev & 0x10) | 147 | if (dev & 0x10) |
148 | return PCIBIOS_DEVICE_NOT_FOUND; | 148 | return PCIBIOS_DEVICE_NOT_FOUND; |
149 | 149 | ||
150 | spin_lock_irqsave(&pci_config_lock, flags); | 150 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
151 | 151 | ||
152 | outb((u8)(0xF0 | (fn << 1)), 0xCF8); | 152 | outb((u8)(0xF0 | (fn << 1)), 0xCF8); |
153 | outb((u8)bus, 0xCFA); | 153 | outb((u8)bus, 0xCFA); |
@@ -166,7 +166,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, | |||
166 | 166 | ||
167 | outb(0, 0xCF8); | 167 | outb(0, 0xCF8); |
168 | 168 | ||
169 | spin_unlock_irqrestore(&pci_config_lock, flags); | 169 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
170 | 170 | ||
171 | return 0; | 171 | return 0; |
172 | } | 172 | } |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 5d362b5ba06f..9810a0f76c91 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -589,8 +589,6 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
589 | case PCI_DEVICE_ID_INTEL_ICH10_1: | 589 | case PCI_DEVICE_ID_INTEL_ICH10_1: |
590 | case PCI_DEVICE_ID_INTEL_ICH10_2: | 590 | case PCI_DEVICE_ID_INTEL_ICH10_2: |
591 | case PCI_DEVICE_ID_INTEL_ICH10_3: | 591 | case PCI_DEVICE_ID_INTEL_ICH10_3: |
592 | case PCI_DEVICE_ID_INTEL_CPT_LPC1: | ||
593 | case PCI_DEVICE_ID_INTEL_CPT_LPC2: | ||
594 | r->name = "PIIX/ICH"; | 592 | r->name = "PIIX/ICH"; |
595 | r->get = pirq_piix_get; | 593 | r->get = pirq_piix_get; |
596 | r->set = pirq_piix_set; | 594 | r->set = pirq_piix_set; |
@@ -605,6 +603,13 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
605 | return 1; | 603 | return 1; |
606 | } | 604 | } |
607 | 605 | ||
606 | if ((device >= PCI_DEVICE_ID_INTEL_CPT_LPC_MIN) && | ||
607 | (device <= PCI_DEVICE_ID_INTEL_CPT_LPC_MAX)) { | ||
608 | r->name = "PIIX/ICH"; | ||
609 | r->get = pirq_piix_get; | ||
610 | r->set = pirq_piix_set; | ||
611 | return 1; | ||
612 | } | ||
608 | return 0; | 613 | return 0; |
609 | } | 614 | } |
610 | 615 | ||
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 39b9ebe8f886..a918553ebc75 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -483,16 +483,17 @@ static void __init pci_mmcfg_reject_broken(int early) | |||
483 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 483 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { |
484 | int valid = 0; | 484 | int valid = 0; |
485 | 485 | ||
486 | if (!early && !acpi_disabled) | 486 | if (!early && !acpi_disabled) { |
487 | valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); | 487 | valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); |
488 | 488 | ||
489 | if (valid) | 489 | if (valid) |
490 | continue; | 490 | continue; |
491 | 491 | else | |
492 | if (!early) | 492 | printk(KERN_ERR FW_BUG PREFIX |
493 | printk(KERN_ERR FW_BUG PREFIX | 493 | "MMCONFIG at %pR not reserved in " |
494 | "MMCONFIG at %pR not reserved in " | 494 | "ACPI motherboard resources\n", |
495 | "ACPI motherboard resources\n", &cfg->res); | 495 | &cfg->res); |
496 | } | ||
496 | 497 | ||
497 | /* Don't try to do this check unless configuration | 498 | /* Don't try to do this check unless configuration |
498 | type 1 is available. how about type 2 ?*/ | 499 | type 1 is available. how about type 2 ?*/ |
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 90d5fd476ed4..a3d9c54792ae 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c | |||
@@ -64,7 +64,7 @@ err: *value = -1; | |||
64 | if (!base) | 64 | if (!base) |
65 | goto err; | 65 | goto err; |
66 | 66 | ||
67 | spin_lock_irqsave(&pci_config_lock, flags); | 67 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
68 | 68 | ||
69 | pci_exp_set_dev_base(base, bus, devfn); | 69 | pci_exp_set_dev_base(base, bus, devfn); |
70 | 70 | ||
@@ -79,7 +79,7 @@ err: *value = -1; | |||
79 | *value = mmio_config_readl(mmcfg_virt_addr + reg); | 79 | *value = mmio_config_readl(mmcfg_virt_addr + reg); |
80 | break; | 80 | break; |
81 | } | 81 | } |
82 | spin_unlock_irqrestore(&pci_config_lock, flags); | 82 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
83 | 83 | ||
84 | return 0; | 84 | return 0; |
85 | } | 85 | } |
@@ -97,7 +97,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
97 | if (!base) | 97 | if (!base) |
98 | return -EINVAL; | 98 | return -EINVAL; |
99 | 99 | ||
100 | spin_lock_irqsave(&pci_config_lock, flags); | 100 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
101 | 101 | ||
102 | pci_exp_set_dev_base(base, bus, devfn); | 102 | pci_exp_set_dev_base(base, bus, devfn); |
103 | 103 | ||
@@ -112,7 +112,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
112 | mmio_config_writel(mmcfg_virt_addr + reg, value); | 112 | mmio_config_writel(mmcfg_virt_addr + reg, value); |
113 | break; | 113 | break; |
114 | } | 114 | } |
115 | spin_unlock_irqrestore(&pci_config_lock, flags); | 115 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
116 | 116 | ||
117 | return 0; | 117 | return 0; |
118 | } | 118 | } |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 8bf2fcb88d04..7ef3a2735df3 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c | |||
@@ -109,7 +109,7 @@ static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn, | |||
109 | decode++; | 109 | decode++; |
110 | decode = ~(decode - 1); | 110 | decode = ~(decode - 1); |
111 | } else { | 111 | } else { |
112 | decode = ~0; | 112 | decode = 0; |
113 | } | 113 | } |
114 | 114 | ||
115 | /* | 115 | /* |
@@ -247,6 +247,10 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) | |||
247 | u32 size; | 247 | u32 size; |
248 | int i; | 248 | int i; |
249 | 249 | ||
250 | /* Must have extended configuration space */ | ||
251 | if (dev->cfg_size < PCIE_CAP_OFFSET + 4) | ||
252 | return; | ||
253 | |||
250 | /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ | 254 | /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ |
251 | offset = fixed_bar_cap(dev->bus, dev->devfn); | 255 | offset = fixed_bar_cap(dev->bus, dev->devfn); |
252 | if (!offset || PCI_DEVFN(2, 0) == dev->devfn || | 256 | if (!offset || PCI_DEVFN(2, 0) == dev->devfn || |
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 8223738ad806..5c9e2458df4e 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c | |||
@@ -37,7 +37,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, | |||
37 | if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) | 37 | if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) |
38 | return -EINVAL; | 38 | return -EINVAL; |
39 | 39 | ||
40 | spin_lock_irqsave(&pci_config_lock, flags); | 40 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
41 | 41 | ||
42 | write_cf8(bus, devfn, reg); | 42 | write_cf8(bus, devfn, reg); |
43 | 43 | ||
@@ -62,7 +62,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, | |||
62 | break; | 62 | break; |
63 | } | 63 | } |
64 | 64 | ||
65 | spin_unlock_irqrestore(&pci_config_lock, flags); | 65 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
66 | 66 | ||
67 | return 0; | 67 | return 0; |
68 | } | 68 | } |
@@ -76,7 +76,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, | |||
76 | if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) | 76 | if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) |
77 | return -EINVAL; | 77 | return -EINVAL; |
78 | 78 | ||
79 | spin_lock_irqsave(&pci_config_lock, flags); | 79 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
80 | 80 | ||
81 | write_cf8(bus, devfn, reg); | 81 | write_cf8(bus, devfn, reg); |
82 | 82 | ||
@@ -101,7 +101,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, | |||
101 | break; | 101 | break; |
102 | } | 102 | } |
103 | 103 | ||
104 | spin_unlock_irqrestore(&pci_config_lock, flags); | 104 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
105 | 105 | ||
106 | return 0; | 106 | return 0; |
107 | } | 107 | } |
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 59a225c17b84..2492d165096a 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -162,7 +162,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, | |||
162 | if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) | 162 | if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) |
163 | return -EINVAL; | 163 | return -EINVAL; |
164 | 164 | ||
165 | spin_lock_irqsave(&pci_config_lock, flags); | 165 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
166 | 166 | ||
167 | switch (len) { | 167 | switch (len) { |
168 | case 1: | 168 | case 1: |
@@ -213,7 +213,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, | |||
213 | break; | 213 | break; |
214 | } | 214 | } |
215 | 215 | ||
216 | spin_unlock_irqrestore(&pci_config_lock, flags); | 216 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
217 | 217 | ||
218 | return (int)((result & 0xff00) >> 8); | 218 | return (int)((result & 0xff00) >> 8); |
219 | } | 219 | } |
@@ -228,7 +228,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, | |||
228 | if ((bus > 255) || (devfn > 255) || (reg > 255)) | 228 | if ((bus > 255) || (devfn > 255) || (reg > 255)) |
229 | return -EINVAL; | 229 | return -EINVAL; |
230 | 230 | ||
231 | spin_lock_irqsave(&pci_config_lock, flags); | 231 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
232 | 232 | ||
233 | switch (len) { | 233 | switch (len) { |
234 | case 1: | 234 | case 1: |
@@ -269,7 +269,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, | |||
269 | break; | 269 | break; |
270 | } | 270 | } |
271 | 271 | ||
272 | spin_unlock_irqrestore(&pci_config_lock, flags); | 272 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
273 | 273 | ||
274 | return (int)((result & 0xff00) >> 8); | 274 | return (int)((result & 0xff00) >> 8); |
275 | } | 275 | } |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 32764b8880b5..b3c6c59ed302 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -476,6 +476,7 @@ void xen_timer_resume(void) | |||
476 | __init void xen_time_init(void) | 476 | __init void xen_time_init(void) |
477 | { | 477 | { |
478 | int cpu = smp_processor_id(); | 478 | int cpu = smp_processor_id(); |
479 | struct timespec tp; | ||
479 | 480 | ||
480 | clocksource_register(&xen_clocksource); | 481 | clocksource_register(&xen_clocksource); |
481 | 482 | ||
@@ -487,9 +488,8 @@ __init void xen_time_init(void) | |||
487 | } | 488 | } |
488 | 489 | ||
489 | /* Set initial system time with full resolution */ | 490 | /* Set initial system time with full resolution */ |
490 | xen_read_wallclock(&xtime); | 491 | xen_read_wallclock(&tp); |
491 | set_normalized_timespec(&wall_to_monotonic, | 492 | do_settimeofday(&tp); |
492 | -xtime.tv_sec, -xtime.tv_nsec); | ||
493 | 493 | ||
494 | setup_force_cpu_cap(X86_FEATURE_TSC); | 494 | setup_force_cpu_cap(X86_FEATURE_TSC); |
495 | 495 | ||