diff options
Diffstat (limited to 'arch/x86')
237 files changed, 7003 insertions, 15647 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 0e103236b754..0e9dec6cadd1 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild | |||
@@ -15,3 +15,4 @@ obj-y += vdso/ | |||
15 | obj-$(CONFIG_IA32_EMULATION) += ia32/ | 15 | obj-$(CONFIG_IA32_EMULATION) += ia32/ |
16 | 16 | ||
17 | obj-y += platform/ | 17 | obj-y += platform/ |
18 | obj-y += net/ | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cc6c53a95bfd..da349723d411 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -8,6 +8,7 @@ config 64BIT | |||
8 | 8 | ||
9 | config X86_32 | 9 | config X86_32 |
10 | def_bool !64BIT | 10 | def_bool !64BIT |
11 | select CLKSRC_I8253 | ||
11 | 12 | ||
12 | config X86_64 | 13 | config X86_64 |
13 | def_bool 64BIT | 14 | def_bool 64BIT |
@@ -16,8 +17,6 @@ config X86_64 | |||
16 | config X86 | 17 | config X86 |
17 | def_bool y | 18 | def_bool y |
18 | select HAVE_AOUT if X86_32 | 19 | select HAVE_AOUT if X86_32 |
19 | select HAVE_READQ | ||
20 | select HAVE_WRITEQ | ||
21 | select HAVE_UNSTABLE_SCHED_CLOCK | 20 | select HAVE_UNSTABLE_SCHED_CLOCK |
22 | select HAVE_IDE | 21 | select HAVE_IDE |
23 | select HAVE_OPROFILE | 22 | select HAVE_OPROFILE |
@@ -65,13 +64,12 @@ config X86 | |||
65 | select HAVE_GENERIC_HARDIRQS | 64 | select HAVE_GENERIC_HARDIRQS |
66 | select HAVE_SPARSE_IRQ | 65 | select HAVE_SPARSE_IRQ |
67 | select GENERIC_FIND_FIRST_BIT | 66 | select GENERIC_FIND_FIRST_BIT |
68 | select GENERIC_FIND_NEXT_BIT | ||
69 | select GENERIC_IRQ_PROBE | 67 | select GENERIC_IRQ_PROBE |
70 | select GENERIC_PENDING_IRQ if SMP | 68 | select GENERIC_PENDING_IRQ if SMP |
71 | select GENERIC_IRQ_SHOW | 69 | select GENERIC_IRQ_SHOW |
72 | select IRQ_FORCED_THREADING | 70 | select IRQ_FORCED_THREADING |
73 | select USE_GENERIC_SMP_HELPERS if SMP | 71 | select USE_GENERIC_SMP_HELPERS if SMP |
74 | select ARCH_NO_SYSDEV_OPS | 72 | select HAVE_BPF_JIT if (X86_64 && NET) |
75 | 73 | ||
76 | config INSTRUCTION_DECODER | 74 | config INSTRUCTION_DECODER |
77 | def_bool (KPROBES || PERF_EVENTS) | 75 | def_bool (KPROBES || PERF_EVENTS) |
@@ -112,7 +110,14 @@ config MMU | |||
112 | def_bool y | 110 | def_bool y |
113 | 111 | ||
114 | config ZONE_DMA | 112 | config ZONE_DMA |
115 | def_bool y | 113 | bool "DMA memory allocation support" if EXPERT |
114 | default y | ||
115 | help | ||
116 | DMA memory allocation support allows devices with less than 32-bit | ||
117 | addressing to allocate within the first 16MB of address space. | ||
118 | Disable if no such devices will be used. | ||
119 | |||
120 | If unsure, say Y. | ||
116 | 121 | ||
117 | config SBUS | 122 | config SBUS |
118 | bool | 123 | bool |
@@ -365,17 +370,6 @@ config X86_UV | |||
365 | # Following is an alphabetically sorted list of 32 bit extended platforms | 370 | # Following is an alphabetically sorted list of 32 bit extended platforms |
366 | # Please maintain the alphabetic order if and when there are additions | 371 | # Please maintain the alphabetic order if and when there are additions |
367 | 372 | ||
368 | config X86_ELAN | ||
369 | bool "AMD Elan" | ||
370 | depends on X86_32 | ||
371 | depends on X86_EXTENDED_PLATFORM | ||
372 | ---help--- | ||
373 | Select this for an AMD Elan processor. | ||
374 | |||
375 | Do not use this option for K6/Athlon/Opteron processors! | ||
376 | |||
377 | If unsure, choose "PC-compatible" instead. | ||
378 | |||
379 | config X86_INTEL_CE | 373 | config X86_INTEL_CE |
380 | bool "CE4100 TV platform" | 374 | bool "CE4100 TV platform" |
381 | depends on PCI | 375 | depends on PCI |
@@ -690,6 +684,7 @@ config AMD_IOMMU | |||
690 | bool "AMD IOMMU support" | 684 | bool "AMD IOMMU support" |
691 | select SWIOTLB | 685 | select SWIOTLB |
692 | select PCI_MSI | 686 | select PCI_MSI |
687 | select PCI_IOV | ||
693 | depends on X86_64 && PCI && ACPI | 688 | depends on X86_64 && PCI && ACPI |
694 | ---help--- | 689 | ---help--- |
695 | With this option you can enable support for AMD IOMMU hardware in | 690 | With this option you can enable support for AMD IOMMU hardware in |
@@ -919,6 +914,7 @@ config TOSHIBA | |||
919 | 914 | ||
920 | config I8K | 915 | config I8K |
921 | tristate "Dell laptop support" | 916 | tristate "Dell laptop support" |
917 | select HWMON | ||
922 | ---help--- | 918 | ---help--- |
923 | This adds a driver to safely access the System Management Mode | 919 | This adds a driver to safely access the System Management Mode |
924 | of the CPU on the Dell Inspiron 8000. The System Management Mode | 920 | of the CPU on the Dell Inspiron 8000. The System Management Mode |
@@ -1174,7 +1170,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" | |||
1174 | config AMD_NUMA | 1170 | config AMD_NUMA |
1175 | def_bool y | 1171 | def_bool y |
1176 | prompt "Old style AMD Opteron NUMA detection" | 1172 | prompt "Old style AMD Opteron NUMA detection" |
1177 | depends on X86_64 && NUMA && PCI | 1173 | depends on NUMA && PCI |
1178 | ---help--- | 1174 | ---help--- |
1179 | Enable AMD NUMA node topology detection. You should say Y here if | 1175 | Enable AMD NUMA node topology detection. You should say Y here if |
1180 | you have a multi processor AMD system. This uses an old method to | 1176 | you have a multi processor AMD system. This uses an old method to |
@@ -1201,7 +1197,7 @@ config NODES_SPAN_OTHER_NODES | |||
1201 | 1197 | ||
1202 | config NUMA_EMU | 1198 | config NUMA_EMU |
1203 | bool "NUMA emulation" | 1199 | bool "NUMA emulation" |
1204 | depends on X86_64 && NUMA | 1200 | depends on NUMA |
1205 | ---help--- | 1201 | ---help--- |
1206 | Enable NUMA emulation. A flat machine will be split | 1202 | Enable NUMA emulation. A flat machine will be split |
1207 | into virtual nodes when booted with "numa=fake=N", where N is the | 1203 | into virtual nodes when booted with "numa=fake=N", where N is the |
@@ -1223,6 +1219,10 @@ config HAVE_ARCH_BOOTMEM | |||
1223 | def_bool y | 1219 | def_bool y |
1224 | depends on X86_32 && NUMA | 1220 | depends on X86_32 && NUMA |
1225 | 1221 | ||
1222 | config HAVE_ARCH_ALLOC_REMAP | ||
1223 | def_bool y | ||
1224 | depends on X86_32 && NUMA | ||
1225 | |||
1226 | config ARCH_HAVE_MEMORY_PRESENT | 1226 | config ARCH_HAVE_MEMORY_PRESENT |
1227 | def_bool y | 1227 | def_bool y |
1228 | depends on X86_32 && DISCONTIGMEM | 1228 | depends on X86_32 && DISCONTIGMEM |
@@ -1231,13 +1231,9 @@ config NEED_NODE_MEMMAP_SIZE | |||
1231 | def_bool y | 1231 | def_bool y |
1232 | depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) | 1232 | depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) |
1233 | 1233 | ||
1234 | config HAVE_ARCH_ALLOC_REMAP | ||
1235 | def_bool y | ||
1236 | depends on X86_32 && NUMA | ||
1237 | |||
1238 | config ARCH_FLATMEM_ENABLE | 1234 | config ARCH_FLATMEM_ENABLE |
1239 | def_bool y | 1235 | def_bool y |
1240 | depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA | 1236 | depends on X86_32 && !NUMA |
1241 | 1237 | ||
1242 | config ARCH_DISCONTIGMEM_ENABLE | 1238 | config ARCH_DISCONTIGMEM_ENABLE |
1243 | def_bool y | 1239 | def_bool y |
@@ -1247,20 +1243,16 @@ config ARCH_DISCONTIGMEM_DEFAULT | |||
1247 | def_bool y | 1243 | def_bool y |
1248 | depends on NUMA && X86_32 | 1244 | depends on NUMA && X86_32 |
1249 | 1245 | ||
1250 | config ARCH_PROC_KCORE_TEXT | ||
1251 | def_bool y | ||
1252 | depends on X86_64 && PROC_KCORE | ||
1253 | |||
1254 | config ARCH_SPARSEMEM_DEFAULT | ||
1255 | def_bool y | ||
1256 | depends on X86_64 | ||
1257 | |||
1258 | config ARCH_SPARSEMEM_ENABLE | 1246 | config ARCH_SPARSEMEM_ENABLE |
1259 | def_bool y | 1247 | def_bool y |
1260 | depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD | 1248 | depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD |
1261 | select SPARSEMEM_STATIC if X86_32 | 1249 | select SPARSEMEM_STATIC if X86_32 |
1262 | select SPARSEMEM_VMEMMAP_ENABLE if X86_64 | 1250 | select SPARSEMEM_VMEMMAP_ENABLE if X86_64 |
1263 | 1251 | ||
1252 | config ARCH_SPARSEMEM_DEFAULT | ||
1253 | def_bool y | ||
1254 | depends on X86_64 | ||
1255 | |||
1264 | config ARCH_SELECT_MEMORY_MODEL | 1256 | config ARCH_SELECT_MEMORY_MODEL |
1265 | def_bool y | 1257 | def_bool y |
1266 | depends on ARCH_SPARSEMEM_ENABLE | 1258 | depends on ARCH_SPARSEMEM_ENABLE |
@@ -1269,6 +1261,10 @@ config ARCH_MEMORY_PROBE | |||
1269 | def_bool X86_64 | 1261 | def_bool X86_64 |
1270 | depends on MEMORY_HOTPLUG | 1262 | depends on MEMORY_HOTPLUG |
1271 | 1263 | ||
1264 | config ARCH_PROC_KCORE_TEXT | ||
1265 | def_bool y | ||
1266 | depends on X86_64 && PROC_KCORE | ||
1267 | |||
1272 | config ILLEGAL_POINTER_VALUE | 1268 | config ILLEGAL_POINTER_VALUE |
1273 | hex | 1269 | hex |
1274 | default 0 if X86_32 | 1270 | default 0 if X86_32 |
@@ -1703,10 +1699,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE | |||
1703 | def_bool y | 1699 | def_bool y |
1704 | depends on MEMORY_HOTPLUG | 1700 | depends on MEMORY_HOTPLUG |
1705 | 1701 | ||
1706 | config HAVE_ARCH_EARLY_PFN_TO_NID | ||
1707 | def_bool X86_64 | ||
1708 | depends on NUMA | ||
1709 | |||
1710 | config USE_PERCPU_NUMA_NODE_ID | 1702 | config USE_PERCPU_NUMA_NODE_ID |
1711 | def_bool y | 1703 | def_bool y |
1712 | depends on NUMA | 1704 | depends on NUMA |
@@ -1848,7 +1840,7 @@ config APM_ALLOW_INTS | |||
1848 | 1840 | ||
1849 | endif # APM | 1841 | endif # APM |
1850 | 1842 | ||
1851 | source "arch/x86/kernel/cpu/cpufreq/Kconfig" | 1843 | source "drivers/cpufreq/Kconfig" |
1852 | 1844 | ||
1853 | source "drivers/cpuidle/Kconfig" | 1845 | source "drivers/cpuidle/Kconfig" |
1854 | 1846 | ||
@@ -2076,7 +2068,7 @@ config OLPC | |||
2076 | depends on !X86_PAE | 2068 | depends on !X86_PAE |
2077 | select GPIOLIB | 2069 | select GPIOLIB |
2078 | select OF | 2070 | select OF |
2079 | select OF_PROMTREE if PROC_DEVICETREE | 2071 | select OF_PROMTREE |
2080 | ---help--- | 2072 | ---help--- |
2081 | Add support for detecting the unique features of the OLPC | 2073 | Add support for detecting the unique features of the OLPC |
2082 | XO hardware. | 2074 | XO hardware. |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index d161e939df62..6a7cfdf8ff69 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -1,6 +1,4 @@ | |||
1 | # Put here option for CPU selection and depending optimization | 1 | # Put here option for CPU selection and depending optimization |
2 | if !X86_ELAN | ||
3 | |||
4 | choice | 2 | choice |
5 | prompt "Processor family" | 3 | prompt "Processor family" |
6 | default M686 if X86_32 | 4 | default M686 if X86_32 |
@@ -203,6 +201,14 @@ config MWINCHIP3D | |||
203 | stores for this CPU, which can increase performance of some | 201 | stores for this CPU, which can increase performance of some |
204 | operations. | 202 | operations. |
205 | 203 | ||
204 | config MELAN | ||
205 | bool "AMD Elan" | ||
206 | depends on X86_32 | ||
207 | ---help--- | ||
208 | Select this for an AMD Elan processor. | ||
209 | |||
210 | Do not use this option for K6/Athlon/Opteron processors! | ||
211 | |||
206 | config MGEODEGX1 | 212 | config MGEODEGX1 |
207 | bool "GeodeGX1" | 213 | bool "GeodeGX1" |
208 | depends on X86_32 | 214 | depends on X86_32 |
@@ -292,8 +298,6 @@ config X86_GENERIC | |||
292 | This is really intended for distributors who need more | 298 | This is really intended for distributors who need more |
293 | generic optimizations. | 299 | generic optimizations. |
294 | 300 | ||
295 | endif | ||
296 | |||
297 | # | 301 | # |
298 | # Define implied options from the CPU selection here | 302 | # Define implied options from the CPU selection here |
299 | config X86_INTERNODE_CACHE_SHIFT | 303 | config X86_INTERNODE_CACHE_SHIFT |
@@ -312,7 +316,7 @@ config X86_L1_CACHE_SHIFT | |||
312 | int | 316 | int |
313 | default "7" if MPENTIUM4 || MPSC | 317 | default "7" if MPENTIUM4 || MPSC |
314 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU | 318 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU |
315 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 319 | default "4" if MELAN || M486 || M386 || MGEODEGX1 |
316 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 320 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
317 | 321 | ||
318 | config X86_XADD | 322 | config X86_XADD |
@@ -358,7 +362,7 @@ config X86_POPAD_OK | |||
358 | 362 | ||
359 | config X86_ALIGNMENT_16 | 363 | config X86_ALIGNMENT_16 |
360 | def_bool y | 364 | def_bool y |
361 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 | 365 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 |
362 | 366 | ||
363 | config X86_INTEL_USERCOPY | 367 | config X86_INTEL_USERCOPY |
364 | def_bool y | 368 | def_bool y |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 615e18810f48..c0f8a5c88910 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -66,26 +66,6 @@ config DEBUG_STACKOVERFLOW | |||
66 | This option will cause messages to be printed if free stack space | 66 | This option will cause messages to be printed if free stack space |
67 | drops below a certain limit. | 67 | drops below a certain limit. |
68 | 68 | ||
69 | config DEBUG_STACK_USAGE | ||
70 | bool "Stack utilization instrumentation" | ||
71 | depends on DEBUG_KERNEL | ||
72 | ---help--- | ||
73 | Enables the display of the minimum amount of free stack which each | ||
74 | task has ever had available in the sysrq-T and sysrq-P debug output. | ||
75 | |||
76 | This option will slow down process creation somewhat. | ||
77 | |||
78 | config DEBUG_PER_CPU_MAPS | ||
79 | bool "Debug access to per_cpu maps" | ||
80 | depends on DEBUG_KERNEL | ||
81 | depends on SMP | ||
82 | ---help--- | ||
83 | Say Y to verify that the per_cpu map being accessed has | ||
84 | been setup. Adds a fair amount of code to kernel memory | ||
85 | and decreases performance. | ||
86 | |||
87 | Say N if unsure. | ||
88 | |||
89 | config X86_PTDUMP | 69 | config X86_PTDUMP |
90 | bool "Export kernel pagetable layout to userspace via debugfs" | 70 | bool "Export kernel pagetable layout to userspace via debugfs" |
91 | depends on DEBUG_KERNEL | 71 | depends on DEBUG_KERNEL |
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index f2ee1abb1df9..86cee7b749e1 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu | |||
@@ -37,7 +37,7 @@ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march= | |||
37 | $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) | 37 | $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) |
38 | 38 | ||
39 | # AMD Elan support | 39 | # AMD Elan support |
40 | cflags-$(CONFIG_X86_ELAN) += -march=i486 | 40 | cflags-$(CONFIG_MELAN) += -march=i486 |
41 | 41 | ||
42 | # Geode GX1 support | 42 | # Geode GX1 support |
43 | cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx | 43 | cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx |
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index cae3feb1035e..db75d07c3645 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c | |||
@@ -91,7 +91,7 @@ static int detect_memory_e801(void) | |||
91 | if (oreg.ax > 15*1024) { | 91 | if (oreg.ax > 15*1024) { |
92 | return -1; /* Bogus! */ | 92 | return -1; /* Bogus! */ |
93 | } else if (oreg.ax == 15*1024) { | 93 | } else if (oreg.ax == 15*1024) { |
94 | boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax; | 94 | boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax; |
95 | } else { | 95 | } else { |
96 | /* | 96 | /* |
97 | * This ignores memory above 16MB if we have a memory | 97 | * This ignores memory above 16MB if we have a memory |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 6f9872658dd2..2bf18059fbea 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -10,7 +10,6 @@ CONFIG_TASK_IO_ACCOUNTING=y | |||
10 | CONFIG_AUDIT=y | 10 | CONFIG_AUDIT=y |
11 | CONFIG_LOG_BUF_SHIFT=18 | 11 | CONFIG_LOG_BUF_SHIFT=18 |
12 | CONFIG_CGROUPS=y | 12 | CONFIG_CGROUPS=y |
13 | CONFIG_CGROUP_NS=y | ||
14 | CONFIG_CGROUP_FREEZER=y | 13 | CONFIG_CGROUP_FREEZER=y |
15 | CONFIG_CPUSETS=y | 14 | CONFIG_CPUSETS=y |
16 | CONFIG_CGROUP_CPUACCT=y | 15 | CONFIG_CGROUP_CPUACCT=y |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index ee01a9d5d4f0..22a0dc8e51dd 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -11,7 +11,6 @@ CONFIG_TASK_IO_ACCOUNTING=y | |||
11 | CONFIG_AUDIT=y | 11 | CONFIG_AUDIT=y |
12 | CONFIG_LOG_BUF_SHIFT=18 | 12 | CONFIG_LOG_BUF_SHIFT=18 |
13 | CONFIG_CGROUPS=y | 13 | CONFIG_CGROUPS=y |
14 | CONFIG_CGROUP_NS=y | ||
15 | CONFIG_CGROUP_FREEZER=y | 14 | CONFIG_CGROUP_FREEZER=y |
16 | CONFIG_CPUSETS=y | 15 | CONFIG_CPUSETS=y |
17 | CONFIG_CGROUP_CPUACCT=y | 16 | CONFIG_CGROUP_CPUACCT=y |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 1a58ad89fdf7..c04f1b7a9139 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,8 +2,6 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_FPU) += fpu.o | ||
6 | |||
7 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
8 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
9 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
@@ -24,6 +22,6 @@ aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o | |||
24 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 22 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
25 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 23 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
26 | 24 | ||
27 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o | 25 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
28 | 26 | ||
29 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 27 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2577613fb32b..feee8ff1d05e 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -94,6 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
94 | const u8 *in, unsigned int len, u8 *iv); | 94 | const u8 *in, unsigned int len, u8 *iv); |
95 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 95 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
96 | const u8 *in, unsigned int len, u8 *iv); | 96 | const u8 *in, unsigned int len, u8 *iv); |
97 | |||
98 | int crypto_fpu_init(void); | ||
99 | void crypto_fpu_exit(void); | ||
100 | |||
97 | #ifdef CONFIG_X86_64 | 101 | #ifdef CONFIG_X86_64 |
98 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 102 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
99 | const u8 *in, unsigned int len, u8 *iv); | 103 | const u8 *in, unsigned int len, u8 *iv); |
@@ -1257,6 +1261,8 @@ static int __init aesni_init(void) | |||
1257 | return -ENODEV; | 1261 | return -ENODEV; |
1258 | } | 1262 | } |
1259 | 1263 | ||
1264 | if ((err = crypto_fpu_init())) | ||
1265 | goto fpu_err; | ||
1260 | if ((err = crypto_register_alg(&aesni_alg))) | 1266 | if ((err = crypto_register_alg(&aesni_alg))) |
1261 | goto aes_err; | 1267 | goto aes_err; |
1262 | if ((err = crypto_register_alg(&__aesni_alg))) | 1268 | if ((err = crypto_register_alg(&__aesni_alg))) |
@@ -1334,6 +1340,7 @@ blk_ecb_err: | |||
1334 | __aes_err: | 1340 | __aes_err: |
1335 | crypto_unregister_alg(&aesni_alg); | 1341 | crypto_unregister_alg(&aesni_alg); |
1336 | aes_err: | 1342 | aes_err: |
1343 | fpu_err: | ||
1337 | return err; | 1344 | return err; |
1338 | } | 1345 | } |
1339 | 1346 | ||
@@ -1363,6 +1370,8 @@ static void __exit aesni_exit(void) | |||
1363 | crypto_unregister_alg(&blk_ecb_alg); | 1370 | crypto_unregister_alg(&blk_ecb_alg); |
1364 | crypto_unregister_alg(&__aesni_alg); | 1371 | crypto_unregister_alg(&__aesni_alg); |
1365 | crypto_unregister_alg(&aesni_alg); | 1372 | crypto_unregister_alg(&aesni_alg); |
1373 | |||
1374 | crypto_fpu_exit(); | ||
1366 | } | 1375 | } |
1367 | 1376 | ||
1368 | module_init(aesni_init); | 1377 | module_init(aesni_init); |
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 1a8f8649c035..98d7a188f46b 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c | |||
@@ -150,18 +150,12 @@ static struct crypto_template crypto_fpu_tmpl = { | |||
150 | .module = THIS_MODULE, | 150 | .module = THIS_MODULE, |
151 | }; | 151 | }; |
152 | 152 | ||
153 | static int __init crypto_fpu_module_init(void) | 153 | int __init crypto_fpu_init(void) |
154 | { | 154 | { |
155 | return crypto_register_template(&crypto_fpu_tmpl); | 155 | return crypto_register_template(&crypto_fpu_tmpl); |
156 | } | 156 | } |
157 | 157 | ||
158 | static void __exit crypto_fpu_module_exit(void) | 158 | void __exit crypto_fpu_exit(void) |
159 | { | 159 | { |
160 | crypto_unregister_template(&crypto_fpu_tmpl); | 160 | crypto_unregister_template(&crypto_fpu_tmpl); |
161 | } | 161 | } |
162 | |||
163 | module_init(crypto_fpu_module_init); | ||
164 | module_exit(crypto_fpu_module_exit); | ||
165 | |||
166 | MODULE_LICENSE("GPL"); | ||
167 | MODULE_DESCRIPTION("FPU block cipher wrapper"); | ||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 849a9d23c71d..95f5826be458 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -848,4 +848,5 @@ ia32_sys_call_table: | |||
848 | .quad compat_sys_open_by_handle_at | 848 | .quad compat_sys_open_by_handle_at |
849 | .quad compat_sys_clock_adjtime | 849 | .quad compat_sys_clock_adjtime |
850 | .quad sys_syncfs | 850 | .quad sys_syncfs |
851 | .quad compat_sys_sendmmsg /* 345 */ | ||
851 | ia32_syscall_end: | 852 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 12e0e7dd869c..416d865eae39 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -183,8 +183,6 @@ static inline void disable_acpi(void) { } | |||
183 | 183 | ||
184 | #define ARCH_HAS_POWER_INIT 1 | 184 | #define ARCH_HAS_POWER_INIT 1 |
185 | 185 | ||
186 | struct bootnode; | ||
187 | |||
188 | #ifdef CONFIG_ACPI_NUMA | 186 | #ifdef CONFIG_ACPI_NUMA |
189 | extern int acpi_numa; | 187 | extern int acpi_numa; |
190 | extern int x86_acpi_numa_init(void); | 188 | extern int x86_acpi_numa_init(void); |
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index a63a68be1cce..94d420b360d1 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -15,4 +15,13 @@ | |||
15 | .endm | 15 | .endm |
16 | #endif | 16 | #endif |
17 | 17 | ||
18 | .macro altinstruction_entry orig alt feature orig_len alt_len | ||
19 | .align 8 | ||
20 | .quad \orig | ||
21 | .quad \alt | ||
22 | .word \feature | ||
23 | .byte \orig_len | ||
24 | .byte \alt_len | ||
25 | .endm | ||
26 | |||
18 | #endif /* __ASSEMBLY__ */ | 27 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 13009d1af99a..bf535f947e8c 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -4,7 +4,6 @@ | |||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <linux/stddef.h> | 5 | #include <linux/stddef.h> |
6 | #include <linux/stringify.h> | 6 | #include <linux/stringify.h> |
7 | #include <linux/jump_label.h> | ||
8 | #include <asm/asm.h> | 7 | #include <asm/asm.h> |
9 | 8 | ||
10 | /* | 9 | /* |
@@ -191,12 +190,4 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); | |||
191 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); | 190 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); |
192 | extern void text_poke_smp_batch(struct text_poke_param *params, int n); | 191 | extern void text_poke_smp_batch(struct text_poke_param *params, int n); |
193 | 192 | ||
194 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | ||
195 | #define IDEAL_NOP_SIZE_5 5 | ||
196 | extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; | ||
197 | extern void arch_init_ideal_nop5(void); | ||
198 | #else | ||
199 | static inline void arch_init_ideal_nop5(void) {} | ||
200 | #endif | ||
201 | |||
202 | #endif /* _ASM_X86_ALTERNATIVE_H */ | 193 | #endif /* _ASM_X86_ALTERNATIVE_H */ |
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 916bc8111a01..55d95eb789b3 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h | |||
@@ -19,13 +19,12 @@ | |||
19 | #ifndef _ASM_X86_AMD_IOMMU_PROTO_H | 19 | #ifndef _ASM_X86_AMD_IOMMU_PROTO_H |
20 | #define _ASM_X86_AMD_IOMMU_PROTO_H | 20 | #define _ASM_X86_AMD_IOMMU_PROTO_H |
21 | 21 | ||
22 | struct amd_iommu; | 22 | #include <asm/amd_iommu_types.h> |
23 | 23 | ||
24 | extern int amd_iommu_init_dma_ops(void); | 24 | extern int amd_iommu_init_dma_ops(void); |
25 | extern int amd_iommu_init_passthrough(void); | 25 | extern int amd_iommu_init_passthrough(void); |
26 | extern irqreturn_t amd_iommu_int_thread(int irq, void *data); | ||
26 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | 27 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); |
27 | extern void amd_iommu_flush_all_domains(void); | ||
28 | extern void amd_iommu_flush_all_devices(void); | ||
29 | extern void amd_iommu_apply_erratum_63(u16 devid); | 28 | extern void amd_iommu_apply_erratum_63(u16 devid); |
30 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | 29 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); |
31 | extern int amd_iommu_init_devices(void); | 30 | extern int amd_iommu_init_devices(void); |
@@ -44,4 +43,12 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev) | |||
44 | (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); | 43 | (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); |
45 | } | 44 | } |
46 | 45 | ||
46 | static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) | ||
47 | { | ||
48 | if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) | ||
49 | return false; | ||
50 | |||
51 | return !!(iommu->features & f); | ||
52 | } | ||
53 | |||
47 | #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ | 54 | #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index e3509fc303bf..4c9982995414 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -68,12 +68,25 @@ | |||
68 | #define MMIO_CONTROL_OFFSET 0x0018 | 68 | #define MMIO_CONTROL_OFFSET 0x0018 |
69 | #define MMIO_EXCL_BASE_OFFSET 0x0020 | 69 | #define MMIO_EXCL_BASE_OFFSET 0x0020 |
70 | #define MMIO_EXCL_LIMIT_OFFSET 0x0028 | 70 | #define MMIO_EXCL_LIMIT_OFFSET 0x0028 |
71 | #define MMIO_EXT_FEATURES 0x0030 | ||
71 | #define MMIO_CMD_HEAD_OFFSET 0x2000 | 72 | #define MMIO_CMD_HEAD_OFFSET 0x2000 |
72 | #define MMIO_CMD_TAIL_OFFSET 0x2008 | 73 | #define MMIO_CMD_TAIL_OFFSET 0x2008 |
73 | #define MMIO_EVT_HEAD_OFFSET 0x2010 | 74 | #define MMIO_EVT_HEAD_OFFSET 0x2010 |
74 | #define MMIO_EVT_TAIL_OFFSET 0x2018 | 75 | #define MMIO_EVT_TAIL_OFFSET 0x2018 |
75 | #define MMIO_STATUS_OFFSET 0x2020 | 76 | #define MMIO_STATUS_OFFSET 0x2020 |
76 | 77 | ||
78 | |||
79 | /* Extended Feature Bits */ | ||
80 | #define FEATURE_PREFETCH (1ULL<<0) | ||
81 | #define FEATURE_PPR (1ULL<<1) | ||
82 | #define FEATURE_X2APIC (1ULL<<2) | ||
83 | #define FEATURE_NX (1ULL<<3) | ||
84 | #define FEATURE_GT (1ULL<<4) | ||
85 | #define FEATURE_IA (1ULL<<6) | ||
86 | #define FEATURE_GA (1ULL<<7) | ||
87 | #define FEATURE_HE (1ULL<<8) | ||
88 | #define FEATURE_PC (1ULL<<9) | ||
89 | |||
77 | /* MMIO status bits */ | 90 | /* MMIO status bits */ |
78 | #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 | 91 | #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 |
79 | 92 | ||
@@ -113,7 +126,9 @@ | |||
113 | /* command specific defines */ | 126 | /* command specific defines */ |
114 | #define CMD_COMPL_WAIT 0x01 | 127 | #define CMD_COMPL_WAIT 0x01 |
115 | #define CMD_INV_DEV_ENTRY 0x02 | 128 | #define CMD_INV_DEV_ENTRY 0x02 |
116 | #define CMD_INV_IOMMU_PAGES 0x03 | 129 | #define CMD_INV_IOMMU_PAGES 0x03 |
130 | #define CMD_INV_IOTLB_PAGES 0x04 | ||
131 | #define CMD_INV_ALL 0x08 | ||
117 | 132 | ||
118 | #define CMD_COMPL_WAIT_STORE_MASK 0x01 | 133 | #define CMD_COMPL_WAIT_STORE_MASK 0x01 |
119 | #define CMD_COMPL_WAIT_INT_MASK 0x02 | 134 | #define CMD_COMPL_WAIT_INT_MASK 0x02 |
@@ -215,6 +230,8 @@ | |||
215 | #define IOMMU_PTE_IR (1ULL << 61) | 230 | #define IOMMU_PTE_IR (1ULL << 61) |
216 | #define IOMMU_PTE_IW (1ULL << 62) | 231 | #define IOMMU_PTE_IW (1ULL << 62) |
217 | 232 | ||
233 | #define DTE_FLAG_IOTLB 0x01 | ||
234 | |||
218 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) | 235 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) |
219 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) | 236 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) |
220 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) | 237 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) |
@@ -227,6 +244,7 @@ | |||
227 | /* IOMMU capabilities */ | 244 | /* IOMMU capabilities */ |
228 | #define IOMMU_CAP_IOTLB 24 | 245 | #define IOMMU_CAP_IOTLB 24 |
229 | #define IOMMU_CAP_NPCACHE 26 | 246 | #define IOMMU_CAP_NPCACHE 26 |
247 | #define IOMMU_CAP_EFR 27 | ||
230 | 248 | ||
231 | #define MAX_DOMAIN_ID 65536 | 249 | #define MAX_DOMAIN_ID 65536 |
232 | 250 | ||
@@ -249,6 +267,8 @@ extern bool amd_iommu_dump; | |||
249 | 267 | ||
250 | /* global flag if IOMMUs cache non-present entries */ | 268 | /* global flag if IOMMUs cache non-present entries */ |
251 | extern bool amd_iommu_np_cache; | 269 | extern bool amd_iommu_np_cache; |
270 | /* Only true if all IOMMUs support device IOTLBs */ | ||
271 | extern bool amd_iommu_iotlb_sup; | ||
252 | 272 | ||
253 | /* | 273 | /* |
254 | * Make iterating over all IOMMUs easier | 274 | * Make iterating over all IOMMUs easier |
@@ -371,6 +391,9 @@ struct amd_iommu { | |||
371 | /* flags read from acpi table */ | 391 | /* flags read from acpi table */ |
372 | u8 acpi_flags; | 392 | u8 acpi_flags; |
373 | 393 | ||
394 | /* Extended features */ | ||
395 | u64 features; | ||
396 | |||
374 | /* | 397 | /* |
375 | * Capability pointer. There could be more than one IOMMU per PCI | 398 | * Capability pointer. There could be more than one IOMMU per PCI |
376 | * device function if there are more than one AMD IOMMU capability | 399 | * device function if there are more than one AMD IOMMU capability |
@@ -409,9 +432,6 @@ struct amd_iommu { | |||
409 | /* if one, we need to send a completion wait command */ | 432 | /* if one, we need to send a completion wait command */ |
410 | bool need_sync; | 433 | bool need_sync; |
411 | 434 | ||
412 | /* becomes true if a command buffer reset is running */ | ||
413 | bool reset_in_progress; | ||
414 | |||
415 | /* default dma_ops domain for that IOMMU */ | 435 | /* default dma_ops domain for that IOMMU */ |
416 | struct dma_ops_domain *default_dom; | 436 | struct dma_ops_domain *default_dom; |
417 | 437 | ||
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 331682231bb4..67f87f257611 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -11,7 +11,6 @@ struct amd_nb_bus_dev_range { | |||
11 | 11 | ||
12 | extern const struct pci_device_id amd_nb_misc_ids[]; | 12 | extern const struct pci_device_id amd_nb_misc_ids[]; |
13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; | 13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; |
14 | struct bootnode; | ||
15 | 14 | ||
16 | extern bool early_is_amd_nb(u32 value); | 15 | extern bool early_is_amd_nb(u32 value); |
17 | extern int amd_cache_northbridges(void); | 16 | extern int amd_cache_northbridges(void); |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2b7d573be549..4a0b7c7e2cce 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -363,7 +363,12 @@ struct apic { | |||
363 | */ | 363 | */ |
364 | int (*x86_32_early_logical_apicid)(int cpu); | 364 | int (*x86_32_early_logical_apicid)(int cpu); |
365 | 365 | ||
366 | /* determine CPU -> NUMA node mapping */ | 366 | /* |
367 | * Optional method called from setup_local_APIC() after logical | ||
368 | * apicid is guaranteed to be known to initialize apicid -> node | ||
369 | * mapping if NUMA initialization hasn't done so already. Don't | ||
370 | * add new users. | ||
371 | */ | ||
367 | int (*x86_32_numa_cpu_node)(int cpu); | 372 | int (*x86_32_numa_cpu_node)(int cpu); |
368 | #endif | 373 | #endif |
369 | }; | 374 | }; |
@@ -376,6 +381,26 @@ struct apic { | |||
376 | extern struct apic *apic; | 381 | extern struct apic *apic; |
377 | 382 | ||
378 | /* | 383 | /* |
384 | * APIC drivers are probed based on how they are listed in the .apicdrivers | ||
385 | * section. So the order is important and enforced by the ordering | ||
386 | * of different apic driver files in the Makefile. | ||
387 | * | ||
388 | * For the files having two apic drivers, we use apic_drivers() | ||
389 | * to enforce the order with in them. | ||
390 | */ | ||
391 | #define apic_driver(sym) \ | ||
392 | static struct apic *__apicdrivers_##sym __used \ | ||
393 | __aligned(sizeof(struct apic *)) \ | ||
394 | __section(.apicdrivers) = { &sym } | ||
395 | |||
396 | #define apic_drivers(sym1, sym2) \ | ||
397 | static struct apic *__apicdrivers_##sym1##sym2[2] __used \ | ||
398 | __aligned(sizeof(struct apic *)) \ | ||
399 | __section(.apicdrivers) = { &sym1, &sym2 } | ||
400 | |||
401 | extern struct apic *__apicdrivers[], *__apicdrivers_end[]; | ||
402 | |||
403 | /* | ||
379 | * APIC functionality to boot other CPUs - only used on SMP: | 404 | * APIC functionality to boot other CPUs - only used on SMP: |
380 | */ | 405 | */ |
381 | #ifdef CONFIG_SMP | 406 | #ifdef CONFIG_SMP |
@@ -453,15 +478,10 @@ static inline unsigned default_get_apic_id(unsigned long x) | |||
453 | #define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 | 478 | #define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 |
454 | 479 | ||
455 | #ifdef CONFIG_X86_64 | 480 | #ifdef CONFIG_X86_64 |
456 | extern struct apic apic_flat; | ||
457 | extern struct apic apic_physflat; | ||
458 | extern struct apic apic_x2apic_cluster; | ||
459 | extern struct apic apic_x2apic_phys; | ||
460 | extern int default_acpi_madt_oem_check(char *, char *); | 481 | extern int default_acpi_madt_oem_check(char *, char *); |
461 | 482 | ||
462 | extern void apic_send_IPI_self(int vector); | 483 | extern void apic_send_IPI_self(int vector); |
463 | 484 | ||
464 | extern struct apic apic_x2apic_uv_x; | ||
465 | DECLARE_PER_CPU(int, x2apic_extra_bits); | 485 | DECLARE_PER_CPU(int, x2apic_extra_bits); |
466 | 486 | ||
467 | extern int default_cpu_present_to_apicid(int mps_cpu); | 487 | extern int default_cpu_present_to_apicid(int mps_cpu); |
@@ -475,7 +495,7 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) | |||
475 | return; | 495 | return; |
476 | } | 496 | } |
477 | 497 | ||
478 | extern void generic_bigsmp_probe(void); | 498 | extern struct apic *generic_bigsmp_probe(void); |
479 | 499 | ||
480 | 500 | ||
481 | #ifdef CONFIG_X86_LOCAL_APIC | 501 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -511,8 +531,6 @@ extern struct apic apic_noop; | |||
511 | 531 | ||
512 | #ifdef CONFIG_X86_32 | 532 | #ifdef CONFIG_X86_32 |
513 | 533 | ||
514 | extern struct apic apic_default; | ||
515 | |||
516 | static inline int noop_x86_32_early_logical_apicid(int cpu) | 534 | static inline int noop_x86_32_early_logical_apicid(int cpu) |
517 | { | 535 | { |
518 | return BAD_APICID; | 536 | return BAD_APICID; |
@@ -537,8 +555,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) | |||
537 | return cpuid_apic >> index_msb; | 555 | return cpuid_apic >> index_msb; |
538 | } | 556 | } |
539 | 557 | ||
540 | extern int default_x86_32_numa_cpu_node(int cpu); | ||
541 | |||
542 | #endif | 558 | #endif |
543 | 559 | ||
544 | static inline unsigned int | 560 | static inline unsigned int |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index d87988bacf3e..34595d5e1038 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -78,6 +78,7 @@ | |||
78 | #define APIC_DEST_LOGICAL 0x00800 | 78 | #define APIC_DEST_LOGICAL 0x00800 |
79 | #define APIC_DEST_PHYSICAL 0x00000 | 79 | #define APIC_DEST_PHYSICAL 0x00000 |
80 | #define APIC_DM_FIXED 0x00000 | 80 | #define APIC_DM_FIXED 0x00000 |
81 | #define APIC_DM_FIXED_MASK 0x00700 | ||
81 | #define APIC_DM_LOWEST 0x00100 | 82 | #define APIC_DM_LOWEST 0x00100 |
82 | #define APIC_DM_SMI 0x00200 | 83 | #define APIC_DM_SMI 0x00200 |
83 | #define APIC_DM_REMRD 0x00300 | 84 | #define APIC_DM_REMRD 0x00300 |
diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h index 3c7521063d3f..aa6a3170ab5a 100644 --- a/arch/x86/include/asm/bios_ebda.h +++ b/arch/x86/include/asm/bios_ebda.h | |||
@@ -4,16 +4,40 @@ | |||
4 | #include <asm/io.h> | 4 | #include <asm/io.h> |
5 | 5 | ||
6 | /* | 6 | /* |
7 | * there is a real-mode segmented pointer pointing to the | 7 | * Returns physical address of EBDA. Returns 0 if there is no EBDA. |
8 | * 4K EBDA area at 0x40E. | ||
9 | */ | 8 | */ |
10 | static inline unsigned int get_bios_ebda(void) | 9 | static inline unsigned int get_bios_ebda(void) |
11 | { | 10 | { |
11 | /* | ||
12 | * There is a real-mode segmented pointer pointing to the | ||
13 | * 4K EBDA area at 0x40E. | ||
14 | */ | ||
12 | unsigned int address = *(unsigned short *)phys_to_virt(0x40E); | 15 | unsigned int address = *(unsigned short *)phys_to_virt(0x40E); |
13 | address <<= 4; | 16 | address <<= 4; |
14 | return address; /* 0 means none */ | 17 | return address; /* 0 means none */ |
15 | } | 18 | } |
16 | 19 | ||
20 | /* | ||
21 | * Return the sanitized length of the EBDA in bytes, if it exists. | ||
22 | */ | ||
23 | static inline unsigned int get_bios_ebda_length(void) | ||
24 | { | ||
25 | unsigned int address; | ||
26 | unsigned int length; | ||
27 | |||
28 | address = get_bios_ebda(); | ||
29 | if (!address) | ||
30 | return 0; | ||
31 | |||
32 | /* EBDA length is byte 0 of the EBDA (stored in KiB) */ | ||
33 | length = *(unsigned char *)phys_to_virt(address); | ||
34 | length <<= 10; | ||
35 | |||
36 | /* Trim the length if it extends beyond 640KiB */ | ||
37 | length = min_t(unsigned int, (640 * 1024) - address, length); | ||
38 | return length; | ||
39 | } | ||
40 | |||
17 | void reserve_ebda_region(void); | 41 | void reserve_ebda_region(void); |
18 | 42 | ||
19 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | 43 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 91f3e087cf21..5dc6acc98dbd 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -195,6 +195,8 @@ | |||
195 | 195 | ||
196 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | 196 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ |
197 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | 197 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ |
198 | #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ | ||
199 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | ||
198 | 200 | ||
199 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 201 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
200 | 202 | ||
@@ -207,8 +209,7 @@ extern const char * const x86_power_flags[32]; | |||
207 | #define test_cpu_cap(c, bit) \ | 209 | #define test_cpu_cap(c, bit) \ |
208 | test_bit(bit, (unsigned long *)((c)->x86_capability)) | 210 | test_bit(bit, (unsigned long *)((c)->x86_capability)) |
209 | 211 | ||
210 | #define cpu_has(c, bit) \ | 212 | #define REQUIRED_MASK_BIT_SET(bit) \ |
211 | (__builtin_constant_p(bit) && \ | ||
212 | ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ | 213 | ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ |
213 | (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ | 214 | (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ |
214 | (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ | 215 | (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ |
@@ -218,10 +219,16 @@ extern const char * const x86_power_flags[32]; | |||
218 | (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ | 219 | (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ |
219 | (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ | 220 | (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ |
220 | (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ | 221 | (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ |
221 | (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \ | 222 | (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) |
222 | ? 1 : \ | 223 | |
224 | #define cpu_has(c, bit) \ | ||
225 | (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ | ||
223 | test_cpu_cap(c, bit)) | 226 | test_cpu_cap(c, bit)) |
224 | 227 | ||
228 | #define this_cpu_has(bit) \ | ||
229 | (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ | ||
230 | x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) | ||
231 | |||
225 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) | 232 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) |
226 | 233 | ||
227 | #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) | 234 | #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) |
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 057099e5faba..0bdb0c54d9a1 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h | |||
@@ -69,22 +69,18 @@ | |||
69 | 69 | ||
70 | #define MAX_DMA_CHANNELS 8 | 70 | #define MAX_DMA_CHANNELS 8 |
71 | 71 | ||
72 | #ifdef CONFIG_X86_32 | ||
73 | |||
74 | /* The maximum address that we can perform a DMA transfer to on this platform */ | ||
75 | #define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) | ||
76 | |||
77 | #else | ||
78 | |||
79 | /* 16MB ISA DMA zone */ | 72 | /* 16MB ISA DMA zone */ |
80 | #define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) | 73 | #define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) |
81 | 74 | ||
82 | /* 4GB broken PCI/AGP hardware bus master zone */ | 75 | /* 4GB broken PCI/AGP hardware bus master zone */ |
83 | #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) | 76 | #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) |
84 | 77 | ||
78 | #ifdef CONFIG_X86_32 | ||
79 | /* The maximum address that we can perform a DMA transfer to on this platform */ | ||
80 | #define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) | ||
81 | #else | ||
85 | /* Compat define for old dma zone */ | 82 | /* Compat define for old dma zone */ |
86 | #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) | 83 | #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) |
87 | |||
88 | #endif | 84 | #endif |
89 | 85 | ||
90 | /* 8237 DMA controllers */ | 86 | /* 8237 DMA controllers */ |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8e4a16508d4e..7093e4a6a0bc 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | |||
90 | #endif /* CONFIG_X86_32 */ | 90 | #endif /* CONFIG_X86_32 */ |
91 | 91 | ||
92 | extern int add_efi_memmap; | 92 | extern int add_efi_memmap; |
93 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); | ||
93 | extern void efi_memblock_x86_reserve_range(void); | 94 | extern void efi_memblock_x86_reserve_range(void); |
94 | extern void efi_call_phys_prelog(void); | 95 | extern void efi_call_phys_prelog(void); |
95 | extern void efi_call_phys_epilog(void); | 96 | extern void efi_call_phys_epilog(void); |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index db24c2278be0..268c783ab1c0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
@@ -38,11 +38,10 @@ extern void mcount(void); | |||
38 | static inline unsigned long ftrace_call_adjust(unsigned long addr) | 38 | static inline unsigned long ftrace_call_adjust(unsigned long addr) |
39 | { | 39 | { |
40 | /* | 40 | /* |
41 | * call mcount is "e8 <4 byte offset>" | 41 | * addr is the address of the mcount call instruction. |
42 | * The addr points to the 4 byte offset and the caller of this | 42 | * recordmcount does the necessary offset calculation. |
43 | * function wants the pointer to e8. Simply subtract one. | ||
44 | */ | 43 | */ |
45 | return addr - 1; | 44 | return addr; |
46 | } | 45 | } |
47 | 46 | ||
48 | #ifdef CONFIG_DYNAMIC_FTRACE | 47 | #ifdef CONFIG_DYNAMIC_FTRACE |
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 43085bfc99c3..156cd5d18d2a 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h | |||
@@ -66,7 +66,7 @@ static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order) | |||
66 | * Don't enable translation but enable GART IO and CPU accesses. | 66 | * Don't enable translation but enable GART IO and CPU accesses. |
67 | * Also, set DISTLBWALKPRB since GART tables memory is UC. | 67 | * Also, set DISTLBWALKPRB since GART tables memory is UC. |
68 | */ | 68 | */ |
69 | ctl = DISTLBWALKPRB | order << 1; | 69 | ctl = order << 1; |
70 | 70 | ||
71 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); | 71 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); |
72 | } | 72 | } |
@@ -75,17 +75,17 @@ static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) | |||
75 | { | 75 | { |
76 | u32 tmp, ctl; | 76 | u32 tmp, ctl; |
77 | 77 | ||
78 | /* address of the mappings table */ | 78 | /* address of the mappings table */ |
79 | addr >>= 12; | 79 | addr >>= 12; |
80 | tmp = (u32) addr<<4; | 80 | tmp = (u32) addr<<4; |
81 | tmp &= ~0xf; | 81 | tmp &= ~0xf; |
82 | pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp); | 82 | pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp); |
83 | 83 | ||
84 | /* Enable GART translation for this hammer. */ | 84 | /* Enable GART translation for this hammer. */ |
85 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); | 85 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); |
86 | ctl |= GARTEN; | 86 | ctl |= GARTEN | DISTLBWALKPRB; |
87 | ctl &= ~(DISGARTCPU | DISGARTIO); | 87 | ctl &= ~(DISGARTCPU | DISGARTIO); |
88 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); | 88 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); |
89 | } | 89 | } |
90 | 90 | ||
91 | static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) | 91 | static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) |
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index fc1f579fb965..65aaa91d5850 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h | |||
@@ -6,6 +6,8 @@ | |||
6 | #define PIT_CH0 0x40 | 6 | #define PIT_CH0 0x40 |
7 | #define PIT_CH2 0x42 | 7 | #define PIT_CH2 0x42 |
8 | 8 | ||
9 | #define PIT_LATCH LATCH | ||
10 | |||
9 | extern raw_spinlock_t i8253_lock; | 11 | extern raw_spinlock_t i8253_lock; |
10 | 12 | ||
11 | extern struct clock_event_device *global_clock_event; | 13 | extern struct clock_event_device *global_clock_event; |
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 072273082528..d02804d650c4 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
@@ -38,7 +38,6 @@ | |||
38 | 38 | ||
39 | #include <linux/string.h> | 39 | #include <linux/string.h> |
40 | #include <linux/compiler.h> | 40 | #include <linux/compiler.h> |
41 | #include <asm-generic/int-ll64.h> | ||
42 | #include <asm/page.h> | 41 | #include <asm/page.h> |
43 | 42 | ||
44 | #include <xen/xen.h> | 43 | #include <xen/xen.h> |
@@ -87,27 +86,6 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) | |||
87 | build_mmio_read(readq, "q", unsigned long, "=r", :"memory") | 86 | build_mmio_read(readq, "q", unsigned long, "=r", :"memory") |
88 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") | 87 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") |
89 | 88 | ||
90 | #else | ||
91 | |||
92 | static inline __u64 readq(const volatile void __iomem *addr) | ||
93 | { | ||
94 | const volatile u32 __iomem *p = addr; | ||
95 | u32 low, high; | ||
96 | |||
97 | low = readl(p); | ||
98 | high = readl(p + 1); | ||
99 | |||
100 | return low + ((u64)high << 32); | ||
101 | } | ||
102 | |||
103 | static inline void writeq(__u64 val, volatile void __iomem *addr) | ||
104 | { | ||
105 | writel(val, addr); | ||
106 | writel(val >> 32, addr+4); | ||
107 | } | ||
108 | |||
109 | #endif | ||
110 | |||
111 | #define readq_relaxed(a) readq(a) | 89 | #define readq_relaxed(a) readq(a) |
112 | 90 | ||
113 | #define __raw_readq(a) readq(a) | 91 | #define __raw_readq(a) readq(a) |
@@ -117,6 +95,8 @@ static inline void writeq(__u64 val, volatile void __iomem *addr) | |||
117 | #define readq readq | 95 | #define readq readq |
118 | #define writeq writeq | 96 | #define writeq writeq |
119 | 97 | ||
98 | #endif | ||
99 | |||
120 | /** | 100 | /** |
121 | * virt_to_phys - map virtual addresses to physical | 101 | * virt_to_phys - map virtual addresses to physical |
122 | * @address: address to remap | 102 | * @address: address to remap |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index c4bd267dfc50..690d1cc9a877 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -105,12 +105,12 @@ struct IR_IO_APIC_route_entry { | |||
105 | * # of IO-APICs and # of IRQ routing registers | 105 | * # of IO-APICs and # of IRQ routing registers |
106 | */ | 106 | */ |
107 | extern int nr_ioapics; | 107 | extern int nr_ioapics; |
108 | extern int nr_ioapic_registers[MAX_IO_APICS]; | ||
109 | 108 | ||
110 | #define MP_MAX_IOAPIC_PIN 127 | 109 | extern int mpc_ioapic_id(int ioapic); |
110 | extern unsigned int mpc_ioapic_addr(int ioapic); | ||
111 | extern struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic); | ||
111 | 112 | ||
112 | /* I/O APIC entries */ | 113 | #define MP_MAX_IOAPIC_PIN 127 |
113 | extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; | ||
114 | 114 | ||
115 | /* # of MP IRQ source entries */ | 115 | /* # of MP IRQ source entries */ |
116 | extern int mp_irq_entries; | 116 | extern int mp_irq_entries; |
@@ -150,13 +150,11 @@ void setup_IO_APIC_irq_extra(u32 gsi); | |||
150 | extern void ioapic_and_gsi_init(void); | 150 | extern void ioapic_and_gsi_init(void); |
151 | extern void ioapic_insert_resources(void); | 151 | extern void ioapic_insert_resources(void); |
152 | 152 | ||
153 | int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr); | 153 | int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); |
154 | 154 | ||
155 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); | 155 | extern int save_ioapic_entries(void); |
156 | extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); | 156 | extern void mask_ioapic_entries(void); |
157 | extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | 157 | extern int restore_ioapic_entries(void); |
158 | extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | ||
159 | extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | ||
160 | 158 | ||
161 | extern int get_nr_irqs_gsi(void); | 159 | extern int get_nr_irqs_gsi(void); |
162 | 160 | ||
@@ -192,19 +190,13 @@ struct io_apic_irq_attr; | |||
192 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, | 190 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, |
193 | struct io_apic_irq_attr *irq_attr) { return 0; } | 191 | struct io_apic_irq_attr *irq_attr) { return 0; } |
194 | 192 | ||
195 | static inline struct IO_APIC_route_entry **alloc_ioapic_entries(void) | 193 | static inline int save_ioapic_entries(void) |
196 | { | ||
197 | return NULL; | ||
198 | } | ||
199 | |||
200 | static inline void free_ioapic_entries(struct IO_APIC_route_entry **ent) { } | ||
201 | static inline int save_IO_APIC_setup(struct IO_APIC_route_entry **ent) | ||
202 | { | 194 | { |
203 | return -ENOMEM; | 195 | return -ENOMEM; |
204 | } | 196 | } |
205 | 197 | ||
206 | static inline void mask_IO_APIC_setup(struct IO_APIC_route_entry **ent) { } | 198 | static inline void mask_ioapic_entries(void) { } |
207 | static inline int restore_IO_APIC_setup(struct IO_APIC_route_entry **ent) | 199 | static inline int restore_ioapic_entries(void) |
208 | { | 200 | { |
209 | return -ENOMEM; | 201 | return -ENOMEM; |
210 | } | 202 | } |
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 574dbc22893a..a32b18ce6ead 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h | |||
@@ -5,20 +5,25 @@ | |||
5 | 5 | ||
6 | #include <linux/types.h> | 6 | #include <linux/types.h> |
7 | #include <asm/nops.h> | 7 | #include <asm/nops.h> |
8 | #include <asm/asm.h> | ||
8 | 9 | ||
9 | #define JUMP_LABEL_NOP_SIZE 5 | 10 | #define JUMP_LABEL_NOP_SIZE 5 |
10 | 11 | ||
11 | # define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" | 12 | #define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" |
12 | 13 | ||
13 | # define JUMP_LABEL(key, label) \ | 14 | static __always_inline bool arch_static_branch(struct jump_label_key *key) |
14 | do { \ | 15 | { |
15 | asm goto("1:" \ | 16 | asm goto("1:" |
16 | JUMP_LABEL_INITIAL_NOP \ | 17 | JUMP_LABEL_INITIAL_NOP |
17 | ".pushsection __jump_table, \"aw\" \n\t"\ | 18 | ".pushsection __jump_table, \"aw\" \n\t" |
18 | _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ | 19 | _ASM_ALIGN "\n\t" |
19 | ".popsection \n\t" \ | 20 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" |
20 | : : "i" (key) : : label); \ | 21 | ".popsection \n\t" |
21 | } while (0) | 22 | : : "i" (key) : : l_yes); |
23 | return false; | ||
24 | l_yes: | ||
25 | return true; | ||
26 | } | ||
22 | 27 | ||
23 | #endif /* __KERNEL__ */ | 28 | #endif /* __KERNEL__ */ |
24 | 29 | ||
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index 396f5b5fc4d7..77e95f54570a 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h | |||
@@ -77,6 +77,7 @@ static inline void arch_kgdb_breakpoint(void) | |||
77 | } | 77 | } |
78 | #define BREAK_INSTR_SIZE 1 | 78 | #define BREAK_INSTR_SIZE 1 |
79 | #define CACHE_FLUSH_IS_SAFE 1 | 79 | #define CACHE_FLUSH_IS_SAFE 1 |
80 | #define GDB_ADJUSTS_BREAK_OFFSET | ||
80 | 81 | ||
81 | extern int kgdb_ll_trap(int cmd, const char *str, | 82 | extern int kgdb_ll_trap(int cmd, const char *str, |
82 | struct pt_regs *regs, long err, int trap, int sig); | 83 | struct pt_regs *regs, long err, int trap, int sig); |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0f5213564326..0049211959c0 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #include <asm/desc_defs.h> | 14 | #include <asm/desc_defs.h> |
15 | 15 | ||
16 | struct x86_emulate_ctxt; | 16 | struct x86_emulate_ctxt; |
17 | enum x86_intercept; | ||
18 | enum x86_intercept_stage; | ||
17 | 19 | ||
18 | struct x86_exception { | 20 | struct x86_exception { |
19 | u8 vector; | 21 | u8 vector; |
@@ -24,6 +26,24 @@ struct x86_exception { | |||
24 | }; | 26 | }; |
25 | 27 | ||
26 | /* | 28 | /* |
29 | * This struct is used to carry enough information from the instruction | ||
30 | * decoder to main KVM so that a decision can be made whether the | ||
31 | * instruction needs to be intercepted or not. | ||
32 | */ | ||
33 | struct x86_instruction_info { | ||
34 | u8 intercept; /* which intercept */ | ||
35 | u8 rep_prefix; /* rep prefix? */ | ||
36 | u8 modrm_mod; /* mod part of modrm */ | ||
37 | u8 modrm_reg; /* index of register used */ | ||
38 | u8 modrm_rm; /* rm part of modrm */ | ||
39 | u64 src_val; /* value of source operand */ | ||
40 | u8 src_bytes; /* size of source operand */ | ||
41 | u8 dst_bytes; /* size of destination operand */ | ||
42 | u8 ad_bytes; /* size of src/dst address */ | ||
43 | u64 next_rip; /* rip following the instruction */ | ||
44 | }; | ||
45 | |||
46 | /* | ||
27 | * x86_emulate_ops: | 47 | * x86_emulate_ops: |
28 | * | 48 | * |
29 | * These operations represent the instruction emulator's interface to memory. | 49 | * These operations represent the instruction emulator's interface to memory. |
@@ -62,6 +82,7 @@ struct x86_exception { | |||
62 | #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ | 82 | #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ |
63 | #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ | 83 | #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ |
64 | #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ | 84 | #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ |
85 | #define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ | ||
65 | 86 | ||
66 | struct x86_emulate_ops { | 87 | struct x86_emulate_ops { |
67 | /* | 88 | /* |
@@ -71,8 +92,9 @@ struct x86_emulate_ops { | |||
71 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | 92 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. |
72 | * @bytes: [IN ] Number of bytes to read from memory. | 93 | * @bytes: [IN ] Number of bytes to read from memory. |
73 | */ | 94 | */ |
74 | int (*read_std)(unsigned long addr, void *val, | 95 | int (*read_std)(struct x86_emulate_ctxt *ctxt, |
75 | unsigned int bytes, struct kvm_vcpu *vcpu, | 96 | unsigned long addr, void *val, |
97 | unsigned int bytes, | ||
76 | struct x86_exception *fault); | 98 | struct x86_exception *fault); |
77 | 99 | ||
78 | /* | 100 | /* |
@@ -82,8 +104,8 @@ struct x86_emulate_ops { | |||
82 | * @val: [OUT] Value write to memory, zero-extended to 'u_long'. | 104 | * @val: [OUT] Value write to memory, zero-extended to 'u_long'. |
83 | * @bytes: [IN ] Number of bytes to write to memory. | 105 | * @bytes: [IN ] Number of bytes to write to memory. |
84 | */ | 106 | */ |
85 | int (*write_std)(unsigned long addr, void *val, | 107 | int (*write_std)(struct x86_emulate_ctxt *ctxt, |
86 | unsigned int bytes, struct kvm_vcpu *vcpu, | 108 | unsigned long addr, void *val, unsigned int bytes, |
87 | struct x86_exception *fault); | 109 | struct x86_exception *fault); |
88 | /* | 110 | /* |
89 | * fetch: Read bytes of standard (non-emulated/special) memory. | 111 | * fetch: Read bytes of standard (non-emulated/special) memory. |
@@ -92,8 +114,8 @@ struct x86_emulate_ops { | |||
92 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | 114 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. |
93 | * @bytes: [IN ] Number of bytes to read from memory. | 115 | * @bytes: [IN ] Number of bytes to read from memory. |
94 | */ | 116 | */ |
95 | int (*fetch)(unsigned long addr, void *val, | 117 | int (*fetch)(struct x86_emulate_ctxt *ctxt, |
96 | unsigned int bytes, struct kvm_vcpu *vcpu, | 118 | unsigned long addr, void *val, unsigned int bytes, |
97 | struct x86_exception *fault); | 119 | struct x86_exception *fault); |
98 | 120 | ||
99 | /* | 121 | /* |
@@ -102,11 +124,9 @@ struct x86_emulate_ops { | |||
102 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | 124 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. |
103 | * @bytes: [IN ] Number of bytes to read from memory. | 125 | * @bytes: [IN ] Number of bytes to read from memory. |
104 | */ | 126 | */ |
105 | int (*read_emulated)(unsigned long addr, | 127 | int (*read_emulated)(struct x86_emulate_ctxt *ctxt, |
106 | void *val, | 128 | unsigned long addr, void *val, unsigned int bytes, |
107 | unsigned int bytes, | 129 | struct x86_exception *fault); |
108 | struct x86_exception *fault, | ||
109 | struct kvm_vcpu *vcpu); | ||
110 | 130 | ||
111 | /* | 131 | /* |
112 | * write_emulated: Write bytes to emulated/special memory area. | 132 | * write_emulated: Write bytes to emulated/special memory area. |
@@ -115,11 +135,10 @@ struct x86_emulate_ops { | |||
115 | * required). | 135 | * required). |
116 | * @bytes: [IN ] Number of bytes to write to memory. | 136 | * @bytes: [IN ] Number of bytes to write to memory. |
117 | */ | 137 | */ |
118 | int (*write_emulated)(unsigned long addr, | 138 | int (*write_emulated)(struct x86_emulate_ctxt *ctxt, |
119 | const void *val, | 139 | unsigned long addr, const void *val, |
120 | unsigned int bytes, | 140 | unsigned int bytes, |
121 | struct x86_exception *fault, | 141 | struct x86_exception *fault); |
122 | struct kvm_vcpu *vcpu); | ||
123 | 142 | ||
124 | /* | 143 | /* |
125 | * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an | 144 | * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an |
@@ -129,40 +148,54 @@ struct x86_emulate_ops { | |||
129 | * @new: [IN ] Value to write to @addr. | 148 | * @new: [IN ] Value to write to @addr. |
130 | * @bytes: [IN ] Number of bytes to access using CMPXCHG. | 149 | * @bytes: [IN ] Number of bytes to access using CMPXCHG. |
131 | */ | 150 | */ |
132 | int (*cmpxchg_emulated)(unsigned long addr, | 151 | int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, |
152 | unsigned long addr, | ||
133 | const void *old, | 153 | const void *old, |
134 | const void *new, | 154 | const void *new, |
135 | unsigned int bytes, | 155 | unsigned int bytes, |
136 | struct x86_exception *fault, | 156 | struct x86_exception *fault); |
137 | struct kvm_vcpu *vcpu); | 157 | void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); |
138 | 158 | ||
139 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | 159 | int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, |
140 | unsigned int count, struct kvm_vcpu *vcpu); | 160 | int size, unsigned short port, void *val, |
141 | 161 | unsigned int count); | |
142 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | 162 | |
143 | unsigned int count, struct kvm_vcpu *vcpu); | 163 | int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, |
144 | 164 | int size, unsigned short port, const void *val, | |
145 | bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, | 165 | unsigned int count); |
146 | int seg, struct kvm_vcpu *vcpu); | 166 | |
147 | void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, | 167 | bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, |
148 | int seg, struct kvm_vcpu *vcpu); | 168 | struct desc_struct *desc, u32 *base3, int seg); |
149 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | 169 | void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, |
150 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | 170 | struct desc_struct *desc, u32 base3, int seg); |
151 | unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); | 171 | unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, |
152 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | 172 | int seg); |
153 | void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | 173 | void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
154 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | 174 | void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
155 | int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | 175 | void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
156 | int (*cpl)(struct kvm_vcpu *vcpu); | 176 | void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
157 | int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); | 177 | ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); |
158 | int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); | 178 | int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); |
159 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 179 | int (*cpl)(struct x86_emulate_ctxt *ctxt); |
160 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | 180 | int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); |
181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); | ||
182 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); | ||
183 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); | ||
184 | void (*halt)(struct x86_emulate_ctxt *ctxt); | ||
185 | void (*wbinvd)(struct x86_emulate_ctxt *ctxt); | ||
186 | int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); | ||
187 | void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ | ||
188 | void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ | ||
189 | int (*intercept)(struct x86_emulate_ctxt *ctxt, | ||
190 | struct x86_instruction_info *info, | ||
191 | enum x86_intercept_stage stage); | ||
161 | }; | 192 | }; |
162 | 193 | ||
194 | typedef u32 __attribute__((vector_size(16))) sse128_t; | ||
195 | |||
163 | /* Type, address-of, and value of an instruction's operand. */ | 196 | /* Type, address-of, and value of an instruction's operand. */ |
164 | struct operand { | 197 | struct operand { |
165 | enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; | 198 | enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; |
166 | unsigned int bytes; | 199 | unsigned int bytes; |
167 | union { | 200 | union { |
168 | unsigned long orig_val; | 201 | unsigned long orig_val; |
@@ -174,11 +207,13 @@ struct operand { | |||
174 | ulong ea; | 207 | ulong ea; |
175 | unsigned seg; | 208 | unsigned seg; |
176 | } mem; | 209 | } mem; |
210 | unsigned xmm; | ||
177 | } addr; | 211 | } addr; |
178 | union { | 212 | union { |
179 | unsigned long val; | 213 | unsigned long val; |
180 | u64 val64; | 214 | u64 val64; |
181 | char valptr[sizeof(unsigned long) + 2]; | 215 | char valptr[sizeof(unsigned long) + 2]; |
216 | sse128_t vec_val; | ||
182 | }; | 217 | }; |
183 | }; | 218 | }; |
184 | 219 | ||
@@ -197,6 +232,7 @@ struct read_cache { | |||
197 | struct decode_cache { | 232 | struct decode_cache { |
198 | u8 twobyte; | 233 | u8 twobyte; |
199 | u8 b; | 234 | u8 b; |
235 | u8 intercept; | ||
200 | u8 lock_prefix; | 236 | u8 lock_prefix; |
201 | u8 rep_prefix; | 237 | u8 rep_prefix; |
202 | u8 op_bytes; | 238 | u8 op_bytes; |
@@ -209,6 +245,7 @@ struct decode_cache { | |||
209 | u8 seg_override; | 245 | u8 seg_override; |
210 | unsigned int d; | 246 | unsigned int d; |
211 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 247 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
248 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | ||
212 | unsigned long regs[NR_VCPU_REGS]; | 249 | unsigned long regs[NR_VCPU_REGS]; |
213 | unsigned long eip; | 250 | unsigned long eip; |
214 | /* modrm */ | 251 | /* modrm */ |
@@ -227,17 +264,15 @@ struct x86_emulate_ctxt { | |||
227 | struct x86_emulate_ops *ops; | 264 | struct x86_emulate_ops *ops; |
228 | 265 | ||
229 | /* Register state before/after emulation. */ | 266 | /* Register state before/after emulation. */ |
230 | struct kvm_vcpu *vcpu; | ||
231 | |||
232 | unsigned long eflags; | 267 | unsigned long eflags; |
233 | unsigned long eip; /* eip before instruction emulation */ | 268 | unsigned long eip; /* eip before instruction emulation */ |
234 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 269 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
235 | int mode; | 270 | int mode; |
236 | u32 cs_base; | ||
237 | 271 | ||
238 | /* interruptibility state, as a result of execution of STI or MOV SS */ | 272 | /* interruptibility state, as a result of execution of STI or MOV SS */ |
239 | int interruptibility; | 273 | int interruptibility; |
240 | 274 | ||
275 | bool guest_mode; /* guest running a nested guest */ | ||
241 | bool perm_ok; /* do not check permissions if true */ | 276 | bool perm_ok; /* do not check permissions if true */ |
242 | bool only_vendor_specific_insn; | 277 | bool only_vendor_specific_insn; |
243 | 278 | ||
@@ -249,8 +284,8 @@ struct x86_emulate_ctxt { | |||
249 | }; | 284 | }; |
250 | 285 | ||
251 | /* Repeat String Operation Prefix */ | 286 | /* Repeat String Operation Prefix */ |
252 | #define REPE_PREFIX 1 | 287 | #define REPE_PREFIX 0xf3 |
253 | #define REPNE_PREFIX 2 | 288 | #define REPNE_PREFIX 0xf2 |
254 | 289 | ||
255 | /* Execution mode, passed to the emulator. */ | 290 | /* Execution mode, passed to the emulator. */ |
256 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ | 291 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ |
@@ -259,6 +294,69 @@ struct x86_emulate_ctxt { | |||
259 | #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ | 294 | #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ |
260 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ | 295 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ |
261 | 296 | ||
297 | /* any protected mode */ | ||
298 | #define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ | ||
299 | X86EMUL_MODE_PROT64) | ||
300 | |||
301 | enum x86_intercept_stage { | ||
302 | X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ | ||
303 | X86_ICPT_PRE_EXCEPT, | ||
304 | X86_ICPT_POST_EXCEPT, | ||
305 | X86_ICPT_POST_MEMACCESS, | ||
306 | }; | ||
307 | |||
308 | enum x86_intercept { | ||
309 | x86_intercept_none, | ||
310 | x86_intercept_cr_read, | ||
311 | x86_intercept_cr_write, | ||
312 | x86_intercept_clts, | ||
313 | x86_intercept_lmsw, | ||
314 | x86_intercept_smsw, | ||
315 | x86_intercept_dr_read, | ||
316 | x86_intercept_dr_write, | ||
317 | x86_intercept_lidt, | ||
318 | x86_intercept_sidt, | ||
319 | x86_intercept_lgdt, | ||
320 | x86_intercept_sgdt, | ||
321 | x86_intercept_lldt, | ||
322 | x86_intercept_sldt, | ||
323 | x86_intercept_ltr, | ||
324 | x86_intercept_str, | ||
325 | x86_intercept_rdtsc, | ||
326 | x86_intercept_rdpmc, | ||
327 | x86_intercept_pushf, | ||
328 | x86_intercept_popf, | ||
329 | x86_intercept_cpuid, | ||
330 | x86_intercept_rsm, | ||
331 | x86_intercept_iret, | ||
332 | x86_intercept_intn, | ||
333 | x86_intercept_invd, | ||
334 | x86_intercept_pause, | ||
335 | x86_intercept_hlt, | ||
336 | x86_intercept_invlpg, | ||
337 | x86_intercept_invlpga, | ||
338 | x86_intercept_vmrun, | ||
339 | x86_intercept_vmload, | ||
340 | x86_intercept_vmsave, | ||
341 | x86_intercept_vmmcall, | ||
342 | x86_intercept_stgi, | ||
343 | x86_intercept_clgi, | ||
344 | x86_intercept_skinit, | ||
345 | x86_intercept_rdtscp, | ||
346 | x86_intercept_icebp, | ||
347 | x86_intercept_wbinvd, | ||
348 | x86_intercept_monitor, | ||
349 | x86_intercept_mwait, | ||
350 | x86_intercept_rdmsr, | ||
351 | x86_intercept_wrmsr, | ||
352 | x86_intercept_in, | ||
353 | x86_intercept_ins, | ||
354 | x86_intercept_out, | ||
355 | x86_intercept_outs, | ||
356 | |||
357 | nr_x86_intercepts | ||
358 | }; | ||
359 | |||
262 | /* Host execution mode. */ | 360 | /* Host execution mode. */ |
263 | #if defined(CONFIG_X86_32) | 361 | #if defined(CONFIG_X86_32) |
264 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 | 362 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 |
@@ -270,6 +368,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); | |||
270 | #define EMULATION_FAILED -1 | 368 | #define EMULATION_FAILED -1 |
271 | #define EMULATION_OK 0 | 369 | #define EMULATION_OK 0 |
272 | #define EMULATION_RESTART 1 | 370 | #define EMULATION_RESTART 1 |
371 | #define EMULATION_INTERCEPTED 2 | ||
273 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); | 372 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); |
274 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | 373 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, |
275 | u16 tss_selector, int reason, | 374 | u16 tss_selector, int reason, |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c8af0991fdf0..d2ac8e2ee897 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -30,14 +30,30 @@ | |||
30 | #define KVM_MEMORY_SLOTS 32 | 30 | #define KVM_MEMORY_SLOTS 32 |
31 | /* memory slots that does not exposed to userspace */ | 31 | /* memory slots that does not exposed to userspace */ |
32 | #define KVM_PRIVATE_MEM_SLOTS 4 | 32 | #define KVM_PRIVATE_MEM_SLOTS 4 |
33 | #define KVM_MMIO_SIZE 16 | ||
33 | 34 | ||
34 | #define KVM_PIO_PAGE_OFFSET 1 | 35 | #define KVM_PIO_PAGE_OFFSET 1 |
35 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 | 36 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 |
36 | 37 | ||
38 | #define CR0_RESERVED_BITS \ | ||
39 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | ||
40 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | ||
41 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | ||
42 | |||
37 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) | 43 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) |
38 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) | 44 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) |
39 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ | 45 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ |
40 | 0xFFFFFF0000000000ULL) | 46 | 0xFFFFFF0000000000ULL) |
47 | #define CR4_RESERVED_BITS \ | ||
48 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | ||
49 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | ||
50 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
51 | | X86_CR4_OSXSAVE \ | ||
52 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | ||
53 | |||
54 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | ||
55 | |||
56 | |||
41 | 57 | ||
42 | #define INVALID_PAGE (~(hpa_t)0) | 58 | #define INVALID_PAGE (~(hpa_t)0) |
43 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | 59 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) |
@@ -118,6 +134,9 @@ enum kvm_reg { | |||
118 | enum kvm_reg_ex { | 134 | enum kvm_reg_ex { |
119 | VCPU_EXREG_PDPTR = NR_VCPU_REGS, | 135 | VCPU_EXREG_PDPTR = NR_VCPU_REGS, |
120 | VCPU_EXREG_CR3, | 136 | VCPU_EXREG_CR3, |
137 | VCPU_EXREG_RFLAGS, | ||
138 | VCPU_EXREG_CPL, | ||
139 | VCPU_EXREG_SEGMENTS, | ||
121 | }; | 140 | }; |
122 | 141 | ||
123 | enum { | 142 | enum { |
@@ -256,7 +275,7 @@ struct kvm_mmu { | |||
256 | struct kvm_mmu_page *sp); | 275 | struct kvm_mmu_page *sp); |
257 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 276 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
258 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 277 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
259 | u64 *spte, const void *pte, unsigned long mmu_seq); | 278 | u64 *spte, const void *pte); |
260 | hpa_t root_hpa; | 279 | hpa_t root_hpa; |
261 | int root_level; | 280 | int root_level; |
262 | int shadow_root_level; | 281 | int shadow_root_level; |
@@ -340,7 +359,6 @@ struct kvm_vcpu_arch { | |||
340 | struct fpu guest_fpu; | 359 | struct fpu guest_fpu; |
341 | u64 xcr0; | 360 | u64 xcr0; |
342 | 361 | ||
343 | gva_t mmio_fault_cr2; | ||
344 | struct kvm_pio_request pio; | 362 | struct kvm_pio_request pio; |
345 | void *pio_data; | 363 | void *pio_data; |
346 | 364 | ||
@@ -367,18 +385,22 @@ struct kvm_vcpu_arch { | |||
367 | /* emulate context */ | 385 | /* emulate context */ |
368 | 386 | ||
369 | struct x86_emulate_ctxt emulate_ctxt; | 387 | struct x86_emulate_ctxt emulate_ctxt; |
388 | bool emulate_regs_need_sync_to_vcpu; | ||
389 | bool emulate_regs_need_sync_from_vcpu; | ||
370 | 390 | ||
371 | gpa_t time; | 391 | gpa_t time; |
372 | struct pvclock_vcpu_time_info hv_clock; | 392 | struct pvclock_vcpu_time_info hv_clock; |
373 | unsigned int hw_tsc_khz; | 393 | unsigned int hw_tsc_khz; |
374 | unsigned int time_offset; | 394 | unsigned int time_offset; |
375 | struct page *time_page; | 395 | struct page *time_page; |
376 | u64 last_host_tsc; | ||
377 | u64 last_guest_tsc; | 396 | u64 last_guest_tsc; |
378 | u64 last_kernel_ns; | 397 | u64 last_kernel_ns; |
379 | u64 last_tsc_nsec; | 398 | u64 last_tsc_nsec; |
380 | u64 last_tsc_write; | 399 | u64 last_tsc_write; |
400 | u32 virtual_tsc_khz; | ||
381 | bool tsc_catchup; | 401 | bool tsc_catchup; |
402 | u32 tsc_catchup_mult; | ||
403 | s8 tsc_catchup_shift; | ||
382 | 404 | ||
383 | bool nmi_pending; | 405 | bool nmi_pending; |
384 | bool nmi_injected; | 406 | bool nmi_injected; |
@@ -448,9 +470,6 @@ struct kvm_arch { | |||
448 | u64 last_tsc_nsec; | 470 | u64 last_tsc_nsec; |
449 | u64 last_tsc_offset; | 471 | u64 last_tsc_offset; |
450 | u64 last_tsc_write; | 472 | u64 last_tsc_write; |
451 | u32 virtual_tsc_khz; | ||
452 | u32 virtual_tsc_mult; | ||
453 | s8 virtual_tsc_shift; | ||
454 | 473 | ||
455 | struct kvm_xen_hvm_config xen_hvm_config; | 474 | struct kvm_xen_hvm_config xen_hvm_config; |
456 | 475 | ||
@@ -502,6 +521,8 @@ struct kvm_vcpu_stat { | |||
502 | u32 nmi_injections; | 521 | u32 nmi_injections; |
503 | }; | 522 | }; |
504 | 523 | ||
524 | struct x86_instruction_info; | ||
525 | |||
505 | struct kvm_x86_ops { | 526 | struct kvm_x86_ops { |
506 | int (*cpu_has_kvm_support)(void); /* __init */ | 527 | int (*cpu_has_kvm_support)(void); /* __init */ |
507 | int (*disabled_by_bios)(void); /* __init */ | 528 | int (*disabled_by_bios)(void); /* __init */ |
@@ -586,9 +607,17 @@ struct kvm_x86_ops { | |||
586 | 607 | ||
587 | bool (*has_wbinvd_exit)(void); | 608 | bool (*has_wbinvd_exit)(void); |
588 | 609 | ||
610 | void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); | ||
589 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); | 611 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
590 | 612 | ||
613 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); | ||
614 | |||
591 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); | 615 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); |
616 | |||
617 | int (*check_intercept)(struct kvm_vcpu *vcpu, | ||
618 | struct x86_instruction_info *info, | ||
619 | enum x86_intercept_stage stage); | ||
620 | |||
592 | const struct trace_print_flags *exit_reasons_str; | 621 | const struct trace_print_flags *exit_reasons_str; |
593 | }; | 622 | }; |
594 | 623 | ||
@@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | |||
627 | 656 | ||
628 | extern bool tdp_enabled; | 657 | extern bool tdp_enabled; |
629 | 658 | ||
659 | /* control of guest tsc rate supported? */ | ||
660 | extern bool kvm_has_tsc_control; | ||
661 | /* minimum supported tsc_khz for guests */ | ||
662 | extern u32 kvm_min_guest_tsc_khz; | ||
663 | /* maximum supported tsc_khz for guests */ | ||
664 | extern u32 kvm_max_guest_tsc_khz; | ||
665 | |||
630 | enum emulation_result { | 666 | enum emulation_result { |
631 | EMULATE_DONE, /* no further processing */ | 667 | EMULATE_DONE, /* no further processing */ |
632 | EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ | 668 | EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ |
@@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, | |||
645 | return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); | 681 | return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); |
646 | } | 682 | } |
647 | 683 | ||
648 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | ||
649 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | ||
650 | |||
651 | void kvm_enable_efer_bits(u64); | 684 | void kvm_enable_efer_bits(u64); |
652 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 685 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
653 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 686 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
@@ -657,8 +690,6 @@ struct x86_emulate_ctxt; | |||
657 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); | 690 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); |
658 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 691 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
659 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 692 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
660 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | ||
661 | int emulate_clts(struct kvm_vcpu *vcpu); | ||
662 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); | 693 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); |
663 | 694 | ||
664 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 695 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
@@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, | |||
721 | 752 | ||
722 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 753 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
723 | 754 | ||
724 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu); | ||
725 | |||
726 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, | 755 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, |
727 | void *insn, int insn_len); | 756 | void *insn, int insn_len); |
728 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | 757 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); |
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 12d55e773eb6..48142971b25d 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h | |||
@@ -8,11 +8,6 @@ | |||
8 | 8 | ||
9 | #ifdef CONFIG_X86_32 | 9 | #ifdef CONFIG_X86_32 |
10 | #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) | 10 | #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) |
11 | /* | ||
12 | * For 32-bit UML - mark functions implemented in assembly that use | ||
13 | * regparm input parameters: | ||
14 | */ | ||
15 | #define asmregparm __attribute__((regparm(3))) | ||
16 | 11 | ||
17 | /* | 12 | /* |
18 | * Make sure the compiler doesn't do anything stupid with the | 13 | * Make sure the compiler doesn't do anything stupid with the |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index eb16e94ae04f..021979a6e23f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -142,8 +142,6 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | |||
142 | static inline void enable_p5_mce(void) {} | 142 | static inline void enable_p5_mce(void) {} |
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | extern void (*x86_mce_decode_callback)(struct mce *m); | ||
146 | |||
147 | void mce_setup(struct mce *m); | 145 | void mce_setup(struct mce *m); |
148 | void mce_log(struct mce *m); | 146 | void mce_log(struct mce *m); |
149 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 147 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 91df7c51806c..5e83a416eca8 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h | |||
@@ -13,31 +13,11 @@ extern struct pglist_data *node_data[]; | |||
13 | #define NODE_DATA(nid) (node_data[nid]) | 13 | #define NODE_DATA(nid) (node_data[nid]) |
14 | 14 | ||
15 | #include <asm/numaq.h> | 15 | #include <asm/numaq.h> |
16 | /* summit or generic arch */ | ||
17 | #include <asm/srat.h> | ||
18 | |||
19 | extern int get_memcfg_numa_flat(void); | ||
20 | /* | ||
21 | * This allows any one NUMA architecture to be compiled | ||
22 | * for, and still fall back to the flat function if it | ||
23 | * fails. | ||
24 | */ | ||
25 | static inline void get_memcfg_numa(void) | ||
26 | { | ||
27 | |||
28 | if (get_memcfg_numaq()) | ||
29 | return; | ||
30 | if (get_memcfg_from_srat()) | ||
31 | return; | ||
32 | get_memcfg_numa_flat(); | ||
33 | } | ||
34 | 16 | ||
35 | extern void resume_map_numa_kva(pgd_t *pgd); | 17 | extern void resume_map_numa_kva(pgd_t *pgd); |
36 | 18 | ||
37 | #else /* !CONFIG_NUMA */ | 19 | #else /* !CONFIG_NUMA */ |
38 | 20 | ||
39 | #define get_memcfg_numa get_memcfg_numa_flat | ||
40 | |||
41 | static inline void resume_map_numa_kva(pgd_t *pgd) {} | 21 | static inline void resume_map_numa_kva(pgd_t *pgd) {} |
42 | 22 | ||
43 | #endif /* CONFIG_NUMA */ | 23 | #endif /* CONFIG_NUMA */ |
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index 288b96f815a6..b3f88d7867c7 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h | |||
@@ -4,36 +4,13 @@ | |||
4 | #ifndef _ASM_X86_MMZONE_64_H | 4 | #ifndef _ASM_X86_MMZONE_64_H |
5 | #define _ASM_X86_MMZONE_64_H | 5 | #define _ASM_X86_MMZONE_64_H |
6 | 6 | ||
7 | |||
8 | #ifdef CONFIG_NUMA | 7 | #ifdef CONFIG_NUMA |
9 | 8 | ||
10 | #include <linux/mmdebug.h> | 9 | #include <linux/mmdebug.h> |
11 | |||
12 | #include <asm/smp.h> | 10 | #include <asm/smp.h> |
13 | 11 | ||
14 | /* Simple perfect hash to map physical addresses to node numbers */ | ||
15 | struct memnode { | ||
16 | int shift; | ||
17 | unsigned int mapsize; | ||
18 | s16 *map; | ||
19 | s16 embedded_map[64 - 8]; | ||
20 | } ____cacheline_aligned; /* total size = 128 bytes */ | ||
21 | extern struct memnode memnode; | ||
22 | #define memnode_shift memnode.shift | ||
23 | #define memnodemap memnode.map | ||
24 | #define memnodemapsize memnode.mapsize | ||
25 | |||
26 | extern struct pglist_data *node_data[]; | 12 | extern struct pglist_data *node_data[]; |
27 | 13 | ||
28 | static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) | ||
29 | { | ||
30 | unsigned nid; | ||
31 | VIRTUAL_BUG_ON(!memnodemap); | ||
32 | nid = memnodemap[addr >> memnode_shift]; | ||
33 | VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); | ||
34 | return nid; | ||
35 | } | ||
36 | |||
37 | #define NODE_DATA(nid) (node_data[nid]) | 14 | #define NODE_DATA(nid) (node_data[nid]) |
38 | 15 | ||
39 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) | 16 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) |
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 67763c5d8b4e..9eae7752ae9b 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h | |||
@@ -35,7 +35,7 @@ | |||
35 | #define MODULE_PROC_FAMILY "K7 " | 35 | #define MODULE_PROC_FAMILY "K7 " |
36 | #elif defined CONFIG_MK8 | 36 | #elif defined CONFIG_MK8 |
37 | #define MODULE_PROC_FAMILY "K8 " | 37 | #define MODULE_PROC_FAMILY "K8 " |
38 | #elif defined CONFIG_X86_ELAN | 38 | #elif defined CONFIG_MELAN |
39 | #define MODULE_PROC_FAMILY "ELAN " | 39 | #define MODULE_PROC_FAMILY "ELAN " |
40 | #elif defined CONFIG_MCRUSOE | 40 | #elif defined CONFIG_MCRUSOE |
41 | #define MODULE_PROC_FAMILY "CRUSOE " | 41 | #define MODULE_PROC_FAMILY "CRUSOE " |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fd5a1f365c95..485b4f1f079b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -96,11 +96,15 @@ | |||
96 | #define MSR_IA32_MC0_ADDR 0x00000402 | 96 | #define MSR_IA32_MC0_ADDR 0x00000402 |
97 | #define MSR_IA32_MC0_MISC 0x00000403 | 97 | #define MSR_IA32_MC0_MISC 0x00000403 |
98 | 98 | ||
99 | #define MSR_AMD64_MC0_MASK 0xc0010044 | ||
100 | |||
99 | #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) | 101 | #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) |
100 | #define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) | 102 | #define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) |
101 | #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) | 103 | #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) |
102 | #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) | 104 | #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) |
103 | 105 | ||
106 | #define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) | ||
107 | |||
104 | /* These are consecutive and not in the normal 4er MCE bank block */ | 108 | /* These are consecutive and not in the normal 4er MCE bank block */ |
105 | #define MSR_IA32_MC0_CTL2 0x00000280 | 109 | #define MSR_IA32_MC0_CTL2 0x00000280 |
106 | #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) | 110 | #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) |
@@ -114,6 +118,7 @@ | |||
114 | complete list. */ | 118 | complete list. */ |
115 | 119 | ||
116 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b | 120 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b |
121 | #define MSR_AMD64_TSC_RATIO 0xc0000104 | ||
117 | #define MSR_AMD64_NB_CFG 0xc001001f | 122 | #define MSR_AMD64_NB_CFG 0xc001001f |
118 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 | 123 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 |
119 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 | 124 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 |
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index af788496020b..405b4032a60b 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h | |||
@@ -1,7 +1,13 @@ | |||
1 | #ifndef _ASM_X86_NOPS_H | 1 | #ifndef _ASM_X86_NOPS_H |
2 | #define _ASM_X86_NOPS_H | 2 | #define _ASM_X86_NOPS_H |
3 | 3 | ||
4 | /* Define nops for use with alternative() */ | 4 | /* |
5 | * Define nops for use with alternative() and for tracing. | ||
6 | * | ||
7 | * *_NOP5_ATOMIC must be a single instruction. | ||
8 | */ | ||
9 | |||
10 | #define NOP_DS_PREFIX 0x3e | ||
5 | 11 | ||
6 | /* generic versions from gas | 12 | /* generic versions from gas |
7 | 1: nop | 13 | 1: nop |
@@ -13,14 +19,15 @@ | |||
13 | 6: leal 0x00000000(%esi),%esi | 19 | 6: leal 0x00000000(%esi),%esi |
14 | 7: leal 0x00000000(,%esi,1),%esi | 20 | 7: leal 0x00000000(,%esi,1),%esi |
15 | */ | 21 | */ |
16 | #define GENERIC_NOP1 ".byte 0x90\n" | 22 | #define GENERIC_NOP1 0x90 |
17 | #define GENERIC_NOP2 ".byte 0x89,0xf6\n" | 23 | #define GENERIC_NOP2 0x89,0xf6 |
18 | #define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" | 24 | #define GENERIC_NOP3 0x8d,0x76,0x00 |
19 | #define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" | 25 | #define GENERIC_NOP4 0x8d,0x74,0x26,0x00 |
20 | #define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 | 26 | #define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4 |
21 | #define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" | 27 | #define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 |
22 | #define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" | 28 | #define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 |
23 | #define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 | 29 | #define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7 |
30 | #define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4 | ||
24 | 31 | ||
25 | /* Opteron 64bit nops | 32 | /* Opteron 64bit nops |
26 | 1: nop | 33 | 1: nop |
@@ -29,13 +36,14 @@ | |||
29 | 4: osp osp osp nop | 36 | 4: osp osp osp nop |
30 | */ | 37 | */ |
31 | #define K8_NOP1 GENERIC_NOP1 | 38 | #define K8_NOP1 GENERIC_NOP1 |
32 | #define K8_NOP2 ".byte 0x66,0x90\n" | 39 | #define K8_NOP2 0x66,K8_NOP1 |
33 | #define K8_NOP3 ".byte 0x66,0x66,0x90\n" | 40 | #define K8_NOP3 0x66,K8_NOP2 |
34 | #define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" | 41 | #define K8_NOP4 0x66,K8_NOP3 |
35 | #define K8_NOP5 K8_NOP3 K8_NOP2 | 42 | #define K8_NOP5 K8_NOP3,K8_NOP2 |
36 | #define K8_NOP6 K8_NOP3 K8_NOP3 | 43 | #define K8_NOP6 K8_NOP3,K8_NOP3 |
37 | #define K8_NOP7 K8_NOP4 K8_NOP3 | 44 | #define K8_NOP7 K8_NOP4,K8_NOP3 |
38 | #define K8_NOP8 K8_NOP4 K8_NOP4 | 45 | #define K8_NOP8 K8_NOP4,K8_NOP4 |
46 | #define K8_NOP5_ATOMIC 0x66,K8_NOP4 | ||
39 | 47 | ||
40 | /* K7 nops | 48 | /* K7 nops |
41 | uses eax dependencies (arbitrary choice) | 49 | uses eax dependencies (arbitrary choice) |
@@ -47,13 +55,14 @@ | |||
47 | 7: leal 0x00000000(,%eax,1),%eax | 55 | 7: leal 0x00000000(,%eax,1),%eax |
48 | */ | 56 | */ |
49 | #define K7_NOP1 GENERIC_NOP1 | 57 | #define K7_NOP1 GENERIC_NOP1 |
50 | #define K7_NOP2 ".byte 0x8b,0xc0\n" | 58 | #define K7_NOP2 0x8b,0xc0 |
51 | #define K7_NOP3 ".byte 0x8d,0x04,0x20\n" | 59 | #define K7_NOP3 0x8d,0x04,0x20 |
52 | #define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n" | 60 | #define K7_NOP4 0x8d,0x44,0x20,0x00 |
53 | #define K7_NOP5 K7_NOP4 ASM_NOP1 | 61 | #define K7_NOP5 K7_NOP4,K7_NOP1 |
54 | #define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n" | 62 | #define K7_NOP6 0x8d,0x80,0,0,0,0 |
55 | #define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" | 63 | #define K7_NOP7 0x8D,0x04,0x05,0,0,0,0 |
56 | #define K7_NOP8 K7_NOP7 ASM_NOP1 | 64 | #define K7_NOP8 K7_NOP7,K7_NOP1 |
65 | #define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4 | ||
57 | 66 | ||
58 | /* P6 nops | 67 | /* P6 nops |
59 | uses eax dependencies (Intel-recommended choice) | 68 | uses eax dependencies (Intel-recommended choice) |
@@ -69,52 +78,65 @@ | |||
69 | There is kernel code that depends on this. | 78 | There is kernel code that depends on this. |
70 | */ | 79 | */ |
71 | #define P6_NOP1 GENERIC_NOP1 | 80 | #define P6_NOP1 GENERIC_NOP1 |
72 | #define P6_NOP2 ".byte 0x66,0x90\n" | 81 | #define P6_NOP2 0x66,0x90 |
73 | #define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" | 82 | #define P6_NOP3 0x0f,0x1f,0x00 |
74 | #define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" | 83 | #define P6_NOP4 0x0f,0x1f,0x40,0 |
75 | #define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" | 84 | #define P6_NOP5 0x0f,0x1f,0x44,0x00,0 |
76 | #define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" | 85 | #define P6_NOP6 0x66,0x0f,0x1f,0x44,0x00,0 |
77 | #define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" | 86 | #define P6_NOP7 0x0f,0x1f,0x80,0,0,0,0 |
78 | #define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" | 87 | #define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0 |
88 | #define P6_NOP5_ATOMIC P6_NOP5 | ||
89 | |||
90 | #define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" | ||
79 | 91 | ||
80 | #if defined(CONFIG_MK7) | 92 | #if defined(CONFIG_MK7) |
81 | #define ASM_NOP1 K7_NOP1 | 93 | #define ASM_NOP1 _ASM_MK_NOP(K7_NOP1) |
82 | #define ASM_NOP2 K7_NOP2 | 94 | #define ASM_NOP2 _ASM_MK_NOP(K7_NOP2) |
83 | #define ASM_NOP3 K7_NOP3 | 95 | #define ASM_NOP3 _ASM_MK_NOP(K7_NOP3) |
84 | #define ASM_NOP4 K7_NOP4 | 96 | #define ASM_NOP4 _ASM_MK_NOP(K7_NOP4) |
85 | #define ASM_NOP5 K7_NOP5 | 97 | #define ASM_NOP5 _ASM_MK_NOP(K7_NOP5) |
86 | #define ASM_NOP6 K7_NOP6 | 98 | #define ASM_NOP6 _ASM_MK_NOP(K7_NOP6) |
87 | #define ASM_NOP7 K7_NOP7 | 99 | #define ASM_NOP7 _ASM_MK_NOP(K7_NOP7) |
88 | #define ASM_NOP8 K7_NOP8 | 100 | #define ASM_NOP8 _ASM_MK_NOP(K7_NOP8) |
101 | #define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC) | ||
89 | #elif defined(CONFIG_X86_P6_NOP) | 102 | #elif defined(CONFIG_X86_P6_NOP) |
90 | #define ASM_NOP1 P6_NOP1 | 103 | #define ASM_NOP1 _ASM_MK_NOP(P6_NOP1) |
91 | #define ASM_NOP2 P6_NOP2 | 104 | #define ASM_NOP2 _ASM_MK_NOP(P6_NOP2) |
92 | #define ASM_NOP3 P6_NOP3 | 105 | #define ASM_NOP3 _ASM_MK_NOP(P6_NOP3) |
93 | #define ASM_NOP4 P6_NOP4 | 106 | #define ASM_NOP4 _ASM_MK_NOP(P6_NOP4) |
94 | #define ASM_NOP5 P6_NOP5 | 107 | #define ASM_NOP5 _ASM_MK_NOP(P6_NOP5) |
95 | #define ASM_NOP6 P6_NOP6 | 108 | #define ASM_NOP6 _ASM_MK_NOP(P6_NOP6) |
96 | #define ASM_NOP7 P6_NOP7 | 109 | #define ASM_NOP7 _ASM_MK_NOP(P6_NOP7) |
97 | #define ASM_NOP8 P6_NOP8 | 110 | #define ASM_NOP8 _ASM_MK_NOP(P6_NOP8) |
111 | #define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC) | ||
98 | #elif defined(CONFIG_X86_64) | 112 | #elif defined(CONFIG_X86_64) |
99 | #define ASM_NOP1 K8_NOP1 | 113 | #define ASM_NOP1 _ASM_MK_NOP(K8_NOP1) |
100 | #define ASM_NOP2 K8_NOP2 | 114 | #define ASM_NOP2 _ASM_MK_NOP(K8_NOP2) |
101 | #define ASM_NOP3 K8_NOP3 | 115 | #define ASM_NOP3 _ASM_MK_NOP(K8_NOP3) |
102 | #define ASM_NOP4 K8_NOP4 | 116 | #define ASM_NOP4 _ASM_MK_NOP(K8_NOP4) |
103 | #define ASM_NOP5 K8_NOP5 | 117 | #define ASM_NOP5 _ASM_MK_NOP(K8_NOP5) |
104 | #define ASM_NOP6 K8_NOP6 | 118 | #define ASM_NOP6 _ASM_MK_NOP(K8_NOP6) |
105 | #define ASM_NOP7 K8_NOP7 | 119 | #define ASM_NOP7 _ASM_MK_NOP(K8_NOP7) |
106 | #define ASM_NOP8 K8_NOP8 | 120 | #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8) |
121 | #define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC) | ||
107 | #else | 122 | #else |
108 | #define ASM_NOP1 GENERIC_NOP1 | 123 | #define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1) |
109 | #define ASM_NOP2 GENERIC_NOP2 | 124 | #define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2) |
110 | #define ASM_NOP3 GENERIC_NOP3 | 125 | #define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3) |
111 | #define ASM_NOP4 GENERIC_NOP4 | 126 | #define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4) |
112 | #define ASM_NOP5 GENERIC_NOP5 | 127 | #define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5) |
113 | #define ASM_NOP6 GENERIC_NOP6 | 128 | #define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6) |
114 | #define ASM_NOP7 GENERIC_NOP7 | 129 | #define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7) |
115 | #define ASM_NOP8 GENERIC_NOP8 | 130 | #define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8) |
131 | #define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC) | ||
116 | #endif | 132 | #endif |
117 | 133 | ||
118 | #define ASM_NOP_MAX 8 | 134 | #define ASM_NOP_MAX 8 |
135 | #define NOP_ATOMIC5 (ASM_NOP_MAX+1) /* Entry for the 5-byte atomic NOP */ | ||
136 | |||
137 | #ifndef __ASSEMBLY__ | ||
138 | extern const unsigned char * const *ideal_nops; | ||
139 | extern void arch_init_ideal_nops(void); | ||
140 | #endif | ||
119 | 141 | ||
120 | #endif /* _ASM_X86_NOPS_H */ | 142 | #endif /* _ASM_X86_NOPS_H */ |
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 3d4dab43c994..bfacd2ccf651 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h | |||
@@ -1,12 +1,24 @@ | |||
1 | #ifndef _ASM_X86_NUMA_H | 1 | #ifndef _ASM_X86_NUMA_H |
2 | #define _ASM_X86_NUMA_H | 2 | #define _ASM_X86_NUMA_H |
3 | 3 | ||
4 | #include <linux/nodemask.h> | ||
5 | |||
4 | #include <asm/topology.h> | 6 | #include <asm/topology.h> |
5 | #include <asm/apicdef.h> | 7 | #include <asm/apicdef.h> |
6 | 8 | ||
7 | #ifdef CONFIG_NUMA | 9 | #ifdef CONFIG_NUMA |
8 | 10 | ||
9 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) | 11 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) |
12 | #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) | ||
13 | |||
14 | /* | ||
15 | * Too small node sizes may confuse the VM badly. Usually they | ||
16 | * result from BIOS bugs. So dont recognize nodes as standalone | ||
17 | * NUMA entities that have less than this amount of RAM listed: | ||
18 | */ | ||
19 | #define NODE_MIN_SIZE (4*1024*1024) | ||
20 | |||
21 | extern int numa_off; | ||
10 | 22 | ||
11 | /* | 23 | /* |
12 | * __apicid_to_node[] stores the raw mapping between physical apicid and | 24 | * __apicid_to_node[] stores the raw mapping between physical apicid and |
@@ -17,15 +29,27 @@ | |||
17 | * numa_cpu_node(). | 29 | * numa_cpu_node(). |
18 | */ | 30 | */ |
19 | extern s16 __apicid_to_node[MAX_LOCAL_APIC]; | 31 | extern s16 __apicid_to_node[MAX_LOCAL_APIC]; |
32 | extern nodemask_t numa_nodes_parsed __initdata; | ||
33 | |||
34 | extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); | ||
35 | extern void __init numa_set_distance(int from, int to, int distance); | ||
20 | 36 | ||
21 | static inline void set_apicid_to_node(int apicid, s16 node) | 37 | static inline void set_apicid_to_node(int apicid, s16 node) |
22 | { | 38 | { |
23 | __apicid_to_node[apicid] = node; | 39 | __apicid_to_node[apicid] = node; |
24 | } | 40 | } |
41 | |||
42 | extern int __cpuinit numa_cpu_node(int cpu); | ||
43 | |||
25 | #else /* CONFIG_NUMA */ | 44 | #else /* CONFIG_NUMA */ |
26 | static inline void set_apicid_to_node(int apicid, s16 node) | 45 | static inline void set_apicid_to_node(int apicid, s16 node) |
27 | { | 46 | { |
28 | } | 47 | } |
48 | |||
49 | static inline int numa_cpu_node(int cpu) | ||
50 | { | ||
51 | return NUMA_NO_NODE; | ||
52 | } | ||
29 | #endif /* CONFIG_NUMA */ | 53 | #endif /* CONFIG_NUMA */ |
30 | 54 | ||
31 | #ifdef CONFIG_X86_32 | 55 | #ifdef CONFIG_X86_32 |
@@ -37,21 +61,25 @@ static inline void set_apicid_to_node(int apicid, s16 node) | |||
37 | #ifdef CONFIG_NUMA | 61 | #ifdef CONFIG_NUMA |
38 | extern void __cpuinit numa_set_node(int cpu, int node); | 62 | extern void __cpuinit numa_set_node(int cpu, int node); |
39 | extern void __cpuinit numa_clear_node(int cpu); | 63 | extern void __cpuinit numa_clear_node(int cpu); |
40 | extern void __init numa_init_array(void); | ||
41 | extern void __init init_cpu_to_node(void); | 64 | extern void __init init_cpu_to_node(void); |
42 | extern void __cpuinit numa_add_cpu(int cpu); | 65 | extern void __cpuinit numa_add_cpu(int cpu); |
43 | extern void __cpuinit numa_remove_cpu(int cpu); | 66 | extern void __cpuinit numa_remove_cpu(int cpu); |
44 | #else /* CONFIG_NUMA */ | 67 | #else /* CONFIG_NUMA */ |
45 | static inline void numa_set_node(int cpu, int node) { } | 68 | static inline void numa_set_node(int cpu, int node) { } |
46 | static inline void numa_clear_node(int cpu) { } | 69 | static inline void numa_clear_node(int cpu) { } |
47 | static inline void numa_init_array(void) { } | ||
48 | static inline void init_cpu_to_node(void) { } | 70 | static inline void init_cpu_to_node(void) { } |
49 | static inline void numa_add_cpu(int cpu) { } | 71 | static inline void numa_add_cpu(int cpu) { } |
50 | static inline void numa_remove_cpu(int cpu) { } | 72 | static inline void numa_remove_cpu(int cpu) { } |
51 | #endif /* CONFIG_NUMA */ | 73 | #endif /* CONFIG_NUMA */ |
52 | 74 | ||
53 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 75 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
54 | struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable); | 76 | void debug_cpumask_set_cpu(int cpu, int node, bool enable); |
55 | #endif | 77 | #endif |
56 | 78 | ||
79 | #ifdef CONFIG_NUMA_EMU | ||
80 | #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) | ||
81 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) | ||
82 | void numa_emu_cmdline(char *); | ||
83 | #endif /* CONFIG_NUMA_EMU */ | ||
84 | |||
57 | #endif /* _ASM_X86_NUMA_H */ | 85 | #endif /* _ASM_X86_NUMA_H */ |
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h index c6beed1ef103..e7d6b8254742 100644 --- a/arch/x86/include/asm/numa_32.h +++ b/arch/x86/include/asm/numa_32.h | |||
@@ -1,16 +1,6 @@ | |||
1 | #ifndef _ASM_X86_NUMA_32_H | 1 | #ifndef _ASM_X86_NUMA_32_H |
2 | #define _ASM_X86_NUMA_32_H | 2 | #define _ASM_X86_NUMA_32_H |
3 | 3 | ||
4 | extern int numa_off; | ||
5 | |||
6 | extern int pxm_to_nid(int pxm); | ||
7 | |||
8 | #ifdef CONFIG_NUMA | ||
9 | extern int __cpuinit numa_cpu_node(int cpu); | ||
10 | #else /* CONFIG_NUMA */ | ||
11 | static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } | ||
12 | #endif /* CONFIG_NUMA */ | ||
13 | |||
14 | #ifdef CONFIG_HIGHMEM | 4 | #ifdef CONFIG_HIGHMEM |
15 | extern void set_highmem_pages_init(void); | 5 | extern void set_highmem_pages_init(void); |
16 | #else | 6 | #else |
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 344eb1790b46..0c05f7ae46e8 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h | |||
@@ -1,42 +1,6 @@ | |||
1 | #ifndef _ASM_X86_NUMA_64_H | 1 | #ifndef _ASM_X86_NUMA_64_H |
2 | #define _ASM_X86_NUMA_64_H | 2 | #define _ASM_X86_NUMA_64_H |
3 | 3 | ||
4 | #include <linux/nodemask.h> | ||
5 | |||
6 | struct bootnode { | ||
7 | u64 start; | ||
8 | u64 end; | ||
9 | }; | ||
10 | |||
11 | #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) | ||
12 | |||
13 | extern int numa_off; | ||
14 | |||
15 | extern unsigned long numa_free_all_bootmem(void); | 4 | extern unsigned long numa_free_all_bootmem(void); |
16 | extern void setup_node_bootmem(int nodeid, unsigned long start, | ||
17 | unsigned long end); | ||
18 | |||
19 | #ifdef CONFIG_NUMA | ||
20 | /* | ||
21 | * Too small node sizes may confuse the VM badly. Usually they | ||
22 | * result from BIOS bugs. So dont recognize nodes as standalone | ||
23 | * NUMA entities that have less than this amount of RAM listed: | ||
24 | */ | ||
25 | #define NODE_MIN_SIZE (4*1024*1024) | ||
26 | |||
27 | extern nodemask_t numa_nodes_parsed __initdata; | ||
28 | |||
29 | extern int __cpuinit numa_cpu_node(int cpu); | ||
30 | extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); | ||
31 | extern void __init numa_set_distance(int from, int to, int distance); | ||
32 | |||
33 | #ifdef CONFIG_NUMA_EMU | ||
34 | #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) | ||
35 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) | ||
36 | void numa_emu_cmdline(char *); | ||
37 | #endif /* CONFIG_NUMA_EMU */ | ||
38 | #else | ||
39 | static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } | ||
40 | #endif | ||
41 | 5 | ||
42 | #endif /* _ASM_X86_NUMA_64_H */ | 6 | #endif /* _ASM_X86_NUMA_64_H */ |
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h index 37c516545ec8..c3b3c322fd87 100644 --- a/arch/x86/include/asm/numaq.h +++ b/arch/x86/include/asm/numaq.h | |||
@@ -29,7 +29,7 @@ | |||
29 | #ifdef CONFIG_X86_NUMAQ | 29 | #ifdef CONFIG_X86_NUMAQ |
30 | 30 | ||
31 | extern int found_numaq; | 31 | extern int found_numaq; |
32 | extern int get_memcfg_numaq(void); | 32 | extern int numaq_numa_init(void); |
33 | extern int pci_numaq_init(void); | 33 | extern int pci_numaq_init(void); |
34 | 34 | ||
35 | extern void *xquad_portio; | 35 | extern void *xquad_portio; |
@@ -166,11 +166,6 @@ struct sys_cfg_data { | |||
166 | 166 | ||
167 | void numaq_tsc_disable(void); | 167 | void numaq_tsc_disable(void); |
168 | 168 | ||
169 | #else | ||
170 | static inline int get_memcfg_numaq(void) | ||
171 | { | ||
172 | return 0; | ||
173 | } | ||
174 | #endif /* CONFIG_X86_NUMAQ */ | 169 | #endif /* CONFIG_X86_NUMAQ */ |
175 | #endif /* _ASM_X86_NUMAQ_H */ | 170 | #endif /* _ASM_X86_NUMAQ_H */ |
176 | 171 | ||
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h index c5d3a5abbb9f..24487712e0b1 100644 --- a/arch/x86/include/asm/olpc_ofw.h +++ b/arch/x86/include/asm/olpc_ofw.h | |||
@@ -26,15 +26,12 @@ extern void setup_olpc_ofw_pgd(void); | |||
26 | /* check if OFW was detected during boot */ | 26 | /* check if OFW was detected during boot */ |
27 | extern bool olpc_ofw_present(void); | 27 | extern bool olpc_ofw_present(void); |
28 | 28 | ||
29 | extern void olpc_dt_build_devicetree(void); | ||
30 | |||
29 | #else /* !CONFIG_OLPC */ | 31 | #else /* !CONFIG_OLPC */ |
30 | static inline void olpc_ofw_detect(void) { } | 32 | static inline void olpc_ofw_detect(void) { } |
31 | static inline void setup_olpc_ofw_pgd(void) { } | 33 | static inline void setup_olpc_ofw_pgd(void) { } |
32 | #endif /* !CONFIG_OLPC */ | ||
33 | |||
34 | #ifdef CONFIG_OF_PROMTREE | ||
35 | extern void olpc_dt_build_devicetree(void); | ||
36 | #else | ||
37 | static inline void olpc_dt_build_devicetree(void) { } | 34 | static inline void olpc_dt_build_devicetree(void) { } |
38 | #endif | 35 | #endif /* !CONFIG_OLPC */ |
39 | 36 | ||
40 | #endif /* _ASM_X86_OLPC_OFW_H */ | 37 | #endif /* _ASM_X86_OLPC_OFW_H */ |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 676129229630..d498943b906c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -135,8 +135,6 @@ void default_teardown_msi_irqs(struct pci_dev *dev); | |||
135 | #include "pci_64.h" | 135 | #include "pci_64.h" |
136 | #endif | 136 | #endif |
137 | 137 | ||
138 | void dma32_reserve_bootmem(void); | ||
139 | |||
140 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ | 138 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ |
141 | #include <asm-generic/pci-dma-compat.h> | 139 | #include <asm-generic/pci-dma-compat.h> |
142 | 140 | ||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d475b4398d8b..a0a9779084d1 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -509,6 +509,11 @@ do { \ | |||
509 | * it in software. The address used in the cmpxchg16 instruction must be | 509 | * it in software. The address used in the cmpxchg16 instruction must be |
510 | * aligned to a 16 byte boundary. | 510 | * aligned to a 16 byte boundary. |
511 | */ | 511 | */ |
512 | #ifdef CONFIG_SMP | ||
513 | #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3 | ||
514 | #else | ||
515 | #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2 | ||
516 | #endif | ||
512 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | 517 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ |
513 | ({ \ | 518 | ({ \ |
514 | char __ret; \ | 519 | char __ret; \ |
@@ -517,7 +522,7 @@ do { \ | |||
517 | typeof(o2) __o2 = o2; \ | 522 | typeof(o2) __o2 = o2; \ |
518 | typeof(o2) __n2 = n2; \ | 523 | typeof(o2) __n2 = n2; \ |
519 | typeof(o2) __dummy; \ | 524 | typeof(o2) __dummy; \ |
520 | alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ | 525 | alternative_io(CMPXCHG16B_EMU_CALL, \ |
521 | "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ | 526 | "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ |
522 | X86_FEATURE_CX16, \ | 527 | X86_FEATURE_CX16, \ |
523 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | 528 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ |
@@ -542,6 +547,33 @@ do { \ | |||
542 | old__; \ | 547 | old__; \ |
543 | }) | 548 | }) |
544 | 549 | ||
550 | static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, | ||
551 | const unsigned long __percpu *addr) | ||
552 | { | ||
553 | unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; | ||
554 | |||
555 | return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0; | ||
556 | } | ||
557 | |||
558 | static inline int x86_this_cpu_variable_test_bit(int nr, | ||
559 | const unsigned long __percpu *addr) | ||
560 | { | ||
561 | int oldbit; | ||
562 | |||
563 | asm volatile("bt "__percpu_arg(2)",%1\n\t" | ||
564 | "sbb %0,%0" | ||
565 | : "=r" (oldbit) | ||
566 | : "m" (*(unsigned long *)addr), "Ir" (nr)); | ||
567 | |||
568 | return oldbit; | ||
569 | } | ||
570 | |||
571 | #define x86_this_cpu_test_bit(nr, addr) \ | ||
572 | (__builtin_constant_p((nr)) \ | ||
573 | ? x86_this_cpu_constant_test_bit((nr), (addr)) \ | ||
574 | : x86_this_cpu_variable_test_bit((nr), (addr))) | ||
575 | |||
576 | |||
545 | #include <asm-generic/percpu.h> | 577 | #include <asm-generic/percpu.h> |
546 | 578 | ||
547 | /* We can use this directly for local CPU (faster). */ | 579 | /* We can use this directly for local CPU (faster). */ |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 7db7723d1f32..d56187c6b838 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
299 | /* Install a pte for a particular vaddr in kernel space. */ | 299 | /* Install a pte for a particular vaddr in kernel space. */ |
300 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); | 300 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); |
301 | 301 | ||
302 | extern void native_pagetable_reserve(u64 start, u64 end); | ||
302 | #ifdef CONFIG_X86_32 | 303 | #ifdef CONFIG_X86_32 |
303 | extern void native_pagetable_setup_start(pgd_t *base); | 304 | extern void native_pagetable_setup_start(pgd_t *base); |
304 | extern void native_pagetable_setup_done(pgd_t *base); | 305 | extern void native_pagetable_setup_done(pgd_t *base); |
diff --git a/arch/x86/include/asm/probe_roms.h b/arch/x86/include/asm/probe_roms.h new file mode 100644 index 000000000000..4950a0b1d09c --- /dev/null +++ b/arch/x86/include/asm/probe_roms.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef _PROBE_ROMS_H_ | ||
2 | #define _PROBE_ROMS_H_ | ||
3 | struct pci_dev; | ||
4 | |||
5 | extern void __iomem *pci_map_biosrom(struct pci_dev *pdev); | ||
6 | extern void pci_unmap_biosrom(void __iomem *rom); | ||
7 | extern size_t pci_biosrom_size(struct pci_dev *pdev); | ||
8 | #endif | ||
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index a898a2b6e10c..59ab4dffa377 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -60,6 +60,7 @@ | |||
60 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ | 60 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ |
61 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ | 61 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ |
62 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ | 62 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ |
63 | #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ | ||
63 | 64 | ||
64 | /* | 65 | /* |
65 | * x86-64 Task Priority Register, CR8 | 66 | * x86-64 Task Priority Register, CR8 |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 1babf8adecdf..94e7618fcac8 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -136,6 +136,7 @@ struct cpuinfo_x86; | |||
136 | struct task_struct; | 136 | struct task_struct; |
137 | 137 | ||
138 | extern unsigned long profile_pc(struct pt_regs *regs); | 138 | extern unsigned long profile_pc(struct pt_regs *regs); |
139 | #define profile_pc profile_pc | ||
139 | 140 | ||
140 | extern unsigned long | 141 | extern unsigned long |
141 | convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); | 142 | convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); |
@@ -202,20 +203,11 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) | |||
202 | #endif | 203 | #endif |
203 | } | 204 | } |
204 | 205 | ||
205 | static inline unsigned long instruction_pointer(struct pt_regs *regs) | 206 | #define GET_IP(regs) ((regs)->ip) |
206 | { | 207 | #define GET_FP(regs) ((regs)->bp) |
207 | return regs->ip; | 208 | #define GET_USP(regs) ((regs)->sp) |
208 | } | ||
209 | |||
210 | static inline unsigned long frame_pointer(struct pt_regs *regs) | ||
211 | { | ||
212 | return regs->bp; | ||
213 | } | ||
214 | 209 | ||
215 | static inline unsigned long user_stack_pointer(struct pt_regs *regs) | 210 | #include <asm-generic/ptrace.h> |
216 | { | ||
217 | return regs->sp; | ||
218 | } | ||
219 | 211 | ||
220 | /* Query offset/name of register from its name/offset */ | 212 | /* Query offset/name of register from its name/offset */ |
221 | extern int regs_query_register_offset(const char *name); | 213 | extern int regs_query_register_offset(const char *name); |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index db8aa19a08a2..9756551ec760 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align); | |||
88 | * executable.) | 88 | * executable.) |
89 | */ | 89 | */ |
90 | #define RESERVE_BRK(name,sz) \ | 90 | #define RESERVE_BRK(name,sz) \ |
91 | static void __section(.discard.text) __used \ | 91 | static void __section(.discard.text) __used notrace \ |
92 | __brk_reservation_fn_##name##__(void) { \ | 92 | __brk_reservation_fn_##name##__(void) { \ |
93 | asm volatile ( \ | 93 | asm volatile ( \ |
94 | ".pushsection .brk_reservation,\"aw\",@nobits;" \ | 94 | ".pushsection .brk_reservation,\"aw\",@nobits;" \ |
@@ -104,10 +104,10 @@ void *extend_brk(size_t size, size_t align); | |||
104 | type *name; \ | 104 | type *name; \ |
105 | RESERVE_BRK(name, sizeof(type) * entries) | 105 | RESERVE_BRK(name, sizeof(type) * entries) |
106 | 106 | ||
107 | extern void probe_roms(void); | ||
107 | #ifdef __i386__ | 108 | #ifdef __i386__ |
108 | 109 | ||
109 | void __init i386_start_kernel(void); | 110 | void __init i386_start_kernel(void); |
110 | extern void probe_roms(void); | ||
111 | 111 | ||
112 | #else | 112 | #else |
113 | void __init x86_64_start_kernel(char *real_mode); | 113 | void __init x86_64_start_kernel(char *real_mode); |
diff --git a/arch/x86/include/asm/srat.h b/arch/x86/include/asm/srat.h deleted file mode 100644 index b508d639d1a7..000000000000 --- a/arch/x86/include/asm/srat.h +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | /* | ||
2 | * Some of the code in this file has been gleaned from the 64 bit | ||
3 | * discontigmem support code base. | ||
4 | * | ||
5 | * Copyright (C) 2002, IBM Corp. | ||
6 | * | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
17 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
18 | * details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | * | ||
24 | * Send feedback to Pat Gaughen <gone@us.ibm.com> | ||
25 | */ | ||
26 | |||
27 | #ifndef _ASM_X86_SRAT_H | ||
28 | #define _ASM_X86_SRAT_H | ||
29 | |||
30 | #ifdef CONFIG_ACPI_NUMA | ||
31 | extern int get_memcfg_from_srat(void); | ||
32 | #else | ||
33 | static inline int get_memcfg_from_srat(void) | ||
34 | { | ||
35 | return 0; | ||
36 | } | ||
37 | #endif | ||
38 | |||
39 | #endif /* _ASM_X86_SRAT_H */ | ||
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index d7e89c83645d..70bbe39043a9 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo, | |||
37 | /* Generic stack tracer with callbacks */ | 37 | /* Generic stack tracer with callbacks */ |
38 | 38 | ||
39 | struct stacktrace_ops { | 39 | struct stacktrace_ops { |
40 | void (*warning)(void *data, char *msg); | ||
41 | /* msg must contain %s for the symbol */ | ||
42 | void (*warning_symbol)(void *data, char *msg, unsigned long symbol); | ||
43 | void (*address)(void *data, unsigned long address, int reliable); | 40 | void (*address)(void *data, unsigned long address, int reliable); |
44 | /* On negative return stop dumping */ | 41 | /* On negative return stop dumping */ |
45 | int (*stack)(void *data, char *name); | 42 | int (*stack)(void *data, char *name); |
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index fd921c3a6841..487055c8c1aa 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h | |||
@@ -9,8 +9,6 @@ | |||
9 | #include <asm/desc.h> | 9 | #include <asm/desc.h> |
10 | #include <asm/i387.h> | 10 | #include <asm/i387.h> |
11 | 11 | ||
12 | static inline int arch_prepare_suspend(void) { return 0; } | ||
13 | |||
14 | /* image of the saved processor state */ | 12 | /* image of the saved processor state */ |
15 | struct saved_context { | 13 | struct saved_context { |
16 | u16 es, fs, gs, ss; | 14 | u16 es, fs, gs, ss; |
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 8d942afae681..09b0bf104156 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h | |||
@@ -9,11 +9,6 @@ | |||
9 | #include <asm/desc.h> | 9 | #include <asm/desc.h> |
10 | #include <asm/i387.h> | 10 | #include <asm/i387.h> |
11 | 11 | ||
12 | static inline int arch_prepare_suspend(void) | ||
13 | { | ||
14 | return 0; | ||
15 | } | ||
16 | |||
17 | /* | 12 | /* |
18 | * Image of the saved processor state, used by the low level ACPI suspend to | 13 | * Image of the saved processor state, used by the low level ACPI suspend to |
19 | * RAM code and by the low level hibernation code. | 14 | * RAM code and by the low level hibernation code. |
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 12569e691ce3..c2ff2a1d845e 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -303,24 +303,81 @@ static inline void native_wbinvd(void) | |||
303 | #ifdef CONFIG_PARAVIRT | 303 | #ifdef CONFIG_PARAVIRT |
304 | #include <asm/paravirt.h> | 304 | #include <asm/paravirt.h> |
305 | #else | 305 | #else |
306 | #define read_cr0() (native_read_cr0()) | 306 | |
307 | #define write_cr0(x) (native_write_cr0(x)) | 307 | static inline unsigned long read_cr0(void) |
308 | #define read_cr2() (native_read_cr2()) | 308 | { |
309 | #define write_cr2(x) (native_write_cr2(x)) | 309 | return native_read_cr0(); |
310 | #define read_cr3() (native_read_cr3()) | 310 | } |
311 | #define write_cr3(x) (native_write_cr3(x)) | 311 | |
312 | #define read_cr4() (native_read_cr4()) | 312 | static inline void write_cr0(unsigned long x) |
313 | #define read_cr4_safe() (native_read_cr4_safe()) | 313 | { |
314 | #define write_cr4(x) (native_write_cr4(x)) | 314 | native_write_cr0(x); |
315 | #define wbinvd() (native_wbinvd()) | 315 | } |
316 | |||
317 | static inline unsigned long read_cr2(void) | ||
318 | { | ||
319 | return native_read_cr2(); | ||
320 | } | ||
321 | |||
322 | static inline void write_cr2(unsigned long x) | ||
323 | { | ||
324 | native_write_cr2(x); | ||
325 | } | ||
326 | |||
327 | static inline unsigned long read_cr3(void) | ||
328 | { | ||
329 | return native_read_cr3(); | ||
330 | } | ||
331 | |||
332 | static inline void write_cr3(unsigned long x) | ||
333 | { | ||
334 | native_write_cr3(x); | ||
335 | } | ||
336 | |||
337 | static inline unsigned long read_cr4(void) | ||
338 | { | ||
339 | return native_read_cr4(); | ||
340 | } | ||
341 | |||
342 | static inline unsigned long read_cr4_safe(void) | ||
343 | { | ||
344 | return native_read_cr4_safe(); | ||
345 | } | ||
346 | |||
347 | static inline void write_cr4(unsigned long x) | ||
348 | { | ||
349 | native_write_cr4(x); | ||
350 | } | ||
351 | |||
352 | static inline void wbinvd(void) | ||
353 | { | ||
354 | native_wbinvd(); | ||
355 | } | ||
356 | |||
316 | #ifdef CONFIG_X86_64 | 357 | #ifdef CONFIG_X86_64 |
317 | #define read_cr8() (native_read_cr8()) | 358 | |
318 | #define write_cr8(x) (native_write_cr8(x)) | 359 | static inline unsigned long read_cr8(void) |
319 | #define load_gs_index native_load_gs_index | 360 | { |
361 | return native_read_cr8(); | ||
362 | } | ||
363 | |||
364 | static inline void write_cr8(unsigned long x) | ||
365 | { | ||
366 | native_write_cr8(x); | ||
367 | } | ||
368 | |||
369 | static inline void load_gs_index(unsigned selector) | ||
370 | { | ||
371 | native_load_gs_index(selector); | ||
372 | } | ||
373 | |||
320 | #endif | 374 | #endif |
321 | 375 | ||
322 | /* Clear the 'TS' bit */ | 376 | /* Clear the 'TS' bit */ |
323 | #define clts() (native_clts()) | 377 | static inline void clts(void) |
378 | { | ||
379 | native_clts(); | ||
380 | } | ||
324 | 381 | ||
325 | #endif/* CONFIG_PARAVIRT */ | 382 | #endif/* CONFIG_PARAVIRT */ |
326 | 383 | ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 910a7084f7f2..c00692476e9f 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -93,19 +93,11 @@ extern void setup_node_to_cpumask_map(void); | |||
93 | #define pcibus_to_node(bus) __pcibus_to_node(bus) | 93 | #define pcibus_to_node(bus) __pcibus_to_node(bus) |
94 | 94 | ||
95 | #ifdef CONFIG_X86_32 | 95 | #ifdef CONFIG_X86_32 |
96 | extern unsigned long node_start_pfn[]; | ||
97 | extern unsigned long node_end_pfn[]; | ||
98 | extern unsigned long node_remap_size[]; | ||
99 | #define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) | ||
100 | |||
101 | # define SD_CACHE_NICE_TRIES 1 | 96 | # define SD_CACHE_NICE_TRIES 1 |
102 | # define SD_IDLE_IDX 1 | 97 | # define SD_IDLE_IDX 1 |
103 | |||
104 | #else | 98 | #else |
105 | |||
106 | # define SD_CACHE_NICE_TRIES 2 | 99 | # define SD_CACHE_NICE_TRIES 2 |
107 | # define SD_IDLE_IDX 2 | 100 | # define SD_IDLE_IDX 2 |
108 | |||
109 | #endif | 101 | #endif |
110 | 102 | ||
111 | /* sched_domains SD_NODE_INIT for NUMA machines */ | 103 | /* sched_domains SD_NODE_INIT for NUMA machines */ |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 83e2efd181e2..9db5583b6d38 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -51,6 +51,10 @@ extern int unsynchronized_tsc(void); | |||
51 | extern int check_tsc_unstable(void); | 51 | extern int check_tsc_unstable(void); |
52 | extern unsigned long native_calibrate_tsc(void); | 52 | extern unsigned long native_calibrate_tsc(void); |
53 | 53 | ||
54 | #ifdef CONFIG_X86_64 | ||
55 | extern cycles_t vread_tsc(void); | ||
56 | #endif | ||
57 | |||
54 | /* | 58 | /* |
55 | * Boot-time check whether the TSCs are synchronized across | 59 | * Boot-time check whether the TSCs are synchronized across |
56 | * all CPUs/cores: | 60 | * all CPUs/cores: |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index abd3e0ea762a..99ddd148a760 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
7 | #include <linux/compiler.h> | 7 | #include <linux/compiler.h> |
8 | #include <linux/thread_info.h> | 8 | #include <linux/thread_info.h> |
9 | #include <linux/prefetch.h> | ||
10 | #include <linux/string.h> | 9 | #include <linux/string.h> |
11 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
12 | #include <asm/page.h> | 11 | #include <asm/page.h> |
@@ -42,7 +41,7 @@ | |||
42 | * Returns 0 if the range is valid, nonzero otherwise. | 41 | * Returns 0 if the range is valid, nonzero otherwise. |
43 | * | 42 | * |
44 | * This is equivalent to the following test: | 43 | * This is equivalent to the following test: |
45 | * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) | 44 | * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) |
46 | * | 45 | * |
47 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... | 46 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... |
48 | */ | 47 | */ |
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 088d09fb1615..566e803cc602 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -6,7 +6,6 @@ | |||
6 | */ | 6 | */ |
7 | #include <linux/errno.h> | 7 | #include <linux/errno.h> |
8 | #include <linux/thread_info.h> | 8 | #include <linux/thread_info.h> |
9 | #include <linux/prefetch.h> | ||
10 | #include <linux/string.h> | 9 | #include <linux/string.h> |
11 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
12 | #include <asm/page.h> | 11 | #include <asm/page.h> |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 316708d5af92..1c66d30971ad 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -6,7 +6,6 @@ | |||
6 | */ | 6 | */ |
7 | #include <linux/compiler.h> | 7 | #include <linux/compiler.h> |
8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
9 | #include <linux/prefetch.h> | ||
10 | #include <linux/lockdep.h> | 9 | #include <linux/lockdep.h> |
11 | #include <asm/alternative.h> | 10 | #include <asm/alternative.h> |
12 | #include <asm/cpufeature.h> | 11 | #include <asm/cpufeature.h> |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index a755ef5e5977..fb6a625c99bf 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -350,10 +350,11 @@ | |||
350 | #define __NR_open_by_handle_at 342 | 350 | #define __NR_open_by_handle_at 342 |
351 | #define __NR_clock_adjtime 343 | 351 | #define __NR_clock_adjtime 343 |
352 | #define __NR_syncfs 344 | 352 | #define __NR_syncfs 344 |
353 | #define __NR_sendmmsg 345 | ||
353 | 354 | ||
354 | #ifdef __KERNEL__ | 355 | #ifdef __KERNEL__ |
355 | 356 | ||
356 | #define NR_syscalls 345 | 357 | #define NR_syscalls 346 |
357 | 358 | ||
358 | #define __ARCH_WANT_IPC_PARSE_VERSION | 359 | #define __ARCH_WANT_IPC_PARSE_VERSION |
359 | #define __ARCH_WANT_OLD_READDIR | 360 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 160fa76bd578..79f90eb15aad 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) | |||
677 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) | 677 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) |
678 | #define __NR_syncfs 306 | 678 | #define __NR_syncfs 306 |
679 | __SYSCALL(__NR_syncfs, sys_syncfs) | 679 | __SYSCALL(__NR_syncfs, sys_syncfs) |
680 | #define __NR_sendmmsg 307 | ||
681 | __SYSCALL(__NR_sendmmsg, sys_sendmmsg) | ||
680 | 682 | ||
681 | #ifndef __NO_STUBS | 683 | #ifndef __NO_STUBS |
682 | #define __ARCH_WANT_OLD_READDIR | 684 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 3e094af443c3..130f1eeee5fe 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -94,6 +94,8 @@ | |||
94 | /* after this # consecutive successes, bump up the throttle if it was lowered */ | 94 | /* after this # consecutive successes, bump up the throttle if it was lowered */ |
95 | #define COMPLETE_THRESHOLD 5 | 95 | #define COMPLETE_THRESHOLD 5 |
96 | 96 | ||
97 | #define UV_LB_SUBNODEID 0x10 | ||
98 | |||
97 | /* | 99 | /* |
98 | * number of entries in the destination side payload queue | 100 | * number of entries in the destination side payload queue |
99 | */ | 101 | */ |
@@ -124,7 +126,7 @@ | |||
124 | * The distribution specification (32 bytes) is interpreted as a 256-bit | 126 | * The distribution specification (32 bytes) is interpreted as a 256-bit |
125 | * distribution vector. Adjacent bits correspond to consecutive even numbered | 127 | * distribution vector. Adjacent bits correspond to consecutive even numbered |
126 | * nodeIDs. The result of adding the index of a given bit to the 15-bit | 128 | * nodeIDs. The result of adding the index of a given bit to the 15-bit |
127 | * 'base_dest_nodeid' field of the header corresponds to the | 129 | * 'base_dest_nasid' field of the header corresponds to the |
128 | * destination nodeID associated with that specified bit. | 130 | * destination nodeID associated with that specified bit. |
129 | */ | 131 | */ |
130 | struct bau_target_uvhubmask { | 132 | struct bau_target_uvhubmask { |
@@ -176,7 +178,7 @@ struct bau_msg_payload { | |||
176 | struct bau_msg_header { | 178 | struct bau_msg_header { |
177 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ | 179 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ |
178 | /* bits 5:0 */ | 180 | /* bits 5:0 */ |
179 | unsigned int base_dest_nodeid:15; /* nasid of the */ | 181 | unsigned int base_dest_nasid:15; /* nasid of the */ |
180 | /* bits 20:6 */ /* first bit in uvhub map */ | 182 | /* bits 20:6 */ /* first bit in uvhub map */ |
181 | unsigned int command:8; /* message type */ | 183 | unsigned int command:8; /* message type */ |
182 | /* bits 28:21 */ | 184 | /* bits 28:21 */ |
@@ -378,6 +380,10 @@ struct ptc_stats { | |||
378 | unsigned long d_rcanceled; /* number of messages canceled by resets */ | 380 | unsigned long d_rcanceled; /* number of messages canceled by resets */ |
379 | }; | 381 | }; |
380 | 382 | ||
383 | struct hub_and_pnode { | ||
384 | short uvhub; | ||
385 | short pnode; | ||
386 | }; | ||
381 | /* | 387 | /* |
382 | * one per-cpu; to locate the software tables | 388 | * one per-cpu; to locate the software tables |
383 | */ | 389 | */ |
@@ -399,10 +405,12 @@ struct bau_control { | |||
399 | int baudisabled; | 405 | int baudisabled; |
400 | int set_bau_off; | 406 | int set_bau_off; |
401 | short cpu; | 407 | short cpu; |
408 | short osnode; | ||
402 | short uvhub_cpu; | 409 | short uvhub_cpu; |
403 | short uvhub; | 410 | short uvhub; |
404 | short cpus_in_socket; | 411 | short cpus_in_socket; |
405 | short cpus_in_uvhub; | 412 | short cpus_in_uvhub; |
413 | short partition_base_pnode; | ||
406 | unsigned short message_number; | 414 | unsigned short message_number; |
407 | unsigned short uvhub_quiesce; | 415 | unsigned short uvhub_quiesce; |
408 | short socket_acknowledge_count[DEST_Q_SIZE]; | 416 | short socket_acknowledge_count[DEST_Q_SIZE]; |
@@ -422,15 +430,16 @@ struct bau_control { | |||
422 | int congested_period; | 430 | int congested_period; |
423 | cycles_t period_time; | 431 | cycles_t period_time; |
424 | long period_requests; | 432 | long period_requests; |
433 | struct hub_and_pnode *target_hub_and_pnode; | ||
425 | }; | 434 | }; |
426 | 435 | ||
427 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) | 436 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) |
428 | { | 437 | { |
429 | return constant_test_bit(uvhub, &dstp->bits[0]); | 438 | return constant_test_bit(uvhub, &dstp->bits[0]); |
430 | } | 439 | } |
431 | static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) | 440 | static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) |
432 | { | 441 | { |
433 | __set_bit(uvhub, &dstp->bits[0]); | 442 | __set_bit(pnode, &dstp->bits[0]); |
434 | } | 443 | } |
435 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, | 444 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, |
436 | int nbits) | 445 | int nbits) |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a501741c2335..4298002d0c83 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -398,6 +398,8 @@ struct uv_blade_info { | |||
398 | unsigned short nr_online_cpus; | 398 | unsigned short nr_online_cpus; |
399 | unsigned short pnode; | 399 | unsigned short pnode; |
400 | short memory_nid; | 400 | short memory_nid; |
401 | spinlock_t nmi_lock; | ||
402 | unsigned long nmi_count; | ||
401 | }; | 403 | }; |
402 | extern struct uv_blade_info *uv_blade_info; | 404 | extern struct uv_blade_info *uv_blade_info; |
403 | extern short *uv_node_to_blade; | 405 | extern short *uv_node_to_blade; |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 20cafeac7455..f5bb64a823d7 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * SGI UV MMR definitions | 6 | * SGI UV MMR definitions |
7 | * | 7 | * |
8 | * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #ifndef _ASM_X86_UV_UV_MMRS_H | 11 | #ifndef _ASM_X86_UV_UV_MMRS_H |
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u { | |||
1099 | } s; | 1099 | } s; |
1100 | }; | 1100 | }; |
1101 | 1101 | ||
1102 | /* ========================================================================= */ | ||
1103 | /* UVH_SCRATCH5 */ | ||
1104 | /* ========================================================================= */ | ||
1105 | #define UVH_SCRATCH5 0x2d0200UL | ||
1106 | #define UVH_SCRATCH5_32 0x00778 | ||
1107 | |||
1108 | #define UVH_SCRATCH5_SCRATCH5_SHFT 0 | ||
1109 | #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL | ||
1110 | union uvh_scratch5_u { | ||
1111 | unsigned long v; | ||
1112 | struct uvh_scratch5_s { | ||
1113 | unsigned long scratch5 : 64; /* RW, W1CS */ | ||
1114 | } s; | ||
1115 | }; | ||
1102 | 1116 | ||
1103 | #endif /* __ASM_UV_MMRS_X86_H__ */ | 1117 | #endif /* __ASM_UV_MMRS_X86_H__ */ |
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 9064052b73de..bb0522850b74 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h | |||
@@ -1,20 +1,6 @@ | |||
1 | #ifndef _ASM_X86_VDSO_H | 1 | #ifndef _ASM_X86_VDSO_H |
2 | #define _ASM_X86_VDSO_H | 2 | #define _ASM_X86_VDSO_H |
3 | 3 | ||
4 | #ifdef CONFIG_X86_64 | ||
5 | extern const char VDSO64_PRELINK[]; | ||
6 | |||
7 | /* | ||
8 | * Given a pointer to the vDSO image, find the pointer to VDSO64_name | ||
9 | * as that symbol is defined in the vDSO sources or linker script. | ||
10 | */ | ||
11 | #define VDSO64_SYMBOL(base, name) \ | ||
12 | ({ \ | ||
13 | extern const char VDSO64_##name[]; \ | ||
14 | (void *)(VDSO64_##name - VDSO64_PRELINK + (unsigned long)(base)); \ | ||
15 | }) | ||
16 | #endif | ||
17 | |||
18 | #if defined CONFIG_X86_32 || defined CONFIG_COMPAT | 4 | #if defined CONFIG_X86_32 || defined CONFIG_COMPAT |
19 | extern const char VDSO32_PRELINK[]; | 5 | extern const char VDSO32_PRELINK[]; |
20 | 6 | ||
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 3d61e204826f..646b4c1ca695 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -23,8 +23,6 @@ struct vsyscall_gtod_data { | |||
23 | struct timespec wall_to_monotonic; | 23 | struct timespec wall_to_monotonic; |
24 | struct timespec wall_time_coarse; | 24 | struct timespec wall_time_coarse; |
25 | }; | 25 | }; |
26 | extern struct vsyscall_gtod_data __vsyscall_gtod_data | ||
27 | __section_vsyscall_gtod_data; | ||
28 | extern struct vsyscall_gtod_data vsyscall_gtod_data; | 26 | extern struct vsyscall_gtod_data vsyscall_gtod_data; |
29 | 27 | ||
30 | #endif /* _ASM_X86_VGTOD_H */ | 28 | #endif /* _ASM_X86_VGTOD_H */ |
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d0983d255fbd..d55597351f6a 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h | |||
@@ -16,27 +16,19 @@ enum vsyscall_num { | |||
16 | #ifdef __KERNEL__ | 16 | #ifdef __KERNEL__ |
17 | #include <linux/seqlock.h> | 17 | #include <linux/seqlock.h> |
18 | 18 | ||
19 | #define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) | ||
20 | #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) | ||
21 | |||
22 | /* Definitions for CONFIG_GENERIC_TIME definitions */ | 19 | /* Definitions for CONFIG_GENERIC_TIME definitions */ |
23 | #define __section_vsyscall_gtod_data __attribute__ \ | ||
24 | ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) | ||
25 | #define __section_vsyscall_clock __attribute__ \ | ||
26 | ((unused, __section__ (".vsyscall_clock"),aligned(16))) | ||
27 | #define __vsyscall_fn \ | 20 | #define __vsyscall_fn \ |
28 | __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace | 21 | __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace |
29 | 22 | ||
30 | #define VGETCPU_RDTSCP 1 | 23 | #define VGETCPU_RDTSCP 1 |
31 | #define VGETCPU_LSL 2 | 24 | #define VGETCPU_LSL 2 |
32 | 25 | ||
33 | extern int __vgetcpu_mode; | ||
34 | extern volatile unsigned long __jiffies; | ||
35 | |||
36 | /* kernel space (writeable) */ | 26 | /* kernel space (writeable) */ |
37 | extern int vgetcpu_mode; | 27 | extern int vgetcpu_mode; |
38 | extern struct timezone sys_tz; | 28 | extern struct timezone sys_tz; |
39 | 29 | ||
30 | #include <asm/vvar.h> | ||
31 | |||
40 | extern void map_vsyscall(void); | 32 | extern void map_vsyscall(void); |
41 | 33 | ||
42 | #endif /* __KERNEL__ */ | 34 | #endif /* __KERNEL__ */ |
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h new file mode 100644 index 000000000000..341b3559452b --- /dev/null +++ b/arch/x86/include/asm/vvar.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * vvar.h: Shared vDSO/kernel variable declarations | ||
3 | * Copyright (c) 2011 Andy Lutomirski | ||
4 | * Subject to the GNU General Public License, version 2 | ||
5 | * | ||
6 | * A handful of variables are accessible (read-only) from userspace | ||
7 | * code in the vsyscall page and the vdso. They are declared here. | ||
8 | * Some other file must define them with DEFINE_VVAR. | ||
9 | * | ||
10 | * In normal kernel code, they are used like any other variable. | ||
11 | * In user code, they are accessed through the VVAR macro. | ||
12 | * | ||
13 | * Each of these variables lives in the vsyscall page, and each | ||
14 | * one needs a unique offset within the little piece of the page | ||
15 | * reserved for vvars. Specify that offset in DECLARE_VVAR. | ||
16 | * (There are 896 bytes available. If you mess up, the linker will | ||
17 | * catch it.) | ||
18 | */ | ||
19 | |||
20 | /* Offset of vars within vsyscall page */ | ||
21 | #define VSYSCALL_VARS_OFFSET (3072 + 128) | ||
22 | |||
23 | #if defined(__VVAR_KERNEL_LDS) | ||
24 | |||
25 | /* The kernel linker script defines its own magic to put vvars in the | ||
26 | * right place. | ||
27 | */ | ||
28 | #define DECLARE_VVAR(offset, type, name) \ | ||
29 | EMIT_VVAR(name, VSYSCALL_VARS_OFFSET + offset) | ||
30 | |||
31 | #else | ||
32 | |||
33 | #define DECLARE_VVAR(offset, type, name) \ | ||
34 | static type const * const vvaraddr_ ## name = \ | ||
35 | (void *)(VSYSCALL_START + VSYSCALL_VARS_OFFSET + (offset)); | ||
36 | |||
37 | #define DEFINE_VVAR(type, name) \ | ||
38 | type __vvar_ ## name \ | ||
39 | __attribute__((section(".vsyscall_var_" #name), aligned(16))) | ||
40 | |||
41 | #define VVAR(name) (*vvaraddr_ ## name) | ||
42 | |||
43 | #endif | ||
44 | |||
45 | /* DECLARE_VVAR(offset, type, name) */ | ||
46 | |||
47 | DECLARE_VVAR(0, volatile unsigned long, jiffies) | ||
48 | DECLARE_VVAR(8, int, vgetcpu_mode) | ||
49 | DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) | ||
50 | |||
51 | #undef DECLARE_VVAR | ||
52 | #undef VSYSCALL_VARS_OFFSET | ||
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h new file mode 100644 index 000000000000..6bf5b8e478c0 --- /dev/null +++ b/arch/x86/include/asm/x2apic.h | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * Common bits for X2APIC cluster/physical modes. | ||
3 | */ | ||
4 | |||
5 | #ifndef _ASM_X86_X2APIC_H | ||
6 | #define _ASM_X86_X2APIC_H | ||
7 | |||
8 | #include <asm/apic.h> | ||
9 | #include <asm/ipi.h> | ||
10 | #include <linux/cpumask.h> | ||
11 | |||
12 | /* | ||
13 | * Need to use more than cpu 0, because we need more vectors | ||
14 | * when MSI-X are used. | ||
15 | */ | ||
16 | static const struct cpumask *x2apic_target_cpus(void) | ||
17 | { | ||
18 | return cpu_online_mask; | ||
19 | } | ||
20 | |||
21 | static int x2apic_apic_id_registered(void) | ||
22 | { | ||
23 | return 1; | ||
24 | } | ||
25 | |||
26 | /* | ||
27 | * For now each logical cpu is in its own vector allocation domain. | ||
28 | */ | ||
29 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
30 | { | ||
31 | cpumask_clear(retmask); | ||
32 | cpumask_set_cpu(cpu, retmask); | ||
33 | } | ||
34 | |||
35 | static void | ||
36 | __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) | ||
37 | { | ||
38 | unsigned long cfg = __prepare_ICR(0, vector, dest); | ||
39 | native_x2apic_icr_write(cfg, apicid); | ||
40 | } | ||
41 | |||
42 | static unsigned int x2apic_get_apic_id(unsigned long id) | ||
43 | { | ||
44 | return id; | ||
45 | } | ||
46 | |||
47 | static unsigned long x2apic_set_apic_id(unsigned int id) | ||
48 | { | ||
49 | return id; | ||
50 | } | ||
51 | |||
52 | static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) | ||
53 | { | ||
54 | return initial_apicid >> index_msb; | ||
55 | } | ||
56 | |||
57 | static void x2apic_send_IPI_self(int vector) | ||
58 | { | ||
59 | apic_write(APIC_SELF_IPI, vector); | ||
60 | } | ||
61 | |||
62 | #endif /* _ASM_X86_X2APIC_H */ | ||
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 643ebf2e2ad8..d3d859035af9 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -68,6 +68,17 @@ struct x86_init_oem { | |||
68 | }; | 68 | }; |
69 | 69 | ||
70 | /** | 70 | /** |
71 | * struct x86_init_mapping - platform specific initial kernel pagetable setup | ||
72 | * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage | ||
73 | * | ||
74 | * For more details on the purpose of this hook, look in | ||
75 | * init_memory_mapping and the commit that added it. | ||
76 | */ | ||
77 | struct x86_init_mapping { | ||
78 | void (*pagetable_reserve)(u64 start, u64 end); | ||
79 | }; | ||
80 | |||
81 | /** | ||
71 | * struct x86_init_paging - platform specific paging functions | 82 | * struct x86_init_paging - platform specific paging functions |
72 | * @pagetable_setup_start: platform specific pre paging_init() call | 83 | * @pagetable_setup_start: platform specific pre paging_init() call |
73 | * @pagetable_setup_done: platform specific post paging_init() call | 84 | * @pagetable_setup_done: platform specific post paging_init() call |
@@ -123,6 +134,7 @@ struct x86_init_ops { | |||
123 | struct x86_init_mpparse mpparse; | 134 | struct x86_init_mpparse mpparse; |
124 | struct x86_init_irqs irqs; | 135 | struct x86_init_irqs irqs; |
125 | struct x86_init_oem oem; | 136 | struct x86_init_oem oem; |
137 | struct x86_init_mapping mapping; | ||
126 | struct x86_init_paging paging; | 138 | struct x86_init_paging paging; |
127 | struct x86_init_timers timers; | 139 | struct x86_init_timers timers; |
128 | struct x86_init_iommu iommu; | 140 | struct x86_init_iommu iommu; |
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 8508bfe52296..d240ea950519 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -447,6 +447,13 @@ HYPERVISOR_hvm_op(int op, void *arg) | |||
447 | return _hypercall2(unsigned long, hvm_op, op, arg); | 447 | return _hypercall2(unsigned long, hvm_op, op, arg); |
448 | } | 448 | } |
449 | 449 | ||
450 | static inline int | ||
451 | HYPERVISOR_tmem_op( | ||
452 | struct tmem_op *op) | ||
453 | { | ||
454 | return _hypercall1(int, tmem_op, op); | ||
455 | } | ||
456 | |||
450 | static inline void | 457 | static inline void |
451 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) | 458 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) |
452 | { | 459 | { |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c61934fbf22a..64a619d47d34 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | |||
47 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, | 47 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, |
48 | unsigned long pfn_e); | 48 | unsigned long pfn_e); |
49 | 49 | ||
50 | extern int m2p_add_override(unsigned long mfn, struct page *page); | 50 | extern int m2p_add_override(unsigned long mfn, struct page *page, |
51 | extern int m2p_remove_override(struct page *page); | 51 | bool clear_pte); |
52 | extern int m2p_remove_override(struct page *page, bool clear_pte); | ||
52 | extern struct page *m2p_find_override(unsigned long mfn); | 53 | extern struct page *m2p_find_override(unsigned long mfn); |
53 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | 54 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); |
54 | 55 | ||
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index aa8620989162..4fbda9a3f339 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h | |||
@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void) | |||
15 | #endif | 15 | #endif |
16 | #if defined(CONFIG_XEN_DOM0) | 16 | #if defined(CONFIG_XEN_DOM0) |
17 | void __init xen_setup_pirqs(void); | 17 | void __init xen_setup_pirqs(void); |
18 | int xen_find_device_domain_owner(struct pci_dev *dev); | ||
19 | int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); | ||
20 | int xen_unregister_device_domain_owner(struct pci_dev *dev); | ||
18 | #else | 21 | #else |
19 | static inline void __init xen_setup_pirqs(void) | 22 | static inline void __init xen_setup_pirqs(void) |
20 | { | 23 | { |
21 | } | 24 | } |
25 | static inline int xen_find_device_domain_owner(struct pci_dev *dev) | ||
26 | { | ||
27 | return -1; | ||
28 | } | ||
29 | static inline int xen_register_device_domain_owner(struct pci_dev *dev, | ||
30 | uint16_t domain) | ||
31 | { | ||
32 | return -1; | ||
33 | } | ||
34 | static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) | ||
35 | { | ||
36 | return -1; | ||
37 | } | ||
22 | #endif | 38 | #endif |
23 | 39 | ||
24 | #if defined(CONFIG_PCI_MSI) | 40 | #if defined(CONFIG_PCI_MSI) |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7338ef2218bc..f5abe3a245b8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -8,7 +8,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | |||
8 | 8 | ||
9 | ifdef CONFIG_FUNCTION_TRACER | 9 | ifdef CONFIG_FUNCTION_TRACER |
10 | # Do not profile debug and lowlevel utilities | 10 | # Do not profile debug and lowlevel utilities |
11 | CFLAGS_REMOVE_tsc.o = -pg | ||
12 | CFLAGS_REMOVE_rtc.o = -pg | 11 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 12 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
14 | CFLAGS_REMOVE_pvclock.o = -pg | 13 | CFLAGS_REMOVE_pvclock.o = -pg |
@@ -24,22 +23,25 @@ endif | |||
24 | nostackp := $(call cc-option, -fno-stack-protector) | 23 | nostackp := $(call cc-option, -fno-stack-protector) |
25 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | 24 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) |
26 | CFLAGS_hpet.o := $(nostackp) | 25 | CFLAGS_hpet.o := $(nostackp) |
27 | CFLAGS_tsc.o := $(nostackp) | 26 | CFLAGS_vread_tsc_64.o := $(nostackp) |
28 | CFLAGS_paravirt.o := $(nostackp) | 27 | CFLAGS_paravirt.o := $(nostackp) |
29 | GCOV_PROFILE_vsyscall_64.o := n | 28 | GCOV_PROFILE_vsyscall_64.o := n |
30 | GCOV_PROFILE_hpet.o := n | 29 | GCOV_PROFILE_hpet.o := n |
31 | GCOV_PROFILE_tsc.o := n | 30 | GCOV_PROFILE_tsc.o := n |
32 | GCOV_PROFILE_paravirt.o := n | 31 | GCOV_PROFILE_paravirt.o := n |
33 | 32 | ||
33 | # vread_tsc_64 is hot and should be fully optimized: | ||
34 | CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls | ||
35 | |||
34 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 36 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
35 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 37 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
36 | obj-y += time.o ioport.o ldt.o dumpstack.o | 38 | obj-y += time.o ioport.o ldt.o dumpstack.o |
37 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o | 39 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o |
38 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 40 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
39 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 41 | obj-y += probe_roms.o |
40 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 42 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
41 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 43 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
42 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 44 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o |
43 | obj-y += bootflag.o e820.o | 45 | obj-y += bootflag.o e820.o |
44 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 46 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
45 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 47 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
@@ -117,7 +119,7 @@ obj-$(CONFIG_OF) += devicetree.o | |||
117 | ifeq ($(CONFIG_X86_64),y) | 119 | ifeq ($(CONFIG_X86_64),y) |
118 | obj-$(CONFIG_AUDIT) += audit_64.o | 120 | obj-$(CONFIG_AUDIT) += audit_64.o |
119 | 121 | ||
120 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | 122 | obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o |
121 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o | 123 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o |
122 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o | 124 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o |
123 | 125 | ||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 9a966c579af5..4558f0d0822d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -970,7 +970,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
970 | mp_irq.irqflag = (trigger << 2) | polarity; | 970 | mp_irq.irqflag = (trigger << 2) | polarity; |
971 | mp_irq.srcbus = MP_ISA_BUS; | 971 | mp_irq.srcbus = MP_ISA_BUS; |
972 | mp_irq.srcbusirq = bus_irq; /* IRQ */ | 972 | mp_irq.srcbusirq = bus_irq; /* IRQ */ |
973 | mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ | 973 | mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ |
974 | mp_irq.dstirq = pin; /* INTIN# */ | 974 | mp_irq.dstirq = pin; /* INTIN# */ |
975 | 975 | ||
976 | mp_save_irq(&mp_irq); | 976 | mp_save_irq(&mp_irq); |
@@ -1021,7 +1021,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1021 | if (ioapic < 0) | 1021 | if (ioapic < 0) |
1022 | continue; | 1022 | continue; |
1023 | pin = mp_find_ioapic_pin(ioapic, gsi); | 1023 | pin = mp_find_ioapic_pin(ioapic, gsi); |
1024 | dstapic = mp_ioapics[ioapic].apicid; | 1024 | dstapic = mpc_ioapic_id(ioapic); |
1025 | 1025 | ||
1026 | for (idx = 0; idx < mp_irq_entries; idx++) { | 1026 | for (idx = 0; idx < mp_irq_entries; idx++) { |
1027 | struct mpc_intsrc *irq = mp_irqs + idx; | 1027 | struct mpc_intsrc *irq = mp_irqs + idx; |
@@ -1082,7 +1082,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, | |||
1082 | mp_irq.srcbus = number; | 1082 | mp_irq.srcbus = number; |
1083 | mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); | 1083 | mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); |
1084 | ioapic = mp_find_ioapic(gsi); | 1084 | ioapic = mp_find_ioapic(gsi); |
1085 | mp_irq.dstapic = mp_ioapics[ioapic].apicid; | 1085 | mp_irq.dstapic = mpc_ioapic_id(ioapic); |
1086 | mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); | 1086 | mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); |
1087 | 1087 | ||
1088 | mp_save_irq(&mp_irq); | 1088 | mp_save_irq(&mp_irq); |
@@ -1113,7 +1113,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
1113 | 1113 | ||
1114 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | 1114 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { |
1115 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | 1115 | printk(KERN_ERR "Invalid reference to IOAPIC pin " |
1116 | "%d-%d\n", mp_ioapics[ioapic].apicid, | 1116 | "%d-%d\n", mpc_ioapic_id(ioapic), |
1117 | ioapic_pin); | 1117 | ioapic_pin); |
1118 | return gsi; | 1118 | return gsi; |
1119 | } | 1119 | } |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ff93bc1b09c3..18a857ba7a25 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -112,11 +112,6 @@ static int __init acpi_sleep_setup(char *str) | |||
112 | #ifdef CONFIG_HIBERNATION | 112 | #ifdef CONFIG_HIBERNATION |
113 | if (strncmp(str, "s4_nohwsig", 10) == 0) | 113 | if (strncmp(str, "s4_nohwsig", 10) == 0) |
114 | acpi_no_s4_hw_signature(); | 114 | acpi_no_s4_hw_signature(); |
115 | if (strncmp(str, "s4_nonvs", 8) == 0) { | ||
116 | pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " | ||
117 | "please use acpi_sleep=nonvs instead"); | ||
118 | acpi_nvs_nosave(); | ||
119 | } | ||
120 | #endif | 115 | #endif |
121 | if (strncmp(str, "nonvs", 5) == 0) | 116 | if (strncmp(str, "nonvs", 5) == 0) |
122 | acpi_nvs_nosave(); | 117 | acpi_nvs_nosave(); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4a234677e213..a81f2d52f869 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -67,17 +67,30 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); | |||
67 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | 67 | #define DPRINTK(fmt, args...) if (debug_alternative) \ |
68 | printk(KERN_DEBUG fmt, args) | 68 | printk(KERN_DEBUG fmt, args) |
69 | 69 | ||
70 | /* | ||
71 | * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes | ||
72 | * that correspond to that nop. Getting from one nop to the next, we | ||
73 | * add to the array the offset that is equal to the sum of all sizes of | ||
74 | * nops preceding the one we are after. | ||
75 | * | ||
76 | * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the | ||
77 | * nice symmetry of sizes of the previous nops. | ||
78 | */ | ||
70 | #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) | 79 | #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) |
71 | /* Use inline assembly to define this because the nops are defined | 80 | static const unsigned char intelnops[] = |
72 | as inline assembly strings in the include files and we cannot | 81 | { |
73 | get them easily into strings. */ | 82 | GENERIC_NOP1, |
74 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: " | 83 | GENERIC_NOP2, |
75 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 | 84 | GENERIC_NOP3, |
76 | GENERIC_NOP7 GENERIC_NOP8 | 85 | GENERIC_NOP4, |
77 | "\t.previous"); | 86 | GENERIC_NOP5, |
78 | extern const unsigned char intelnops[]; | 87 | GENERIC_NOP6, |
79 | static const unsigned char *const __initconst_or_module | 88 | GENERIC_NOP7, |
80 | intel_nops[ASM_NOP_MAX+1] = { | 89 | GENERIC_NOP8, |
90 | GENERIC_NOP5_ATOMIC | ||
91 | }; | ||
92 | static const unsigned char * const intel_nops[ASM_NOP_MAX+2] = | ||
93 | { | ||
81 | NULL, | 94 | NULL, |
82 | intelnops, | 95 | intelnops, |
83 | intelnops + 1, | 96 | intelnops + 1, |
@@ -87,17 +100,25 @@ intel_nops[ASM_NOP_MAX+1] = { | |||
87 | intelnops + 1 + 2 + 3 + 4 + 5, | 100 | intelnops + 1 + 2 + 3 + 4 + 5, |
88 | intelnops + 1 + 2 + 3 + 4 + 5 + 6, | 101 | intelnops + 1 + 2 + 3 + 4 + 5 + 6, |
89 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 102 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
103 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, | ||
90 | }; | 104 | }; |
91 | #endif | 105 | #endif |
92 | 106 | ||
93 | #ifdef K8_NOP1 | 107 | #ifdef K8_NOP1 |
94 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: " | 108 | static const unsigned char k8nops[] = |
95 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | 109 | { |
96 | K8_NOP7 K8_NOP8 | 110 | K8_NOP1, |
97 | "\t.previous"); | 111 | K8_NOP2, |
98 | extern const unsigned char k8nops[]; | 112 | K8_NOP3, |
99 | static const unsigned char *const __initconst_or_module | 113 | K8_NOP4, |
100 | k8_nops[ASM_NOP_MAX+1] = { | 114 | K8_NOP5, |
115 | K8_NOP6, | ||
116 | K8_NOP7, | ||
117 | K8_NOP8, | ||
118 | K8_NOP5_ATOMIC | ||
119 | }; | ||
120 | static const unsigned char * const k8_nops[ASM_NOP_MAX+2] = | ||
121 | { | ||
101 | NULL, | 122 | NULL, |
102 | k8nops, | 123 | k8nops, |
103 | k8nops + 1, | 124 | k8nops + 1, |
@@ -107,17 +128,25 @@ k8_nops[ASM_NOP_MAX+1] = { | |||
107 | k8nops + 1 + 2 + 3 + 4 + 5, | 128 | k8nops + 1 + 2 + 3 + 4 + 5, |
108 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, | 129 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, |
109 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 130 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
131 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, | ||
110 | }; | 132 | }; |
111 | #endif | 133 | #endif |
112 | 134 | ||
113 | #if defined(K7_NOP1) && !defined(CONFIG_X86_64) | 135 | #if defined(K7_NOP1) && !defined(CONFIG_X86_64) |
114 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: " | 136 | static const unsigned char k7nops[] = |
115 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | 137 | { |
116 | K7_NOP7 K7_NOP8 | 138 | K7_NOP1, |
117 | "\t.previous"); | 139 | K7_NOP2, |
118 | extern const unsigned char k7nops[]; | 140 | K7_NOP3, |
119 | static const unsigned char *const __initconst_or_module | 141 | K7_NOP4, |
120 | k7_nops[ASM_NOP_MAX+1] = { | 142 | K7_NOP5, |
143 | K7_NOP6, | ||
144 | K7_NOP7, | ||
145 | K7_NOP8, | ||
146 | K7_NOP5_ATOMIC | ||
147 | }; | ||
148 | static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = | ||
149 | { | ||
121 | NULL, | 150 | NULL, |
122 | k7nops, | 151 | k7nops, |
123 | k7nops + 1, | 152 | k7nops + 1, |
@@ -127,17 +156,25 @@ k7_nops[ASM_NOP_MAX+1] = { | |||
127 | k7nops + 1 + 2 + 3 + 4 + 5, | 156 | k7nops + 1 + 2 + 3 + 4 + 5, |
128 | k7nops + 1 + 2 + 3 + 4 + 5 + 6, | 157 | k7nops + 1 + 2 + 3 + 4 + 5 + 6, |
129 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 158 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
159 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, | ||
130 | }; | 160 | }; |
131 | #endif | 161 | #endif |
132 | 162 | ||
133 | #ifdef P6_NOP1 | 163 | #ifdef P6_NOP1 |
134 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: " | 164 | static const unsigned char __initconst_or_module p6nops[] = |
135 | P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 | 165 | { |
136 | P6_NOP7 P6_NOP8 | 166 | P6_NOP1, |
137 | "\t.previous"); | 167 | P6_NOP2, |
138 | extern const unsigned char p6nops[]; | 168 | P6_NOP3, |
139 | static const unsigned char *const __initconst_or_module | 169 | P6_NOP4, |
140 | p6_nops[ASM_NOP_MAX+1] = { | 170 | P6_NOP5, |
171 | P6_NOP6, | ||
172 | P6_NOP7, | ||
173 | P6_NOP8, | ||
174 | P6_NOP5_ATOMIC | ||
175 | }; | ||
176 | static const unsigned char * const p6_nops[ASM_NOP_MAX+2] = | ||
177 | { | ||
141 | NULL, | 178 | NULL, |
142 | p6nops, | 179 | p6nops, |
143 | p6nops + 1, | 180 | p6nops + 1, |
@@ -147,47 +184,65 @@ p6_nops[ASM_NOP_MAX+1] = { | |||
147 | p6nops + 1 + 2 + 3 + 4 + 5, | 184 | p6nops + 1 + 2 + 3 + 4 + 5, |
148 | p6nops + 1 + 2 + 3 + 4 + 5 + 6, | 185 | p6nops + 1 + 2 + 3 + 4 + 5 + 6, |
149 | p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 186 | p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
187 | p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, | ||
150 | }; | 188 | }; |
151 | #endif | 189 | #endif |
152 | 190 | ||
191 | /* Initialize these to a safe default */ | ||
153 | #ifdef CONFIG_X86_64 | 192 | #ifdef CONFIG_X86_64 |
193 | const unsigned char * const *ideal_nops = p6_nops; | ||
194 | #else | ||
195 | const unsigned char * const *ideal_nops = intel_nops; | ||
196 | #endif | ||
154 | 197 | ||
155 | extern char __vsyscall_0; | 198 | void __init arch_init_ideal_nops(void) |
156 | static const unsigned char *const *__init_or_module find_nop_table(void) | ||
157 | { | 199 | { |
158 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | 200 | switch (boot_cpu_data.x86_vendor) { |
159 | boot_cpu_has(X86_FEATURE_NOPL)) | 201 | case X86_VENDOR_INTEL: |
160 | return p6_nops; | 202 | /* |
161 | else | 203 | * Due to a decoder implementation quirk, some |
162 | return k8_nops; | 204 | * specific Intel CPUs actually perform better with |
163 | } | 205 | * the "k8_nops" than with the SDM-recommended NOPs. |
164 | 206 | */ | |
165 | #else /* CONFIG_X86_64 */ | 207 | if (boot_cpu_data.x86 == 6 && |
208 | boot_cpu_data.x86_model >= 0x0f && | ||
209 | boot_cpu_data.x86_model != 0x1c && | ||
210 | boot_cpu_data.x86_model != 0x26 && | ||
211 | boot_cpu_data.x86_model != 0x27 && | ||
212 | boot_cpu_data.x86_model < 0x30) { | ||
213 | ideal_nops = k8_nops; | ||
214 | } else if (boot_cpu_has(X86_FEATURE_NOPL)) { | ||
215 | ideal_nops = p6_nops; | ||
216 | } else { | ||
217 | #ifdef CONFIG_X86_64 | ||
218 | ideal_nops = k8_nops; | ||
219 | #else | ||
220 | ideal_nops = intel_nops; | ||
221 | #endif | ||
222 | } | ||
166 | 223 | ||
167 | static const unsigned char *const *__init_or_module find_nop_table(void) | 224 | default: |
168 | { | 225 | #ifdef CONFIG_X86_64 |
169 | if (boot_cpu_has(X86_FEATURE_K8)) | 226 | ideal_nops = k8_nops; |
170 | return k8_nops; | 227 | #else |
171 | else if (boot_cpu_has(X86_FEATURE_K7)) | 228 | if (boot_cpu_has(X86_FEATURE_K8)) |
172 | return k7_nops; | 229 | ideal_nops = k8_nops; |
173 | else if (boot_cpu_has(X86_FEATURE_NOPL)) | 230 | else if (boot_cpu_has(X86_FEATURE_K7)) |
174 | return p6_nops; | 231 | ideal_nops = k7_nops; |
175 | else | 232 | else |
176 | return intel_nops; | 233 | ideal_nops = intel_nops; |
234 | #endif | ||
235 | } | ||
177 | } | 236 | } |
178 | 237 | ||
179 | #endif /* CONFIG_X86_64 */ | ||
180 | |||
181 | /* Use this to add nops to a buffer, then text_poke the whole buffer. */ | 238 | /* Use this to add nops to a buffer, then text_poke the whole buffer. */ |
182 | static void __init_or_module add_nops(void *insns, unsigned int len) | 239 | static void __init_or_module add_nops(void *insns, unsigned int len) |
183 | { | 240 | { |
184 | const unsigned char *const *noptable = find_nop_table(); | ||
185 | |||
186 | while (len > 0) { | 241 | while (len > 0) { |
187 | unsigned int noplen = len; | 242 | unsigned int noplen = len; |
188 | if (noplen > ASM_NOP_MAX) | 243 | if (noplen > ASM_NOP_MAX) |
189 | noplen = ASM_NOP_MAX; | 244 | noplen = ASM_NOP_MAX; |
190 | memcpy(insns, noptable[noplen], noplen); | 245 | memcpy(insns, ideal_nops[noplen], noplen); |
191 | insns += noplen; | 246 | insns += noplen; |
192 | len -= noplen; | 247 | len -= noplen; |
193 | } | 248 | } |
@@ -195,6 +250,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
195 | 250 | ||
196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 251 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
197 | extern s32 __smp_locks[], __smp_locks_end[]; | 252 | extern s32 __smp_locks[], __smp_locks_end[]; |
253 | extern char __vsyscall_0; | ||
198 | void *text_poke_early(void *addr, const void *opcode, size_t len); | 254 | void *text_poke_early(void *addr, const void *opcode, size_t len); |
199 | 255 | ||
200 | /* Replace instructions with better alternatives for this CPU type. | 256 | /* Replace instructions with better alternatives for this CPU type. |
@@ -210,6 +266,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
210 | u8 insnbuf[MAX_PATCH_LEN]; | 266 | u8 insnbuf[MAX_PATCH_LEN]; |
211 | 267 | ||
212 | DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); | 268 | DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); |
269 | /* | ||
270 | * The scan order should be from start to end. A later scanned | ||
271 | * alternative code can overwrite a previous scanned alternative code. | ||
272 | * Some kernel functions (e.g. memcpy, memset, etc) use this order to | ||
273 | * patch code. | ||
274 | * | ||
275 | * So be careful if you want to change the scan order to any other | ||
276 | * order. | ||
277 | */ | ||
213 | for (a = start; a < end; a++) { | 278 | for (a = start; a < end; a++) { |
214 | u8 *instr = a->instr; | 279 | u8 *instr = a->instr; |
215 | BUG_ON(a->replacementlen > a->instrlen); | 280 | BUG_ON(a->replacementlen > a->instrlen); |
@@ -678,29 +743,3 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) | |||
678 | wrote_text = 0; | 743 | wrote_text = 0; |
679 | __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | 744 | __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); |
680 | } | 745 | } |
681 | |||
682 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | ||
683 | |||
684 | #ifdef CONFIG_X86_64 | ||
685 | unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 }; | ||
686 | #else | ||
687 | unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; | ||
688 | #endif | ||
689 | |||
690 | void __init arch_init_ideal_nop5(void) | ||
691 | { | ||
692 | /* | ||
693 | * There is no good nop for all x86 archs. This selection | ||
694 | * algorithm should be unified with the one in find_nop_table(), | ||
695 | * but this should be good enough for now. | ||
696 | * | ||
697 | * For cases other than the ones below, use the safe (as in | ||
698 | * always functional) defaults above. | ||
699 | */ | ||
700 | #ifdef CONFIG_X86_64 | ||
701 | /* Don't use these on 32 bits due to broken virtualizers */ | ||
702 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
703 | memcpy(ideal_nop5, p6_nops[5], 5); | ||
704 | #endif | ||
705 | } | ||
706 | #endif | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/amd_gart_64.c index 82ada01625b9..b117efd24f71 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c | |||
@@ -81,6 +81,9 @@ static u32 gart_unmapped_entry; | |||
81 | #define AGPEXTERN | 81 | #define AGPEXTERN |
82 | #endif | 82 | #endif |
83 | 83 | ||
84 | /* GART can only remap to physical addresses < 1TB */ | ||
85 | #define GART_MAX_PHYS_ADDR (1ULL << 40) | ||
86 | |||
84 | /* backdoor interface to AGP driver */ | 87 | /* backdoor interface to AGP driver */ |
85 | AGPEXTERN int agp_memory_reserved; | 88 | AGPEXTERN int agp_memory_reserved; |
86 | AGPEXTERN __u32 *agp_gatt_table; | 89 | AGPEXTERN __u32 *agp_gatt_table; |
@@ -212,9 +215,13 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
212 | size_t size, int dir, unsigned long align_mask) | 215 | size_t size, int dir, unsigned long align_mask) |
213 | { | 216 | { |
214 | unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE); | 217 | unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE); |
215 | unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); | 218 | unsigned long iommu_page; |
216 | int i; | 219 | int i; |
217 | 220 | ||
221 | if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR)) | ||
222 | return bad_dma_addr; | ||
223 | |||
224 | iommu_page = alloc_iommu(dev, npages, align_mask); | ||
218 | if (iommu_page == -1) { | 225 | if (iommu_page == -1) { |
219 | if (!nonforced_iommu(dev, phys_mem, size)) | 226 | if (!nonforced_iommu(dev, phys_mem, size)) |
220 | return phys_mem; | 227 | return phys_mem; |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 57ca77787220..cd8cbeb5fa34 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/pci.h> | 20 | #include <linux/pci.h> |
21 | #include <linux/pci-ats.h> | ||
21 | #include <linux/bitmap.h> | 22 | #include <linux/bitmap.h> |
22 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
23 | #include <linux/debugfs.h> | 24 | #include <linux/debugfs.h> |
@@ -25,6 +26,7 @@ | |||
25 | #include <linux/dma-mapping.h> | 26 | #include <linux/dma-mapping.h> |
26 | #include <linux/iommu-helper.h> | 27 | #include <linux/iommu-helper.h> |
27 | #include <linux/iommu.h> | 28 | #include <linux/iommu.h> |
29 | #include <linux/delay.h> | ||
28 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
29 | #include <asm/iommu.h> | 31 | #include <asm/iommu.h> |
30 | #include <asm/gart.h> | 32 | #include <asm/gart.h> |
@@ -34,7 +36,7 @@ | |||
34 | 36 | ||
35 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) | 37 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) |
36 | 38 | ||
37 | #define EXIT_LOOP_COUNT 10000000 | 39 | #define LOOP_TIMEOUT 100000 |
38 | 40 | ||
39 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 41 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
40 | 42 | ||
@@ -57,7 +59,6 @@ struct iommu_cmd { | |||
57 | u32 data[4]; | 59 | u32 data[4]; |
58 | }; | 60 | }; |
59 | 61 | ||
60 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | ||
61 | static void update_domain(struct protection_domain *domain); | 62 | static void update_domain(struct protection_domain *domain); |
62 | 63 | ||
63 | /**************************************************************************** | 64 | /**************************************************************************** |
@@ -322,8 +323,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | |||
322 | break; | 323 | break; |
323 | case EVENT_TYPE_ILL_CMD: | 324 | case EVENT_TYPE_ILL_CMD: |
324 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 325 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
325 | iommu->reset_in_progress = true; | ||
326 | reset_iommu_command_buffer(iommu); | ||
327 | dump_command(address); | 326 | dump_command(address); |
328 | break; | 327 | break; |
329 | case EVENT_TYPE_CMD_HARD_ERR: | 328 | case EVENT_TYPE_CMD_HARD_ERR: |
@@ -367,7 +366,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) | |||
367 | spin_unlock_irqrestore(&iommu->lock, flags); | 366 | spin_unlock_irqrestore(&iommu->lock, flags); |
368 | } | 367 | } |
369 | 368 | ||
370 | irqreturn_t amd_iommu_int_handler(int irq, void *data) | 369 | irqreturn_t amd_iommu_int_thread(int irq, void *data) |
371 | { | 370 | { |
372 | struct amd_iommu *iommu; | 371 | struct amd_iommu *iommu; |
373 | 372 | ||
@@ -377,192 +376,300 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) | |||
377 | return IRQ_HANDLED; | 376 | return IRQ_HANDLED; |
378 | } | 377 | } |
379 | 378 | ||
379 | irqreturn_t amd_iommu_int_handler(int irq, void *data) | ||
380 | { | ||
381 | return IRQ_WAKE_THREAD; | ||
382 | } | ||
383 | |||
380 | /**************************************************************************** | 384 | /**************************************************************************** |
381 | * | 385 | * |
382 | * IOMMU command queuing functions | 386 | * IOMMU command queuing functions |
383 | * | 387 | * |
384 | ****************************************************************************/ | 388 | ****************************************************************************/ |
385 | 389 | ||
386 | /* | 390 | static int wait_on_sem(volatile u64 *sem) |
387 | * Writes the command to the IOMMUs command buffer and informs the | 391 | { |
388 | * hardware about the new command. Must be called with iommu->lock held. | 392 | int i = 0; |
389 | */ | 393 | |
390 | static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | 394 | while (*sem == 0 && i < LOOP_TIMEOUT) { |
395 | udelay(1); | ||
396 | i += 1; | ||
397 | } | ||
398 | |||
399 | if (i == LOOP_TIMEOUT) { | ||
400 | pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); | ||
401 | return -EIO; | ||
402 | } | ||
403 | |||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | static void copy_cmd_to_buffer(struct amd_iommu *iommu, | ||
408 | struct iommu_cmd *cmd, | ||
409 | u32 tail) | ||
391 | { | 410 | { |
392 | u32 tail, head; | ||
393 | u8 *target; | 411 | u8 *target; |
394 | 412 | ||
395 | WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); | ||
396 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
397 | target = iommu->cmd_buf + tail; | 413 | target = iommu->cmd_buf + tail; |
398 | memcpy_toio(target, cmd, sizeof(*cmd)); | 414 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; |
399 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; | 415 | |
400 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | 416 | /* Copy command to buffer */ |
401 | if (tail == head) | 417 | memcpy(target, cmd, sizeof(*cmd)); |
402 | return -ENOMEM; | 418 | |
419 | /* Tell the IOMMU about it */ | ||
403 | writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 420 | writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
421 | } | ||
404 | 422 | ||
405 | return 0; | 423 | static void build_completion_wait(struct iommu_cmd *cmd, u64 address) |
424 | { | ||
425 | WARN_ON(address & 0x7ULL); | ||
426 | |||
427 | memset(cmd, 0, sizeof(*cmd)); | ||
428 | cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; | ||
429 | cmd->data[1] = upper_32_bits(__pa(address)); | ||
430 | cmd->data[2] = 1; | ||
431 | CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); | ||
432 | } | ||
433 | |||
434 | static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) | ||
435 | { | ||
436 | memset(cmd, 0, sizeof(*cmd)); | ||
437 | cmd->data[0] = devid; | ||
438 | CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); | ||
439 | } | ||
440 | |||
441 | static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | ||
442 | size_t size, u16 domid, int pde) | ||
443 | { | ||
444 | u64 pages; | ||
445 | int s; | ||
446 | |||
447 | pages = iommu_num_pages(address, size, PAGE_SIZE); | ||
448 | s = 0; | ||
449 | |||
450 | if (pages > 1) { | ||
451 | /* | ||
452 | * If we have to flush more than one page, flush all | ||
453 | * TLB entries for this domain | ||
454 | */ | ||
455 | address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
456 | s = 1; | ||
457 | } | ||
458 | |||
459 | address &= PAGE_MASK; | ||
460 | |||
461 | memset(cmd, 0, sizeof(*cmd)); | ||
462 | cmd->data[1] |= domid; | ||
463 | cmd->data[2] = lower_32_bits(address); | ||
464 | cmd->data[3] = upper_32_bits(address); | ||
465 | CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); | ||
466 | if (s) /* size bit - we flush more than one 4kb page */ | ||
467 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
468 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | ||
469 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | ||
470 | } | ||
471 | |||
472 | static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, | ||
473 | u64 address, size_t size) | ||
474 | { | ||
475 | u64 pages; | ||
476 | int s; | ||
477 | |||
478 | pages = iommu_num_pages(address, size, PAGE_SIZE); | ||
479 | s = 0; | ||
480 | |||
481 | if (pages > 1) { | ||
482 | /* | ||
483 | * If we have to flush more than one page, flush all | ||
484 | * TLB entries for this domain | ||
485 | */ | ||
486 | address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
487 | s = 1; | ||
488 | } | ||
489 | |||
490 | address &= PAGE_MASK; | ||
491 | |||
492 | memset(cmd, 0, sizeof(*cmd)); | ||
493 | cmd->data[0] = devid; | ||
494 | cmd->data[0] |= (qdep & 0xff) << 24; | ||
495 | cmd->data[1] = devid; | ||
496 | cmd->data[2] = lower_32_bits(address); | ||
497 | cmd->data[3] = upper_32_bits(address); | ||
498 | CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); | ||
499 | if (s) | ||
500 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
501 | } | ||
502 | |||
503 | static void build_inv_all(struct iommu_cmd *cmd) | ||
504 | { | ||
505 | memset(cmd, 0, sizeof(*cmd)); | ||
506 | CMD_SET_TYPE(cmd, CMD_INV_ALL); | ||
406 | } | 507 | } |
407 | 508 | ||
408 | /* | 509 | /* |
409 | * General queuing function for commands. Takes iommu->lock and calls | 510 | * Writes the command to the IOMMUs command buffer and informs the |
410 | * __iommu_queue_command(). | 511 | * hardware about the new command. |
411 | */ | 512 | */ |
412 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | 513 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) |
413 | { | 514 | { |
515 | u32 left, tail, head, next_tail; | ||
414 | unsigned long flags; | 516 | unsigned long flags; |
415 | int ret; | ||
416 | 517 | ||
518 | WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); | ||
519 | |||
520 | again: | ||
417 | spin_lock_irqsave(&iommu->lock, flags); | 521 | spin_lock_irqsave(&iommu->lock, flags); |
418 | ret = __iommu_queue_command(iommu, cmd); | ||
419 | if (!ret) | ||
420 | iommu->need_sync = true; | ||
421 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
422 | 522 | ||
423 | return ret; | 523 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); |
424 | } | 524 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
525 | next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; | ||
526 | left = (head - next_tail) % iommu->cmd_buf_size; | ||
425 | 527 | ||
426 | /* | 528 | if (left <= 2) { |
427 | * This function waits until an IOMMU has completed a completion | 529 | struct iommu_cmd sync_cmd; |
428 | * wait command | 530 | volatile u64 sem = 0; |
429 | */ | 531 | int ret; |
430 | static void __iommu_wait_for_completion(struct amd_iommu *iommu) | 532 | |
431 | { | 533 | build_completion_wait(&sync_cmd, (u64)&sem); |
432 | int ready = 0; | 534 | copy_cmd_to_buffer(iommu, &sync_cmd, tail); |
433 | unsigned status = 0; | ||
434 | unsigned long i = 0; | ||
435 | 535 | ||
436 | INC_STATS_COUNTER(compl_wait); | 536 | spin_unlock_irqrestore(&iommu->lock, flags); |
537 | |||
538 | if ((ret = wait_on_sem(&sem)) != 0) | ||
539 | return ret; | ||
437 | 540 | ||
438 | while (!ready && (i < EXIT_LOOP_COUNT)) { | 541 | goto again; |
439 | ++i; | ||
440 | /* wait for the bit to become one */ | ||
441 | status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
442 | ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; | ||
443 | } | 542 | } |
444 | 543 | ||
445 | /* set bit back to zero */ | 544 | copy_cmd_to_buffer(iommu, cmd, tail); |
446 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 545 | |
447 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 546 | /* We need to sync now to make sure all commands are processed */ |
547 | iommu->need_sync = true; | ||
548 | |||
549 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
448 | 550 | ||
449 | if (unlikely(i == EXIT_LOOP_COUNT)) | 551 | return 0; |
450 | iommu->reset_in_progress = true; | ||
451 | } | 552 | } |
452 | 553 | ||
453 | /* | 554 | /* |
454 | * This function queues a completion wait command into the command | 555 | * This function queues a completion wait command into the command |
455 | * buffer of an IOMMU | 556 | * buffer of an IOMMU |
456 | */ | 557 | */ |
457 | static int __iommu_completion_wait(struct amd_iommu *iommu) | 558 | static int iommu_completion_wait(struct amd_iommu *iommu) |
458 | { | 559 | { |
459 | struct iommu_cmd cmd; | 560 | struct iommu_cmd cmd; |
561 | volatile u64 sem = 0; | ||
562 | int ret; | ||
563 | |||
564 | if (!iommu->need_sync) | ||
565 | return 0; | ||
460 | 566 | ||
461 | memset(&cmd, 0, sizeof(cmd)); | 567 | build_completion_wait(&cmd, (u64)&sem); |
462 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; | ||
463 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | ||
464 | 568 | ||
465 | return __iommu_queue_command(iommu, &cmd); | 569 | ret = iommu_queue_command(iommu, &cmd); |
570 | if (ret) | ||
571 | return ret; | ||
572 | |||
573 | return wait_on_sem(&sem); | ||
466 | } | 574 | } |
467 | 575 | ||
468 | /* | 576 | static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) |
469 | * This function is called whenever we need to ensure that the IOMMU has | ||
470 | * completed execution of all commands we sent. It sends a | ||
471 | * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs | ||
472 | * us about that by writing a value to a physical address we pass with | ||
473 | * the command. | ||
474 | */ | ||
475 | static int iommu_completion_wait(struct amd_iommu *iommu) | ||
476 | { | 577 | { |
477 | int ret = 0; | 578 | struct iommu_cmd cmd; |
478 | unsigned long flags; | ||
479 | |||
480 | spin_lock_irqsave(&iommu->lock, flags); | ||
481 | 579 | ||
482 | if (!iommu->need_sync) | 580 | build_inv_dte(&cmd, devid); |
483 | goto out; | ||
484 | 581 | ||
485 | ret = __iommu_completion_wait(iommu); | 582 | return iommu_queue_command(iommu, &cmd); |
583 | } | ||
486 | 584 | ||
487 | iommu->need_sync = false; | 585 | static void iommu_flush_dte_all(struct amd_iommu *iommu) |
586 | { | ||
587 | u32 devid; | ||
488 | 588 | ||
489 | if (ret) | 589 | for (devid = 0; devid <= 0xffff; ++devid) |
490 | goto out; | 590 | iommu_flush_dte(iommu, devid); |
491 | 591 | ||
492 | __iommu_wait_for_completion(iommu); | 592 | iommu_completion_wait(iommu); |
593 | } | ||
493 | 594 | ||
494 | out: | 595 | /* |
495 | spin_unlock_irqrestore(&iommu->lock, flags); | 596 | * This function uses heavy locking and may disable irqs for some time. But |
597 | * this is no issue because it is only called during resume. | ||
598 | */ | ||
599 | static void iommu_flush_tlb_all(struct amd_iommu *iommu) | ||
600 | { | ||
601 | u32 dom_id; | ||
496 | 602 | ||
497 | if (iommu->reset_in_progress) | 603 | for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { |
498 | reset_iommu_command_buffer(iommu); | 604 | struct iommu_cmd cmd; |
605 | build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
606 | dom_id, 1); | ||
607 | iommu_queue_command(iommu, &cmd); | ||
608 | } | ||
499 | 609 | ||
500 | return 0; | 610 | iommu_completion_wait(iommu); |
501 | } | 611 | } |
502 | 612 | ||
503 | static void iommu_flush_complete(struct protection_domain *domain) | 613 | static void iommu_flush_all(struct amd_iommu *iommu) |
504 | { | 614 | { |
505 | int i; | 615 | struct iommu_cmd cmd; |
506 | 616 | ||
507 | for (i = 0; i < amd_iommus_present; ++i) { | 617 | build_inv_all(&cmd); |
508 | if (!domain->dev_iommu[i]) | ||
509 | continue; | ||
510 | 618 | ||
511 | /* | 619 | iommu_queue_command(iommu, &cmd); |
512 | * Devices of this domain are behind this IOMMU | 620 | iommu_completion_wait(iommu); |
513 | * We need to wait for completion of all commands. | 621 | } |
514 | */ | 622 | |
515 | iommu_completion_wait(amd_iommus[i]); | 623 | void iommu_flush_all_caches(struct amd_iommu *iommu) |
624 | { | ||
625 | if (iommu_feature(iommu, FEATURE_IA)) { | ||
626 | iommu_flush_all(iommu); | ||
627 | } else { | ||
628 | iommu_flush_dte_all(iommu); | ||
629 | iommu_flush_tlb_all(iommu); | ||
516 | } | 630 | } |
517 | } | 631 | } |
518 | 632 | ||
519 | /* | 633 | /* |
520 | * Command send function for invalidating a device table entry | 634 | * Command send function for flushing on-device TLB |
521 | */ | 635 | */ |
522 | static int iommu_flush_device(struct device *dev) | 636 | static int device_flush_iotlb(struct device *dev, u64 address, size_t size) |
523 | { | 637 | { |
638 | struct pci_dev *pdev = to_pci_dev(dev); | ||
524 | struct amd_iommu *iommu; | 639 | struct amd_iommu *iommu; |
525 | struct iommu_cmd cmd; | 640 | struct iommu_cmd cmd; |
526 | u16 devid; | 641 | u16 devid; |
642 | int qdep; | ||
527 | 643 | ||
644 | qdep = pci_ats_queue_depth(pdev); | ||
528 | devid = get_device_id(dev); | 645 | devid = get_device_id(dev); |
529 | iommu = amd_iommu_rlookup_table[devid]; | 646 | iommu = amd_iommu_rlookup_table[devid]; |
530 | 647 | ||
531 | /* Build command */ | 648 | build_inv_iotlb_pages(&cmd, devid, qdep, address, size); |
532 | memset(&cmd, 0, sizeof(cmd)); | ||
533 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); | ||
534 | cmd.data[0] = devid; | ||
535 | 649 | ||
536 | return iommu_queue_command(iommu, &cmd); | 650 | return iommu_queue_command(iommu, &cmd); |
537 | } | 651 | } |
538 | 652 | ||
539 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | ||
540 | u16 domid, int pde, int s) | ||
541 | { | ||
542 | memset(cmd, 0, sizeof(*cmd)); | ||
543 | address &= PAGE_MASK; | ||
544 | CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); | ||
545 | cmd->data[1] |= domid; | ||
546 | cmd->data[2] = lower_32_bits(address); | ||
547 | cmd->data[3] = upper_32_bits(address); | ||
548 | if (s) /* size bit - we flush more than one 4kb page */ | ||
549 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
550 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | ||
551 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | ||
552 | } | ||
553 | |||
554 | /* | 653 | /* |
555 | * Generic command send function for invalidaing TLB entries | 654 | * Command send function for invalidating a device table entry |
556 | */ | 655 | */ |
557 | static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | 656 | static int device_flush_dte(struct device *dev) |
558 | u64 address, u16 domid, int pde, int s) | ||
559 | { | 657 | { |
560 | struct iommu_cmd cmd; | 658 | struct amd_iommu *iommu; |
659 | struct pci_dev *pdev; | ||
660 | u16 devid; | ||
561 | int ret; | 661 | int ret; |
562 | 662 | ||
563 | __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); | 663 | pdev = to_pci_dev(dev); |
664 | devid = get_device_id(dev); | ||
665 | iommu = amd_iommu_rlookup_table[devid]; | ||
564 | 666 | ||
565 | ret = iommu_queue_command(iommu, &cmd); | 667 | ret = iommu_flush_dte(iommu, devid); |
668 | if (ret) | ||
669 | return ret; | ||
670 | |||
671 | if (pci_ats_enabled(pdev)) | ||
672 | ret = device_flush_iotlb(dev, 0, ~0UL); | ||
566 | 673 | ||
567 | return ret; | 674 | return ret; |
568 | } | 675 | } |
@@ -572,23 +679,14 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
572 | * It invalidates a single PTE if the range to flush is within a single | 679 | * It invalidates a single PTE if the range to flush is within a single |
573 | * page. Otherwise it flushes the whole TLB of the IOMMU. | 680 | * page. Otherwise it flushes the whole TLB of the IOMMU. |
574 | */ | 681 | */ |
575 | static void __iommu_flush_pages(struct protection_domain *domain, | 682 | static void __domain_flush_pages(struct protection_domain *domain, |
576 | u64 address, size_t size, int pde) | 683 | u64 address, size_t size, int pde) |
577 | { | 684 | { |
578 | int s = 0, i; | 685 | struct iommu_dev_data *dev_data; |
579 | unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); | 686 | struct iommu_cmd cmd; |
580 | 687 | int ret = 0, i; | |
581 | address &= PAGE_MASK; | ||
582 | |||
583 | if (pages > 1) { | ||
584 | /* | ||
585 | * If we have to flush more than one page, flush all | ||
586 | * TLB entries for this domain | ||
587 | */ | ||
588 | address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
589 | s = 1; | ||
590 | } | ||
591 | 688 | ||
689 | build_inv_iommu_pages(&cmd, address, size, domain->id, pde); | ||
592 | 690 | ||
593 | for (i = 0; i < amd_iommus_present; ++i) { | 691 | for (i = 0; i < amd_iommus_present; ++i) { |
594 | if (!domain->dev_iommu[i]) | 692 | if (!domain->dev_iommu[i]) |
@@ -598,101 +696,70 @@ static void __iommu_flush_pages(struct protection_domain *domain, | |||
598 | * Devices of this domain are behind this IOMMU | 696 | * Devices of this domain are behind this IOMMU |
599 | * We need a TLB flush | 697 | * We need a TLB flush |
600 | */ | 698 | */ |
601 | iommu_queue_inv_iommu_pages(amd_iommus[i], address, | 699 | ret |= iommu_queue_command(amd_iommus[i], &cmd); |
602 | domain->id, pde, s); | 700 | } |
701 | |||
702 | list_for_each_entry(dev_data, &domain->dev_list, list) { | ||
703 | struct pci_dev *pdev = to_pci_dev(dev_data->dev); | ||
704 | |||
705 | if (!pci_ats_enabled(pdev)) | ||
706 | continue; | ||
707 | |||
708 | ret |= device_flush_iotlb(dev_data->dev, address, size); | ||
603 | } | 709 | } |
604 | 710 | ||
605 | return; | 711 | WARN_ON(ret); |
606 | } | 712 | } |
607 | 713 | ||
608 | static void iommu_flush_pages(struct protection_domain *domain, | 714 | static void domain_flush_pages(struct protection_domain *domain, |
609 | u64 address, size_t size) | 715 | u64 address, size_t size) |
610 | { | 716 | { |
611 | __iommu_flush_pages(domain, address, size, 0); | 717 | __domain_flush_pages(domain, address, size, 0); |
612 | } | 718 | } |
613 | 719 | ||
614 | /* Flush the whole IO/TLB for a given protection domain */ | 720 | /* Flush the whole IO/TLB for a given protection domain */ |
615 | static void iommu_flush_tlb(struct protection_domain *domain) | 721 | static void domain_flush_tlb(struct protection_domain *domain) |
616 | { | 722 | { |
617 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); | 723 | __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); |
618 | } | 724 | } |
619 | 725 | ||
620 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ | 726 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ |
621 | static void iommu_flush_tlb_pde(struct protection_domain *domain) | 727 | static void domain_flush_tlb_pde(struct protection_domain *domain) |
622 | { | 728 | { |
623 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); | 729 | __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); |
624 | } | ||
625 | |||
626 | |||
627 | /* | ||
628 | * This function flushes the DTEs for all devices in domain | ||
629 | */ | ||
630 | static void iommu_flush_domain_devices(struct protection_domain *domain) | ||
631 | { | ||
632 | struct iommu_dev_data *dev_data; | ||
633 | unsigned long flags; | ||
634 | |||
635 | spin_lock_irqsave(&domain->lock, flags); | ||
636 | |||
637 | list_for_each_entry(dev_data, &domain->dev_list, list) | ||
638 | iommu_flush_device(dev_data->dev); | ||
639 | |||
640 | spin_unlock_irqrestore(&domain->lock, flags); | ||
641 | } | 730 | } |
642 | 731 | ||
643 | static void iommu_flush_all_domain_devices(void) | 732 | static void domain_flush_complete(struct protection_domain *domain) |
644 | { | 733 | { |
645 | struct protection_domain *domain; | 734 | int i; |
646 | unsigned long flags; | ||
647 | 735 | ||
648 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | 736 | for (i = 0; i < amd_iommus_present; ++i) { |
737 | if (!domain->dev_iommu[i]) | ||
738 | continue; | ||
649 | 739 | ||
650 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { | 740 | /* |
651 | iommu_flush_domain_devices(domain); | 741 | * Devices of this domain are behind this IOMMU |
652 | iommu_flush_complete(domain); | 742 | * We need to wait for completion of all commands. |
743 | */ | ||
744 | iommu_completion_wait(amd_iommus[i]); | ||
653 | } | 745 | } |
654 | |||
655 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
656 | } | 746 | } |
657 | 747 | ||
658 | void amd_iommu_flush_all_devices(void) | ||
659 | { | ||
660 | iommu_flush_all_domain_devices(); | ||
661 | } | ||
662 | 748 | ||
663 | /* | 749 | /* |
664 | * This function uses heavy locking and may disable irqs for some time. But | 750 | * This function flushes the DTEs for all devices in domain |
665 | * this is no issue because it is only called during resume. | ||
666 | */ | 751 | */ |
667 | void amd_iommu_flush_all_domains(void) | 752 | static void domain_flush_devices(struct protection_domain *domain) |
668 | { | 753 | { |
669 | struct protection_domain *domain; | 754 | struct iommu_dev_data *dev_data; |
670 | unsigned long flags; | 755 | unsigned long flags; |
671 | 756 | ||
672 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | 757 | spin_lock_irqsave(&domain->lock, flags); |
673 | |||
674 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { | ||
675 | spin_lock(&domain->lock); | ||
676 | iommu_flush_tlb_pde(domain); | ||
677 | iommu_flush_complete(domain); | ||
678 | spin_unlock(&domain->lock); | ||
679 | } | ||
680 | |||
681 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
682 | } | ||
683 | |||
684 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | ||
685 | { | ||
686 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | ||
687 | |||
688 | if (iommu->reset_in_progress) | ||
689 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | ||
690 | 758 | ||
691 | amd_iommu_reset_cmd_buffer(iommu); | 759 | list_for_each_entry(dev_data, &domain->dev_list, list) |
692 | amd_iommu_flush_all_devices(); | 760 | device_flush_dte(dev_data->dev); |
693 | amd_iommu_flush_all_domains(); | ||
694 | 761 | ||
695 | iommu->reset_in_progress = false; | 762 | spin_unlock_irqrestore(&domain->lock, flags); |
696 | } | 763 | } |
697 | 764 | ||
698 | /**************************************************************************** | 765 | /**************************************************************************** |
@@ -1410,17 +1477,22 @@ static bool dma_ops_domain(struct protection_domain *domain) | |||
1410 | return domain->flags & PD_DMA_OPS_MASK; | 1477 | return domain->flags & PD_DMA_OPS_MASK; |
1411 | } | 1478 | } |
1412 | 1479 | ||
1413 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | 1480 | static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) |
1414 | { | 1481 | { |
1415 | u64 pte_root = virt_to_phys(domain->pt_root); | 1482 | u64 pte_root = virt_to_phys(domain->pt_root); |
1483 | u32 flags = 0; | ||
1416 | 1484 | ||
1417 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | 1485 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) |
1418 | << DEV_ENTRY_MODE_SHIFT; | 1486 | << DEV_ENTRY_MODE_SHIFT; |
1419 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | 1487 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; |
1420 | 1488 | ||
1421 | amd_iommu_dev_table[devid].data[2] = domain->id; | 1489 | if (ats) |
1422 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | 1490 | flags |= DTE_FLAG_IOTLB; |
1423 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1491 | |
1492 | amd_iommu_dev_table[devid].data[3] |= flags; | ||
1493 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1494 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1495 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | ||
1424 | } | 1496 | } |
1425 | 1497 | ||
1426 | static void clear_dte_entry(u16 devid) | 1498 | static void clear_dte_entry(u16 devid) |
@@ -1437,23 +1509,29 @@ static void do_attach(struct device *dev, struct protection_domain *domain) | |||
1437 | { | 1509 | { |
1438 | struct iommu_dev_data *dev_data; | 1510 | struct iommu_dev_data *dev_data; |
1439 | struct amd_iommu *iommu; | 1511 | struct amd_iommu *iommu; |
1512 | struct pci_dev *pdev; | ||
1513 | bool ats = false; | ||
1440 | u16 devid; | 1514 | u16 devid; |
1441 | 1515 | ||
1442 | devid = get_device_id(dev); | 1516 | devid = get_device_id(dev); |
1443 | iommu = amd_iommu_rlookup_table[devid]; | 1517 | iommu = amd_iommu_rlookup_table[devid]; |
1444 | dev_data = get_dev_data(dev); | 1518 | dev_data = get_dev_data(dev); |
1519 | pdev = to_pci_dev(dev); | ||
1520 | |||
1521 | if (amd_iommu_iotlb_sup) | ||
1522 | ats = pci_ats_enabled(pdev); | ||
1445 | 1523 | ||
1446 | /* Update data structures */ | 1524 | /* Update data structures */ |
1447 | dev_data->domain = domain; | 1525 | dev_data->domain = domain; |
1448 | list_add(&dev_data->list, &domain->dev_list); | 1526 | list_add(&dev_data->list, &domain->dev_list); |
1449 | set_dte_entry(devid, domain); | 1527 | set_dte_entry(devid, domain, ats); |
1450 | 1528 | ||
1451 | /* Do reference counting */ | 1529 | /* Do reference counting */ |
1452 | domain->dev_iommu[iommu->index] += 1; | 1530 | domain->dev_iommu[iommu->index] += 1; |
1453 | domain->dev_cnt += 1; | 1531 | domain->dev_cnt += 1; |
1454 | 1532 | ||
1455 | /* Flush the DTE entry */ | 1533 | /* Flush the DTE entry */ |
1456 | iommu_flush_device(dev); | 1534 | device_flush_dte(dev); |
1457 | } | 1535 | } |
1458 | 1536 | ||
1459 | static void do_detach(struct device *dev) | 1537 | static void do_detach(struct device *dev) |
@@ -1476,7 +1554,7 @@ static void do_detach(struct device *dev) | |||
1476 | clear_dte_entry(devid); | 1554 | clear_dte_entry(devid); |
1477 | 1555 | ||
1478 | /* Flush the DTE entry */ | 1556 | /* Flush the DTE entry */ |
1479 | iommu_flush_device(dev); | 1557 | device_flush_dte(dev); |
1480 | } | 1558 | } |
1481 | 1559 | ||
1482 | /* | 1560 | /* |
@@ -1539,9 +1617,13 @@ out_unlock: | |||
1539 | static int attach_device(struct device *dev, | 1617 | static int attach_device(struct device *dev, |
1540 | struct protection_domain *domain) | 1618 | struct protection_domain *domain) |
1541 | { | 1619 | { |
1620 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1542 | unsigned long flags; | 1621 | unsigned long flags; |
1543 | int ret; | 1622 | int ret; |
1544 | 1623 | ||
1624 | if (amd_iommu_iotlb_sup) | ||
1625 | pci_enable_ats(pdev, PAGE_SHIFT); | ||
1626 | |||
1545 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1627 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1546 | ret = __attach_device(dev, domain); | 1628 | ret = __attach_device(dev, domain); |
1547 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1629 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
@@ -1551,7 +1633,7 @@ static int attach_device(struct device *dev, | |||
1551 | * left the caches in the IOMMU dirty. So we have to flush | 1633 | * left the caches in the IOMMU dirty. So we have to flush |
1552 | * here to evict all dirty stuff. | 1634 | * here to evict all dirty stuff. |
1553 | */ | 1635 | */ |
1554 | iommu_flush_tlb_pde(domain); | 1636 | domain_flush_tlb_pde(domain); |
1555 | 1637 | ||
1556 | return ret; | 1638 | return ret; |
1557 | } | 1639 | } |
@@ -1598,12 +1680,16 @@ static void __detach_device(struct device *dev) | |||
1598 | */ | 1680 | */ |
1599 | static void detach_device(struct device *dev) | 1681 | static void detach_device(struct device *dev) |
1600 | { | 1682 | { |
1683 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1601 | unsigned long flags; | 1684 | unsigned long flags; |
1602 | 1685 | ||
1603 | /* lock device table */ | 1686 | /* lock device table */ |
1604 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1687 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1605 | __detach_device(dev); | 1688 | __detach_device(dev); |
1606 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1689 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1690 | |||
1691 | if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev)) | ||
1692 | pci_disable_ats(pdev); | ||
1607 | } | 1693 | } |
1608 | 1694 | ||
1609 | /* | 1695 | /* |
@@ -1615,10 +1701,9 @@ static struct protection_domain *domain_for_device(struct device *dev) | |||
1615 | struct protection_domain *dom; | 1701 | struct protection_domain *dom; |
1616 | struct iommu_dev_data *dev_data, *alias_data; | 1702 | struct iommu_dev_data *dev_data, *alias_data; |
1617 | unsigned long flags; | 1703 | unsigned long flags; |
1618 | u16 devid, alias; | 1704 | u16 devid; |
1619 | 1705 | ||
1620 | devid = get_device_id(dev); | 1706 | devid = get_device_id(dev); |
1621 | alias = amd_iommu_alias_table[devid]; | ||
1622 | dev_data = get_dev_data(dev); | 1707 | dev_data = get_dev_data(dev); |
1623 | alias_data = get_dev_data(dev_data->alias); | 1708 | alias_data = get_dev_data(dev_data->alias); |
1624 | if (!alias_data) | 1709 | if (!alias_data) |
@@ -1692,7 +1777,7 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1692 | goto out; | 1777 | goto out; |
1693 | } | 1778 | } |
1694 | 1779 | ||
1695 | iommu_flush_device(dev); | 1780 | device_flush_dte(dev); |
1696 | iommu_completion_wait(iommu); | 1781 | iommu_completion_wait(iommu); |
1697 | 1782 | ||
1698 | out: | 1783 | out: |
@@ -1753,8 +1838,9 @@ static void update_device_table(struct protection_domain *domain) | |||
1753 | struct iommu_dev_data *dev_data; | 1838 | struct iommu_dev_data *dev_data; |
1754 | 1839 | ||
1755 | list_for_each_entry(dev_data, &domain->dev_list, list) { | 1840 | list_for_each_entry(dev_data, &domain->dev_list, list) { |
1841 | struct pci_dev *pdev = to_pci_dev(dev_data->dev); | ||
1756 | u16 devid = get_device_id(dev_data->dev); | 1842 | u16 devid = get_device_id(dev_data->dev); |
1757 | set_dte_entry(devid, domain); | 1843 | set_dte_entry(devid, domain, pci_ats_enabled(pdev)); |
1758 | } | 1844 | } |
1759 | } | 1845 | } |
1760 | 1846 | ||
@@ -1764,8 +1850,9 @@ static void update_domain(struct protection_domain *domain) | |||
1764 | return; | 1850 | return; |
1765 | 1851 | ||
1766 | update_device_table(domain); | 1852 | update_device_table(domain); |
1767 | iommu_flush_domain_devices(domain); | 1853 | |
1768 | iommu_flush_tlb_pde(domain); | 1854 | domain_flush_devices(domain); |
1855 | domain_flush_tlb_pde(domain); | ||
1769 | 1856 | ||
1770 | domain->updated = false; | 1857 | domain->updated = false; |
1771 | } | 1858 | } |
@@ -1924,10 +2011,10 @@ retry: | |||
1924 | ADD_STATS_COUNTER(alloced_io_mem, size); | 2011 | ADD_STATS_COUNTER(alloced_io_mem, size); |
1925 | 2012 | ||
1926 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | 2013 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { |
1927 | iommu_flush_tlb(&dma_dom->domain); | 2014 | domain_flush_tlb(&dma_dom->domain); |
1928 | dma_dom->need_flush = false; | 2015 | dma_dom->need_flush = false; |
1929 | } else if (unlikely(amd_iommu_np_cache)) | 2016 | } else if (unlikely(amd_iommu_np_cache)) |
1930 | iommu_flush_pages(&dma_dom->domain, address, size); | 2017 | domain_flush_pages(&dma_dom->domain, address, size); |
1931 | 2018 | ||
1932 | out: | 2019 | out: |
1933 | return address; | 2020 | return address; |
@@ -1976,7 +2063,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, | |||
1976 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 2063 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
1977 | 2064 | ||
1978 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | 2065 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { |
1979 | iommu_flush_pages(&dma_dom->domain, flush_addr, size); | 2066 | domain_flush_pages(&dma_dom->domain, flush_addr, size); |
1980 | dma_dom->need_flush = false; | 2067 | dma_dom->need_flush = false; |
1981 | } | 2068 | } |
1982 | } | 2069 | } |
@@ -2012,7 +2099,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, | |||
2012 | if (addr == DMA_ERROR_CODE) | 2099 | if (addr == DMA_ERROR_CODE) |
2013 | goto out; | 2100 | goto out; |
2014 | 2101 | ||
2015 | iommu_flush_complete(domain); | 2102 | domain_flush_complete(domain); |
2016 | 2103 | ||
2017 | out: | 2104 | out: |
2018 | spin_unlock_irqrestore(&domain->lock, flags); | 2105 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -2039,7 +2126,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | |||
2039 | 2126 | ||
2040 | __unmap_single(domain->priv, dma_addr, size, dir); | 2127 | __unmap_single(domain->priv, dma_addr, size, dir); |
2041 | 2128 | ||
2042 | iommu_flush_complete(domain); | 2129 | domain_flush_complete(domain); |
2043 | 2130 | ||
2044 | spin_unlock_irqrestore(&domain->lock, flags); | 2131 | spin_unlock_irqrestore(&domain->lock, flags); |
2045 | } | 2132 | } |
@@ -2104,7 +2191,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
2104 | goto unmap; | 2191 | goto unmap; |
2105 | } | 2192 | } |
2106 | 2193 | ||
2107 | iommu_flush_complete(domain); | 2194 | domain_flush_complete(domain); |
2108 | 2195 | ||
2109 | out: | 2196 | out: |
2110 | spin_unlock_irqrestore(&domain->lock, flags); | 2197 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -2150,7 +2237,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
2150 | s->dma_address = s->dma_length = 0; | 2237 | s->dma_address = s->dma_length = 0; |
2151 | } | 2238 | } |
2152 | 2239 | ||
2153 | iommu_flush_complete(domain); | 2240 | domain_flush_complete(domain); |
2154 | 2241 | ||
2155 | spin_unlock_irqrestore(&domain->lock, flags); | 2242 | spin_unlock_irqrestore(&domain->lock, flags); |
2156 | } | 2243 | } |
@@ -2200,7 +2287,7 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
2200 | goto out_free; | 2287 | goto out_free; |
2201 | } | 2288 | } |
2202 | 2289 | ||
2203 | iommu_flush_complete(domain); | 2290 | domain_flush_complete(domain); |
2204 | 2291 | ||
2205 | spin_unlock_irqrestore(&domain->lock, flags); | 2292 | spin_unlock_irqrestore(&domain->lock, flags); |
2206 | 2293 | ||
@@ -2232,7 +2319,7 @@ static void free_coherent(struct device *dev, size_t size, | |||
2232 | 2319 | ||
2233 | __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 2320 | __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
2234 | 2321 | ||
2235 | iommu_flush_complete(domain); | 2322 | domain_flush_complete(domain); |
2236 | 2323 | ||
2237 | spin_unlock_irqrestore(&domain->lock, flags); | 2324 | spin_unlock_irqrestore(&domain->lock, flags); |
2238 | 2325 | ||
@@ -2476,7 +2563,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, | |||
2476 | if (!iommu) | 2563 | if (!iommu) |
2477 | return; | 2564 | return; |
2478 | 2565 | ||
2479 | iommu_flush_device(dev); | 2566 | device_flush_dte(dev); |
2480 | iommu_completion_wait(iommu); | 2567 | iommu_completion_wait(iommu); |
2481 | } | 2568 | } |
2482 | 2569 | ||
@@ -2542,7 +2629,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, | |||
2542 | unmap_size = iommu_unmap_page(domain, iova, page_size); | 2629 | unmap_size = iommu_unmap_page(domain, iova, page_size); |
2543 | mutex_unlock(&domain->api_lock); | 2630 | mutex_unlock(&domain->api_lock); |
2544 | 2631 | ||
2545 | iommu_flush_tlb_pde(domain); | 2632 | domain_flush_tlb_pde(domain); |
2546 | 2633 | ||
2547 | return get_order(unmap_size); | 2634 | return get_order(unmap_size); |
2548 | } | 2635 | } |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 246d727b65b7..9179c21120a8 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -137,6 +137,7 @@ int amd_iommus_present; | |||
137 | 137 | ||
138 | /* IOMMUs have a non-present cache? */ | 138 | /* IOMMUs have a non-present cache? */ |
139 | bool amd_iommu_np_cache __read_mostly; | 139 | bool amd_iommu_np_cache __read_mostly; |
140 | bool amd_iommu_iotlb_sup __read_mostly = true; | ||
140 | 141 | ||
141 | /* | 142 | /* |
142 | * The ACPI table parsing functions set this variable on an error | 143 | * The ACPI table parsing functions set this variable on an error |
@@ -180,6 +181,12 @@ static u32 dev_table_size; /* size of the device table */ | |||
180 | static u32 alias_table_size; /* size of the alias table */ | 181 | static u32 alias_table_size; /* size of the alias table */ |
181 | static u32 rlookup_table_size; /* size if the rlookup table */ | 182 | static u32 rlookup_table_size; /* size if the rlookup table */ |
182 | 183 | ||
184 | /* | ||
185 | * This function flushes all internal caches of | ||
186 | * the IOMMU used by this driver. | ||
187 | */ | ||
188 | extern void iommu_flush_all_caches(struct amd_iommu *iommu); | ||
189 | |||
183 | static inline void update_last_devid(u16 devid) | 190 | static inline void update_last_devid(u16 devid) |
184 | { | 191 | { |
185 | if (devid > amd_iommu_last_bdf) | 192 | if (devid > amd_iommu_last_bdf) |
@@ -293,9 +300,23 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
293 | /* Function to enable the hardware */ | 300 | /* Function to enable the hardware */ |
294 | static void iommu_enable(struct amd_iommu *iommu) | 301 | static void iommu_enable(struct amd_iommu *iommu) |
295 | { | 302 | { |
296 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", | 303 | static const char * const feat_str[] = { |
304 | "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", | ||
305 | "IA", "GA", "HE", "PC", NULL | ||
306 | }; | ||
307 | int i; | ||
308 | |||
309 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx", | ||
297 | dev_name(&iommu->dev->dev), iommu->cap_ptr); | 310 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
298 | 311 | ||
312 | if (iommu->cap & (1 << IOMMU_CAP_EFR)) { | ||
313 | printk(KERN_CONT " extended features: "); | ||
314 | for (i = 0; feat_str[i]; ++i) | ||
315 | if (iommu_feature(iommu, (1ULL << i))) | ||
316 | printk(KERN_CONT " %s", feat_str[i]); | ||
317 | } | ||
318 | printk(KERN_CONT "\n"); | ||
319 | |||
299 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 320 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
300 | } | 321 | } |
301 | 322 | ||
@@ -651,7 +672,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | |||
651 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) | 672 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) |
652 | { | 673 | { |
653 | int cap_ptr = iommu->cap_ptr; | 674 | int cap_ptr = iommu->cap_ptr; |
654 | u32 range, misc; | 675 | u32 range, misc, low, high; |
655 | int i, j; | 676 | int i, j; |
656 | 677 | ||
657 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, | 678 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, |
@@ -667,6 +688,15 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
667 | MMIO_GET_LD(range)); | 688 | MMIO_GET_LD(range)); |
668 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | 689 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); |
669 | 690 | ||
691 | if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) | ||
692 | amd_iommu_iotlb_sup = false; | ||
693 | |||
694 | /* read extended feature bits */ | ||
695 | low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); | ||
696 | high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); | ||
697 | |||
698 | iommu->features = ((u64)high << 32) | low; | ||
699 | |||
670 | if (!is_rd890_iommu(iommu->dev)) | 700 | if (!is_rd890_iommu(iommu->dev)) |
671 | return; | 701 | return; |
672 | 702 | ||
@@ -1004,10 +1034,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu) | |||
1004 | if (pci_enable_msi(iommu->dev)) | 1034 | if (pci_enable_msi(iommu->dev)) |
1005 | return 1; | 1035 | return 1; |
1006 | 1036 | ||
1007 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | 1037 | r = request_threaded_irq(iommu->dev->irq, |
1008 | IRQF_SAMPLE_RANDOM, | 1038 | amd_iommu_int_handler, |
1009 | "AMD-Vi", | 1039 | amd_iommu_int_thread, |
1010 | NULL); | 1040 | 0, "AMD-Vi", |
1041 | iommu->dev); | ||
1011 | 1042 | ||
1012 | if (r) { | 1043 | if (r) { |
1013 | pci_disable_msi(iommu->dev); | 1044 | pci_disable_msi(iommu->dev); |
@@ -1244,6 +1275,7 @@ static void enable_iommus(void) | |||
1244 | iommu_set_exclusion_range(iommu); | 1275 | iommu_set_exclusion_range(iommu); |
1245 | iommu_init_msi(iommu); | 1276 | iommu_init_msi(iommu); |
1246 | iommu_enable(iommu); | 1277 | iommu_enable(iommu); |
1278 | iommu_flush_all_caches(iommu); | ||
1247 | } | 1279 | } |
1248 | } | 1280 | } |
1249 | 1281 | ||
@@ -1274,8 +1306,8 @@ static void amd_iommu_resume(void) | |||
1274 | * we have to flush after the IOMMUs are enabled because a | 1306 | * we have to flush after the IOMMUs are enabled because a |
1275 | * disabled IOMMU will never execute the commands we send | 1307 | * disabled IOMMU will never execute the commands we send |
1276 | */ | 1308 | */ |
1277 | amd_iommu_flush_all_devices(); | 1309 | for_each_iommu(iommu) |
1278 | amd_iommu_flush_all_domains(); | 1310 | iommu_flush_all_caches(iommu); |
1279 | } | 1311 | } |
1280 | 1312 | ||
1281 | static int amd_iommu_suspend(void) | 1313 | static int amd_iommu_suspend(void) |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index cd1ffed4ee22..289e92862fd9 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -177,7 +177,6 @@ static struct clocksource clocksource_apbt = { | |||
177 | .rating = APBT_CLOCKSOURCE_RATING, | 177 | .rating = APBT_CLOCKSOURCE_RATING, |
178 | .read = apbt_read_clocksource, | 178 | .read = apbt_read_clocksource, |
179 | .mask = APBT_MASK, | 179 | .mask = APBT_MASK, |
180 | .shift = APBT_SHIFT, | ||
181 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 180 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
182 | .resume = apbt_restart_clocksource, | 181 | .resume = apbt_restart_clocksource, |
183 | }; | 182 | }; |
@@ -543,14 +542,7 @@ static int apbt_clocksource_register(void) | |||
543 | if (t1 == apbt_read_clocksource(&clocksource_apbt)) | 542 | if (t1 == apbt_read_clocksource(&clocksource_apbt)) |
544 | panic("APBT counter not counting. APBT disabled\n"); | 543 | panic("APBT counter not counting. APBT disabled\n"); |
545 | 544 | ||
546 | /* | 545 | clocksource_register_khz(&clocksource_apbt, (u32)apbt_freq*1000); |
547 | * initialize and register APBT clocksource | ||
548 | * convert that to ns/clock cycle | ||
549 | * mult = (ns/c) * 2^APBT_SHIFT | ||
550 | */ | ||
551 | clocksource_apbt.mult = div_sc(MSEC_PER_SEC, | ||
552 | (unsigned long) apbt_freq, APBT_SHIFT); | ||
553 | clocksource_register(&clocksource_apbt); | ||
554 | 546 | ||
555 | return 0; | 547 | return 0; |
556 | } | 548 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 86d1ad4962a7..3d2661ca6542 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -30,6 +30,22 @@ | |||
30 | #include <asm/amd_nb.h> | 30 | #include <asm/amd_nb.h> |
31 | #include <asm/x86_init.h> | 31 | #include <asm/x86_init.h> |
32 | 32 | ||
33 | /* | ||
34 | * Using 512M as goal, in case kexec will load kernel_big | ||
35 | * that will do the on-position decompress, and could overlap with | ||
36 | * with the gart aperture that is used. | ||
37 | * Sequence: | ||
38 | * kernel_small | ||
39 | * ==> kexec (with kdump trigger path or gart still enabled) | ||
40 | * ==> kernel_small (gart area become e820_reserved) | ||
41 | * ==> kexec (with kdump trigger path or gart still enabled) | ||
42 | * ==> kerne_big (uncompressed size will be big than 64M or 128M) | ||
43 | * So don't use 512M below as gart iommu, leave the space for kernel | ||
44 | * code for safe. | ||
45 | */ | ||
46 | #define GART_MIN_ADDR (512ULL << 20) | ||
47 | #define GART_MAX_ADDR (1ULL << 32) | ||
48 | |||
33 | int gart_iommu_aperture; | 49 | int gart_iommu_aperture; |
34 | int gart_iommu_aperture_disabled __initdata; | 50 | int gart_iommu_aperture_disabled __initdata; |
35 | int gart_iommu_aperture_allowed __initdata; | 51 | int gart_iommu_aperture_allowed __initdata; |
@@ -70,21 +86,9 @@ static u32 __init allocate_aperture(void) | |||
70 | * memory. Unfortunately we cannot move it up because that would | 86 | * memory. Unfortunately we cannot move it up because that would |
71 | * make the IOMMU useless. | 87 | * make the IOMMU useless. |
72 | */ | 88 | */ |
73 | /* | 89 | addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, |
74 | * using 512M as goal, in case kexec will load kernel_big | 90 | aper_size, aper_size); |
75 | * that will do the on position decompress, and could overlap with | 91 | if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { |
76 | * that position with gart that is used. | ||
77 | * sequende: | ||
78 | * kernel_small | ||
79 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) | ||
80 | * ==> kernel_small(gart area become e820_reserved) | ||
81 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) | ||
82 | * ==> kerne_big (uncompressed size will be big than 64M or 128M) | ||
83 | * so don't use 512M below as gart iommu, leave the space for kernel | ||
84 | * code for safe | ||
85 | */ | ||
86 | addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20); | ||
87 | if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) { | ||
88 | printk(KERN_ERR | 92 | printk(KERN_ERR |
89 | "Cannot allocate aperture memory hole (%lx,%uK)\n", | 93 | "Cannot allocate aperture memory hole (%lx,%uK)\n", |
90 | addr, aper_size>>10); | 94 | addr, aper_size>>10); |
@@ -499,7 +503,7 @@ out: | |||
499 | * Don't enable translation yet but enable GART IO and CPU | 503 | * Don't enable translation yet but enable GART IO and CPU |
500 | * accesses and set DISTLBWALKPRB since GART table memory is UC. | 504 | * accesses and set DISTLBWALKPRB since GART table memory is UC. |
501 | */ | 505 | */ |
502 | u32 ctl = DISTLBWALKPRB | aper_order << 1; | 506 | u32 ctl = aper_order << 1; |
503 | 507 | ||
504 | bus = amd_nb_bus_dev_ranges[i].bus; | 508 | bus = amd_nb_bus_dev_ranges[i].bus; |
505 | dev_base = amd_nb_bus_dev_ranges[i].dev_base; | 509 | dev_base = amd_nb_bus_dev_ranges[i].dev_base; |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 3966b564ea47..767fd04f2843 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -2,20 +2,25 @@ | |||
2 | # Makefile for local APIC drivers and for the IO-APIC code | 2 | # Makefile for local APIC drivers and for the IO-APIC code |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o | 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o |
6 | obj-y += hw_nmi.o | 6 | obj-y += hw_nmi.o |
7 | 7 | ||
8 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 8 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
9 | obj-$(CONFIG_SMP) += ipi.o | 9 | obj-$(CONFIG_SMP) += ipi.o |
10 | 10 | ||
11 | ifeq ($(CONFIG_X86_64),y) | 11 | ifeq ($(CONFIG_X86_64),y) |
12 | obj-y += apic_flat_64.o | 12 | # APIC probe will depend on the listing order here |
13 | obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o | ||
14 | obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o | ||
15 | obj-$(CONFIG_X86_UV) += x2apic_uv_x.o | 13 | obj-$(CONFIG_X86_UV) += x2apic_uv_x.o |
14 | obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o | ||
15 | obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o | ||
16 | obj-y += apic_flat_64.o | ||
16 | endif | 17 | endif |
17 | 18 | ||
18 | obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o | 19 | # APIC probe will depend on the listing order here |
19 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 20 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
20 | obj-$(CONFIG_X86_ES7000) += es7000_32.o | ||
21 | obj-$(CONFIG_X86_SUMMIT) += summit_32.o | 21 | obj-$(CONFIG_X86_SUMMIT) += summit_32.o |
22 | obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o | ||
23 | obj-$(CONFIG_X86_ES7000) += es7000_32.o | ||
24 | |||
25 | # For 32bit, probe_32 need to be listed last | ||
26 | obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index fabf01eff771..b961af86bfea 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void) | |||
505 | { | 505 | { |
506 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 506 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
507 | 507 | ||
508 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) { | 508 | if (this_cpu_has(X86_FEATURE_ARAT)) { |
509 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; | 509 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; |
510 | /* Make LAPIC timer preferrable over percpu HPET */ | 510 | /* Make LAPIC timer preferrable over percpu HPET */ |
511 | lapic_clockevent.rating = 150; | 511 | lapic_clockevent.rating = 150; |
@@ -1237,6 +1237,17 @@ void __cpuinit setup_local_APIC(void) | |||
1237 | /* always use the value from LDR */ | 1237 | /* always use the value from LDR */ |
1238 | early_per_cpu(x86_cpu_to_logical_apicid, cpu) = | 1238 | early_per_cpu(x86_cpu_to_logical_apicid, cpu) = |
1239 | logical_smp_processor_id(); | 1239 | logical_smp_processor_id(); |
1240 | |||
1241 | /* | ||
1242 | * Some NUMA implementations (NUMAQ) don't initialize apicid to | ||
1243 | * node mapping during NUMA init. Now that logical apicid is | ||
1244 | * guaranteed to be known, give it another chance. This is already | ||
1245 | * a bit too late - percpu allocation has already happened without | ||
1246 | * proper NUMA affinity. | ||
1247 | */ | ||
1248 | if (apic->x86_32_numa_cpu_node) | ||
1249 | set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu), | ||
1250 | apic->x86_32_numa_cpu_node(cpu)); | ||
1240 | #endif | 1251 | #endif |
1241 | 1252 | ||
1242 | /* | 1253 | /* |
@@ -1450,7 +1461,6 @@ int __init enable_IR(void) | |||
1450 | void __init enable_IR_x2apic(void) | 1461 | void __init enable_IR_x2apic(void) |
1451 | { | 1462 | { |
1452 | unsigned long flags; | 1463 | unsigned long flags; |
1453 | struct IO_APIC_route_entry **ioapic_entries; | ||
1454 | int ret, x2apic_enabled = 0; | 1464 | int ret, x2apic_enabled = 0; |
1455 | int dmar_table_init_ret; | 1465 | int dmar_table_init_ret; |
1456 | 1466 | ||
@@ -1458,13 +1468,7 @@ void __init enable_IR_x2apic(void) | |||
1458 | if (dmar_table_init_ret && !x2apic_supported()) | 1468 | if (dmar_table_init_ret && !x2apic_supported()) |
1459 | return; | 1469 | return; |
1460 | 1470 | ||
1461 | ioapic_entries = alloc_ioapic_entries(); | 1471 | ret = save_ioapic_entries(); |
1462 | if (!ioapic_entries) { | ||
1463 | pr_err("Allocate ioapic_entries failed\n"); | ||
1464 | goto out; | ||
1465 | } | ||
1466 | |||
1467 | ret = save_IO_APIC_setup(ioapic_entries); | ||
1468 | if (ret) { | 1472 | if (ret) { |
1469 | pr_info("Saving IO-APIC state failed: %d\n", ret); | 1473 | pr_info("Saving IO-APIC state failed: %d\n", ret); |
1470 | goto out; | 1474 | goto out; |
@@ -1472,7 +1476,7 @@ void __init enable_IR_x2apic(void) | |||
1472 | 1476 | ||
1473 | local_irq_save(flags); | 1477 | local_irq_save(flags); |
1474 | legacy_pic->mask_all(); | 1478 | legacy_pic->mask_all(); |
1475 | mask_IO_APIC_setup(ioapic_entries); | 1479 | mask_ioapic_entries(); |
1476 | 1480 | ||
1477 | if (dmar_table_init_ret) | 1481 | if (dmar_table_init_ret) |
1478 | ret = 0; | 1482 | ret = 0; |
@@ -1503,14 +1507,11 @@ void __init enable_IR_x2apic(void) | |||
1503 | 1507 | ||
1504 | nox2apic: | 1508 | nox2apic: |
1505 | if (!ret) /* IR enabling failed */ | 1509 | if (!ret) /* IR enabling failed */ |
1506 | restore_IO_APIC_setup(ioapic_entries); | 1510 | restore_ioapic_entries(); |
1507 | legacy_pic->restore_mask(); | 1511 | legacy_pic->restore_mask(); |
1508 | local_irq_restore(flags); | 1512 | local_irq_restore(flags); |
1509 | 1513 | ||
1510 | out: | 1514 | out: |
1511 | if (ioapic_entries) | ||
1512 | free_ioapic_entries(ioapic_entries); | ||
1513 | |||
1514 | if (x2apic_enabled) | 1515 | if (x2apic_enabled) |
1515 | return; | 1516 | return; |
1516 | 1517 | ||
@@ -1812,30 +1813,41 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1812 | */ | 1813 | */ |
1813 | void smp_error_interrupt(struct pt_regs *regs) | 1814 | void smp_error_interrupt(struct pt_regs *regs) |
1814 | { | 1815 | { |
1815 | u32 v, v1; | 1816 | u32 v0, v1; |
1817 | u32 i = 0; | ||
1818 | static const char * const error_interrupt_reason[] = { | ||
1819 | "Send CS error", /* APIC Error Bit 0 */ | ||
1820 | "Receive CS error", /* APIC Error Bit 1 */ | ||
1821 | "Send accept error", /* APIC Error Bit 2 */ | ||
1822 | "Receive accept error", /* APIC Error Bit 3 */ | ||
1823 | "Redirectable IPI", /* APIC Error Bit 4 */ | ||
1824 | "Send illegal vector", /* APIC Error Bit 5 */ | ||
1825 | "Received illegal vector", /* APIC Error Bit 6 */ | ||
1826 | "Illegal register address", /* APIC Error Bit 7 */ | ||
1827 | }; | ||
1816 | 1828 | ||
1817 | exit_idle(); | 1829 | exit_idle(); |
1818 | irq_enter(); | 1830 | irq_enter(); |
1819 | /* First tickle the hardware, only then report what went on. -- REW */ | 1831 | /* First tickle the hardware, only then report what went on. -- REW */ |
1820 | v = apic_read(APIC_ESR); | 1832 | v0 = apic_read(APIC_ESR); |
1821 | apic_write(APIC_ESR, 0); | 1833 | apic_write(APIC_ESR, 0); |
1822 | v1 = apic_read(APIC_ESR); | 1834 | v1 = apic_read(APIC_ESR); |
1823 | ack_APIC_irq(); | 1835 | ack_APIC_irq(); |
1824 | atomic_inc(&irq_err_count); | 1836 | atomic_inc(&irq_err_count); |
1825 | 1837 | ||
1826 | /* | 1838 | apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)", |
1827 | * Here is what the APIC error bits mean: | 1839 | smp_processor_id(), v0 , v1); |
1828 | * 0: Send CS error | 1840 | |
1829 | * 1: Receive CS error | 1841 | v1 = v1 & 0xff; |
1830 | * 2: Send accept error | 1842 | while (v1) { |
1831 | * 3: Receive accept error | 1843 | if (v1 & 0x1) |
1832 | * 4: Reserved | 1844 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); |
1833 | * 5: Send illegal vector | 1845 | i++; |
1834 | * 6: Received illegal vector | 1846 | v1 >>= 1; |
1835 | * 7: Illegal register address | 1847 | }; |
1836 | */ | 1848 | |
1837 | pr_debug("APIC error on CPU%d: %02x(%02x)\n", | 1849 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); |
1838 | smp_processor_id(), v , v1); | 1850 | |
1839 | irq_exit(); | 1851 | irq_exit(); |
1840 | } | 1852 | } |
1841 | 1853 | ||
@@ -2003,21 +2015,6 @@ void default_init_apic_ldr(void) | |||
2003 | apic_write(APIC_LDR, val); | 2015 | apic_write(APIC_LDR, val); |
2004 | } | 2016 | } |
2005 | 2017 | ||
2006 | #ifdef CONFIG_X86_32 | ||
2007 | int default_x86_32_numa_cpu_node(int cpu) | ||
2008 | { | ||
2009 | #ifdef CONFIG_NUMA | ||
2010 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | ||
2011 | |||
2012 | if (apicid != BAD_APICID) | ||
2013 | return __apicid_to_node[apicid]; | ||
2014 | return NUMA_NO_NODE; | ||
2015 | #else | ||
2016 | return 0; | ||
2017 | #endif | ||
2018 | } | ||
2019 | #endif | ||
2020 | |||
2021 | /* | 2018 | /* |
2022 | * Power management | 2019 | * Power management |
2023 | */ | 2020 | */ |
@@ -2088,28 +2085,20 @@ static void lapic_resume(void) | |||
2088 | { | 2085 | { |
2089 | unsigned int l, h; | 2086 | unsigned int l, h; |
2090 | unsigned long flags; | 2087 | unsigned long flags; |
2091 | int maxlvt, ret; | 2088 | int maxlvt; |
2092 | struct IO_APIC_route_entry **ioapic_entries = NULL; | ||
2093 | 2089 | ||
2094 | if (!apic_pm_state.active) | 2090 | if (!apic_pm_state.active) |
2095 | return; | 2091 | return; |
2096 | 2092 | ||
2097 | local_irq_save(flags); | 2093 | local_irq_save(flags); |
2098 | if (intr_remapping_enabled) { | 2094 | if (intr_remapping_enabled) { |
2099 | ioapic_entries = alloc_ioapic_entries(); | 2095 | /* |
2100 | if (!ioapic_entries) { | 2096 | * IO-APIC and PIC have their own resume routines. |
2101 | WARN(1, "Alloc ioapic_entries in lapic resume failed."); | 2097 | * We just mask them here to make sure the interrupt |
2102 | goto restore; | 2098 | * subsystem is completely quiet while we enable x2apic |
2103 | } | 2099 | * and interrupt-remapping. |
2104 | 2100 | */ | |
2105 | ret = save_IO_APIC_setup(ioapic_entries); | 2101 | mask_ioapic_entries(); |
2106 | if (ret) { | ||
2107 | WARN(1, "Saving IO-APIC state failed: %d\n", ret); | ||
2108 | free_ioapic_entries(ioapic_entries); | ||
2109 | goto restore; | ||
2110 | } | ||
2111 | |||
2112 | mask_IO_APIC_setup(ioapic_entries); | ||
2113 | legacy_pic->mask_all(); | 2102 | legacy_pic->mask_all(); |
2114 | } | 2103 | } |
2115 | 2104 | ||
@@ -2152,13 +2141,9 @@ static void lapic_resume(void) | |||
2152 | apic_write(APIC_ESR, 0); | 2141 | apic_write(APIC_ESR, 0); |
2153 | apic_read(APIC_ESR); | 2142 | apic_read(APIC_ESR); |
2154 | 2143 | ||
2155 | if (intr_remapping_enabled) { | 2144 | if (intr_remapping_enabled) |
2156 | reenable_intr_remapping(x2apic_mode); | 2145 | reenable_intr_remapping(x2apic_mode); |
2157 | legacy_pic->restore_mask(); | 2146 | |
2158 | restore_IO_APIC_setup(ioapic_entries); | ||
2159 | free_ioapic_entries(ioapic_entries); | ||
2160 | } | ||
2161 | restore: | ||
2162 | local_irq_restore(flags); | 2147 | local_irq_restore(flags); |
2163 | } | 2148 | } |
2164 | 2149 | ||
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 5652d31fe108..f7a41e4cae47 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/hardirq.h> | 18 | #include <linux/hardirq.h> |
19 | #include <linux/module.h> | ||
19 | #include <asm/smp.h> | 20 | #include <asm/smp.h> |
20 | #include <asm/apic.h> | 21 | #include <asm/apic.h> |
21 | #include <asm/ipi.h> | 22 | #include <asm/ipi.h> |
@@ -24,6 +25,12 @@ | |||
24 | #include <acpi/acpi_bus.h> | 25 | #include <acpi/acpi_bus.h> |
25 | #endif | 26 | #endif |
26 | 27 | ||
28 | static struct apic apic_physflat; | ||
29 | static struct apic apic_flat; | ||
30 | |||
31 | struct apic __read_mostly *apic = &apic_flat; | ||
32 | EXPORT_SYMBOL_GPL(apic); | ||
33 | |||
27 | static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 34 | static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
28 | { | 35 | { |
29 | return 1; | 36 | return 1; |
@@ -164,7 +171,7 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb) | |||
164 | return initial_apic_id >> index_msb; | 171 | return initial_apic_id >> index_msb; |
165 | } | 172 | } |
166 | 173 | ||
167 | struct apic apic_flat = { | 174 | static struct apic apic_flat = { |
168 | .name = "flat", | 175 | .name = "flat", |
169 | .probe = NULL, | 176 | .probe = NULL, |
170 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, | 177 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, |
@@ -312,10 +319,18 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
312 | return per_cpu(x86_cpu_to_apicid, cpu); | 319 | return per_cpu(x86_cpu_to_apicid, cpu); |
313 | } | 320 | } |
314 | 321 | ||
315 | struct apic apic_physflat = { | 322 | static int physflat_probe(void) |
323 | { | ||
324 | if (apic == &apic_physflat || num_possible_cpus() > 8) | ||
325 | return 1; | ||
326 | |||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | static struct apic apic_physflat = { | ||
316 | 331 | ||
317 | .name = "physical flat", | 332 | .name = "physical flat", |
318 | .probe = NULL, | 333 | .probe = physflat_probe, |
319 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, | 334 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, |
320 | .apic_id_registered = flat_apic_id_registered, | 335 | .apic_id_registered = flat_apic_id_registered, |
321 | 336 | ||
@@ -369,3 +384,8 @@ struct apic apic_physflat = { | |||
369 | .wait_icr_idle = native_apic_wait_icr_idle, | 384 | .wait_icr_idle = native_apic_wait_icr_idle, |
370 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 385 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
371 | }; | 386 | }; |
387 | |||
388 | /* | ||
389 | * We need to check for physflat first, so this order is important. | ||
390 | */ | ||
391 | apic_drivers(apic_physflat, apic_flat); | ||
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index f1baa2dc087a..775b82bc655c 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -119,14 +119,6 @@ static void noop_apic_write(u32 reg, u32 v) | |||
119 | WARN_ON_ONCE(cpu_has_apic && !disable_apic); | 119 | WARN_ON_ONCE(cpu_has_apic && !disable_apic); |
120 | } | 120 | } |
121 | 121 | ||
122 | #ifdef CONFIG_X86_32 | ||
123 | static int noop_x86_32_numa_cpu_node(int cpu) | ||
124 | { | ||
125 | /* we're always on node 0 */ | ||
126 | return 0; | ||
127 | } | ||
128 | #endif | ||
129 | |||
130 | struct apic apic_noop = { | 122 | struct apic apic_noop = { |
131 | .name = "noop", | 123 | .name = "noop", |
132 | .probe = noop_probe, | 124 | .probe = noop_probe, |
@@ -195,6 +187,5 @@ struct apic apic_noop = { | |||
195 | 187 | ||
196 | #ifdef CONFIG_X86_32 | 188 | #ifdef CONFIG_X86_32 |
197 | .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, | 189 | .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, |
198 | .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node, | ||
199 | #endif | 190 | #endif |
200 | }; | 191 | }; |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 541a2e431659..efd737e827f4 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -193,7 +193,7 @@ static int probe_bigsmp(void) | |||
193 | return dmi_bigsmp; | 193 | return dmi_bigsmp; |
194 | } | 194 | } |
195 | 195 | ||
196 | struct apic apic_bigsmp = { | 196 | static struct apic apic_bigsmp = { |
197 | 197 | ||
198 | .name = "bigsmp", | 198 | .name = "bigsmp", |
199 | .probe = probe_bigsmp, | 199 | .probe = probe_bigsmp, |
@@ -253,5 +253,14 @@ struct apic apic_bigsmp = { | |||
253 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 253 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
254 | 254 | ||
255 | .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, | 255 | .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, |
256 | .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, | ||
257 | }; | 256 | }; |
257 | |||
258 | struct apic * __init generic_bigsmp_probe(void) | ||
259 | { | ||
260 | if (probe_bigsmp()) | ||
261 | return &apic_bigsmp; | ||
262 | |||
263 | return NULL; | ||
264 | } | ||
265 | |||
266 | apic_driver(apic_bigsmp); | ||
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 3e9de4854c5b..9536b3fe43f8 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -510,11 +510,6 @@ static void es7000_setup_apic_routing(void) | |||
510 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); | 510 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); |
511 | } | 511 | } |
512 | 512 | ||
513 | static int es7000_numa_cpu_node(int cpu) | ||
514 | { | ||
515 | return 0; | ||
516 | } | ||
517 | |||
518 | static int es7000_cpu_present_to_apicid(int mps_cpu) | 513 | static int es7000_cpu_present_to_apicid(int mps_cpu) |
519 | { | 514 | { |
520 | if (!mps_cpu) | 515 | if (!mps_cpu) |
@@ -625,7 +620,7 @@ static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, | |||
625 | } | 620 | } |
626 | 621 | ||
627 | /* We've been warned by a false positive warning.Use __refdata to keep calm. */ | 622 | /* We've been warned by a false positive warning.Use __refdata to keep calm. */ |
628 | struct apic __refdata apic_es7000_cluster = { | 623 | static struct apic __refdata apic_es7000_cluster = { |
629 | 624 | ||
630 | .name = "es7000", | 625 | .name = "es7000", |
631 | .probe = probe_es7000, | 626 | .probe = probe_es7000, |
@@ -688,10 +683,9 @@ struct apic __refdata apic_es7000_cluster = { | |||
688 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 683 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
689 | 684 | ||
690 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, | 685 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, |
691 | .x86_32_numa_cpu_node = es7000_numa_cpu_node, | ||
692 | }; | 686 | }; |
693 | 687 | ||
694 | struct apic __refdata apic_es7000 = { | 688 | static struct apic __refdata apic_es7000 = { |
695 | 689 | ||
696 | .name = "es7000", | 690 | .name = "es7000", |
697 | .probe = probe_es7000, | 691 | .probe = probe_es7000, |
@@ -752,5 +746,10 @@ struct apic __refdata apic_es7000 = { | |||
752 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 746 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
753 | 747 | ||
754 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, | 748 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, |
755 | .x86_32_numa_cpu_node = es7000_numa_cpu_node, | ||
756 | }; | 749 | }; |
750 | |||
751 | /* | ||
752 | * Need to check for es7000 followed by es7000_cluster, so this order | ||
753 | * in apic_drivers is important. | ||
754 | */ | ||
755 | apic_drivers(apic_es7000, apic_es7000_cluster); | ||
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 5260fe91bcb6..d5e57db0f7be 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -19,9 +19,9 @@ | |||
19 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
20 | 20 | ||
21 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 21 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
22 | u64 hw_nmi_get_sample_period(void) | 22 | u64 hw_nmi_get_sample_period(int watchdog_thresh) |
23 | { | 23 | { |
24 | return (u64)(cpu_khz) * 1000 * 60; | 24 | return (u64)(cpu_khz) * 1000 * watchdog_thresh; |
25 | } | 25 | } |
26 | #endif | 26 | #endif |
27 | 27 | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 68df09bba92e..e5293394b548 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -76,17 +76,40 @@ int sis_apic_bug = -1; | |||
76 | static DEFINE_RAW_SPINLOCK(ioapic_lock); | 76 | static DEFINE_RAW_SPINLOCK(ioapic_lock); |
77 | static DEFINE_RAW_SPINLOCK(vector_lock); | 77 | static DEFINE_RAW_SPINLOCK(vector_lock); |
78 | 78 | ||
79 | /* | 79 | static struct ioapic { |
80 | * # of IRQ routing registers | 80 | /* |
81 | */ | 81 | * # of IRQ routing registers |
82 | int nr_ioapic_registers[MAX_IO_APICS]; | 82 | */ |
83 | int nr_registers; | ||
84 | /* | ||
85 | * Saved state during suspend/resume, or while enabling intr-remap. | ||
86 | */ | ||
87 | struct IO_APIC_route_entry *saved_registers; | ||
88 | /* I/O APIC config */ | ||
89 | struct mpc_ioapic mp_config; | ||
90 | /* IO APIC gsi routing info */ | ||
91 | struct mp_ioapic_gsi gsi_config; | ||
92 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | ||
93 | } ioapics[MAX_IO_APICS]; | ||
83 | 94 | ||
84 | /* I/O APIC entries */ | 95 | #define mpc_ioapic_ver(id) ioapics[id].mp_config.apicver |
85 | struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; | 96 | |
86 | int nr_ioapics; | 97 | int mpc_ioapic_id(int id) |
98 | { | ||
99 | return ioapics[id].mp_config.apicid; | ||
100 | } | ||
87 | 101 | ||
88 | /* IO APIC gsi routing info */ | 102 | unsigned int mpc_ioapic_addr(int id) |
89 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; | 103 | { |
104 | return ioapics[id].mp_config.apicaddr; | ||
105 | } | ||
106 | |||
107 | struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int id) | ||
108 | { | ||
109 | return &ioapics[id].gsi_config; | ||
110 | } | ||
111 | |||
112 | int nr_ioapics; | ||
90 | 113 | ||
91 | /* The one past the highest gsi number used */ | 114 | /* The one past the highest gsi number used */ |
92 | u32 gsi_top; | 115 | u32 gsi_top; |
@@ -128,8 +151,8 @@ static int __init parse_noapic(char *str) | |||
128 | } | 151 | } |
129 | early_param("noapic", parse_noapic); | 152 | early_param("noapic", parse_noapic); |
130 | 153 | ||
131 | static int io_apic_setup_irq_pin_once(unsigned int irq, int node, | 154 | static int io_apic_setup_irq_pin(unsigned int irq, int node, |
132 | struct io_apic_irq_attr *attr); | 155 | struct io_apic_irq_attr *attr); |
133 | 156 | ||
134 | /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ | 157 | /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ |
135 | void mp_save_irq(struct mpc_intsrc *m) | 158 | void mp_save_irq(struct mpc_intsrc *m) |
@@ -179,6 +202,14 @@ int __init arch_early_irq_init(void) | |||
179 | io_apic_irqs = ~0UL; | 202 | io_apic_irqs = ~0UL; |
180 | } | 203 | } |
181 | 204 | ||
205 | for (i = 0; i < nr_ioapics; i++) { | ||
206 | ioapics[i].saved_registers = | ||
207 | kzalloc(sizeof(struct IO_APIC_route_entry) * | ||
208 | ioapics[i].nr_registers, GFP_KERNEL); | ||
209 | if (!ioapics[i].saved_registers) | ||
210 | pr_err("IOAPIC %d: suspend/resume impossible!\n", i); | ||
211 | } | ||
212 | |||
182 | cfg = irq_cfgx; | 213 | cfg = irq_cfgx; |
183 | count = ARRAY_SIZE(irq_cfgx); | 214 | count = ARRAY_SIZE(irq_cfgx); |
184 | node = cpu_to_node(0); | 215 | node = cpu_to_node(0); |
@@ -297,7 +328,7 @@ struct io_apic { | |||
297 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | 328 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) |
298 | { | 329 | { |
299 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | 330 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) |
300 | + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); | 331 | + (mpc_ioapic_addr(idx) & ~PAGE_MASK); |
301 | } | 332 | } |
302 | 333 | ||
303 | static inline void io_apic_eoi(unsigned int apic, unsigned int vector) | 334 | static inline void io_apic_eoi(unsigned int apic, unsigned int vector) |
@@ -573,7 +604,7 @@ static void clear_IO_APIC (void) | |||
573 | int apic, pin; | 604 | int apic, pin; |
574 | 605 | ||
575 | for (apic = 0; apic < nr_ioapics; apic++) | 606 | for (apic = 0; apic < nr_ioapics; apic++) |
576 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) | 607 | for (pin = 0; pin < ioapics[apic].nr_registers; pin++) |
577 | clear_IO_APIC_pin(apic, pin); | 608 | clear_IO_APIC_pin(apic, pin); |
578 | } | 609 | } |
579 | 610 | ||
@@ -615,74 +646,43 @@ static int __init ioapic_pirq_setup(char *str) | |||
615 | __setup("pirq=", ioapic_pirq_setup); | 646 | __setup("pirq=", ioapic_pirq_setup); |
616 | #endif /* CONFIG_X86_32 */ | 647 | #endif /* CONFIG_X86_32 */ |
617 | 648 | ||
618 | struct IO_APIC_route_entry **alloc_ioapic_entries(void) | ||
619 | { | ||
620 | int apic; | ||
621 | struct IO_APIC_route_entry **ioapic_entries; | ||
622 | |||
623 | ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, | ||
624 | GFP_KERNEL); | ||
625 | if (!ioapic_entries) | ||
626 | return 0; | ||
627 | |||
628 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
629 | ioapic_entries[apic] = | ||
630 | kzalloc(sizeof(struct IO_APIC_route_entry) * | ||
631 | nr_ioapic_registers[apic], GFP_KERNEL); | ||
632 | if (!ioapic_entries[apic]) | ||
633 | goto nomem; | ||
634 | } | ||
635 | |||
636 | return ioapic_entries; | ||
637 | |||
638 | nomem: | ||
639 | while (--apic >= 0) | ||
640 | kfree(ioapic_entries[apic]); | ||
641 | kfree(ioapic_entries); | ||
642 | |||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | /* | 649 | /* |
647 | * Saves all the IO-APIC RTE's | 650 | * Saves all the IO-APIC RTE's |
648 | */ | 651 | */ |
649 | int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) | 652 | int save_ioapic_entries(void) |
650 | { | 653 | { |
651 | int apic, pin; | 654 | int apic, pin; |
652 | 655 | int err = 0; | |
653 | if (!ioapic_entries) | ||
654 | return -ENOMEM; | ||
655 | 656 | ||
656 | for (apic = 0; apic < nr_ioapics; apic++) { | 657 | for (apic = 0; apic < nr_ioapics; apic++) { |
657 | if (!ioapic_entries[apic]) | 658 | if (!ioapics[apic].saved_registers) { |
658 | return -ENOMEM; | 659 | err = -ENOMEM; |
660 | continue; | ||
661 | } | ||
659 | 662 | ||
660 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) | 663 | for (pin = 0; pin < ioapics[apic].nr_registers; pin++) |
661 | ioapic_entries[apic][pin] = | 664 | ioapics[apic].saved_registers[pin] = |
662 | ioapic_read_entry(apic, pin); | 665 | ioapic_read_entry(apic, pin); |
663 | } | 666 | } |
664 | 667 | ||
665 | return 0; | 668 | return err; |
666 | } | 669 | } |
667 | 670 | ||
668 | /* | 671 | /* |
669 | * Mask all IO APIC entries. | 672 | * Mask all IO APIC entries. |
670 | */ | 673 | */ |
671 | void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) | 674 | void mask_ioapic_entries(void) |
672 | { | 675 | { |
673 | int apic, pin; | 676 | int apic, pin; |
674 | 677 | ||
675 | if (!ioapic_entries) | ||
676 | return; | ||
677 | |||
678 | for (apic = 0; apic < nr_ioapics; apic++) { | 678 | for (apic = 0; apic < nr_ioapics; apic++) { |
679 | if (!ioapic_entries[apic]) | 679 | if (!ioapics[apic].saved_registers) |
680 | break; | 680 | continue; |
681 | 681 | ||
682 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 682 | for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { |
683 | struct IO_APIC_route_entry entry; | 683 | struct IO_APIC_route_entry entry; |
684 | 684 | ||
685 | entry = ioapic_entries[apic][pin]; | 685 | entry = ioapics[apic].saved_registers[pin]; |
686 | if (!entry.mask) { | 686 | if (!entry.mask) { |
687 | entry.mask = 1; | 687 | entry.mask = 1; |
688 | ioapic_write_entry(apic, pin, entry); | 688 | ioapic_write_entry(apic, pin, entry); |
@@ -692,36 +692,23 @@ void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) | |||
692 | } | 692 | } |
693 | 693 | ||
694 | /* | 694 | /* |
695 | * Restore IO APIC entries which was saved in ioapic_entries. | 695 | * Restore IO APIC entries which was saved in the ioapic structure. |
696 | */ | 696 | */ |
697 | int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) | 697 | int restore_ioapic_entries(void) |
698 | { | 698 | { |
699 | int apic, pin; | 699 | int apic, pin; |
700 | 700 | ||
701 | if (!ioapic_entries) | ||
702 | return -ENOMEM; | ||
703 | |||
704 | for (apic = 0; apic < nr_ioapics; apic++) { | 701 | for (apic = 0; apic < nr_ioapics; apic++) { |
705 | if (!ioapic_entries[apic]) | 702 | if (!ioapics[apic].saved_registers) |
706 | return -ENOMEM; | 703 | continue; |
707 | 704 | ||
708 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) | 705 | for (pin = 0; pin < ioapics[apic].nr_registers; pin++) |
709 | ioapic_write_entry(apic, pin, | 706 | ioapic_write_entry(apic, pin, |
710 | ioapic_entries[apic][pin]); | 707 | ioapics[apic].saved_registers[pin]); |
711 | } | 708 | } |
712 | return 0; | 709 | return 0; |
713 | } | 710 | } |
714 | 711 | ||
715 | void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) | ||
716 | { | ||
717 | int apic; | ||
718 | |||
719 | for (apic = 0; apic < nr_ioapics; apic++) | ||
720 | kfree(ioapic_entries[apic]); | ||
721 | |||
722 | kfree(ioapic_entries); | ||
723 | } | ||
724 | |||
725 | /* | 712 | /* |
726 | * Find the IRQ entry number of a certain pin. | 713 | * Find the IRQ entry number of a certain pin. |
727 | */ | 714 | */ |
@@ -731,7 +718,7 @@ static int find_irq_entry(int apic, int pin, int type) | |||
731 | 718 | ||
732 | for (i = 0; i < mp_irq_entries; i++) | 719 | for (i = 0; i < mp_irq_entries; i++) |
733 | if (mp_irqs[i].irqtype == type && | 720 | if (mp_irqs[i].irqtype == type && |
734 | (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || | 721 | (mp_irqs[i].dstapic == mpc_ioapic_id(apic) || |
735 | mp_irqs[i].dstapic == MP_APIC_ALL) && | 722 | mp_irqs[i].dstapic == MP_APIC_ALL) && |
736 | mp_irqs[i].dstirq == pin) | 723 | mp_irqs[i].dstirq == pin) |
737 | return i; | 724 | return i; |
@@ -773,7 +760,7 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
773 | if (i < mp_irq_entries) { | 760 | if (i < mp_irq_entries) { |
774 | int apic; | 761 | int apic; |
775 | for(apic = 0; apic < nr_ioapics; apic++) { | 762 | for(apic = 0; apic < nr_ioapics; apic++) { |
776 | if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) | 763 | if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic) |
777 | return apic; | 764 | return apic; |
778 | } | 765 | } |
779 | } | 766 | } |
@@ -942,6 +929,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
942 | { | 929 | { |
943 | int irq; | 930 | int irq; |
944 | int bus = mp_irqs[idx].srcbus; | 931 | int bus = mp_irqs[idx].srcbus; |
932 | struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic); | ||
945 | 933 | ||
946 | /* | 934 | /* |
947 | * Debugging check, we are in big trouble if this message pops up! | 935 | * Debugging check, we are in big trouble if this message pops up! |
@@ -952,7 +940,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
952 | if (test_bit(bus, mp_bus_not_pci)) { | 940 | if (test_bit(bus, mp_bus_not_pci)) { |
953 | irq = mp_irqs[idx].srcbusirq; | 941 | irq = mp_irqs[idx].srcbusirq; |
954 | } else { | 942 | } else { |
955 | u32 gsi = mp_gsi_routing[apic].gsi_base + pin; | 943 | u32 gsi = gsi_cfg->gsi_base + pin; |
956 | 944 | ||
957 | if (gsi >= NR_IRQS_LEGACY) | 945 | if (gsi >= NR_IRQS_LEGACY) |
958 | irq = gsi; | 946 | irq = gsi; |
@@ -1003,7 +991,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, | |||
1003 | int lbus = mp_irqs[i].srcbus; | 991 | int lbus = mp_irqs[i].srcbus; |
1004 | 992 | ||
1005 | for (apic = 0; apic < nr_ioapics; apic++) | 993 | for (apic = 0; apic < nr_ioapics; apic++) |
1006 | if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || | 994 | if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic || |
1007 | mp_irqs[i].dstapic == MP_APIC_ALL) | 995 | mp_irqs[i].dstapic == MP_APIC_ALL) |
1008 | break; | 996 | break; |
1009 | 997 | ||
@@ -1222,7 +1210,7 @@ static inline int IO_APIC_irq_trigger(int irq) | |||
1222 | int apic, idx, pin; | 1210 | int apic, idx, pin; |
1223 | 1211 | ||
1224 | for (apic = 0; apic < nr_ioapics; apic++) { | 1212 | for (apic = 0; apic < nr_ioapics; apic++) { |
1225 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 1213 | for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { |
1226 | idx = find_irq_entry(apic, pin, mp_INT); | 1214 | idx = find_irq_entry(apic, pin, mp_INT); |
1227 | if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) | 1215 | if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) |
1228 | return irq_trigger(idx); | 1216 | return irq_trigger(idx); |
@@ -1350,14 +1338,14 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, | |||
1350 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 1338 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
1351 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 1339 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
1352 | "IRQ %d Mode:%i Active:%i)\n", | 1340 | "IRQ %d Mode:%i Active:%i)\n", |
1353 | apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, | 1341 | apic_id, mpc_ioapic_id(apic_id), pin, cfg->vector, |
1354 | irq, trigger, polarity); | 1342 | irq, trigger, polarity); |
1355 | 1343 | ||
1356 | 1344 | ||
1357 | if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, | 1345 | if (setup_ioapic_entry(mpc_ioapic_id(apic_id), irq, &entry, |
1358 | dest, trigger, polarity, cfg->vector, pin)) { | 1346 | dest, trigger, polarity, cfg->vector, pin)) { |
1359 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", | 1347 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
1360 | mp_ioapics[apic_id].apicid, pin); | 1348 | mpc_ioapic_id(apic_id), pin); |
1361 | __clear_irq_vector(irq, cfg); | 1349 | __clear_irq_vector(irq, cfg); |
1362 | return; | 1350 | return; |
1363 | } | 1351 | } |
@@ -1369,17 +1357,13 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, | |||
1369 | ioapic_write_entry(apic_id, pin, entry); | 1357 | ioapic_write_entry(apic_id, pin, entry); |
1370 | } | 1358 | } |
1371 | 1359 | ||
1372 | static struct { | ||
1373 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | ||
1374 | } mp_ioapic_routing[MAX_IO_APICS]; | ||
1375 | |||
1376 | static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin) | 1360 | static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin) |
1377 | { | 1361 | { |
1378 | if (idx != -1) | 1362 | if (idx != -1) |
1379 | return false; | 1363 | return false; |
1380 | 1364 | ||
1381 | apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", | 1365 | apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", |
1382 | mp_ioapics[apic_id].apicid, pin); | 1366 | mpc_ioapic_id(apic_id), pin); |
1383 | return true; | 1367 | return true; |
1384 | } | 1368 | } |
1385 | 1369 | ||
@@ -1389,7 +1373,7 @@ static void __init __io_apic_setup_irqs(unsigned int apic_id) | |||
1389 | struct io_apic_irq_attr attr; | 1373 | struct io_apic_irq_attr attr; |
1390 | unsigned int pin, irq; | 1374 | unsigned int pin, irq; |
1391 | 1375 | ||
1392 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { | 1376 | for (pin = 0; pin < ioapics[apic_id].nr_registers; pin++) { |
1393 | idx = find_irq_entry(apic_id, pin, mp_INT); | 1377 | idx = find_irq_entry(apic_id, pin, mp_INT); |
1394 | if (io_apic_pin_not_connected(idx, apic_id, pin)) | 1378 | if (io_apic_pin_not_connected(idx, apic_id, pin)) |
1395 | continue; | 1379 | continue; |
@@ -1511,7 +1495,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1511 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1495 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
1512 | for (i = 0; i < nr_ioapics; i++) | 1496 | for (i = 0; i < nr_ioapics; i++) |
1513 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 1497 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
1514 | mp_ioapics[i].apicid, nr_ioapic_registers[i]); | 1498 | mpc_ioapic_id(i), ioapics[i].nr_registers); |
1515 | 1499 | ||
1516 | /* | 1500 | /* |
1517 | * We are a bit conservative about what we expect. We have to | 1501 | * We are a bit conservative about what we expect. We have to |
@@ -1531,7 +1515,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1531 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 1515 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1532 | 1516 | ||
1533 | printk("\n"); | 1517 | printk("\n"); |
1534 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); | 1518 | printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(apic)); |
1535 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 1519 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
1536 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1520 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
1537 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | 1521 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); |
@@ -1825,7 +1809,7 @@ void __init enable_IO_APIC(void) | |||
1825 | for(apic = 0; apic < nr_ioapics; apic++) { | 1809 | for(apic = 0; apic < nr_ioapics; apic++) { |
1826 | int pin; | 1810 | int pin; |
1827 | /* See if any of the pins is in ExtINT mode */ | 1811 | /* See if any of the pins is in ExtINT mode */ |
1828 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 1812 | for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { |
1829 | struct IO_APIC_route_entry entry; | 1813 | struct IO_APIC_route_entry entry; |
1830 | entry = ioapic_read_entry(apic, pin); | 1814 | entry = ioapic_read_entry(apic, pin); |
1831 | 1815 | ||
@@ -1949,14 +1933,14 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
1949 | reg_00.raw = io_apic_read(apic_id, 0); | 1933 | reg_00.raw = io_apic_read(apic_id, 0); |
1950 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 1934 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1951 | 1935 | ||
1952 | old_id = mp_ioapics[apic_id].apicid; | 1936 | old_id = mpc_ioapic_id(apic_id); |
1953 | 1937 | ||
1954 | if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { | 1938 | if (mpc_ioapic_id(apic_id) >= get_physical_broadcast()) { |
1955 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | 1939 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", |
1956 | apic_id, mp_ioapics[apic_id].apicid); | 1940 | apic_id, mpc_ioapic_id(apic_id)); |
1957 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 1941 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
1958 | reg_00.bits.ID); | 1942 | reg_00.bits.ID); |
1959 | mp_ioapics[apic_id].apicid = reg_00.bits.ID; | 1943 | ioapics[apic_id].mp_config.apicid = reg_00.bits.ID; |
1960 | } | 1944 | } |
1961 | 1945 | ||
1962 | /* | 1946 | /* |
@@ -1965,9 +1949,9 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
1965 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 1949 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
1966 | */ | 1950 | */ |
1967 | if (apic->check_apicid_used(&phys_id_present_map, | 1951 | if (apic->check_apicid_used(&phys_id_present_map, |
1968 | mp_ioapics[apic_id].apicid)) { | 1952 | mpc_ioapic_id(apic_id))) { |
1969 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | 1953 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", |
1970 | apic_id, mp_ioapics[apic_id].apicid); | 1954 | apic_id, mpc_ioapic_id(apic_id)); |
1971 | for (i = 0; i < get_physical_broadcast(); i++) | 1955 | for (i = 0; i < get_physical_broadcast(); i++) |
1972 | if (!physid_isset(i, phys_id_present_map)) | 1956 | if (!physid_isset(i, phys_id_present_map)) |
1973 | break; | 1957 | break; |
@@ -1976,13 +1960,14 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
1976 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 1960 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
1977 | i); | 1961 | i); |
1978 | physid_set(i, phys_id_present_map); | 1962 | physid_set(i, phys_id_present_map); |
1979 | mp_ioapics[apic_id].apicid = i; | 1963 | ioapics[apic_id].mp_config.apicid = i; |
1980 | } else { | 1964 | } else { |
1981 | physid_mask_t tmp; | 1965 | physid_mask_t tmp; |
1982 | apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); | 1966 | apic->apicid_to_cpu_present(mpc_ioapic_id(apic_id), |
1967 | &tmp); | ||
1983 | apic_printk(APIC_VERBOSE, "Setting %d in the " | 1968 | apic_printk(APIC_VERBOSE, "Setting %d in the " |
1984 | "phys_id_present_map\n", | 1969 | "phys_id_present_map\n", |
1985 | mp_ioapics[apic_id].apicid); | 1970 | mpc_ioapic_id(apic_id)); |
1986 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | 1971 | physids_or(phys_id_present_map, phys_id_present_map, tmp); |
1987 | } | 1972 | } |
1988 | 1973 | ||
@@ -1990,24 +1975,24 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
1990 | * We need to adjust the IRQ routing table | 1975 | * We need to adjust the IRQ routing table |
1991 | * if the ID changed. | 1976 | * if the ID changed. |
1992 | */ | 1977 | */ |
1993 | if (old_id != mp_ioapics[apic_id].apicid) | 1978 | if (old_id != mpc_ioapic_id(apic_id)) |
1994 | for (i = 0; i < mp_irq_entries; i++) | 1979 | for (i = 0; i < mp_irq_entries; i++) |
1995 | if (mp_irqs[i].dstapic == old_id) | 1980 | if (mp_irqs[i].dstapic == old_id) |
1996 | mp_irqs[i].dstapic | 1981 | mp_irqs[i].dstapic |
1997 | = mp_ioapics[apic_id].apicid; | 1982 | = mpc_ioapic_id(apic_id); |
1998 | 1983 | ||
1999 | /* | 1984 | /* |
2000 | * Update the ID register according to the right value | 1985 | * Update the ID register according to the right value |
2001 | * from the MPC table if they are different. | 1986 | * from the MPC table if they are different. |
2002 | */ | 1987 | */ |
2003 | if (mp_ioapics[apic_id].apicid == reg_00.bits.ID) | 1988 | if (mpc_ioapic_id(apic_id) == reg_00.bits.ID) |
2004 | continue; | 1989 | continue; |
2005 | 1990 | ||
2006 | apic_printk(APIC_VERBOSE, KERN_INFO | 1991 | apic_printk(APIC_VERBOSE, KERN_INFO |
2007 | "...changing IO-APIC physical APIC ID to %d ...", | 1992 | "...changing IO-APIC physical APIC ID to %d ...", |
2008 | mp_ioapics[apic_id].apicid); | 1993 | mpc_ioapic_id(apic_id)); |
2009 | 1994 | ||
2010 | reg_00.bits.ID = mp_ioapics[apic_id].apicid; | 1995 | reg_00.bits.ID = mpc_ioapic_id(apic_id); |
2011 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 1996 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2012 | io_apic_write(apic_id, 0, reg_00.raw); | 1997 | io_apic_write(apic_id, 0, reg_00.raw); |
2013 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 1998 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
@@ -2018,7 +2003,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
2018 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2003 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2019 | reg_00.raw = io_apic_read(apic_id, 0); | 2004 | reg_00.raw = io_apic_read(apic_id, 0); |
2020 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2005 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2021 | if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) | 2006 | if (reg_00.bits.ID != mpc_ioapic_id(apic_id)) |
2022 | printk("could not set ID!\n"); | 2007 | printk("could not set ID!\n"); |
2023 | else | 2008 | else |
2024 | apic_printk(APIC_VERBOSE, " ok.\n"); | 2009 | apic_printk(APIC_VERBOSE, " ok.\n"); |
@@ -2404,7 +2389,7 @@ static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | |||
2404 | 2389 | ||
2405 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2390 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2406 | for_each_irq_pin(entry, cfg->irq_2_pin) { | 2391 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
2407 | if (mp_ioapics[entry->apic].apicver >= 0x20) { | 2392 | if (mpc_ioapic_ver(entry->apic) >= 0x20) { |
2408 | /* | 2393 | /* |
2409 | * Intr-remapping uses pin number as the virtual vector | 2394 | * Intr-remapping uses pin number as the virtual vector |
2410 | * in the RTE. Actual vector is programmed in | 2395 | * in the RTE. Actual vector is programmed in |
@@ -2918,49 +2903,19 @@ static int __init io_apic_bug_finalize(void) | |||
2918 | 2903 | ||
2919 | late_initcall(io_apic_bug_finalize); | 2904 | late_initcall(io_apic_bug_finalize); |
2920 | 2905 | ||
2921 | static struct IO_APIC_route_entry *ioapic_saved_data[MAX_IO_APICS]; | 2906 | static void resume_ioapic_id(int ioapic_id) |
2922 | |||
2923 | static void suspend_ioapic(int ioapic_id) | ||
2924 | { | 2907 | { |
2925 | struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id]; | ||
2926 | int i; | ||
2927 | |||
2928 | if (!saved_data) | ||
2929 | return; | ||
2930 | |||
2931 | for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++) | ||
2932 | saved_data[i] = ioapic_read_entry(ioapic_id, i); | ||
2933 | } | ||
2934 | |||
2935 | static int ioapic_suspend(void) | ||
2936 | { | ||
2937 | int ioapic_id; | ||
2938 | |||
2939 | for (ioapic_id = 0; ioapic_id < nr_ioapics; ioapic_id++) | ||
2940 | suspend_ioapic(ioapic_id); | ||
2941 | |||
2942 | return 0; | ||
2943 | } | ||
2944 | |||
2945 | static void resume_ioapic(int ioapic_id) | ||
2946 | { | ||
2947 | struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id]; | ||
2948 | unsigned long flags; | 2908 | unsigned long flags; |
2949 | union IO_APIC_reg_00 reg_00; | 2909 | union IO_APIC_reg_00 reg_00; |
2950 | int i; | ||
2951 | 2910 | ||
2952 | if (!saved_data) | ||
2953 | return; | ||
2954 | 2911 | ||
2955 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2912 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2956 | reg_00.raw = io_apic_read(ioapic_id, 0); | 2913 | reg_00.raw = io_apic_read(ioapic_id, 0); |
2957 | if (reg_00.bits.ID != mp_ioapics[ioapic_id].apicid) { | 2914 | if (reg_00.bits.ID != mpc_ioapic_id(ioapic_id)) { |
2958 | reg_00.bits.ID = mp_ioapics[ioapic_id].apicid; | 2915 | reg_00.bits.ID = mpc_ioapic_id(ioapic_id); |
2959 | io_apic_write(ioapic_id, 0, reg_00.raw); | 2916 | io_apic_write(ioapic_id, 0, reg_00.raw); |
2960 | } | 2917 | } |
2961 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2918 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2962 | for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++) | ||
2963 | ioapic_write_entry(ioapic_id, i, saved_data[i]); | ||
2964 | } | 2919 | } |
2965 | 2920 | ||
2966 | static void ioapic_resume(void) | 2921 | static void ioapic_resume(void) |
@@ -2968,28 +2923,18 @@ static void ioapic_resume(void) | |||
2968 | int ioapic_id; | 2923 | int ioapic_id; |
2969 | 2924 | ||
2970 | for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--) | 2925 | for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--) |
2971 | resume_ioapic(ioapic_id); | 2926 | resume_ioapic_id(ioapic_id); |
2927 | |||
2928 | restore_ioapic_entries(); | ||
2972 | } | 2929 | } |
2973 | 2930 | ||
2974 | static struct syscore_ops ioapic_syscore_ops = { | 2931 | static struct syscore_ops ioapic_syscore_ops = { |
2975 | .suspend = ioapic_suspend, | 2932 | .suspend = save_ioapic_entries, |
2976 | .resume = ioapic_resume, | 2933 | .resume = ioapic_resume, |
2977 | }; | 2934 | }; |
2978 | 2935 | ||
2979 | static int __init ioapic_init_ops(void) | 2936 | static int __init ioapic_init_ops(void) |
2980 | { | 2937 | { |
2981 | int i; | ||
2982 | |||
2983 | for (i = 0; i < nr_ioapics; i++) { | ||
2984 | unsigned int size; | ||
2985 | |||
2986 | size = nr_ioapic_registers[i] | ||
2987 | * sizeof(struct IO_APIC_route_entry); | ||
2988 | ioapic_saved_data[i] = kzalloc(size, GFP_KERNEL); | ||
2989 | if (!ioapic_saved_data[i]) | ||
2990 | pr_err("IOAPIC %d: suspend/resume impossible!\n", i); | ||
2991 | } | ||
2992 | |||
2993 | register_syscore_ops(&ioapic_syscore_ops); | 2938 | register_syscore_ops(&ioapic_syscore_ops); |
2994 | 2939 | ||
2995 | return 0; | 2940 | return 0; |
@@ -3570,7 +3515,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3570 | } | 3515 | } |
3571 | #endif /* CONFIG_HT_IRQ */ | 3516 | #endif /* CONFIG_HT_IRQ */ |
3572 | 3517 | ||
3573 | int | 3518 | static int |
3574 | io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) | 3519 | io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) |
3575 | { | 3520 | { |
3576 | struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); | 3521 | struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); |
@@ -3585,21 +3530,21 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) | |||
3585 | return ret; | 3530 | return ret; |
3586 | } | 3531 | } |
3587 | 3532 | ||
3588 | static int io_apic_setup_irq_pin_once(unsigned int irq, int node, | 3533 | int io_apic_setup_irq_pin_once(unsigned int irq, int node, |
3589 | struct io_apic_irq_attr *attr) | 3534 | struct io_apic_irq_attr *attr) |
3590 | { | 3535 | { |
3591 | unsigned int id = attr->ioapic, pin = attr->ioapic_pin; | 3536 | unsigned int id = attr->ioapic, pin = attr->ioapic_pin; |
3592 | int ret; | 3537 | int ret; |
3593 | 3538 | ||
3594 | /* Avoid redundant programming */ | 3539 | /* Avoid redundant programming */ |
3595 | if (test_bit(pin, mp_ioapic_routing[id].pin_programmed)) { | 3540 | if (test_bit(pin, ioapics[id].pin_programmed)) { |
3596 | pr_debug("Pin %d-%d already programmed\n", | 3541 | pr_debug("Pin %d-%d already programmed\n", |
3597 | mp_ioapics[id].apicid, pin); | 3542 | mpc_ioapic_id(id), pin); |
3598 | return 0; | 3543 | return 0; |
3599 | } | 3544 | } |
3600 | ret = io_apic_setup_irq_pin(irq, node, attr); | 3545 | ret = io_apic_setup_irq_pin(irq, node, attr); |
3601 | if (!ret) | 3546 | if (!ret) |
3602 | set_bit(pin, mp_ioapic_routing[id].pin_programmed); | 3547 | set_bit(pin, ioapics[id].pin_programmed); |
3603 | return ret; | 3548 | return ret; |
3604 | } | 3549 | } |
3605 | 3550 | ||
@@ -3764,8 +3709,7 @@ static u8 __init io_apic_unique_id(u8 id) | |||
3764 | 3709 | ||
3765 | bitmap_zero(used, 256); | 3710 | bitmap_zero(used, 256); |
3766 | for (i = 0; i < nr_ioapics; i++) { | 3711 | for (i = 0; i < nr_ioapics; i++) { |
3767 | struct mpc_ioapic *ia = &mp_ioapics[i]; | 3712 | __set_bit(mpc_ioapic_id(i), used); |
3768 | __set_bit(ia->apicid, used); | ||
3769 | } | 3713 | } |
3770 | if (!test_bit(id, used)) | 3714 | if (!test_bit(id, used)) |
3771 | return id; | 3715 | return id; |
@@ -3825,7 +3769,7 @@ void __init setup_ioapic_dest(void) | |||
3825 | return; | 3769 | return; |
3826 | 3770 | ||
3827 | for (ioapic = 0; ioapic < nr_ioapics; ioapic++) | 3771 | for (ioapic = 0; ioapic < nr_ioapics; ioapic++) |
3828 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { | 3772 | for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) { |
3829 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); | 3773 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); |
3830 | if (irq_entry == -1) | 3774 | if (irq_entry == -1) |
3831 | continue; | 3775 | continue; |
@@ -3896,7 +3840,7 @@ void __init ioapic_and_gsi_init(void) | |||
3896 | ioapic_res = ioapic_setup_resources(nr_ioapics); | 3840 | ioapic_res = ioapic_setup_resources(nr_ioapics); |
3897 | for (i = 0; i < nr_ioapics; i++) { | 3841 | for (i = 0; i < nr_ioapics; i++) { |
3898 | if (smp_found_config) { | 3842 | if (smp_found_config) { |
3899 | ioapic_phys = mp_ioapics[i].apicaddr; | 3843 | ioapic_phys = mpc_ioapic_addr(i); |
3900 | #ifdef CONFIG_X86_32 | 3844 | #ifdef CONFIG_X86_32 |
3901 | if (!ioapic_phys) { | 3845 | if (!ioapic_phys) { |
3902 | printk(KERN_ERR | 3846 | printk(KERN_ERR |
@@ -3956,8 +3900,9 @@ int mp_find_ioapic(u32 gsi) | |||
3956 | 3900 | ||
3957 | /* Find the IOAPIC that manages this GSI. */ | 3901 | /* Find the IOAPIC that manages this GSI. */ |
3958 | for (i = 0; i < nr_ioapics; i++) { | 3902 | for (i = 0; i < nr_ioapics; i++) { |
3959 | if ((gsi >= mp_gsi_routing[i].gsi_base) | 3903 | struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i); |
3960 | && (gsi <= mp_gsi_routing[i].gsi_end)) | 3904 | if ((gsi >= gsi_cfg->gsi_base) |
3905 | && (gsi <= gsi_cfg->gsi_end)) | ||
3961 | return i; | 3906 | return i; |
3962 | } | 3907 | } |
3963 | 3908 | ||
@@ -3967,12 +3912,16 @@ int mp_find_ioapic(u32 gsi) | |||
3967 | 3912 | ||
3968 | int mp_find_ioapic_pin(int ioapic, u32 gsi) | 3913 | int mp_find_ioapic_pin(int ioapic, u32 gsi) |
3969 | { | 3914 | { |
3915 | struct mp_ioapic_gsi *gsi_cfg; | ||
3916 | |||
3970 | if (WARN_ON(ioapic == -1)) | 3917 | if (WARN_ON(ioapic == -1)) |
3971 | return -1; | 3918 | return -1; |
3972 | if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end)) | 3919 | |
3920 | gsi_cfg = mp_ioapic_gsi_routing(ioapic); | ||
3921 | if (WARN_ON(gsi > gsi_cfg->gsi_end)) | ||
3973 | return -1; | 3922 | return -1; |
3974 | 3923 | ||
3975 | return gsi - mp_gsi_routing[ioapic].gsi_base; | 3924 | return gsi - gsi_cfg->gsi_base; |
3976 | } | 3925 | } |
3977 | 3926 | ||
3978 | static __init int bad_ioapic(unsigned long address) | 3927 | static __init int bad_ioapic(unsigned long address) |
@@ -3994,40 +3943,42 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
3994 | { | 3943 | { |
3995 | int idx = 0; | 3944 | int idx = 0; |
3996 | int entries; | 3945 | int entries; |
3946 | struct mp_ioapic_gsi *gsi_cfg; | ||
3997 | 3947 | ||
3998 | if (bad_ioapic(address)) | 3948 | if (bad_ioapic(address)) |
3999 | return; | 3949 | return; |
4000 | 3950 | ||
4001 | idx = nr_ioapics; | 3951 | idx = nr_ioapics; |
4002 | 3952 | ||
4003 | mp_ioapics[idx].type = MP_IOAPIC; | 3953 | ioapics[idx].mp_config.type = MP_IOAPIC; |
4004 | mp_ioapics[idx].flags = MPC_APIC_USABLE; | 3954 | ioapics[idx].mp_config.flags = MPC_APIC_USABLE; |
4005 | mp_ioapics[idx].apicaddr = address; | 3955 | ioapics[idx].mp_config.apicaddr = address; |
4006 | 3956 | ||
4007 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | 3957 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); |
4008 | mp_ioapics[idx].apicid = io_apic_unique_id(id); | 3958 | ioapics[idx].mp_config.apicid = io_apic_unique_id(id); |
4009 | mp_ioapics[idx].apicver = io_apic_get_version(idx); | 3959 | ioapics[idx].mp_config.apicver = io_apic_get_version(idx); |
4010 | 3960 | ||
4011 | /* | 3961 | /* |
4012 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 3962 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups |
4013 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 3963 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). |
4014 | */ | 3964 | */ |
4015 | entries = io_apic_get_redir_entries(idx); | 3965 | entries = io_apic_get_redir_entries(idx); |
4016 | mp_gsi_routing[idx].gsi_base = gsi_base; | 3966 | gsi_cfg = mp_ioapic_gsi_routing(idx); |
4017 | mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1; | 3967 | gsi_cfg->gsi_base = gsi_base; |
3968 | gsi_cfg->gsi_end = gsi_base + entries - 1; | ||
4018 | 3969 | ||
4019 | /* | 3970 | /* |
4020 | * The number of IO-APIC IRQ registers (== #pins): | 3971 | * The number of IO-APIC IRQ registers (== #pins): |
4021 | */ | 3972 | */ |
4022 | nr_ioapic_registers[idx] = entries; | 3973 | ioapics[idx].nr_registers = entries; |
4023 | 3974 | ||
4024 | if (mp_gsi_routing[idx].gsi_end >= gsi_top) | 3975 | if (gsi_cfg->gsi_end >= gsi_top) |
4025 | gsi_top = mp_gsi_routing[idx].gsi_end + 1; | 3976 | gsi_top = gsi_cfg->gsi_end + 1; |
4026 | 3977 | ||
4027 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | 3978 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " |
4028 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, | 3979 | "GSI %d-%d\n", idx, mpc_ioapic_id(idx), |
4029 | mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, | 3980 | mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), |
4030 | mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end); | 3981 | gsi_cfg->gsi_base, gsi_cfg->gsi_end); |
4031 | 3982 | ||
4032 | nr_ioapics++; | 3983 | nr_ioapics++; |
4033 | } | 3984 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 6273eee5134b..c4a61ca1349a 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -48,8 +48,6 @@ | |||
48 | #include <asm/e820.h> | 48 | #include <asm/e820.h> |
49 | #include <asm/ipi.h> | 49 | #include <asm/ipi.h> |
50 | 50 | ||
51 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | ||
52 | |||
53 | int found_numaq; | 51 | int found_numaq; |
54 | 52 | ||
55 | /* | 53 | /* |
@@ -79,31 +77,20 @@ int quad_local_to_mp_bus_id[NR_CPUS/4][4]; | |||
79 | static inline void numaq_register_node(int node, struct sys_cfg_data *scd) | 77 | static inline void numaq_register_node(int node, struct sys_cfg_data *scd) |
80 | { | 78 | { |
81 | struct eachquadmem *eq = scd->eq + node; | 79 | struct eachquadmem *eq = scd->eq + node; |
80 | u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20; | ||
81 | u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20; | ||
82 | int ret; | ||
82 | 83 | ||
83 | node_set_online(node); | 84 | node_set(node, numa_nodes_parsed); |
84 | 85 | ret = numa_add_memblk(node, start, end); | |
85 | /* Convert to pages */ | 86 | BUG_ON(ret < 0); |
86 | node_start_pfn[node] = | ||
87 | MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size); | ||
88 | |||
89 | node_end_pfn[node] = | ||
90 | MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); | ||
91 | |||
92 | memblock_x86_register_active_regions(node, node_start_pfn[node], | ||
93 | node_end_pfn[node]); | ||
94 | |||
95 | memory_present(node, node_start_pfn[node], node_end_pfn[node]); | ||
96 | |||
97 | node_remap_size[node] = node_memmap_size_bytes(node, | ||
98 | node_start_pfn[node], | ||
99 | node_end_pfn[node]); | ||
100 | } | 87 | } |
101 | 88 | ||
102 | /* | 89 | /* |
103 | * Function: smp_dump_qct() | 90 | * Function: smp_dump_qct() |
104 | * | 91 | * |
105 | * Description: gets memory layout from the quad config table. This | 92 | * Description: gets memory layout from the quad config table. This |
106 | * function also updates node_online_map with the nodes (quads) present. | 93 | * function also updates numa_nodes_parsed with the nodes (quads) present. |
107 | */ | 94 | */ |
108 | static void __init smp_dump_qct(void) | 95 | static void __init smp_dump_qct(void) |
109 | { | 96 | { |
@@ -112,7 +99,6 @@ static void __init smp_dump_qct(void) | |||
112 | 99 | ||
113 | scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); | 100 | scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); |
114 | 101 | ||
115 | nodes_clear(node_online_map); | ||
116 | for_each_node(node) { | 102 | for_each_node(node) { |
117 | if (scd->quads_present31_0 & (1 << node)) | 103 | if (scd->quads_present31_0 & (1 << node)) |
118 | numaq_register_node(node, scd); | 104 | numaq_register_node(node, scd); |
@@ -282,14 +268,14 @@ static __init void early_check_numaq(void) | |||
282 | } | 268 | } |
283 | } | 269 | } |
284 | 270 | ||
285 | int __init get_memcfg_numaq(void) | 271 | int __init numaq_numa_init(void) |
286 | { | 272 | { |
287 | early_check_numaq(); | 273 | early_check_numaq(); |
288 | if (!found_numaq) | 274 | if (!found_numaq) |
289 | return 0; | 275 | return -ENOENT; |
290 | smp_dump_qct(); | 276 | smp_dump_qct(); |
291 | 277 | ||
292 | return 1; | 278 | return 0; |
293 | } | 279 | } |
294 | 280 | ||
295 | #define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) | 281 | #define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) |
@@ -486,8 +472,8 @@ static void numaq_setup_portio_remap(void) | |||
486 | (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); | 472 | (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); |
487 | } | 473 | } |
488 | 474 | ||
489 | /* Use __refdata to keep false positive warning calm. */ | 475 | /* Use __refdata to keep false positive warning calm. */ |
490 | struct apic __refdata apic_numaq = { | 476 | static struct apic __refdata apic_numaq = { |
491 | 477 | ||
492 | .name = "NUMAQ", | 478 | .name = "NUMAQ", |
493 | .probe = probe_numaq, | 479 | .probe = probe_numaq, |
@@ -551,3 +537,5 @@ struct apic __refdata apic_numaq = { | |||
551 | .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, | 537 | .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, |
552 | .x86_32_numa_cpu_node = numaq_numa_cpu_node, | 538 | .x86_32_numa_cpu_node = numaq_numa_cpu_node, |
553 | }; | 539 | }; |
540 | |||
541 | apic_driver(apic_numaq); | ||
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index fc84c7b61108..b5254ad044ab 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -52,31 +52,6 @@ static int __init print_ipi_mode(void) | |||
52 | } | 52 | } |
53 | late_initcall(print_ipi_mode); | 53 | late_initcall(print_ipi_mode); |
54 | 54 | ||
55 | void __init default_setup_apic_routing(void) | ||
56 | { | ||
57 | int version = apic_version[boot_cpu_physical_apicid]; | ||
58 | |||
59 | if (num_possible_cpus() > 8) { | ||
60 | switch (boot_cpu_data.x86_vendor) { | ||
61 | case X86_VENDOR_INTEL: | ||
62 | if (!APIC_XAPIC(version)) { | ||
63 | def_to_bigsmp = 0; | ||
64 | break; | ||
65 | } | ||
66 | /* If P4 and above fall through */ | ||
67 | case X86_VENDOR_AMD: | ||
68 | def_to_bigsmp = 1; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | #ifdef CONFIG_X86_BIGSMP | ||
73 | generic_bigsmp_probe(); | ||
74 | #endif | ||
75 | |||
76 | if (apic->setup_apic_routing) | ||
77 | apic->setup_apic_routing(); | ||
78 | } | ||
79 | |||
80 | static int default_x86_32_early_logical_apicid(int cpu) | 55 | static int default_x86_32_early_logical_apicid(int cpu) |
81 | { | 56 | { |
82 | return 1 << cpu; | 57 | return 1 << cpu; |
@@ -112,7 +87,7 @@ static int probe_default(void) | |||
112 | return 1; | 87 | return 1; |
113 | } | 88 | } |
114 | 89 | ||
115 | struct apic apic_default = { | 90 | static struct apic apic_default = { |
116 | 91 | ||
117 | .name = "default", | 92 | .name = "default", |
118 | .probe = probe_default, | 93 | .probe = probe_default, |
@@ -172,47 +147,24 @@ struct apic apic_default = { | |||
172 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 147 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
173 | 148 | ||
174 | .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, | 149 | .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, |
175 | .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, | ||
176 | }; | 150 | }; |
177 | 151 | ||
178 | extern struct apic apic_numaq; | 152 | apic_driver(apic_default); |
179 | extern struct apic apic_summit; | ||
180 | extern struct apic apic_bigsmp; | ||
181 | extern struct apic apic_es7000; | ||
182 | extern struct apic apic_es7000_cluster; | ||
183 | 153 | ||
184 | struct apic *apic = &apic_default; | 154 | struct apic *apic = &apic_default; |
185 | EXPORT_SYMBOL_GPL(apic); | 155 | EXPORT_SYMBOL_GPL(apic); |
186 | 156 | ||
187 | static struct apic *apic_probe[] __initdata = { | ||
188 | #ifdef CONFIG_X86_NUMAQ | ||
189 | &apic_numaq, | ||
190 | #endif | ||
191 | #ifdef CONFIG_X86_SUMMIT | ||
192 | &apic_summit, | ||
193 | #endif | ||
194 | #ifdef CONFIG_X86_BIGSMP | ||
195 | &apic_bigsmp, | ||
196 | #endif | ||
197 | #ifdef CONFIG_X86_ES7000 | ||
198 | &apic_es7000, | ||
199 | &apic_es7000_cluster, | ||
200 | #endif | ||
201 | &apic_default, /* must be last */ | ||
202 | NULL, | ||
203 | }; | ||
204 | |||
205 | static int cmdline_apic __initdata; | 157 | static int cmdline_apic __initdata; |
206 | static int __init parse_apic(char *arg) | 158 | static int __init parse_apic(char *arg) |
207 | { | 159 | { |
208 | int i; | 160 | struct apic **drv; |
209 | 161 | ||
210 | if (!arg) | 162 | if (!arg) |
211 | return -EINVAL; | 163 | return -EINVAL; |
212 | 164 | ||
213 | for (i = 0; apic_probe[i]; i++) { | 165 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { |
214 | if (!strcmp(apic_probe[i]->name, arg)) { | 166 | if (!strcmp((*drv)->name, arg)) { |
215 | apic = apic_probe[i]; | 167 | apic = *drv; |
216 | cmdline_apic = 1; | 168 | cmdline_apic = 1; |
217 | return 0; | 169 | return 0; |
218 | } | 170 | } |
@@ -223,38 +175,58 @@ static int __init parse_apic(char *arg) | |||
223 | } | 175 | } |
224 | early_param("apic", parse_apic); | 176 | early_param("apic", parse_apic); |
225 | 177 | ||
226 | void __init generic_bigsmp_probe(void) | 178 | void __init default_setup_apic_routing(void) |
227 | { | 179 | { |
180 | int version = apic_version[boot_cpu_physical_apicid]; | ||
181 | |||
182 | if (num_possible_cpus() > 8) { | ||
183 | switch (boot_cpu_data.x86_vendor) { | ||
184 | case X86_VENDOR_INTEL: | ||
185 | if (!APIC_XAPIC(version)) { | ||
186 | def_to_bigsmp = 0; | ||
187 | break; | ||
188 | } | ||
189 | /* If P4 and above fall through */ | ||
190 | case X86_VENDOR_AMD: | ||
191 | def_to_bigsmp = 1; | ||
192 | } | ||
193 | } | ||
194 | |||
228 | #ifdef CONFIG_X86_BIGSMP | 195 | #ifdef CONFIG_X86_BIGSMP |
229 | /* | 196 | /* |
230 | * This routine is used to switch to bigsmp mode when | 197 | * This is used to switch to bigsmp mode when |
231 | * - There is no apic= option specified by the user | 198 | * - There is no apic= option specified by the user |
232 | * - generic_apic_probe() has chosen apic_default as the sub_arch | 199 | * - generic_apic_probe() has chosen apic_default as the sub_arch |
233 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support | 200 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support |
234 | */ | 201 | */ |
235 | 202 | ||
236 | if (!cmdline_apic && apic == &apic_default) { | 203 | if (!cmdline_apic && apic == &apic_default) { |
237 | if (apic_bigsmp.probe()) { | 204 | struct apic *bigsmp = generic_bigsmp_probe(); |
238 | apic = &apic_bigsmp; | 205 | if (bigsmp) { |
206 | apic = bigsmp; | ||
239 | printk(KERN_INFO "Overriding APIC driver with %s\n", | 207 | printk(KERN_INFO "Overriding APIC driver with %s\n", |
240 | apic->name); | 208 | apic->name); |
241 | } | 209 | } |
242 | } | 210 | } |
243 | #endif | 211 | #endif |
212 | |||
213 | if (apic->setup_apic_routing) | ||
214 | apic->setup_apic_routing(); | ||
244 | } | 215 | } |
245 | 216 | ||
246 | void __init generic_apic_probe(void) | 217 | void __init generic_apic_probe(void) |
247 | { | 218 | { |
248 | if (!cmdline_apic) { | 219 | if (!cmdline_apic) { |
249 | int i; | 220 | struct apic **drv; |
250 | for (i = 0; apic_probe[i]; i++) { | 221 | |
251 | if (apic_probe[i]->probe()) { | 222 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { |
252 | apic = apic_probe[i]; | 223 | if ((*drv)->probe()) { |
224 | apic = *drv; | ||
253 | break; | 225 | break; |
254 | } | 226 | } |
255 | } | 227 | } |
256 | /* Not visible without early console */ | 228 | /* Not visible without early console */ |
257 | if (!apic_probe[i]) | 229 | if (drv == __apicdrivers_end) |
258 | panic("Didn't find an APIC driver"); | 230 | panic("Didn't find an APIC driver"); |
259 | } | 231 | } |
260 | printk(KERN_INFO "Using APIC driver %s\n", apic->name); | 232 | printk(KERN_INFO "Using APIC driver %s\n", apic->name); |
@@ -265,16 +237,16 @@ void __init generic_apic_probe(void) | |||
265 | int __init | 237 | int __init |
266 | generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | 238 | generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) |
267 | { | 239 | { |
268 | int i; | 240 | struct apic **drv; |
269 | 241 | ||
270 | for (i = 0; apic_probe[i]; ++i) { | 242 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { |
271 | if (!apic_probe[i]->mps_oem_check) | 243 | if (!((*drv)->mps_oem_check)) |
272 | continue; | 244 | continue; |
273 | if (!apic_probe[i]->mps_oem_check(mpc, oem, productid)) | 245 | if (!(*drv)->mps_oem_check(mpc, oem, productid)) |
274 | continue; | 246 | continue; |
275 | 247 | ||
276 | if (!cmdline_apic) { | 248 | if (!cmdline_apic) { |
277 | apic = apic_probe[i]; | 249 | apic = *drv; |
278 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | 250 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", |
279 | apic->name); | 251 | apic->name); |
280 | } | 252 | } |
@@ -285,16 +257,16 @@ generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | |||
285 | 257 | ||
286 | int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 258 | int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
287 | { | 259 | { |
288 | int i; | 260 | struct apic **drv; |
289 | 261 | ||
290 | for (i = 0; apic_probe[i]; ++i) { | 262 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { |
291 | if (!apic_probe[i]->acpi_madt_oem_check) | 263 | if (!(*drv)->acpi_madt_oem_check) |
292 | continue; | 264 | continue; |
293 | if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) | 265 | if (!(*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) |
294 | continue; | 266 | continue; |
295 | 267 | ||
296 | if (!cmdline_apic) { | 268 | if (!cmdline_apic) { |
297 | apic = apic_probe[i]; | 269 | apic = *drv; |
298 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | 270 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", |
299 | apic->name); | 271 | apic->name); |
300 | } | 272 | } |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index d8c4a6feb286..3fe986698929 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -23,27 +23,6 @@ | |||
23 | #include <asm/ipi.h> | 23 | #include <asm/ipi.h> |
24 | #include <asm/setup.h> | 24 | #include <asm/setup.h> |
25 | 25 | ||
26 | extern struct apic apic_flat; | ||
27 | extern struct apic apic_physflat; | ||
28 | extern struct apic apic_x2xpic_uv_x; | ||
29 | extern struct apic apic_x2apic_phys; | ||
30 | extern struct apic apic_x2apic_cluster; | ||
31 | |||
32 | struct apic __read_mostly *apic = &apic_flat; | ||
33 | EXPORT_SYMBOL_GPL(apic); | ||
34 | |||
35 | static struct apic *apic_probe[] __initdata = { | ||
36 | #ifdef CONFIG_X86_UV | ||
37 | &apic_x2apic_uv_x, | ||
38 | #endif | ||
39 | #ifdef CONFIG_X86_X2APIC | ||
40 | &apic_x2apic_phys, | ||
41 | &apic_x2apic_cluster, | ||
42 | #endif | ||
43 | &apic_physflat, | ||
44 | NULL, | ||
45 | }; | ||
46 | |||
47 | static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | 26 | static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) |
48 | { | 27 | { |
49 | return hard_smp_processor_id() >> index_msb; | 28 | return hard_smp_processor_id() >> index_msb; |
@@ -54,26 +33,20 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | |||
54 | */ | 33 | */ |
55 | void __init default_setup_apic_routing(void) | 34 | void __init default_setup_apic_routing(void) |
56 | { | 35 | { |
36 | struct apic **drv; | ||
57 | 37 | ||
58 | enable_IR_x2apic(); | 38 | enable_IR_x2apic(); |
59 | 39 | ||
60 | #ifdef CONFIG_X86_X2APIC | 40 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { |
61 | if (x2apic_mode | 41 | if ((*drv)->probe && (*drv)->probe()) { |
62 | #ifdef CONFIG_X86_UV | 42 | if (apic != *drv) { |
63 | && apic != &apic_x2apic_uv_x | 43 | apic = *drv; |
64 | #endif | 44 | pr_info("Switched APIC routing to %s.\n", |
65 | ) { | 45 | apic->name); |
66 | if (x2apic_phys) | 46 | } |
67 | apic = &apic_x2apic_phys; | 47 | break; |
68 | else | 48 | } |
69 | apic = &apic_x2apic_cluster; | ||
70 | } | 49 | } |
71 | #endif | ||
72 | |||
73 | if (apic == &apic_flat && num_possible_cpus() > 8) | ||
74 | apic = &apic_physflat; | ||
75 | |||
76 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | ||
77 | 50 | ||
78 | if (is_vsmp_box()) { | 51 | if (is_vsmp_box()) { |
79 | /* need to update phys_pkg_id */ | 52 | /* need to update phys_pkg_id */ |
@@ -90,13 +63,15 @@ void apic_send_IPI_self(int vector) | |||
90 | 63 | ||
91 | int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 64 | int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
92 | { | 65 | { |
93 | int i; | 66 | struct apic **drv; |
94 | 67 | ||
95 | for (i = 0; apic_probe[i]; ++i) { | 68 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { |
96 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { | 69 | if ((*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) { |
97 | apic = apic_probe[i]; | 70 | if (apic != *drv) { |
98 | printk(KERN_INFO "Setting APIC routing to %s.\n", | 71 | apic = *drv; |
99 | apic->name); | 72 | pr_info("Setting APIC routing to %s.\n", |
73 | apic->name); | ||
74 | } | ||
100 | return 1; | 75 | return 1; |
101 | } | 76 | } |
102 | } | 77 | } |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index e4b8059b414a..19114423c58c 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -491,7 +491,7 @@ void setup_summit(void) | |||
491 | } | 491 | } |
492 | #endif | 492 | #endif |
493 | 493 | ||
494 | struct apic apic_summit = { | 494 | static struct apic apic_summit = { |
495 | 495 | ||
496 | .name = "summit", | 496 | .name = "summit", |
497 | .probe = probe_summit, | 497 | .probe = probe_summit, |
@@ -551,5 +551,6 @@ struct apic apic_summit = { | |||
551 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 551 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
552 | 552 | ||
553 | .x86_32_early_logical_apicid = summit_early_logical_apicid, | 553 | .x86_32_early_logical_apicid = summit_early_logical_apicid, |
554 | .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, | ||
555 | }; | 554 | }; |
555 | |||
556 | apic_driver(apic_summit); | ||
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 90949bbd566d..500795875827 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -5,118 +5,95 @@ | |||
5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/dmar.h> | 7 | #include <linux/dmar.h> |
8 | #include <linux/cpu.h> | ||
8 | 9 | ||
9 | #include <asm/smp.h> | 10 | #include <asm/smp.h> |
10 | #include <asm/apic.h> | 11 | #include <asm/x2apic.h> |
11 | #include <asm/ipi.h> | ||
12 | 12 | ||
13 | static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); | 13 | static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); |
14 | static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); | ||
15 | static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); | ||
14 | 16 | ||
15 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 17 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
16 | { | 18 | { |
17 | return x2apic_enabled(); | 19 | return x2apic_enabled(); |
18 | } | 20 | } |
19 | 21 | ||
20 | /* | 22 | static inline u32 x2apic_cluster(int cpu) |
21 | * need to use more than cpu 0, because we need more vectors when | ||
22 | * MSI-X are used. | ||
23 | */ | ||
24 | static const struct cpumask *x2apic_target_cpus(void) | ||
25 | { | 23 | { |
26 | return cpu_online_mask; | 24 | return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; |
27 | } | ||
28 | |||
29 | /* | ||
30 | * for now each logical cpu is in its own vector allocation domain. | ||
31 | */ | ||
32 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
33 | { | ||
34 | cpumask_clear(retmask); | ||
35 | cpumask_set_cpu(cpu, retmask); | ||
36 | } | 25 | } |
37 | 26 | ||
38 | static void | 27 | static void |
39 | __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) | 28 | __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) |
40 | { | 29 | { |
41 | unsigned long cfg; | 30 | struct cpumask *cpus_in_cluster_ptr; |
31 | struct cpumask *ipi_mask_ptr; | ||
32 | unsigned int cpu, this_cpu; | ||
33 | unsigned long flags; | ||
34 | u32 dest; | ||
35 | |||
36 | x2apic_wrmsr_fence(); | ||
37 | |||
38 | local_irq_save(flags); | ||
42 | 39 | ||
43 | cfg = __prepare_ICR(0, vector, dest); | 40 | this_cpu = smp_processor_id(); |
44 | 41 | ||
45 | /* | 42 | /* |
46 | * send the IPI. | 43 | * We are to modify mask, so we need an own copy |
44 | * and be sure it's manipulated with irq off. | ||
47 | */ | 45 | */ |
48 | native_x2apic_icr_write(cfg, apicid); | 46 | ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); |
49 | } | 47 | cpumask_copy(ipi_mask_ptr, mask); |
50 | 48 | ||
51 | /* | 49 | /* |
52 | * for now, we send the IPI's one by one in the cpumask. | 50 | * The idea is to send one IPI per cluster. |
53 | * TBD: Based on the cpu mask, we can send the IPI's to the cluster group | 51 | */ |
54 | * at once. We have 16 cpu's in a cluster. This will minimize IPI register | 52 | for_each_cpu(cpu, ipi_mask_ptr) { |
55 | * writes. | 53 | unsigned long i; |
56 | */ | ||
57 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | ||
58 | { | ||
59 | unsigned long query_cpu; | ||
60 | unsigned long flags; | ||
61 | 54 | ||
62 | x2apic_wrmsr_fence(); | 55 | cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu); |
56 | dest = 0; | ||
63 | 57 | ||
64 | local_irq_save(flags); | 58 | /* Collect cpus in cluster. */ |
65 | for_each_cpu(query_cpu, mask) { | 59 | for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) { |
66 | __x2apic_send_IPI_dest( | 60 | if (apic_dest == APIC_DEST_ALLINC || i != this_cpu) |
67 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | 61 | dest |= per_cpu(x86_cpu_to_logical_apicid, i); |
68 | vector, apic->dest_logical); | 62 | } |
63 | |||
64 | if (!dest) | ||
65 | continue; | ||
66 | |||
67 | __x2apic_send_IPI_dest(dest, vector, apic->dest_logical); | ||
68 | /* | ||
69 | * Cluster sibling cpus should be discared now so | ||
70 | * we would not send IPI them second time. | ||
71 | */ | ||
72 | cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr); | ||
69 | } | 73 | } |
74 | |||
70 | local_irq_restore(flags); | 75 | local_irq_restore(flags); |
71 | } | 76 | } |
72 | 77 | ||
78 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | ||
79 | { | ||
80 | __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC); | ||
81 | } | ||
82 | |||
73 | static void | 83 | static void |
74 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | 84 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) |
75 | { | 85 | { |
76 | unsigned long this_cpu = smp_processor_id(); | 86 | __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); |
77 | unsigned long query_cpu; | ||
78 | unsigned long flags; | ||
79 | |||
80 | x2apic_wrmsr_fence(); | ||
81 | |||
82 | local_irq_save(flags); | ||
83 | for_each_cpu(query_cpu, mask) { | ||
84 | if (query_cpu == this_cpu) | ||
85 | continue; | ||
86 | __x2apic_send_IPI_dest( | ||
87 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
88 | vector, apic->dest_logical); | ||
89 | } | ||
90 | local_irq_restore(flags); | ||
91 | } | 87 | } |
92 | 88 | ||
93 | static void x2apic_send_IPI_allbutself(int vector) | 89 | static void x2apic_send_IPI_allbutself(int vector) |
94 | { | 90 | { |
95 | unsigned long this_cpu = smp_processor_id(); | 91 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT); |
96 | unsigned long query_cpu; | ||
97 | unsigned long flags; | ||
98 | |||
99 | x2apic_wrmsr_fence(); | ||
100 | |||
101 | local_irq_save(flags); | ||
102 | for_each_online_cpu(query_cpu) { | ||
103 | if (query_cpu == this_cpu) | ||
104 | continue; | ||
105 | __x2apic_send_IPI_dest( | ||
106 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
107 | vector, apic->dest_logical); | ||
108 | } | ||
109 | local_irq_restore(flags); | ||
110 | } | 92 | } |
111 | 93 | ||
112 | static void x2apic_send_IPI_all(int vector) | 94 | static void x2apic_send_IPI_all(int vector) |
113 | { | 95 | { |
114 | x2apic_send_IPI_mask(cpu_online_mask, vector); | 96 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); |
115 | } | ||
116 | |||
117 | static int x2apic_apic_id_registered(void) | ||
118 | { | ||
119 | return 1; | ||
120 | } | 97 | } |
121 | 98 | ||
122 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) | 99 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
@@ -151,43 +128,90 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
151 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 128 | return per_cpu(x86_cpu_to_logical_apicid, cpu); |
152 | } | 129 | } |
153 | 130 | ||
154 | static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) | 131 | static void init_x2apic_ldr(void) |
155 | { | 132 | { |
156 | unsigned int id; | 133 | unsigned int this_cpu = smp_processor_id(); |
134 | unsigned int cpu; | ||
157 | 135 | ||
158 | id = x; | 136 | per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR); |
159 | return id; | 137 | |
138 | __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); | ||
139 | for_each_online_cpu(cpu) { | ||
140 | if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) | ||
141 | continue; | ||
142 | __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu)); | ||
143 | __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu)); | ||
144 | } | ||
160 | } | 145 | } |
161 | 146 | ||
162 | static unsigned long set_apic_id(unsigned int id) | 147 | /* |
148 | * At CPU state changes, update the x2apic cluster sibling info. | ||
149 | */ | ||
150 | static int __cpuinit | ||
151 | update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
163 | { | 152 | { |
164 | unsigned long x; | 153 | unsigned int this_cpu = (unsigned long)hcpu; |
154 | unsigned int cpu; | ||
155 | int err = 0; | ||
156 | |||
157 | switch (action) { | ||
158 | case CPU_UP_PREPARE: | ||
159 | if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu), | ||
160 | GFP_KERNEL)) { | ||
161 | err = -ENOMEM; | ||
162 | } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu), | ||
163 | GFP_KERNEL)) { | ||
164 | free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); | ||
165 | err = -ENOMEM; | ||
166 | } | ||
167 | break; | ||
168 | case CPU_UP_CANCELED: | ||
169 | case CPU_UP_CANCELED_FROZEN: | ||
170 | case CPU_DEAD: | ||
171 | for_each_online_cpu(cpu) { | ||
172 | if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) | ||
173 | continue; | ||
174 | __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu)); | ||
175 | __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu)); | ||
176 | } | ||
177 | free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); | ||
178 | free_cpumask_var(per_cpu(ipi_mask, this_cpu)); | ||
179 | break; | ||
180 | } | ||
165 | 181 | ||
166 | x = id; | 182 | return notifier_from_errno(err); |
167 | return x; | ||
168 | } | 183 | } |
169 | 184 | ||
170 | static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) | 185 | static struct notifier_block __refdata x2apic_cpu_notifier = { |
171 | { | 186 | .notifier_call = update_clusterinfo, |
172 | return initial_apicid >> index_msb; | 187 | }; |
173 | } | ||
174 | 188 | ||
175 | static void x2apic_send_IPI_self(int vector) | 189 | static int x2apic_init_cpu_notifier(void) |
176 | { | 190 | { |
177 | apic_write(APIC_SELF_IPI, vector); | 191 | int cpu = smp_processor_id(); |
192 | |||
193 | zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL); | ||
194 | zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL); | ||
195 | |||
196 | BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); | ||
197 | |||
198 | __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu)); | ||
199 | register_hotcpu_notifier(&x2apic_cpu_notifier); | ||
200 | return 1; | ||
178 | } | 201 | } |
179 | 202 | ||
180 | static void init_x2apic_ldr(void) | 203 | static int x2apic_cluster_probe(void) |
181 | { | 204 | { |
182 | int cpu = smp_processor_id(); | 205 | if (x2apic_mode) |
183 | 206 | return x2apic_init_cpu_notifier(); | |
184 | per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); | 207 | else |
208 | return 0; | ||
185 | } | 209 | } |
186 | 210 | ||
187 | struct apic apic_x2apic_cluster = { | 211 | static struct apic apic_x2apic_cluster = { |
188 | 212 | ||
189 | .name = "cluster x2apic", | 213 | .name = "cluster x2apic", |
190 | .probe = NULL, | 214 | .probe = x2apic_cluster_probe, |
191 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, | 215 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, |
192 | .apic_id_registered = x2apic_apic_id_registered, | 216 | .apic_id_registered = x2apic_apic_id_registered, |
193 | 217 | ||
@@ -211,11 +235,11 @@ struct apic apic_x2apic_cluster = { | |||
211 | .setup_portio_remap = NULL, | 235 | .setup_portio_remap = NULL, |
212 | .check_phys_apicid_present = default_check_phys_apicid_present, | 236 | .check_phys_apicid_present = default_check_phys_apicid_present, |
213 | .enable_apic_mode = NULL, | 237 | .enable_apic_mode = NULL, |
214 | .phys_pkg_id = x2apic_cluster_phys_pkg_id, | 238 | .phys_pkg_id = x2apic_phys_pkg_id, |
215 | .mps_oem_check = NULL, | 239 | .mps_oem_check = NULL, |
216 | 240 | ||
217 | .get_apic_id = x2apic_cluster_phys_get_apic_id, | 241 | .get_apic_id = x2apic_get_apic_id, |
218 | .set_apic_id = set_apic_id, | 242 | .set_apic_id = x2apic_set_apic_id, |
219 | .apic_id_mask = 0xFFFFFFFFu, | 243 | .apic_id_mask = 0xFFFFFFFFu, |
220 | 244 | ||
221 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 245 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, |
@@ -240,3 +264,5 @@ struct apic apic_x2apic_cluster = { | |||
240 | .wait_icr_idle = native_x2apic_wait_icr_idle, | 264 | .wait_icr_idle = native_x2apic_wait_icr_idle, |
241 | .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, | 265 | .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, |
242 | }; | 266 | }; |
267 | |||
268 | apic_driver(apic_x2apic_cluster); | ||
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index c7e6d6645bf4..f5373dfde21e 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -7,11 +7,12 @@ | |||
7 | #include <linux/dmar.h> | 7 | #include <linux/dmar.h> |
8 | 8 | ||
9 | #include <asm/smp.h> | 9 | #include <asm/smp.h> |
10 | #include <asm/apic.h> | 10 | #include <asm/x2apic.h> |
11 | #include <asm/ipi.h> | ||
12 | 11 | ||
13 | int x2apic_phys; | 12 | int x2apic_phys; |
14 | 13 | ||
14 | static struct apic apic_x2apic_phys; | ||
15 | |||
15 | static int set_x2apic_phys_mode(char *arg) | 16 | static int set_x2apic_phys_mode(char *arg) |
16 | { | 17 | { |
17 | x2apic_phys = 1; | 18 | x2apic_phys = 1; |
@@ -27,94 +28,46 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
27 | return 0; | 28 | return 0; |
28 | } | 29 | } |
29 | 30 | ||
30 | /* | 31 | static void |
31 | * need to use more than cpu 0, because we need more vectors when | 32 | __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) |
32 | * MSI-X are used. | ||
33 | */ | ||
34 | static const struct cpumask *x2apic_target_cpus(void) | ||
35 | { | ||
36 | return cpu_online_mask; | ||
37 | } | ||
38 | |||
39 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
40 | { | ||
41 | cpumask_clear(retmask); | ||
42 | cpumask_set_cpu(cpu, retmask); | ||
43 | } | ||
44 | |||
45 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | ||
46 | unsigned int dest) | ||
47 | { | ||
48 | unsigned long cfg; | ||
49 | |||
50 | cfg = __prepare_ICR(0, vector, dest); | ||
51 | |||
52 | /* | ||
53 | * send the IPI. | ||
54 | */ | ||
55 | native_x2apic_icr_write(cfg, apicid); | ||
56 | } | ||
57 | |||
58 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | ||
59 | { | 33 | { |
60 | unsigned long query_cpu; | 34 | unsigned long query_cpu; |
35 | unsigned long this_cpu; | ||
61 | unsigned long flags; | 36 | unsigned long flags; |
62 | 37 | ||
63 | x2apic_wrmsr_fence(); | 38 | x2apic_wrmsr_fence(); |
64 | 39 | ||
65 | local_irq_save(flags); | 40 | local_irq_save(flags); |
41 | |||
42 | this_cpu = smp_processor_id(); | ||
66 | for_each_cpu(query_cpu, mask) { | 43 | for_each_cpu(query_cpu, mask) { |
44 | if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu) | ||
45 | continue; | ||
67 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), | 46 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), |
68 | vector, APIC_DEST_PHYSICAL); | 47 | vector, APIC_DEST_PHYSICAL); |
69 | } | 48 | } |
70 | local_irq_restore(flags); | 49 | local_irq_restore(flags); |
71 | } | 50 | } |
72 | 51 | ||
52 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | ||
53 | { | ||
54 | __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC); | ||
55 | } | ||
56 | |||
73 | static void | 57 | static void |
74 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | 58 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) |
75 | { | 59 | { |
76 | unsigned long this_cpu = smp_processor_id(); | 60 | __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); |
77 | unsigned long query_cpu; | ||
78 | unsigned long flags; | ||
79 | |||
80 | x2apic_wrmsr_fence(); | ||
81 | |||
82 | local_irq_save(flags); | ||
83 | for_each_cpu(query_cpu, mask) { | ||
84 | if (query_cpu != this_cpu) | ||
85 | __x2apic_send_IPI_dest( | ||
86 | per_cpu(x86_cpu_to_apicid, query_cpu), | ||
87 | vector, APIC_DEST_PHYSICAL); | ||
88 | } | ||
89 | local_irq_restore(flags); | ||
90 | } | 61 | } |
91 | 62 | ||
92 | static void x2apic_send_IPI_allbutself(int vector) | 63 | static void x2apic_send_IPI_allbutself(int vector) |
93 | { | 64 | { |
94 | unsigned long this_cpu = smp_processor_id(); | 65 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT); |
95 | unsigned long query_cpu; | ||
96 | unsigned long flags; | ||
97 | |||
98 | x2apic_wrmsr_fence(); | ||
99 | |||
100 | local_irq_save(flags); | ||
101 | for_each_online_cpu(query_cpu) { | ||
102 | if (query_cpu == this_cpu) | ||
103 | continue; | ||
104 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), | ||
105 | vector, APIC_DEST_PHYSICAL); | ||
106 | } | ||
107 | local_irq_restore(flags); | ||
108 | } | 66 | } |
109 | 67 | ||
110 | static void x2apic_send_IPI_all(int vector) | 68 | static void x2apic_send_IPI_all(int vector) |
111 | { | 69 | { |
112 | x2apic_send_IPI_mask(cpu_online_mask, vector); | 70 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); |
113 | } | ||
114 | |||
115 | static int x2apic_apic_id_registered(void) | ||
116 | { | ||
117 | return 1; | ||
118 | } | 71 | } |
119 | 72 | ||
120 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) | 73 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
@@ -149,34 +102,22 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
149 | return per_cpu(x86_cpu_to_apicid, cpu); | 102 | return per_cpu(x86_cpu_to_apicid, cpu); |
150 | } | 103 | } |
151 | 104 | ||
152 | static unsigned int x2apic_phys_get_apic_id(unsigned long x) | 105 | static void init_x2apic_ldr(void) |
153 | { | ||
154 | return x; | ||
155 | } | ||
156 | |||
157 | static unsigned long set_apic_id(unsigned int id) | ||
158 | { | ||
159 | return id; | ||
160 | } | ||
161 | |||
162 | static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) | ||
163 | { | 106 | { |
164 | return initial_apicid >> index_msb; | ||
165 | } | 107 | } |
166 | 108 | ||
167 | static void x2apic_send_IPI_self(int vector) | 109 | static int x2apic_phys_probe(void) |
168 | { | 110 | { |
169 | apic_write(APIC_SELF_IPI, vector); | 111 | if (x2apic_mode && x2apic_phys) |
170 | } | 112 | return 1; |
171 | 113 | ||
172 | static void init_x2apic_ldr(void) | 114 | return apic == &apic_x2apic_phys; |
173 | { | ||
174 | } | 115 | } |
175 | 116 | ||
176 | struct apic apic_x2apic_phys = { | 117 | static struct apic apic_x2apic_phys = { |
177 | 118 | ||
178 | .name = "physical x2apic", | 119 | .name = "physical x2apic", |
179 | .probe = NULL, | 120 | .probe = x2apic_phys_probe, |
180 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, | 121 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, |
181 | .apic_id_registered = x2apic_apic_id_registered, | 122 | .apic_id_registered = x2apic_apic_id_registered, |
182 | 123 | ||
@@ -203,8 +144,8 @@ struct apic apic_x2apic_phys = { | |||
203 | .phys_pkg_id = x2apic_phys_pkg_id, | 144 | .phys_pkg_id = x2apic_phys_pkg_id, |
204 | .mps_oem_check = NULL, | 145 | .mps_oem_check = NULL, |
205 | 146 | ||
206 | .get_apic_id = x2apic_phys_get_apic_id, | 147 | .get_apic_id = x2apic_get_apic_id, |
207 | .set_apic_id = set_apic_id, | 148 | .set_apic_id = x2apic_set_apic_id, |
208 | .apic_id_mask = 0xFFFFFFFFu, | 149 | .apic_id_mask = 0xFFFFFFFFu, |
209 | 150 | ||
210 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 151 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, |
@@ -229,3 +170,5 @@ struct apic apic_x2apic_phys = { | |||
229 | .wait_icr_idle = native_x2apic_wait_icr_idle, | 170 | .wait_icr_idle = native_x2apic_wait_icr_idle, |
230 | .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, | 171 | .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, |
231 | }; | 172 | }; |
173 | |||
174 | apic_driver(apic_x2apic_phys); | ||
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 33b10a0fc095..f450b683dfcf 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -37,6 +37,13 @@ | |||
37 | #include <asm/smp.h> | 37 | #include <asm/smp.h> |
38 | #include <asm/x86_init.h> | 38 | #include <asm/x86_init.h> |
39 | #include <asm/emergency-restart.h> | 39 | #include <asm/emergency-restart.h> |
40 | #include <asm/nmi.h> | ||
41 | |||
42 | /* BMC sets a bit this MMR non-zero before sending an NMI */ | ||
43 | #define UVH_NMI_MMR UVH_SCRATCH5 | ||
44 | #define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) | ||
45 | #define UV_NMI_PENDING_MASK (1UL << 63) | ||
46 | DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); | ||
40 | 47 | ||
41 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 48 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
42 | 49 | ||
@@ -51,6 +58,8 @@ unsigned int uv_apicid_hibits; | |||
51 | EXPORT_SYMBOL_GPL(uv_apicid_hibits); | 58 | EXPORT_SYMBOL_GPL(uv_apicid_hibits); |
52 | static DEFINE_SPINLOCK(uv_nmi_lock); | 59 | static DEFINE_SPINLOCK(uv_nmi_lock); |
53 | 60 | ||
61 | static struct apic apic_x2apic_uv_x; | ||
62 | |||
54 | static unsigned long __init uv_early_read_mmr(unsigned long addr) | 63 | static unsigned long __init uv_early_read_mmr(unsigned long addr) |
55 | { | 64 | { |
56 | unsigned long val, *mmr; | 65 | unsigned long val, *mmr; |
@@ -319,10 +328,15 @@ static void uv_send_IPI_self(int vector) | |||
319 | apic_write(APIC_SELF_IPI, vector); | 328 | apic_write(APIC_SELF_IPI, vector); |
320 | } | 329 | } |
321 | 330 | ||
322 | struct apic __refdata apic_x2apic_uv_x = { | 331 | static int uv_probe(void) |
332 | { | ||
333 | return apic == &apic_x2apic_uv_x; | ||
334 | } | ||
335 | |||
336 | static struct apic __refdata apic_x2apic_uv_x = { | ||
323 | 337 | ||
324 | .name = "UV large system", | 338 | .name = "UV large system", |
325 | .probe = NULL, | 339 | .probe = uv_probe, |
326 | .acpi_madt_oem_check = uv_acpi_madt_oem_check, | 340 | .acpi_madt_oem_check = uv_acpi_madt_oem_check, |
327 | .apic_id_registered = uv_apic_id_registered, | 341 | .apic_id_registered = uv_apic_id_registered, |
328 | 342 | ||
@@ -642,18 +656,46 @@ void __cpuinit uv_cpu_init(void) | |||
642 | */ | 656 | */ |
643 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | 657 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) |
644 | { | 658 | { |
659 | unsigned long real_uv_nmi; | ||
660 | int bid; | ||
661 | |||
645 | if (reason != DIE_NMIUNKNOWN) | 662 | if (reason != DIE_NMIUNKNOWN) |
646 | return NOTIFY_OK; | 663 | return NOTIFY_OK; |
647 | 664 | ||
648 | if (in_crash_kexec) | 665 | if (in_crash_kexec) |
649 | /* do nothing if entering the crash kernel */ | 666 | /* do nothing if entering the crash kernel */ |
650 | return NOTIFY_OK; | 667 | return NOTIFY_OK; |
668 | |||
669 | /* | ||
670 | * Each blade has an MMR that indicates when an NMI has been sent | ||
671 | * to cpus on the blade. If an NMI is detected, atomically | ||
672 | * clear the MMR and update a per-blade NMI count used to | ||
673 | * cause each cpu on the blade to notice a new NMI. | ||
674 | */ | ||
675 | bid = uv_numa_blade_id(); | ||
676 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
677 | |||
678 | if (unlikely(real_uv_nmi)) { | ||
679 | spin_lock(&uv_blade_info[bid].nmi_lock); | ||
680 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
681 | if (real_uv_nmi) { | ||
682 | uv_blade_info[bid].nmi_count++; | ||
683 | uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); | ||
684 | } | ||
685 | spin_unlock(&uv_blade_info[bid].nmi_lock); | ||
686 | } | ||
687 | |||
688 | if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) | ||
689 | return NOTIFY_DONE; | ||
690 | |||
691 | __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; | ||
692 | |||
651 | /* | 693 | /* |
652 | * Use a lock so only one cpu prints at a time | 694 | * Use a lock so only one cpu prints at a time. |
653 | * to prevent intermixed output. | 695 | * This prevents intermixed output. |
654 | */ | 696 | */ |
655 | spin_lock(&uv_nmi_lock); | 697 | spin_lock(&uv_nmi_lock); |
656 | pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); | 698 | pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); |
657 | dump_stack(); | 699 | dump_stack(); |
658 | spin_unlock(&uv_nmi_lock); | 700 | spin_unlock(&uv_nmi_lock); |
659 | 701 | ||
@@ -661,7 +703,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | |||
661 | } | 703 | } |
662 | 704 | ||
663 | static struct notifier_block uv_dump_stack_nmi_nb = { | 705 | static struct notifier_block uv_dump_stack_nmi_nb = { |
664 | .notifier_call = uv_handle_nmi | 706 | .notifier_call = uv_handle_nmi, |
707 | .priority = NMI_LOCAL_LOW_PRIOR - 1, | ||
665 | }; | 708 | }; |
666 | 709 | ||
667 | void uv_register_nmi_notifier(void) | 710 | void uv_register_nmi_notifier(void) |
@@ -720,8 +763,9 @@ void __init uv_system_init(void) | |||
720 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); | 763 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); |
721 | 764 | ||
722 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 765 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
723 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); | 766 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); |
724 | BUG_ON(!uv_blade_info); | 767 | BUG_ON(!uv_blade_info); |
768 | |||
725 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | 769 | for (blade = 0; blade < uv_num_possible_blades(); blade++) |
726 | uv_blade_info[blade].memory_nid = -1; | 770 | uv_blade_info[blade].memory_nid = -1; |
727 | 771 | ||
@@ -747,6 +791,7 @@ void __init uv_system_init(void) | |||
747 | uv_blade_info[blade].pnode = pnode; | 791 | uv_blade_info[blade].pnode = pnode; |
748 | uv_blade_info[blade].nr_possible_cpus = 0; | 792 | uv_blade_info[blade].nr_possible_cpus = 0; |
749 | uv_blade_info[blade].nr_online_cpus = 0; | 793 | uv_blade_info[blade].nr_online_cpus = 0; |
794 | spin_lock_init(&uv_blade_info[blade].nmi_lock); | ||
750 | max_pnode = max(pnode, max_pnode); | 795 | max_pnode = max(pnode, max_pnode); |
751 | blade++; | 796 | blade++; |
752 | } | 797 | } |
@@ -821,3 +866,5 @@ void __init uv_system_init(void) | |||
821 | if (is_kdump_kernel()) | 866 | if (is_kdump_kernel()) |
822 | reboot_type = BOOT_ACPI; | 867 | reboot_type = BOOT_ACPI; |
823 | } | 868 | } |
869 | |||
870 | apic_driver(apic_x2apic_uv_x); | ||
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 0b4be431c620..3bfa02235965 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -228,6 +228,7 @@ | |||
228 | #include <linux/kthread.h> | 228 | #include <linux/kthread.h> |
229 | #include <linux/jiffies.h> | 229 | #include <linux/jiffies.h> |
230 | #include <linux/acpi.h> | 230 | #include <linux/acpi.h> |
231 | #include <linux/syscore_ops.h> | ||
231 | 232 | ||
232 | #include <asm/system.h> | 233 | #include <asm/system.h> |
233 | #include <asm/uaccess.h> | 234 | #include <asm/uaccess.h> |
@@ -1237,7 +1238,7 @@ static int suspend(int vetoable) | |||
1237 | dpm_suspend_noirq(PMSG_SUSPEND); | 1238 | dpm_suspend_noirq(PMSG_SUSPEND); |
1238 | 1239 | ||
1239 | local_irq_disable(); | 1240 | local_irq_disable(); |
1240 | sysdev_suspend(PMSG_SUSPEND); | 1241 | syscore_suspend(); |
1241 | 1242 | ||
1242 | local_irq_enable(); | 1243 | local_irq_enable(); |
1243 | 1244 | ||
@@ -1255,7 +1256,7 @@ static int suspend(int vetoable) | |||
1255 | apm_error("suspend", err); | 1256 | apm_error("suspend", err); |
1256 | err = (err == APM_SUCCESS) ? 0 : -EIO; | 1257 | err = (err == APM_SUCCESS) ? 0 : -EIO; |
1257 | 1258 | ||
1258 | sysdev_resume(); | 1259 | syscore_resume(); |
1259 | local_irq_enable(); | 1260 | local_irq_enable(); |
1260 | 1261 | ||
1261 | dpm_resume_noirq(PMSG_RESUME); | 1262 | dpm_resume_noirq(PMSG_RESUME); |
@@ -1279,7 +1280,7 @@ static void standby(void) | |||
1279 | dpm_suspend_noirq(PMSG_SUSPEND); | 1280 | dpm_suspend_noirq(PMSG_SUSPEND); |
1280 | 1281 | ||
1281 | local_irq_disable(); | 1282 | local_irq_disable(); |
1282 | sysdev_suspend(PMSG_SUSPEND); | 1283 | syscore_suspend(); |
1283 | local_irq_enable(); | 1284 | local_irq_enable(); |
1284 | 1285 | ||
1285 | err = set_system_power_state(APM_STATE_STANDBY); | 1286 | err = set_system_power_state(APM_STATE_STANDBY); |
@@ -1287,7 +1288,7 @@ static void standby(void) | |||
1287 | apm_error("standby", err); | 1288 | apm_error("standby", err); |
1288 | 1289 | ||
1289 | local_irq_disable(); | 1290 | local_irq_disable(); |
1290 | sysdev_resume(); | 1291 | syscore_resume(); |
1291 | local_irq_enable(); | 1292 | local_irq_enable(); |
1292 | 1293 | ||
1293 | dpm_resume_noirq(PMSG_RESUME); | 1294 | dpm_resume_noirq(PMSG_RESUME); |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3f0ebe429a01..6042981d0309 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -30,7 +30,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o | |||
30 | 30 | ||
31 | obj-$(CONFIG_X86_MCE) += mcheck/ | 31 | obj-$(CONFIG_X86_MCE) += mcheck/ |
32 | obj-$(CONFIG_MTRR) += mtrr/ | 32 | obj-$(CONFIG_MTRR) += mtrr/ |
33 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
34 | 33 | ||
35 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | 34 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o |
36 | 35 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3ecece0217ef..8f5cabb3c5b0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -613,8 +613,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
613 | #endif | 613 | #endif |
614 | 614 | ||
615 | /* As a rule processors have APIC timer running in deep C states */ | 615 | /* As a rule processors have APIC timer running in deep C states */ |
616 | if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) | 616 | if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) |
617 | set_cpu_cap(c, X86_FEATURE_ARAT); | 617 | set_cpu_cap(c, X86_FEATURE_ARAT); |
618 | |||
619 | /* | ||
620 | * Disable GART TLB Walk Errors on Fam10h. We do this here | ||
621 | * because this is always needed when GART is enabled, even in a | ||
622 | * kernel which has no MCE support built in. | ||
623 | */ | ||
624 | if (c->x86 == 0x10) { | ||
625 | /* | ||
626 | * BIOS should disable GartTlbWlk Errors themself. If | ||
627 | * it doesn't do it here as suggested by the BKDG. | ||
628 | * | ||
629 | * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 | ||
630 | */ | ||
631 | u64 mask; | ||
632 | int err; | ||
633 | |||
634 | err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); | ||
635 | if (err == 0) { | ||
636 | mask |= (1 << 10); | ||
637 | checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); | ||
638 | } | ||
639 | } | ||
618 | } | 640 | } |
619 | 641 | ||
620 | #ifdef CONFIG_X86_32 | 642 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2ced0074a45..c8b41623377f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -254,6 +254,25 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | |||
254 | } | 254 | } |
255 | #endif | 255 | #endif |
256 | 256 | ||
257 | static int disable_smep __cpuinitdata; | ||
258 | static __init int setup_disable_smep(char *arg) | ||
259 | { | ||
260 | disable_smep = 1; | ||
261 | return 1; | ||
262 | } | ||
263 | __setup("nosmep", setup_disable_smep); | ||
264 | |||
265 | static __cpuinit void setup_smep(struct cpuinfo_x86 *c) | ||
266 | { | ||
267 | if (cpu_has(c, X86_FEATURE_SMEP)) { | ||
268 | if (unlikely(disable_smep)) { | ||
269 | setup_clear_cpu_cap(X86_FEATURE_SMEP); | ||
270 | clear_in_cr4(X86_CR4_SMEP); | ||
271 | } else | ||
272 | set_in_cr4(X86_CR4_SMEP); | ||
273 | } | ||
274 | } | ||
275 | |||
257 | /* | 276 | /* |
258 | * Some CPU features depend on higher CPUID levels, which may not always | 277 | * Some CPU features depend on higher CPUID levels, which may not always |
259 | * be available due to CPUID level capping or broken virtualization | 278 | * be available due to CPUID level capping or broken virtualization |
@@ -565,8 +584,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
565 | 584 | ||
566 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); | 585 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); |
567 | 586 | ||
568 | if (eax > 0) | 587 | c->x86_capability[9] = ebx; |
569 | c->x86_capability[9] = ebx; | ||
570 | } | 588 | } |
571 | 589 | ||
572 | /* AMD-defined flags: level 0x80000001 */ | 590 | /* AMD-defined flags: level 0x80000001 */ |
@@ -668,6 +686,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
668 | c->cpu_index = 0; | 686 | c->cpu_index = 0; |
669 | #endif | 687 | #endif |
670 | filter_cpuid_features(c, false); | 688 | filter_cpuid_features(c, false); |
689 | |||
690 | setup_smep(c); | ||
671 | } | 691 | } |
672 | 692 | ||
673 | void __init early_cpu_init(void) | 693 | void __init early_cpu_init(void) |
@@ -753,6 +773,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
753 | #endif | 773 | #endif |
754 | } | 774 | } |
755 | 775 | ||
776 | setup_smep(c); | ||
777 | |||
756 | get_model_name(c); /* Default name */ | 778 | get_model_name(c); /* Default name */ |
757 | 779 | ||
758 | detect_nopl(c); | 780 | detect_nopl(c); |
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig deleted file mode 100644 index 870e6cc6ad28..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ /dev/null | |||
@@ -1,266 +0,0 @@ | |||
1 | # | ||
2 | # CPU Frequency scaling | ||
3 | # | ||
4 | |||
5 | menu "CPU Frequency scaling" | ||
6 | |||
7 | source "drivers/cpufreq/Kconfig" | ||
8 | |||
9 | if CPU_FREQ | ||
10 | |||
11 | comment "CPUFreq processor drivers" | ||
12 | |||
13 | config X86_PCC_CPUFREQ | ||
14 | tristate "Processor Clocking Control interface driver" | ||
15 | depends on ACPI && ACPI_PROCESSOR | ||
16 | help | ||
17 | This driver adds support for the PCC interface. | ||
18 | |||
19 | For details, take a look at: | ||
20 | <file:Documentation/cpu-freq/pcc-cpufreq.txt>. | ||
21 | |||
22 | To compile this driver as a module, choose M here: the | ||
23 | module will be called pcc-cpufreq. | ||
24 | |||
25 | If in doubt, say N. | ||
26 | |||
27 | config X86_ACPI_CPUFREQ | ||
28 | tristate "ACPI Processor P-States driver" | ||
29 | select CPU_FREQ_TABLE | ||
30 | depends on ACPI_PROCESSOR | ||
31 | help | ||
32 | This driver adds a CPUFreq driver which utilizes the ACPI | ||
33 | Processor Performance States. | ||
34 | This driver also supports Intel Enhanced Speedstep. | ||
35 | |||
36 | To compile this driver as a module, choose M here: the | ||
37 | module will be called acpi-cpufreq. | ||
38 | |||
39 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
40 | |||
41 | If in doubt, say N. | ||
42 | |||
43 | config ELAN_CPUFREQ | ||
44 | tristate "AMD Elan SC400 and SC410" | ||
45 | select CPU_FREQ_TABLE | ||
46 | depends on X86_ELAN | ||
47 | ---help--- | ||
48 | This adds the CPUFreq driver for AMD Elan SC400 and SC410 | ||
49 | processors. | ||
50 | |||
51 | You need to specify the processor maximum speed as boot | ||
52 | parameter: elanfreq=maxspeed (in kHz) or as module | ||
53 | parameter "max_freq". | ||
54 | |||
55 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
56 | |||
57 | If in doubt, say N. | ||
58 | |||
59 | config SC520_CPUFREQ | ||
60 | tristate "AMD Elan SC520" | ||
61 | select CPU_FREQ_TABLE | ||
62 | depends on X86_ELAN | ||
63 | ---help--- | ||
64 | This adds the CPUFreq driver for AMD Elan SC520 processor. | ||
65 | |||
66 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
67 | |||
68 | If in doubt, say N. | ||
69 | |||
70 | |||
71 | config X86_POWERNOW_K6 | ||
72 | tristate "AMD Mobile K6-2/K6-3 PowerNow!" | ||
73 | select CPU_FREQ_TABLE | ||
74 | depends on X86_32 | ||
75 | help | ||
76 | This adds the CPUFreq driver for mobile AMD K6-2+ and mobile | ||
77 | AMD K6-3+ processors. | ||
78 | |||
79 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
80 | |||
81 | If in doubt, say N. | ||
82 | |||
83 | config X86_POWERNOW_K7 | ||
84 | tristate "AMD Mobile Athlon/Duron PowerNow!" | ||
85 | select CPU_FREQ_TABLE | ||
86 | depends on X86_32 | ||
87 | help | ||
88 | This adds the CPUFreq driver for mobile AMD K7 mobile processors. | ||
89 | |||
90 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
91 | |||
92 | If in doubt, say N. | ||
93 | |||
94 | config X86_POWERNOW_K7_ACPI | ||
95 | bool | ||
96 | depends on X86_POWERNOW_K7 && ACPI_PROCESSOR | ||
97 | depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) | ||
98 | depends on X86_32 | ||
99 | default y | ||
100 | |||
101 | config X86_POWERNOW_K8 | ||
102 | tristate "AMD Opteron/Athlon64 PowerNow!" | ||
103 | select CPU_FREQ_TABLE | ||
104 | depends on ACPI && ACPI_PROCESSOR | ||
105 | help | ||
106 | This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors. | ||
107 | |||
108 | To compile this driver as a module, choose M here: the | ||
109 | module will be called powernow-k8. | ||
110 | |||
111 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
112 | |||
113 | config X86_GX_SUSPMOD | ||
114 | tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" | ||
115 | depends on X86_32 && PCI | ||
116 | help | ||
117 | This add the CPUFreq driver for NatSemi Geode processors which | ||
118 | support suspend modulation. | ||
119 | |||
120 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
121 | |||
122 | If in doubt, say N. | ||
123 | |||
124 | config X86_SPEEDSTEP_CENTRINO | ||
125 | tristate "Intel Enhanced SpeedStep (deprecated)" | ||
126 | select CPU_FREQ_TABLE | ||
127 | select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32 | ||
128 | depends on X86_32 || (X86_64 && ACPI_PROCESSOR) | ||
129 | help | ||
130 | This is deprecated and this functionality is now merged into | ||
131 | acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of | ||
132 | speedstep_centrino. | ||
133 | This adds the CPUFreq driver for Enhanced SpeedStep enabled | ||
134 | mobile CPUs. This means Intel Pentium M (Centrino) CPUs | ||
135 | or 64bit enabled Intel Xeons. | ||
136 | |||
137 | To compile this driver as a module, choose M here: the | ||
138 | module will be called speedstep-centrino. | ||
139 | |||
140 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
141 | |||
142 | If in doubt, say N. | ||
143 | |||
144 | config X86_SPEEDSTEP_CENTRINO_TABLE | ||
145 | bool "Built-in tables for Banias CPUs" | ||
146 | depends on X86_32 && X86_SPEEDSTEP_CENTRINO | ||
147 | default y | ||
148 | help | ||
149 | Use built-in tables for Banias CPUs if ACPI encoding | ||
150 | is not available. | ||
151 | |||
152 | If in doubt, say N. | ||
153 | |||
154 | config X86_SPEEDSTEP_ICH | ||
155 | tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" | ||
156 | select CPU_FREQ_TABLE | ||
157 | depends on X86_32 | ||
158 | help | ||
159 | This adds the CPUFreq driver for certain mobile Intel Pentium III | ||
160 | (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all | ||
161 | mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, | ||
162 | ICH3 or ICH4 southbridge. | ||
163 | |||
164 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
165 | |||
166 | If in doubt, say N. | ||
167 | |||
168 | config X86_SPEEDSTEP_SMI | ||
169 | tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" | ||
170 | select CPU_FREQ_TABLE | ||
171 | depends on X86_32 && EXPERIMENTAL | ||
172 | help | ||
173 | This adds the CPUFreq driver for certain mobile Intel Pentium III | ||
174 | (Coppermine), all mobile Intel Pentium III-M (Tualatin) | ||
175 | on systems which have an Intel 440BX/ZX/MX southbridge. | ||
176 | |||
177 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
178 | |||
179 | If in doubt, say N. | ||
180 | |||
181 | config X86_P4_CLOCKMOD | ||
182 | tristate "Intel Pentium 4 clock modulation" | ||
183 | select CPU_FREQ_TABLE | ||
184 | help | ||
185 | This adds the CPUFreq driver for Intel Pentium 4 / XEON | ||
186 | processors. When enabled it will lower CPU temperature by skipping | ||
187 | clocks. | ||
188 | |||
189 | This driver should be only used in exceptional | ||
190 | circumstances when very low power is needed because it causes severe | ||
191 | slowdowns and noticeable latencies. Normally Speedstep should be used | ||
192 | instead. | ||
193 | |||
194 | To compile this driver as a module, choose M here: the | ||
195 | module will be called p4-clockmod. | ||
196 | |||
197 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
198 | |||
199 | Unless you are absolutely sure say N. | ||
200 | |||
201 | config X86_CPUFREQ_NFORCE2 | ||
202 | tristate "nVidia nForce2 FSB changing" | ||
203 | depends on X86_32 && EXPERIMENTAL | ||
204 | help | ||
205 | This adds the CPUFreq driver for FSB changing on nVidia nForce2 | ||
206 | platforms. | ||
207 | |||
208 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
209 | |||
210 | If in doubt, say N. | ||
211 | |||
212 | config X86_LONGRUN | ||
213 | tristate "Transmeta LongRun" | ||
214 | depends on X86_32 | ||
215 | help | ||
216 | This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors | ||
217 | which support LongRun. | ||
218 | |||
219 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
220 | |||
221 | If in doubt, say N. | ||
222 | |||
223 | config X86_LONGHAUL | ||
224 | tristate "VIA Cyrix III Longhaul" | ||
225 | select CPU_FREQ_TABLE | ||
226 | depends on X86_32 && ACPI_PROCESSOR | ||
227 | help | ||
228 | This adds the CPUFreq driver for VIA Samuel/CyrixIII, | ||
229 | VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T | ||
230 | processors. | ||
231 | |||
232 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
233 | |||
234 | If in doubt, say N. | ||
235 | |||
236 | config X86_E_POWERSAVER | ||
237 | tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)" | ||
238 | select CPU_FREQ_TABLE | ||
239 | depends on X86_32 && EXPERIMENTAL | ||
240 | help | ||
241 | This adds the CPUFreq driver for VIA C7 processors. However, this driver | ||
242 | does not have any safeguards to prevent operating the CPU out of spec | ||
243 | and is thus considered dangerous. Please use the regular ACPI cpufreq | ||
244 | driver, enabled by CONFIG_X86_ACPI_CPUFREQ. | ||
245 | |||
246 | If in doubt, say N. | ||
247 | |||
248 | comment "shared options" | ||
249 | |||
250 | config X86_SPEEDSTEP_LIB | ||
251 | tristate | ||
252 | default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) | ||
253 | |||
254 | config X86_SPEEDSTEP_RELAXED_CAP_CHECK | ||
255 | bool "Relaxed speedstep capability checks" | ||
256 | depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) | ||
257 | help | ||
258 | Don't perform all checks for a speedstep capable system which would | ||
259 | normally be done. Some ancient or strange systems, though speedstep | ||
260 | capable, don't always indicate that they are speedstep capable. This | ||
261 | option lets the probing code bypass some of those checks if the | ||
262 | parameter "relaxed_check=1" is passed to the module. | ||
263 | |||
264 | endif # CPU_FREQ | ||
265 | |||
266 | endmenu | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile deleted file mode 100644 index bd54bf67e6fb..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | # Link order matters. K8 is preferred to ACPI because of firmware bugs in early | ||
2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. | ||
3 | # speedstep-* is preferred over p4-clockmod. | ||
4 | |||
5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o | ||
6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o | ||
7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | ||
8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | ||
9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | ||
10 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o | ||
11 | obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o | ||
12 | obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o | ||
13 | obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o | ||
14 | obj-$(CONFIG_X86_LONGRUN) += longrun.o | ||
15 | obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o | ||
16 | obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o | ||
17 | obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o | ||
18 | obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o | ||
19 | obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o | ||
20 | obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o | ||
21 | obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c deleted file mode 100644 index a2baafb2fe6d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ /dev/null | |||
@@ -1,776 +0,0 @@ | |||
1 | /* | ||
2 | * acpi-cpufreq.c - ACPI Processor P-States Driver | ||
3 | * | ||
4 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> | ||
5 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | ||
6 | * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> | ||
7 | * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com> | ||
8 | * | ||
9 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or (at | ||
14 | * your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, but | ||
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License along | ||
22 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
23 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. | ||
24 | * | ||
25 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
26 | */ | ||
27 | |||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/smp.h> | ||
32 | #include <linux/sched.h> | ||
33 | #include <linux/cpufreq.h> | ||
34 | #include <linux/compiler.h> | ||
35 | #include <linux/dmi.h> | ||
36 | #include <linux/slab.h> | ||
37 | |||
38 | #include <linux/acpi.h> | ||
39 | #include <linux/io.h> | ||
40 | #include <linux/delay.h> | ||
41 | #include <linux/uaccess.h> | ||
42 | |||
43 | #include <acpi/processor.h> | ||
44 | |||
45 | #include <asm/msr.h> | ||
46 | #include <asm/processor.h> | ||
47 | #include <asm/cpufeature.h> | ||
48 | #include "mperf.h" | ||
49 | |||
50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
51 | "acpi-cpufreq", msg) | ||
52 | |||
53 | MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); | ||
54 | MODULE_DESCRIPTION("ACPI Processor P-States Driver"); | ||
55 | MODULE_LICENSE("GPL"); | ||
56 | |||
57 | enum { | ||
58 | UNDEFINED_CAPABLE = 0, | ||
59 | SYSTEM_INTEL_MSR_CAPABLE, | ||
60 | SYSTEM_IO_CAPABLE, | ||
61 | }; | ||
62 | |||
63 | #define INTEL_MSR_RANGE (0xffff) | ||
64 | |||
65 | struct acpi_cpufreq_data { | ||
66 | struct acpi_processor_performance *acpi_data; | ||
67 | struct cpufreq_frequency_table *freq_table; | ||
68 | unsigned int resume; | ||
69 | unsigned int cpu_feature; | ||
70 | }; | ||
71 | |||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | ||
73 | |||
74 | /* acpi_perf_data is a pointer to percpu data. */ | ||
75 | static struct acpi_processor_performance __percpu *acpi_perf_data; | ||
76 | |||
77 | static struct cpufreq_driver acpi_cpufreq_driver; | ||
78 | |||
79 | static unsigned int acpi_pstate_strict; | ||
80 | |||
81 | static int check_est_cpu(unsigned int cpuid) | ||
82 | { | ||
83 | struct cpuinfo_x86 *cpu = &cpu_data(cpuid); | ||
84 | |||
85 | return cpu_has(cpu, X86_FEATURE_EST); | ||
86 | } | ||
87 | |||
88 | static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) | ||
89 | { | ||
90 | struct acpi_processor_performance *perf; | ||
91 | int i; | ||
92 | |||
93 | perf = data->acpi_data; | ||
94 | |||
95 | for (i = 0; i < perf->state_count; i++) { | ||
96 | if (value == perf->states[i].status) | ||
97 | return data->freq_table[i].frequency; | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) | ||
103 | { | ||
104 | int i; | ||
105 | struct acpi_processor_performance *perf; | ||
106 | |||
107 | msr &= INTEL_MSR_RANGE; | ||
108 | perf = data->acpi_data; | ||
109 | |||
110 | for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { | ||
111 | if (msr == perf->states[data->freq_table[i].index].status) | ||
112 | return data->freq_table[i].frequency; | ||
113 | } | ||
114 | return data->freq_table[0].frequency; | ||
115 | } | ||
116 | |||
117 | static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) | ||
118 | { | ||
119 | switch (data->cpu_feature) { | ||
120 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
121 | return extract_msr(val, data); | ||
122 | case SYSTEM_IO_CAPABLE: | ||
123 | return extract_io(val, data); | ||
124 | default: | ||
125 | return 0; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | struct msr_addr { | ||
130 | u32 reg; | ||
131 | }; | ||
132 | |||
133 | struct io_addr { | ||
134 | u16 port; | ||
135 | u8 bit_width; | ||
136 | }; | ||
137 | |||
138 | struct drv_cmd { | ||
139 | unsigned int type; | ||
140 | const struct cpumask *mask; | ||
141 | union { | ||
142 | struct msr_addr msr; | ||
143 | struct io_addr io; | ||
144 | } addr; | ||
145 | u32 val; | ||
146 | }; | ||
147 | |||
148 | /* Called via smp_call_function_single(), on the target CPU */ | ||
149 | static void do_drv_read(void *_cmd) | ||
150 | { | ||
151 | struct drv_cmd *cmd = _cmd; | ||
152 | u32 h; | ||
153 | |||
154 | switch (cmd->type) { | ||
155 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
156 | rdmsr(cmd->addr.msr.reg, cmd->val, h); | ||
157 | break; | ||
158 | case SYSTEM_IO_CAPABLE: | ||
159 | acpi_os_read_port((acpi_io_address)cmd->addr.io.port, | ||
160 | &cmd->val, | ||
161 | (u32)cmd->addr.io.bit_width); | ||
162 | break; | ||
163 | default: | ||
164 | break; | ||
165 | } | ||
166 | } | ||
167 | |||
168 | /* Called via smp_call_function_many(), on the target CPUs */ | ||
169 | static void do_drv_write(void *_cmd) | ||
170 | { | ||
171 | struct drv_cmd *cmd = _cmd; | ||
172 | u32 lo, hi; | ||
173 | |||
174 | switch (cmd->type) { | ||
175 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
176 | rdmsr(cmd->addr.msr.reg, lo, hi); | ||
177 | lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); | ||
178 | wrmsr(cmd->addr.msr.reg, lo, hi); | ||
179 | break; | ||
180 | case SYSTEM_IO_CAPABLE: | ||
181 | acpi_os_write_port((acpi_io_address)cmd->addr.io.port, | ||
182 | cmd->val, | ||
183 | (u32)cmd->addr.io.bit_width); | ||
184 | break; | ||
185 | default: | ||
186 | break; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static void drv_read(struct drv_cmd *cmd) | ||
191 | { | ||
192 | int err; | ||
193 | cmd->val = 0; | ||
194 | |||
195 | err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); | ||
196 | WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ | ||
197 | } | ||
198 | |||
199 | static void drv_write(struct drv_cmd *cmd) | ||
200 | { | ||
201 | int this_cpu; | ||
202 | |||
203 | this_cpu = get_cpu(); | ||
204 | if (cpumask_test_cpu(this_cpu, cmd->mask)) | ||
205 | do_drv_write(cmd); | ||
206 | smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); | ||
207 | put_cpu(); | ||
208 | } | ||
209 | |||
210 | static u32 get_cur_val(const struct cpumask *mask) | ||
211 | { | ||
212 | struct acpi_processor_performance *perf; | ||
213 | struct drv_cmd cmd; | ||
214 | |||
215 | if (unlikely(cpumask_empty(mask))) | ||
216 | return 0; | ||
217 | |||
218 | switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) { | ||
219 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
220 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | ||
221 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; | ||
222 | break; | ||
223 | case SYSTEM_IO_CAPABLE: | ||
224 | cmd.type = SYSTEM_IO_CAPABLE; | ||
225 | perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; | ||
226 | cmd.addr.io.port = perf->control_register.address; | ||
227 | cmd.addr.io.bit_width = perf->control_register.bit_width; | ||
228 | break; | ||
229 | default: | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | cmd.mask = mask; | ||
234 | drv_read(&cmd); | ||
235 | |||
236 | dprintk("get_cur_val = %u\n", cmd.val); | ||
237 | |||
238 | return cmd.val; | ||
239 | } | ||
240 | |||
241 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | ||
242 | { | ||
243 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); | ||
244 | unsigned int freq; | ||
245 | unsigned int cached_freq; | ||
246 | |||
247 | dprintk("get_cur_freq_on_cpu (%d)\n", cpu); | ||
248 | |||
249 | if (unlikely(data == NULL || | ||
250 | data->acpi_data == NULL || data->freq_table == NULL)) { | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | cached_freq = data->freq_table[data->acpi_data->state].frequency; | ||
255 | freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); | ||
256 | if (freq != cached_freq) { | ||
257 | /* | ||
258 | * The dreaded BIOS frequency change behind our back. | ||
259 | * Force set the frequency on next target call. | ||
260 | */ | ||
261 | data->resume = 1; | ||
262 | } | ||
263 | |||
264 | dprintk("cur freq = %u\n", freq); | ||
265 | |||
266 | return freq; | ||
267 | } | ||
268 | |||
269 | static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, | ||
270 | struct acpi_cpufreq_data *data) | ||
271 | { | ||
272 | unsigned int cur_freq; | ||
273 | unsigned int i; | ||
274 | |||
275 | for (i = 0; i < 100; i++) { | ||
276 | cur_freq = extract_freq(get_cur_val(mask), data); | ||
277 | if (cur_freq == freq) | ||
278 | return 1; | ||
279 | udelay(10); | ||
280 | } | ||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | static int acpi_cpufreq_target(struct cpufreq_policy *policy, | ||
285 | unsigned int target_freq, unsigned int relation) | ||
286 | { | ||
287 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); | ||
288 | struct acpi_processor_performance *perf; | ||
289 | struct cpufreq_freqs freqs; | ||
290 | struct drv_cmd cmd; | ||
291 | unsigned int next_state = 0; /* Index into freq_table */ | ||
292 | unsigned int next_perf_state = 0; /* Index into perf table */ | ||
293 | unsigned int i; | ||
294 | int result = 0; | ||
295 | |||
296 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); | ||
297 | |||
298 | if (unlikely(data == NULL || | ||
299 | data->acpi_data == NULL || data->freq_table == NULL)) { | ||
300 | return -ENODEV; | ||
301 | } | ||
302 | |||
303 | perf = data->acpi_data; | ||
304 | result = cpufreq_frequency_table_target(policy, | ||
305 | data->freq_table, | ||
306 | target_freq, | ||
307 | relation, &next_state); | ||
308 | if (unlikely(result)) { | ||
309 | result = -ENODEV; | ||
310 | goto out; | ||
311 | } | ||
312 | |||
313 | next_perf_state = data->freq_table[next_state].index; | ||
314 | if (perf->state == next_perf_state) { | ||
315 | if (unlikely(data->resume)) { | ||
316 | dprintk("Called after resume, resetting to P%d\n", | ||
317 | next_perf_state); | ||
318 | data->resume = 0; | ||
319 | } else { | ||
320 | dprintk("Already at target state (P%d)\n", | ||
321 | next_perf_state); | ||
322 | goto out; | ||
323 | } | ||
324 | } | ||
325 | |||
326 | switch (data->cpu_feature) { | ||
327 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
328 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | ||
329 | cmd.addr.msr.reg = MSR_IA32_PERF_CTL; | ||
330 | cmd.val = (u32) perf->states[next_perf_state].control; | ||
331 | break; | ||
332 | case SYSTEM_IO_CAPABLE: | ||
333 | cmd.type = SYSTEM_IO_CAPABLE; | ||
334 | cmd.addr.io.port = perf->control_register.address; | ||
335 | cmd.addr.io.bit_width = perf->control_register.bit_width; | ||
336 | cmd.val = (u32) perf->states[next_perf_state].control; | ||
337 | break; | ||
338 | default: | ||
339 | result = -ENODEV; | ||
340 | goto out; | ||
341 | } | ||
342 | |||
343 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | ||
344 | if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) | ||
345 | cmd.mask = policy->cpus; | ||
346 | else | ||
347 | cmd.mask = cpumask_of(policy->cpu); | ||
348 | |||
349 | freqs.old = perf->states[perf->state].core_frequency * 1000; | ||
350 | freqs.new = data->freq_table[next_state].frequency; | ||
351 | for_each_cpu(i, policy->cpus) { | ||
352 | freqs.cpu = i; | ||
353 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
354 | } | ||
355 | |||
356 | drv_write(&cmd); | ||
357 | |||
358 | if (acpi_pstate_strict) { | ||
359 | if (!check_freqs(cmd.mask, freqs.new, data)) { | ||
360 | dprintk("acpi_cpufreq_target failed (%d)\n", | ||
361 | policy->cpu); | ||
362 | result = -EAGAIN; | ||
363 | goto out; | ||
364 | } | ||
365 | } | ||
366 | |||
367 | for_each_cpu(i, policy->cpus) { | ||
368 | freqs.cpu = i; | ||
369 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
370 | } | ||
371 | perf->state = next_perf_state; | ||
372 | |||
373 | out: | ||
374 | return result; | ||
375 | } | ||
376 | |||
377 | static int acpi_cpufreq_verify(struct cpufreq_policy *policy) | ||
378 | { | ||
379 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); | ||
380 | |||
381 | dprintk("acpi_cpufreq_verify\n"); | ||
382 | |||
383 | return cpufreq_frequency_table_verify(policy, data->freq_table); | ||
384 | } | ||
385 | |||
386 | static unsigned long | ||
387 | acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) | ||
388 | { | ||
389 | struct acpi_processor_performance *perf = data->acpi_data; | ||
390 | |||
391 | if (cpu_khz) { | ||
392 | /* search the closest match to cpu_khz */ | ||
393 | unsigned int i; | ||
394 | unsigned long freq; | ||
395 | unsigned long freqn = perf->states[0].core_frequency * 1000; | ||
396 | |||
397 | for (i = 0; i < (perf->state_count-1); i++) { | ||
398 | freq = freqn; | ||
399 | freqn = perf->states[i+1].core_frequency * 1000; | ||
400 | if ((2 * cpu_khz) > (freqn + freq)) { | ||
401 | perf->state = i; | ||
402 | return freq; | ||
403 | } | ||
404 | } | ||
405 | perf->state = perf->state_count-1; | ||
406 | return freqn; | ||
407 | } else { | ||
408 | /* assume CPU is at P0... */ | ||
409 | perf->state = 0; | ||
410 | return perf->states[0].core_frequency * 1000; | ||
411 | } | ||
412 | } | ||
413 | |||
414 | static void free_acpi_perf_data(void) | ||
415 | { | ||
416 | unsigned int i; | ||
417 | |||
418 | /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ | ||
419 | for_each_possible_cpu(i) | ||
420 | free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) | ||
421 | ->shared_cpu_map); | ||
422 | free_percpu(acpi_perf_data); | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * acpi_cpufreq_early_init - initialize ACPI P-States library | ||
427 | * | ||
428 | * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) | ||
429 | * in order to determine correct frequency and voltage pairings. We can | ||
430 | * do _PDC and _PSD and find out the processor dependency for the | ||
431 | * actual init that will happen later... | ||
432 | */ | ||
433 | static int __init acpi_cpufreq_early_init(void) | ||
434 | { | ||
435 | unsigned int i; | ||
436 | dprintk("acpi_cpufreq_early_init\n"); | ||
437 | |||
438 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); | ||
439 | if (!acpi_perf_data) { | ||
440 | dprintk("Memory allocation error for acpi_perf_data.\n"); | ||
441 | return -ENOMEM; | ||
442 | } | ||
443 | for_each_possible_cpu(i) { | ||
444 | if (!zalloc_cpumask_var_node( | ||
445 | &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, | ||
446 | GFP_KERNEL, cpu_to_node(i))) { | ||
447 | |||
448 | /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ | ||
449 | free_acpi_perf_data(); | ||
450 | return -ENOMEM; | ||
451 | } | ||
452 | } | ||
453 | |||
454 | /* Do initialization in ACPI core */ | ||
455 | acpi_processor_preregister_performance(acpi_perf_data); | ||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | #ifdef CONFIG_SMP | ||
460 | /* | ||
461 | * Some BIOSes do SW_ANY coordination internally, either set it up in hw | ||
462 | * or do it in BIOS firmware and won't inform about it to OS. If not | ||
463 | * detected, this has a side effect of making CPU run at a different speed | ||
464 | * than OS intended it to run at. Detect it and handle it cleanly. | ||
465 | */ | ||
466 | static int bios_with_sw_any_bug; | ||
467 | |||
468 | static int sw_any_bug_found(const struct dmi_system_id *d) | ||
469 | { | ||
470 | bios_with_sw_any_bug = 1; | ||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | static const struct dmi_system_id sw_any_bug_dmi_table[] = { | ||
475 | { | ||
476 | .callback = sw_any_bug_found, | ||
477 | .ident = "Supermicro Server X6DLP", | ||
478 | .matches = { | ||
479 | DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), | ||
480 | DMI_MATCH(DMI_BIOS_VERSION, "080010"), | ||
481 | DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), | ||
482 | }, | ||
483 | }, | ||
484 | { } | ||
485 | }; | ||
486 | |||
487 | static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) | ||
488 | { | ||
489 | /* Intel Xeon Processor 7100 Series Specification Update | ||
490 | * http://www.intel.com/Assets/PDF/specupdate/314554.pdf | ||
491 | * AL30: A Machine Check Exception (MCE) Occurring during an | ||
492 | * Enhanced Intel SpeedStep Technology Ratio Change May Cause | ||
493 | * Both Processor Cores to Lock Up. */ | ||
494 | if (c->x86_vendor == X86_VENDOR_INTEL) { | ||
495 | if ((c->x86 == 15) && | ||
496 | (c->x86_model == 6) && | ||
497 | (c->x86_mask == 8)) { | ||
498 | printk(KERN_INFO "acpi-cpufreq: Intel(R) " | ||
499 | "Xeon(R) 7100 Errata AL30, processors may " | ||
500 | "lock up on frequency changes: disabling " | ||
501 | "acpi-cpufreq.\n"); | ||
502 | return -ENODEV; | ||
503 | } | ||
504 | } | ||
505 | return 0; | ||
506 | } | ||
507 | #endif | ||
508 | |||
509 | static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | ||
510 | { | ||
511 | unsigned int i; | ||
512 | unsigned int valid_states = 0; | ||
513 | unsigned int cpu = policy->cpu; | ||
514 | struct acpi_cpufreq_data *data; | ||
515 | unsigned int result = 0; | ||
516 | struct cpuinfo_x86 *c = &cpu_data(policy->cpu); | ||
517 | struct acpi_processor_performance *perf; | ||
518 | #ifdef CONFIG_SMP | ||
519 | static int blacklisted; | ||
520 | #endif | ||
521 | |||
522 | dprintk("acpi_cpufreq_cpu_init\n"); | ||
523 | |||
524 | #ifdef CONFIG_SMP | ||
525 | if (blacklisted) | ||
526 | return blacklisted; | ||
527 | blacklisted = acpi_cpufreq_blacklist(c); | ||
528 | if (blacklisted) | ||
529 | return blacklisted; | ||
530 | #endif | ||
531 | |||
532 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); | ||
533 | if (!data) | ||
534 | return -ENOMEM; | ||
535 | |||
536 | data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); | ||
537 | per_cpu(acfreq_data, cpu) = data; | ||
538 | |||
539 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) | ||
540 | acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
541 | |||
542 | result = acpi_processor_register_performance(data->acpi_data, cpu); | ||
543 | if (result) | ||
544 | goto err_free; | ||
545 | |||
546 | perf = data->acpi_data; | ||
547 | policy->shared_type = perf->shared_type; | ||
548 | |||
549 | /* | ||
550 | * Will let policy->cpus know about dependency only when software | ||
551 | * coordination is required. | ||
552 | */ | ||
553 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || | ||
554 | policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { | ||
555 | cpumask_copy(policy->cpus, perf->shared_cpu_map); | ||
556 | } | ||
557 | cpumask_copy(policy->related_cpus, perf->shared_cpu_map); | ||
558 | |||
559 | #ifdef CONFIG_SMP | ||
560 | dmi_check_system(sw_any_bug_dmi_table); | ||
561 | if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { | ||
562 | policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; | ||
563 | cpumask_copy(policy->cpus, cpu_core_mask(cpu)); | ||
564 | } | ||
565 | #endif | ||
566 | |||
567 | /* capability check */ | ||
568 | if (perf->state_count <= 1) { | ||
569 | dprintk("No P-States\n"); | ||
570 | result = -ENODEV; | ||
571 | goto err_unreg; | ||
572 | } | ||
573 | |||
574 | if (perf->control_register.space_id != perf->status_register.space_id) { | ||
575 | result = -ENODEV; | ||
576 | goto err_unreg; | ||
577 | } | ||
578 | |||
579 | switch (perf->control_register.space_id) { | ||
580 | case ACPI_ADR_SPACE_SYSTEM_IO: | ||
581 | dprintk("SYSTEM IO addr space\n"); | ||
582 | data->cpu_feature = SYSTEM_IO_CAPABLE; | ||
583 | break; | ||
584 | case ACPI_ADR_SPACE_FIXED_HARDWARE: | ||
585 | dprintk("HARDWARE addr space\n"); | ||
586 | if (!check_est_cpu(cpu)) { | ||
587 | result = -ENODEV; | ||
588 | goto err_unreg; | ||
589 | } | ||
590 | data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; | ||
591 | break; | ||
592 | default: | ||
593 | dprintk("Unknown addr space %d\n", | ||
594 | (u32) (perf->control_register.space_id)); | ||
595 | result = -ENODEV; | ||
596 | goto err_unreg; | ||
597 | } | ||
598 | |||
599 | data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * | ||
600 | (perf->state_count+1), GFP_KERNEL); | ||
601 | if (!data->freq_table) { | ||
602 | result = -ENOMEM; | ||
603 | goto err_unreg; | ||
604 | } | ||
605 | |||
606 | /* detect transition latency */ | ||
607 | policy->cpuinfo.transition_latency = 0; | ||
608 | for (i = 0; i < perf->state_count; i++) { | ||
609 | if ((perf->states[i].transition_latency * 1000) > | ||
610 | policy->cpuinfo.transition_latency) | ||
611 | policy->cpuinfo.transition_latency = | ||
612 | perf->states[i].transition_latency * 1000; | ||
613 | } | ||
614 | |||
615 | /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ | ||
616 | if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && | ||
617 | policy->cpuinfo.transition_latency > 20 * 1000) { | ||
618 | policy->cpuinfo.transition_latency = 20 * 1000; | ||
619 | printk_once(KERN_INFO | ||
620 | "P-state transition latency capped at 20 uS\n"); | ||
621 | } | ||
622 | |||
623 | /* table init */ | ||
624 | for (i = 0; i < perf->state_count; i++) { | ||
625 | if (i > 0 && perf->states[i].core_frequency >= | ||
626 | data->freq_table[valid_states-1].frequency / 1000) | ||
627 | continue; | ||
628 | |||
629 | data->freq_table[valid_states].index = i; | ||
630 | data->freq_table[valid_states].frequency = | ||
631 | perf->states[i].core_frequency * 1000; | ||
632 | valid_states++; | ||
633 | } | ||
634 | data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; | ||
635 | perf->state = 0; | ||
636 | |||
637 | result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); | ||
638 | if (result) | ||
639 | goto err_freqfree; | ||
640 | |||
641 | if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) | ||
642 | printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); | ||
643 | |||
644 | switch (perf->control_register.space_id) { | ||
645 | case ACPI_ADR_SPACE_SYSTEM_IO: | ||
646 | /* Current speed is unknown and not detectable by IO port */ | ||
647 | policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); | ||
648 | break; | ||
649 | case ACPI_ADR_SPACE_FIXED_HARDWARE: | ||
650 | acpi_cpufreq_driver.get = get_cur_freq_on_cpu; | ||
651 | policy->cur = get_cur_freq_on_cpu(cpu); | ||
652 | break; | ||
653 | default: | ||
654 | break; | ||
655 | } | ||
656 | |||
657 | /* notify BIOS that we exist */ | ||
658 | acpi_processor_notify_smm(THIS_MODULE); | ||
659 | |||
660 | /* Check for APERF/MPERF support in hardware */ | ||
661 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
662 | acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; | ||
663 | |||
664 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | ||
665 | for (i = 0; i < perf->state_count; i++) | ||
666 | dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", | ||
667 | (i == perf->state ? '*' : ' '), i, | ||
668 | (u32) perf->states[i].core_frequency, | ||
669 | (u32) perf->states[i].power, | ||
670 | (u32) perf->states[i].transition_latency); | ||
671 | |||
672 | cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); | ||
673 | |||
674 | /* | ||
675 | * the first call to ->target() should result in us actually | ||
676 | * writing something to the appropriate registers. | ||
677 | */ | ||
678 | data->resume = 1; | ||
679 | |||
680 | return result; | ||
681 | |||
682 | err_freqfree: | ||
683 | kfree(data->freq_table); | ||
684 | err_unreg: | ||
685 | acpi_processor_unregister_performance(perf, cpu); | ||
686 | err_free: | ||
687 | kfree(data); | ||
688 | per_cpu(acfreq_data, cpu) = NULL; | ||
689 | |||
690 | return result; | ||
691 | } | ||
692 | |||
693 | static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) | ||
694 | { | ||
695 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); | ||
696 | |||
697 | dprintk("acpi_cpufreq_cpu_exit\n"); | ||
698 | |||
699 | if (data) { | ||
700 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
701 | per_cpu(acfreq_data, policy->cpu) = NULL; | ||
702 | acpi_processor_unregister_performance(data->acpi_data, | ||
703 | policy->cpu); | ||
704 | kfree(data->freq_table); | ||
705 | kfree(data); | ||
706 | } | ||
707 | |||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | static int acpi_cpufreq_resume(struct cpufreq_policy *policy) | ||
712 | { | ||
713 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); | ||
714 | |||
715 | dprintk("acpi_cpufreq_resume\n"); | ||
716 | |||
717 | data->resume = 1; | ||
718 | |||
719 | return 0; | ||
720 | } | ||
721 | |||
722 | static struct freq_attr *acpi_cpufreq_attr[] = { | ||
723 | &cpufreq_freq_attr_scaling_available_freqs, | ||
724 | NULL, | ||
725 | }; | ||
726 | |||
727 | static struct cpufreq_driver acpi_cpufreq_driver = { | ||
728 | .verify = acpi_cpufreq_verify, | ||
729 | .target = acpi_cpufreq_target, | ||
730 | .bios_limit = acpi_processor_get_bios_limit, | ||
731 | .init = acpi_cpufreq_cpu_init, | ||
732 | .exit = acpi_cpufreq_cpu_exit, | ||
733 | .resume = acpi_cpufreq_resume, | ||
734 | .name = "acpi-cpufreq", | ||
735 | .owner = THIS_MODULE, | ||
736 | .attr = acpi_cpufreq_attr, | ||
737 | }; | ||
738 | |||
739 | static int __init acpi_cpufreq_init(void) | ||
740 | { | ||
741 | int ret; | ||
742 | |||
743 | if (acpi_disabled) | ||
744 | return 0; | ||
745 | |||
746 | dprintk("acpi_cpufreq_init\n"); | ||
747 | |||
748 | ret = acpi_cpufreq_early_init(); | ||
749 | if (ret) | ||
750 | return ret; | ||
751 | |||
752 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); | ||
753 | if (ret) | ||
754 | free_acpi_perf_data(); | ||
755 | |||
756 | return ret; | ||
757 | } | ||
758 | |||
759 | static void __exit acpi_cpufreq_exit(void) | ||
760 | { | ||
761 | dprintk("acpi_cpufreq_exit\n"); | ||
762 | |||
763 | cpufreq_unregister_driver(&acpi_cpufreq_driver); | ||
764 | |||
765 | free_percpu(acpi_perf_data); | ||
766 | } | ||
767 | |||
768 | module_param(acpi_pstate_strict, uint, 0644); | ||
769 | MODULE_PARM_DESC(acpi_pstate_strict, | ||
770 | "value 0 or non-zero. non-zero -> strict ACPI checks are " | ||
771 | "performed during frequency changes."); | ||
772 | |||
773 | late_initcall(acpi_cpufreq_init); | ||
774 | module_exit(acpi_cpufreq_exit); | ||
775 | |||
776 | MODULE_ALIAS("acpi"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c deleted file mode 100644 index 141abebc4516..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ /dev/null | |||
@@ -1,446 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2004-2006 Sebastian Witt <se.witt@gmx.net> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * Based upon reverse engineered information | ||
6 | * | ||
7 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/moduleparam.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/cpufreq.h> | ||
15 | #include <linux/pci.h> | ||
16 | #include <linux/delay.h> | ||
17 | |||
18 | #define NFORCE2_XTAL 25 | ||
19 | #define NFORCE2_BOOTFSB 0x48 | ||
20 | #define NFORCE2_PLLENABLE 0xa8 | ||
21 | #define NFORCE2_PLLREG 0xa4 | ||
22 | #define NFORCE2_PLLADR 0xa0 | ||
23 | #define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div) | ||
24 | |||
25 | #define NFORCE2_MIN_FSB 50 | ||
26 | #define NFORCE2_SAFE_DISTANCE 50 | ||
27 | |||
28 | /* Delay in ms between FSB changes */ | ||
29 | /* #define NFORCE2_DELAY 10 */ | ||
30 | |||
31 | /* | ||
32 | * nforce2_chipset: | ||
33 | * FSB is changed using the chipset | ||
34 | */ | ||
35 | static struct pci_dev *nforce2_dev; | ||
36 | |||
37 | /* fid: | ||
38 | * multiplier * 10 | ||
39 | */ | ||
40 | static int fid; | ||
41 | |||
42 | /* min_fsb, max_fsb: | ||
43 | * minimum and maximum FSB (= FSB at boot time) | ||
44 | */ | ||
45 | static int min_fsb; | ||
46 | static int max_fsb; | ||
47 | |||
48 | MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); | ||
49 | MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); | ||
50 | MODULE_LICENSE("GPL"); | ||
51 | |||
52 | module_param(fid, int, 0444); | ||
53 | module_param(min_fsb, int, 0444); | ||
54 | |||
55 | MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); | ||
56 | MODULE_PARM_DESC(min_fsb, | ||
57 | "Minimum FSB to use, if not defined: current FSB - 50"); | ||
58 | |||
59 | #define PFX "cpufreq-nforce2: " | ||
60 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
61 | "cpufreq-nforce2", msg) | ||
62 | |||
63 | /** | ||
64 | * nforce2_calc_fsb - calculate FSB | ||
65 | * @pll: PLL value | ||
66 | * | ||
67 | * Calculates FSB from PLL value | ||
68 | */ | ||
69 | static int nforce2_calc_fsb(int pll) | ||
70 | { | ||
71 | unsigned char mul, div; | ||
72 | |||
73 | mul = (pll >> 8) & 0xff; | ||
74 | div = pll & 0xff; | ||
75 | |||
76 | if (div > 0) | ||
77 | return NFORCE2_XTAL * mul / div; | ||
78 | |||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | /** | ||
83 | * nforce2_calc_pll - calculate PLL value | ||
84 | * @fsb: FSB | ||
85 | * | ||
86 | * Calculate PLL value for given FSB | ||
87 | */ | ||
88 | static int nforce2_calc_pll(unsigned int fsb) | ||
89 | { | ||
90 | unsigned char xmul, xdiv; | ||
91 | unsigned char mul = 0, div = 0; | ||
92 | int tried = 0; | ||
93 | |||
94 | /* Try to calculate multiplier and divider up to 4 times */ | ||
95 | while (((mul == 0) || (div == 0)) && (tried <= 3)) { | ||
96 | for (xdiv = 2; xdiv <= 0x80; xdiv++) | ||
97 | for (xmul = 1; xmul <= 0xfe; xmul++) | ||
98 | if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == | ||
99 | fsb + tried) { | ||
100 | mul = xmul; | ||
101 | div = xdiv; | ||
102 | } | ||
103 | tried++; | ||
104 | } | ||
105 | |||
106 | if ((mul == 0) || (div == 0)) | ||
107 | return -1; | ||
108 | |||
109 | return NFORCE2_PLL(mul, div); | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * nforce2_write_pll - write PLL value to chipset | ||
114 | * @pll: PLL value | ||
115 | * | ||
116 | * Writes new FSB PLL value to chipset | ||
117 | */ | ||
118 | static void nforce2_write_pll(int pll) | ||
119 | { | ||
120 | int temp; | ||
121 | |||
122 | /* Set the pll addr. to 0x00 */ | ||
123 | pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0); | ||
124 | |||
125 | /* Now write the value in all 64 registers */ | ||
126 | for (temp = 0; temp <= 0x3f; temp++) | ||
127 | pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll); | ||
128 | |||
129 | return; | ||
130 | } | ||
131 | |||
132 | /** | ||
133 | * nforce2_fsb_read - Read FSB | ||
134 | * | ||
135 | * Read FSB from chipset | ||
136 | * If bootfsb != 0, return FSB at boot-time | ||
137 | */ | ||
138 | static unsigned int nforce2_fsb_read(int bootfsb) | ||
139 | { | ||
140 | struct pci_dev *nforce2_sub5; | ||
141 | u32 fsb, temp = 0; | ||
142 | |||
143 | /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ | ||
144 | nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF, | ||
145 | PCI_ANY_ID, PCI_ANY_ID, NULL); | ||
146 | if (!nforce2_sub5) | ||
147 | return 0; | ||
148 | |||
149 | pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); | ||
150 | fsb /= 1000000; | ||
151 | |||
152 | /* Check if PLL register is already set */ | ||
153 | pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); | ||
154 | |||
155 | if (bootfsb || !temp) | ||
156 | return fsb; | ||
157 | |||
158 | /* Use PLL register FSB value */ | ||
159 | pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp); | ||
160 | fsb = nforce2_calc_fsb(temp); | ||
161 | |||
162 | return fsb; | ||
163 | } | ||
164 | |||
165 | /** | ||
166 | * nforce2_set_fsb - set new FSB | ||
167 | * @fsb: New FSB | ||
168 | * | ||
169 | * Sets new FSB | ||
170 | */ | ||
171 | static int nforce2_set_fsb(unsigned int fsb) | ||
172 | { | ||
173 | u32 temp = 0; | ||
174 | unsigned int tfsb; | ||
175 | int diff; | ||
176 | int pll = 0; | ||
177 | |||
178 | if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { | ||
179 | printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb); | ||
180 | return -EINVAL; | ||
181 | } | ||
182 | |||
183 | tfsb = nforce2_fsb_read(0); | ||
184 | if (!tfsb) { | ||
185 | printk(KERN_ERR PFX "Error while reading the FSB\n"); | ||
186 | return -EINVAL; | ||
187 | } | ||
188 | |||
189 | /* First write? Then set actual value */ | ||
190 | pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); | ||
191 | if (!temp) { | ||
192 | pll = nforce2_calc_pll(tfsb); | ||
193 | |||
194 | if (pll < 0) | ||
195 | return -EINVAL; | ||
196 | |||
197 | nforce2_write_pll(pll); | ||
198 | } | ||
199 | |||
200 | /* Enable write access */ | ||
201 | temp = 0x01; | ||
202 | pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp); | ||
203 | |||
204 | diff = tfsb - fsb; | ||
205 | |||
206 | if (!diff) | ||
207 | return 0; | ||
208 | |||
209 | while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) { | ||
210 | if (diff < 0) | ||
211 | tfsb++; | ||
212 | else | ||
213 | tfsb--; | ||
214 | |||
215 | /* Calculate the PLL reg. value */ | ||
216 | pll = nforce2_calc_pll(tfsb); | ||
217 | if (pll == -1) | ||
218 | return -EINVAL; | ||
219 | |||
220 | nforce2_write_pll(pll); | ||
221 | #ifdef NFORCE2_DELAY | ||
222 | mdelay(NFORCE2_DELAY); | ||
223 | #endif | ||
224 | } | ||
225 | |||
226 | temp = 0x40; | ||
227 | pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp); | ||
228 | |||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | /** | ||
233 | * nforce2_get - get the CPU frequency | ||
234 | * @cpu: CPU number | ||
235 | * | ||
236 | * Returns the CPU frequency | ||
237 | */ | ||
238 | static unsigned int nforce2_get(unsigned int cpu) | ||
239 | { | ||
240 | if (cpu) | ||
241 | return 0; | ||
242 | return nforce2_fsb_read(0) * fid * 100; | ||
243 | } | ||
244 | |||
245 | /** | ||
246 | * nforce2_target - set a new CPUFreq policy | ||
247 | * @policy: new policy | ||
248 | * @target_freq: the target frequency | ||
249 | * @relation: how that frequency relates to achieved frequency | ||
250 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
251 | * | ||
252 | * Sets a new CPUFreq policy. | ||
253 | */ | ||
254 | static int nforce2_target(struct cpufreq_policy *policy, | ||
255 | unsigned int target_freq, unsigned int relation) | ||
256 | { | ||
257 | /* unsigned long flags; */ | ||
258 | struct cpufreq_freqs freqs; | ||
259 | unsigned int target_fsb; | ||
260 | |||
261 | if ((target_freq > policy->max) || (target_freq < policy->min)) | ||
262 | return -EINVAL; | ||
263 | |||
264 | target_fsb = target_freq / (fid * 100); | ||
265 | |||
266 | freqs.old = nforce2_get(policy->cpu); | ||
267 | freqs.new = target_fsb * fid * 100; | ||
268 | freqs.cpu = 0; /* Only one CPU on nForce2 platforms */ | ||
269 | |||
270 | if (freqs.old == freqs.new) | ||
271 | return 0; | ||
272 | |||
273 | dprintk("Old CPU frequency %d kHz, new %d kHz\n", | ||
274 | freqs.old, freqs.new); | ||
275 | |||
276 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
277 | |||
278 | /* Disable IRQs */ | ||
279 | /* local_irq_save(flags); */ | ||
280 | |||
281 | if (nforce2_set_fsb(target_fsb) < 0) | ||
282 | printk(KERN_ERR PFX "Changing FSB to %d failed\n", | ||
283 | target_fsb); | ||
284 | else | ||
285 | dprintk("Changed FSB successfully to %d\n", | ||
286 | target_fsb); | ||
287 | |||
288 | /* Enable IRQs */ | ||
289 | /* local_irq_restore(flags); */ | ||
290 | |||
291 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
292 | |||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | /** | ||
297 | * nforce2_verify - verifies a new CPUFreq policy | ||
298 | * @policy: new policy | ||
299 | */ | ||
300 | static int nforce2_verify(struct cpufreq_policy *policy) | ||
301 | { | ||
302 | unsigned int fsb_pol_max; | ||
303 | |||
304 | fsb_pol_max = policy->max / (fid * 100); | ||
305 | |||
306 | if (policy->min < (fsb_pol_max * fid * 100)) | ||
307 | policy->max = (fsb_pol_max + 1) * fid * 100; | ||
308 | |||
309 | cpufreq_verify_within_limits(policy, | ||
310 | policy->cpuinfo.min_freq, | ||
311 | policy->cpuinfo.max_freq); | ||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | static int nforce2_cpu_init(struct cpufreq_policy *policy) | ||
316 | { | ||
317 | unsigned int fsb; | ||
318 | unsigned int rfid; | ||
319 | |||
320 | /* capability check */ | ||
321 | if (policy->cpu != 0) | ||
322 | return -ENODEV; | ||
323 | |||
324 | /* Get current FSB */ | ||
325 | fsb = nforce2_fsb_read(0); | ||
326 | |||
327 | if (!fsb) | ||
328 | return -EIO; | ||
329 | |||
330 | /* FIX: Get FID from CPU */ | ||
331 | if (!fid) { | ||
332 | if (!cpu_khz) { | ||
333 | printk(KERN_WARNING PFX | ||
334 | "cpu_khz not set, can't calculate multiplier!\n"); | ||
335 | return -ENODEV; | ||
336 | } | ||
337 | |||
338 | fid = cpu_khz / (fsb * 100); | ||
339 | rfid = fid % 5; | ||
340 | |||
341 | if (rfid) { | ||
342 | if (rfid > 2) | ||
343 | fid += 5 - rfid; | ||
344 | else | ||
345 | fid -= rfid; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb, | ||
350 | fid / 10, fid % 10); | ||
351 | |||
352 | /* Set maximum FSB to FSB at boot time */ | ||
353 | max_fsb = nforce2_fsb_read(1); | ||
354 | |||
355 | if (!max_fsb) | ||
356 | return -EIO; | ||
357 | |||
358 | if (!min_fsb) | ||
359 | min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE; | ||
360 | |||
361 | if (min_fsb < NFORCE2_MIN_FSB) | ||
362 | min_fsb = NFORCE2_MIN_FSB; | ||
363 | |||
364 | /* cpuinfo and default policy values */ | ||
365 | policy->cpuinfo.min_freq = min_fsb * fid * 100; | ||
366 | policy->cpuinfo.max_freq = max_fsb * fid * 100; | ||
367 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
368 | policy->cur = nforce2_get(policy->cpu); | ||
369 | policy->min = policy->cpuinfo.min_freq; | ||
370 | policy->max = policy->cpuinfo.max_freq; | ||
371 | |||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | static int nforce2_cpu_exit(struct cpufreq_policy *policy) | ||
376 | { | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | static struct cpufreq_driver nforce2_driver = { | ||
381 | .name = "nforce2", | ||
382 | .verify = nforce2_verify, | ||
383 | .target = nforce2_target, | ||
384 | .get = nforce2_get, | ||
385 | .init = nforce2_cpu_init, | ||
386 | .exit = nforce2_cpu_exit, | ||
387 | .owner = THIS_MODULE, | ||
388 | }; | ||
389 | |||
390 | /** | ||
391 | * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic | ||
392 | * | ||
393 | * Detects nForce2 A2 and C1 stepping | ||
394 | * | ||
395 | */ | ||
396 | static int nforce2_detect_chipset(void) | ||
397 | { | ||
398 | nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, | ||
399 | PCI_DEVICE_ID_NVIDIA_NFORCE2, | ||
400 | PCI_ANY_ID, PCI_ANY_ID, NULL); | ||
401 | |||
402 | if (nforce2_dev == NULL) | ||
403 | return -ENODEV; | ||
404 | |||
405 | printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n", | ||
406 | nforce2_dev->revision); | ||
407 | printk(KERN_INFO PFX | ||
408 | "FSB changing is maybe unstable and can lead to " | ||
409 | "crashes and data loss.\n"); | ||
410 | |||
411 | return 0; | ||
412 | } | ||
413 | |||
414 | /** | ||
415 | * nforce2_init - initializes the nForce2 CPUFreq driver | ||
416 | * | ||
417 | * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported | ||
418 | * devices, -EINVAL on problems during initiatization, and zero on | ||
419 | * success. | ||
420 | */ | ||
421 | static int __init nforce2_init(void) | ||
422 | { | ||
423 | /* TODO: do we need to detect the processor? */ | ||
424 | |||
425 | /* detect chipset */ | ||
426 | if (nforce2_detect_chipset()) { | ||
427 | printk(KERN_INFO PFX "No nForce2 chipset.\n"); | ||
428 | return -ENODEV; | ||
429 | } | ||
430 | |||
431 | return cpufreq_register_driver(&nforce2_driver); | ||
432 | } | ||
433 | |||
434 | /** | ||
435 | * nforce2_exit - unregisters cpufreq module | ||
436 | * | ||
437 | * Unregisters nForce2 FSB change support. | ||
438 | */ | ||
439 | static void __exit nforce2_exit(void) | ||
440 | { | ||
441 | cpufreq_unregister_driver(&nforce2_driver); | ||
442 | } | ||
443 | |||
444 | module_init(nforce2_init); | ||
445 | module_exit(nforce2_exit); | ||
446 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c deleted file mode 100644 index 35a257dd4bb7..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ /dev/null | |||
@@ -1,367 +0,0 @@ | |||
1 | /* | ||
2 | * Based on documentation provided by Dave Jones. Thanks! | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/cpufreq.h> | ||
13 | #include <linux/ioport.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/timex.h> | ||
16 | #include <linux/io.h> | ||
17 | #include <linux/delay.h> | ||
18 | |||
19 | #include <asm/msr.h> | ||
20 | #include <asm/tsc.h> | ||
21 | |||
22 | #define EPS_BRAND_C7M 0 | ||
23 | #define EPS_BRAND_C7 1 | ||
24 | #define EPS_BRAND_EDEN 2 | ||
25 | #define EPS_BRAND_C3 3 | ||
26 | #define EPS_BRAND_C7D 4 | ||
27 | |||
28 | struct eps_cpu_data { | ||
29 | u32 fsb; | ||
30 | struct cpufreq_frequency_table freq_table[]; | ||
31 | }; | ||
32 | |||
33 | static struct eps_cpu_data *eps_cpu[NR_CPUS]; | ||
34 | |||
35 | |||
36 | static unsigned int eps_get(unsigned int cpu) | ||
37 | { | ||
38 | struct eps_cpu_data *centaur; | ||
39 | u32 lo, hi; | ||
40 | |||
41 | if (cpu) | ||
42 | return 0; | ||
43 | centaur = eps_cpu[cpu]; | ||
44 | if (centaur == NULL) | ||
45 | return 0; | ||
46 | |||
47 | /* Return current frequency */ | ||
48 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
49 | return centaur->fsb * ((lo >> 8) & 0xff); | ||
50 | } | ||
51 | |||
52 | static int eps_set_state(struct eps_cpu_data *centaur, | ||
53 | unsigned int cpu, | ||
54 | u32 dest_state) | ||
55 | { | ||
56 | struct cpufreq_freqs freqs; | ||
57 | u32 lo, hi; | ||
58 | int err = 0; | ||
59 | int i; | ||
60 | |||
61 | freqs.old = eps_get(cpu); | ||
62 | freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); | ||
63 | freqs.cpu = cpu; | ||
64 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
65 | |||
66 | /* Wait while CPU is busy */ | ||
67 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
68 | i = 0; | ||
69 | while (lo & ((1 << 16) | (1 << 17))) { | ||
70 | udelay(16); | ||
71 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
72 | i++; | ||
73 | if (unlikely(i > 64)) { | ||
74 | err = -ENODEV; | ||
75 | goto postchange; | ||
76 | } | ||
77 | } | ||
78 | /* Set new multiplier and voltage */ | ||
79 | wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); | ||
80 | /* Wait until transition end */ | ||
81 | i = 0; | ||
82 | do { | ||
83 | udelay(16); | ||
84 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
85 | i++; | ||
86 | if (unlikely(i > 64)) { | ||
87 | err = -ENODEV; | ||
88 | goto postchange; | ||
89 | } | ||
90 | } while (lo & ((1 << 16) | (1 << 17))); | ||
91 | |||
92 | /* Return current frequency */ | ||
93 | postchange: | ||
94 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
95 | freqs.new = centaur->fsb * ((lo >> 8) & 0xff); | ||
96 | |||
97 | #ifdef DEBUG | ||
98 | { | ||
99 | u8 current_multiplier, current_voltage; | ||
100 | |||
101 | /* Print voltage and multiplier */ | ||
102 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
103 | current_voltage = lo & 0xff; | ||
104 | printk(KERN_INFO "eps: Current voltage = %dmV\n", | ||
105 | current_voltage * 16 + 700); | ||
106 | current_multiplier = (lo >> 8) & 0xff; | ||
107 | printk(KERN_INFO "eps: Current multiplier = %d\n", | ||
108 | current_multiplier); | ||
109 | } | ||
110 | #endif | ||
111 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
112 | return err; | ||
113 | } | ||
114 | |||
115 | static int eps_target(struct cpufreq_policy *policy, | ||
116 | unsigned int target_freq, | ||
117 | unsigned int relation) | ||
118 | { | ||
119 | struct eps_cpu_data *centaur; | ||
120 | unsigned int newstate = 0; | ||
121 | unsigned int cpu = policy->cpu; | ||
122 | unsigned int dest_state; | ||
123 | int ret; | ||
124 | |||
125 | if (unlikely(eps_cpu[cpu] == NULL)) | ||
126 | return -ENODEV; | ||
127 | centaur = eps_cpu[cpu]; | ||
128 | |||
129 | if (unlikely(cpufreq_frequency_table_target(policy, | ||
130 | &eps_cpu[cpu]->freq_table[0], | ||
131 | target_freq, | ||
132 | relation, | ||
133 | &newstate))) { | ||
134 | return -EINVAL; | ||
135 | } | ||
136 | |||
137 | /* Make frequency transition */ | ||
138 | dest_state = centaur->freq_table[newstate].index & 0xffff; | ||
139 | ret = eps_set_state(centaur, cpu, dest_state); | ||
140 | if (ret) | ||
141 | printk(KERN_ERR "eps: Timeout!\n"); | ||
142 | return ret; | ||
143 | } | ||
144 | |||
145 | static int eps_verify(struct cpufreq_policy *policy) | ||
146 | { | ||
147 | return cpufreq_frequency_table_verify(policy, | ||
148 | &eps_cpu[policy->cpu]->freq_table[0]); | ||
149 | } | ||
150 | |||
151 | static int eps_cpu_init(struct cpufreq_policy *policy) | ||
152 | { | ||
153 | unsigned int i; | ||
154 | u32 lo, hi; | ||
155 | u64 val; | ||
156 | u8 current_multiplier, current_voltage; | ||
157 | u8 max_multiplier, max_voltage; | ||
158 | u8 min_multiplier, min_voltage; | ||
159 | u8 brand = 0; | ||
160 | u32 fsb; | ||
161 | struct eps_cpu_data *centaur; | ||
162 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
163 | struct cpufreq_frequency_table *f_table; | ||
164 | int k, step, voltage; | ||
165 | int ret; | ||
166 | int states; | ||
167 | |||
168 | if (policy->cpu != 0) | ||
169 | return -ENODEV; | ||
170 | |||
171 | /* Check brand */ | ||
172 | printk(KERN_INFO "eps: Detected VIA "); | ||
173 | |||
174 | switch (c->x86_model) { | ||
175 | case 10: | ||
176 | rdmsr(0x1153, lo, hi); | ||
177 | brand = (((lo >> 2) ^ lo) >> 18) & 3; | ||
178 | printk(KERN_CONT "Model A "); | ||
179 | break; | ||
180 | case 13: | ||
181 | rdmsr(0x1154, lo, hi); | ||
182 | brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff; | ||
183 | printk(KERN_CONT "Model D "); | ||
184 | break; | ||
185 | } | ||
186 | |||
187 | switch (brand) { | ||
188 | case EPS_BRAND_C7M: | ||
189 | printk(KERN_CONT "C7-M\n"); | ||
190 | break; | ||
191 | case EPS_BRAND_C7: | ||
192 | printk(KERN_CONT "C7\n"); | ||
193 | break; | ||
194 | case EPS_BRAND_EDEN: | ||
195 | printk(KERN_CONT "Eden\n"); | ||
196 | break; | ||
197 | case EPS_BRAND_C7D: | ||
198 | printk(KERN_CONT "C7-D\n"); | ||
199 | break; | ||
200 | case EPS_BRAND_C3: | ||
201 | printk(KERN_CONT "C3\n"); | ||
202 | return -ENODEV; | ||
203 | break; | ||
204 | } | ||
205 | /* Enable Enhanced PowerSaver */ | ||
206 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | ||
207 | if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | ||
208 | val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; | ||
209 | wrmsrl(MSR_IA32_MISC_ENABLE, val); | ||
210 | /* Can be locked at 0 */ | ||
211 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | ||
212 | if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | ||
213 | printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); | ||
214 | return -ENODEV; | ||
215 | } | ||
216 | } | ||
217 | |||
218 | /* Print voltage and multiplier */ | ||
219 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
220 | current_voltage = lo & 0xff; | ||
221 | printk(KERN_INFO "eps: Current voltage = %dmV\n", | ||
222 | current_voltage * 16 + 700); | ||
223 | current_multiplier = (lo >> 8) & 0xff; | ||
224 | printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier); | ||
225 | |||
226 | /* Print limits */ | ||
227 | max_voltage = hi & 0xff; | ||
228 | printk(KERN_INFO "eps: Highest voltage = %dmV\n", | ||
229 | max_voltage * 16 + 700); | ||
230 | max_multiplier = (hi >> 8) & 0xff; | ||
231 | printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier); | ||
232 | min_voltage = (hi >> 16) & 0xff; | ||
233 | printk(KERN_INFO "eps: Lowest voltage = %dmV\n", | ||
234 | min_voltage * 16 + 700); | ||
235 | min_multiplier = (hi >> 24) & 0xff; | ||
236 | printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier); | ||
237 | |||
238 | /* Sanity checks */ | ||
239 | if (current_multiplier == 0 || max_multiplier == 0 | ||
240 | || min_multiplier == 0) | ||
241 | return -EINVAL; | ||
242 | if (current_multiplier > max_multiplier | ||
243 | || max_multiplier <= min_multiplier) | ||
244 | return -EINVAL; | ||
245 | if (current_voltage > 0x1f || max_voltage > 0x1f) | ||
246 | return -EINVAL; | ||
247 | if (max_voltage < min_voltage) | ||
248 | return -EINVAL; | ||
249 | |||
250 | /* Calc FSB speed */ | ||
251 | fsb = cpu_khz / current_multiplier; | ||
252 | /* Calc number of p-states supported */ | ||
253 | if (brand == EPS_BRAND_C7M) | ||
254 | states = max_multiplier - min_multiplier + 1; | ||
255 | else | ||
256 | states = 2; | ||
257 | |||
258 | /* Allocate private data and frequency table for current cpu */ | ||
259 | centaur = kzalloc(sizeof(struct eps_cpu_data) | ||
260 | + (states + 1) * sizeof(struct cpufreq_frequency_table), | ||
261 | GFP_KERNEL); | ||
262 | if (!centaur) | ||
263 | return -ENOMEM; | ||
264 | eps_cpu[0] = centaur; | ||
265 | |||
266 | /* Copy basic values */ | ||
267 | centaur->fsb = fsb; | ||
268 | |||
269 | /* Fill frequency and MSR value table */ | ||
270 | f_table = ¢aur->freq_table[0]; | ||
271 | if (brand != EPS_BRAND_C7M) { | ||
272 | f_table[0].frequency = fsb * min_multiplier; | ||
273 | f_table[0].index = (min_multiplier << 8) | min_voltage; | ||
274 | f_table[1].frequency = fsb * max_multiplier; | ||
275 | f_table[1].index = (max_multiplier << 8) | max_voltage; | ||
276 | f_table[2].frequency = CPUFREQ_TABLE_END; | ||
277 | } else { | ||
278 | k = 0; | ||
279 | step = ((max_voltage - min_voltage) * 256) | ||
280 | / (max_multiplier - min_multiplier); | ||
281 | for (i = min_multiplier; i <= max_multiplier; i++) { | ||
282 | voltage = (k * step) / 256 + min_voltage; | ||
283 | f_table[k].frequency = fsb * i; | ||
284 | f_table[k].index = (i << 8) | voltage; | ||
285 | k++; | ||
286 | } | ||
287 | f_table[k].frequency = CPUFREQ_TABLE_END; | ||
288 | } | ||
289 | |||
290 | policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ | ||
291 | policy->cur = fsb * current_multiplier; | ||
292 | |||
293 | ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); | ||
294 | if (ret) { | ||
295 | kfree(centaur); | ||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | static int eps_cpu_exit(struct cpufreq_policy *policy) | ||
304 | { | ||
305 | unsigned int cpu = policy->cpu; | ||
306 | struct eps_cpu_data *centaur; | ||
307 | u32 lo, hi; | ||
308 | |||
309 | if (eps_cpu[cpu] == NULL) | ||
310 | return -ENODEV; | ||
311 | centaur = eps_cpu[cpu]; | ||
312 | |||
313 | /* Get max frequency */ | ||
314 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
315 | /* Set max frequency */ | ||
316 | eps_set_state(centaur, cpu, hi & 0xffff); | ||
317 | /* Bye */ | ||
318 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
319 | kfree(eps_cpu[cpu]); | ||
320 | eps_cpu[cpu] = NULL; | ||
321 | return 0; | ||
322 | } | ||
323 | |||
324 | static struct freq_attr *eps_attr[] = { | ||
325 | &cpufreq_freq_attr_scaling_available_freqs, | ||
326 | NULL, | ||
327 | }; | ||
328 | |||
329 | static struct cpufreq_driver eps_driver = { | ||
330 | .verify = eps_verify, | ||
331 | .target = eps_target, | ||
332 | .init = eps_cpu_init, | ||
333 | .exit = eps_cpu_exit, | ||
334 | .get = eps_get, | ||
335 | .name = "e_powersaver", | ||
336 | .owner = THIS_MODULE, | ||
337 | .attr = eps_attr, | ||
338 | }; | ||
339 | |||
340 | static int __init eps_init(void) | ||
341 | { | ||
342 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
343 | |||
344 | /* This driver will work only on Centaur C7 processors with | ||
345 | * Enhanced SpeedStep/PowerSaver registers */ | ||
346 | if (c->x86_vendor != X86_VENDOR_CENTAUR | ||
347 | || c->x86 != 6 || c->x86_model < 10) | ||
348 | return -ENODEV; | ||
349 | if (!cpu_has(c, X86_FEATURE_EST)) | ||
350 | return -ENODEV; | ||
351 | |||
352 | if (cpufreq_register_driver(&eps_driver)) | ||
353 | return -EINVAL; | ||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | static void __exit eps_exit(void) | ||
358 | { | ||
359 | cpufreq_unregister_driver(&eps_driver); | ||
360 | } | ||
361 | |||
362 | MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>"); | ||
363 | MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); | ||
364 | MODULE_LICENSE("GPL"); | ||
365 | |||
366 | module_init(eps_init); | ||
367 | module_exit(eps_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c deleted file mode 100644 index c587db472a75..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ /dev/null | |||
@@ -1,309 +0,0 @@ | |||
1 | /* | ||
2 | * elanfreq: cpufreq driver for the AMD ELAN family | ||
3 | * | ||
4 | * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de> | ||
5 | * | ||
6 | * Parts of this code are (c) Sven Geggus <sven@geggus.net> | ||
7 | * | ||
8 | * All Rights Reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/init.h> | ||
22 | |||
23 | #include <linux/delay.h> | ||
24 | #include <linux/cpufreq.h> | ||
25 | |||
26 | #include <asm/msr.h> | ||
27 | #include <linux/timex.h> | ||
28 | #include <linux/io.h> | ||
29 | |||
30 | #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ | ||
31 | #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ | ||
32 | |||
33 | /* Module parameter */ | ||
34 | static int max_freq; | ||
35 | |||
36 | struct s_elan_multiplier { | ||
37 | int clock; /* frequency in kHz */ | ||
38 | int val40h; /* PMU Force Mode register */ | ||
39 | int val80h; /* CPU Clock Speed Register */ | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * It is important that the frequencies | ||
44 | * are listed in ascending order here! | ||
45 | */ | ||
46 | static struct s_elan_multiplier elan_multiplier[] = { | ||
47 | {1000, 0x02, 0x18}, | ||
48 | {2000, 0x02, 0x10}, | ||
49 | {4000, 0x02, 0x08}, | ||
50 | {8000, 0x00, 0x00}, | ||
51 | {16000, 0x00, 0x02}, | ||
52 | {33000, 0x00, 0x04}, | ||
53 | {66000, 0x01, 0x04}, | ||
54 | {99000, 0x01, 0x05} | ||
55 | }; | ||
56 | |||
57 | static struct cpufreq_frequency_table elanfreq_table[] = { | ||
58 | {0, 1000}, | ||
59 | {1, 2000}, | ||
60 | {2, 4000}, | ||
61 | {3, 8000}, | ||
62 | {4, 16000}, | ||
63 | {5, 33000}, | ||
64 | {6, 66000}, | ||
65 | {7, 99000}, | ||
66 | {0, CPUFREQ_TABLE_END}, | ||
67 | }; | ||
68 | |||
69 | |||
70 | /** | ||
71 | * elanfreq_get_cpu_frequency: determine current cpu speed | ||
72 | * | ||
73 | * Finds out at which frequency the CPU of the Elan SOC runs | ||
74 | * at the moment. Frequencies from 1 to 33 MHz are generated | ||
75 | * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" | ||
76 | * and have the rest of the chip running with 33 MHz. | ||
77 | */ | ||
78 | |||
79 | static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) | ||
80 | { | ||
81 | u8 clockspeed_reg; /* Clock Speed Register */ | ||
82 | |||
83 | local_irq_disable(); | ||
84 | outb_p(0x80, REG_CSCIR); | ||
85 | clockspeed_reg = inb_p(REG_CSCDR); | ||
86 | local_irq_enable(); | ||
87 | |||
88 | if ((clockspeed_reg & 0xE0) == 0xE0) | ||
89 | return 0; | ||
90 | |||
91 | /* Are we in CPU clock multiplied mode (66/99 MHz)? */ | ||
92 | if ((clockspeed_reg & 0xE0) == 0xC0) { | ||
93 | if ((clockspeed_reg & 0x01) == 0) | ||
94 | return 66000; | ||
95 | else | ||
96 | return 99000; | ||
97 | } | ||
98 | |||
99 | /* 33 MHz is not 32 MHz... */ | ||
100 | if ((clockspeed_reg & 0xE0) == 0xA0) | ||
101 | return 33000; | ||
102 | |||
103 | return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000; | ||
104 | } | ||
105 | |||
106 | |||
107 | /** | ||
108 | * elanfreq_set_cpu_frequency: Change the CPU core frequency | ||
109 | * @cpu: cpu number | ||
110 | * @freq: frequency in kHz | ||
111 | * | ||
112 | * This function takes a frequency value and changes the CPU frequency | ||
113 | * according to this. Note that the frequency has to be checked by | ||
114 | * elanfreq_validatespeed() for correctness! | ||
115 | * | ||
116 | * There is no return value. | ||
117 | */ | ||
118 | |||
119 | static void elanfreq_set_cpu_state(unsigned int state) | ||
120 | { | ||
121 | struct cpufreq_freqs freqs; | ||
122 | |||
123 | freqs.old = elanfreq_get_cpu_frequency(0); | ||
124 | freqs.new = elan_multiplier[state].clock; | ||
125 | freqs.cpu = 0; /* elanfreq.c is UP only driver */ | ||
126 | |||
127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
128 | |||
129 | printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", | ||
130 | elan_multiplier[state].clock); | ||
131 | |||
132 | |||
133 | /* | ||
134 | * Access to the Elan's internal registers is indexed via | ||
135 | * 0x22: Chip Setup & Control Register Index Register (CSCI) | ||
136 | * 0x23: Chip Setup & Control Register Data Register (CSCD) | ||
137 | * | ||
138 | */ | ||
139 | |||
140 | /* | ||
141 | * 0x40 is the Power Management Unit's Force Mode Register. | ||
142 | * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) | ||
143 | */ | ||
144 | |||
145 | local_irq_disable(); | ||
146 | outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */ | ||
147 | outb_p(0x00, REG_CSCDR); | ||
148 | local_irq_enable(); /* wait till internal pipelines and */ | ||
149 | udelay(1000); /* buffers have cleaned up */ | ||
150 | |||
151 | local_irq_disable(); | ||
152 | |||
153 | /* now, set the CPU clock speed register (0x80) */ | ||
154 | outb_p(0x80, REG_CSCIR); | ||
155 | outb_p(elan_multiplier[state].val80h, REG_CSCDR); | ||
156 | |||
157 | /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ | ||
158 | outb_p(0x40, REG_CSCIR); | ||
159 | outb_p(elan_multiplier[state].val40h, REG_CSCDR); | ||
160 | udelay(10000); | ||
161 | local_irq_enable(); | ||
162 | |||
163 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
164 | }; | ||
165 | |||
166 | |||
167 | /** | ||
168 | * elanfreq_validatespeed: test if frequency range is valid | ||
169 | * @policy: the policy to validate | ||
170 | * | ||
171 | * This function checks if a given frequency range in kHz is valid | ||
172 | * for the hardware supported by the driver. | ||
173 | */ | ||
174 | |||
175 | static int elanfreq_verify(struct cpufreq_policy *policy) | ||
176 | { | ||
177 | return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); | ||
178 | } | ||
179 | |||
180 | static int elanfreq_target(struct cpufreq_policy *policy, | ||
181 | unsigned int target_freq, | ||
182 | unsigned int relation) | ||
183 | { | ||
184 | unsigned int newstate = 0; | ||
185 | |||
186 | if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], | ||
187 | target_freq, relation, &newstate)) | ||
188 | return -EINVAL; | ||
189 | |||
190 | elanfreq_set_cpu_state(newstate); | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | |||
196 | /* | ||
197 | * Module init and exit code | ||
198 | */ | ||
199 | |||
200 | static int elanfreq_cpu_init(struct cpufreq_policy *policy) | ||
201 | { | ||
202 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
203 | unsigned int i; | ||
204 | int result; | ||
205 | |||
206 | /* capability check */ | ||
207 | if ((c->x86_vendor != X86_VENDOR_AMD) || | ||
208 | (c->x86 != 4) || (c->x86_model != 10)) | ||
209 | return -ENODEV; | ||
210 | |||
211 | /* max freq */ | ||
212 | if (!max_freq) | ||
213 | max_freq = elanfreq_get_cpu_frequency(0); | ||
214 | |||
215 | /* table init */ | ||
216 | for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { | ||
217 | if (elanfreq_table[i].frequency > max_freq) | ||
218 | elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
219 | } | ||
220 | |||
221 | /* cpuinfo and default policy values */ | ||
222 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
223 | policy->cur = elanfreq_get_cpu_frequency(0); | ||
224 | |||
225 | result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); | ||
226 | if (result) | ||
227 | return result; | ||
228 | |||
229 | cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | |||
234 | static int elanfreq_cpu_exit(struct cpufreq_policy *policy) | ||
235 | { | ||
236 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | |||
241 | #ifndef MODULE | ||
242 | /** | ||
243 | * elanfreq_setup - elanfreq command line parameter parsing | ||
244 | * | ||
245 | * elanfreq command line parameter. Use: | ||
246 | * elanfreq=66000 | ||
247 | * to set the maximum CPU frequency to 66 MHz. Note that in | ||
248 | * case you do not give this boot parameter, the maximum | ||
249 | * frequency will fall back to _current_ CPU frequency which | ||
250 | * might be lower. If you build this as a module, use the | ||
251 | * max_freq module parameter instead. | ||
252 | */ | ||
253 | static int __init elanfreq_setup(char *str) | ||
254 | { | ||
255 | max_freq = simple_strtoul(str, &str, 0); | ||
256 | printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); | ||
257 | return 1; | ||
258 | } | ||
259 | __setup("elanfreq=", elanfreq_setup); | ||
260 | #endif | ||
261 | |||
262 | |||
263 | static struct freq_attr *elanfreq_attr[] = { | ||
264 | &cpufreq_freq_attr_scaling_available_freqs, | ||
265 | NULL, | ||
266 | }; | ||
267 | |||
268 | |||
269 | static struct cpufreq_driver elanfreq_driver = { | ||
270 | .get = elanfreq_get_cpu_frequency, | ||
271 | .verify = elanfreq_verify, | ||
272 | .target = elanfreq_target, | ||
273 | .init = elanfreq_cpu_init, | ||
274 | .exit = elanfreq_cpu_exit, | ||
275 | .name = "elanfreq", | ||
276 | .owner = THIS_MODULE, | ||
277 | .attr = elanfreq_attr, | ||
278 | }; | ||
279 | |||
280 | |||
281 | static int __init elanfreq_init(void) | ||
282 | { | ||
283 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
284 | |||
285 | /* Test if we have the right hardware */ | ||
286 | if ((c->x86_vendor != X86_VENDOR_AMD) || | ||
287 | (c->x86 != 4) || (c->x86_model != 10)) { | ||
288 | printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); | ||
289 | return -ENODEV; | ||
290 | } | ||
291 | return cpufreq_register_driver(&elanfreq_driver); | ||
292 | } | ||
293 | |||
294 | |||
295 | static void __exit elanfreq_exit(void) | ||
296 | { | ||
297 | cpufreq_unregister_driver(&elanfreq_driver); | ||
298 | } | ||
299 | |||
300 | |||
301 | module_param(max_freq, int, 0444); | ||
302 | |||
303 | MODULE_LICENSE("GPL"); | ||
304 | MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, " | ||
305 | "Sven Geggus <sven@geggus.net>"); | ||
306 | MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); | ||
307 | |||
308 | module_init(elanfreq_init); | ||
309 | module_exit(elanfreq_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c deleted file mode 100644 index 32974cf84232..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ /dev/null | |||
@@ -1,517 +0,0 @@ | |||
1 | /* | ||
2 | * Cyrix MediaGX and NatSemi Geode Suspend Modulation | ||
3 | * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> | ||
4 | * (C) 2002 Hiroshi Miura <miura@da-cha.org> | ||
5 | * All Rights Reserved | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation | ||
10 | * | ||
11 | * The author(s) of this software shall not be held liable for damages | ||
12 | * of any nature resulting due to the use of this software. This | ||
13 | * software is provided AS-IS with no warranties. | ||
14 | * | ||
15 | * Theoretical note: | ||
16 | * | ||
17 | * (see Geode(tm) CS5530 manual (rev.4.1) page.56) | ||
18 | * | ||
19 | * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 | ||
20 | * are based on Suspend Modulation. | ||
21 | * | ||
22 | * Suspend Modulation works by asserting and de-asserting the SUSP# pin | ||
23 | * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# | ||
24 | * the CPU enters an idle state. GX1 stops its core clock when SUSP# is | ||
25 | * asserted then power consumption is reduced. | ||
26 | * | ||
27 | * Suspend Modulation's OFF/ON duration are configurable | ||
28 | * with 'Suspend Modulation OFF Count Register' | ||
29 | * and 'Suspend Modulation ON Count Register'. | ||
30 | * These registers are 8bit counters that represent the number of | ||
31 | * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) | ||
32 | * to the processor. | ||
33 | * | ||
34 | * These counters define a ratio which is the effective frequency | ||
35 | * of operation of the system. | ||
36 | * | ||
37 | * OFF Count | ||
38 | * F_eff = Fgx * ---------------------- | ||
39 | * OFF Count + ON Count | ||
40 | * | ||
41 | * 0 <= On Count, Off Count <= 255 | ||
42 | * | ||
43 | * From these limits, we can get register values | ||
44 | * | ||
45 | * off_duration + on_duration <= MAX_DURATION | ||
46 | * on_duration = off_duration * (stock_freq - freq) / freq | ||
47 | * | ||
48 | * off_duration = (freq * DURATION) / stock_freq | ||
49 | * on_duration = DURATION - off_duration | ||
50 | * | ||
51 | * | ||
52 | *--------------------------------------------------------------------------- | ||
53 | * | ||
54 | * ChangeLog: | ||
55 | * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org> | ||
56 | * - fix on/off register mistake | ||
57 | * - fix cpu_khz calc when it stops cpu modulation. | ||
58 | * | ||
59 | * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org> | ||
60 | * - rewrite for Cyrix MediaGX Cx5510/5520 and | ||
61 | * NatSemi Geode Cs5530(A). | ||
62 | * | ||
63 | * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com> | ||
64 | * - cs5530_mod patch for 2.4.19-rc1. | ||
65 | * | ||
66 | *--------------------------------------------------------------------------- | ||
67 | * | ||
68 | * Todo | ||
69 | * Test on machines with 5510, 5530, 5530A | ||
70 | */ | ||
71 | |||
72 | /************************************************************************ | ||
73 | * Suspend Modulation - Definitions * | ||
74 | ************************************************************************/ | ||
75 | |||
76 | #include <linux/kernel.h> | ||
77 | #include <linux/module.h> | ||
78 | #include <linux/init.h> | ||
79 | #include <linux/smp.h> | ||
80 | #include <linux/cpufreq.h> | ||
81 | #include <linux/pci.h> | ||
82 | #include <linux/errno.h> | ||
83 | #include <linux/slab.h> | ||
84 | |||
85 | #include <asm/processor-cyrix.h> | ||
86 | |||
87 | /* PCI config registers, all at F0 */ | ||
88 | #define PCI_PMER1 0x80 /* power management enable register 1 */ | ||
89 | #define PCI_PMER2 0x81 /* power management enable register 2 */ | ||
90 | #define PCI_PMER3 0x82 /* power management enable register 3 */ | ||
91 | #define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ | ||
92 | #define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ | ||
93 | #define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ | ||
94 | #define PCI_MODON 0x95 /* suspend modulation ON counter register */ | ||
95 | #define PCI_SUSCFG 0x96 /* suspend configuration register */ | ||
96 | |||
97 | /* PMER1 bits */ | ||
98 | #define GPM (1<<0) /* global power management */ | ||
99 | #define GIT (1<<1) /* globally enable PM device idle timers */ | ||
100 | #define GTR (1<<2) /* globally enable IO traps */ | ||
101 | #define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ | ||
102 | #define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ | ||
103 | |||
104 | /* SUSCFG bits */ | ||
105 | #define SUSMOD (1<<0) /* enable/disable suspend modulation */ | ||
106 | /* the below is supported only with cs5530 (after rev.1.2)/cs5530A */ | ||
107 | #define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ | ||
108 | /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ | ||
109 | #define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ | ||
110 | /* the below is supported only with cs5530A */ | ||
111 | #define PWRSVE_ISA (1<<3) /* stop ISA clock */ | ||
112 | #define PWRSVE (1<<4) /* active idle */ | ||
113 | |||
114 | struct gxfreq_params { | ||
115 | u8 on_duration; | ||
116 | u8 off_duration; | ||
117 | u8 pci_suscfg; | ||
118 | u8 pci_pmer1; | ||
119 | u8 pci_pmer2; | ||
120 | struct pci_dev *cs55x0; | ||
121 | }; | ||
122 | |||
123 | static struct gxfreq_params *gx_params; | ||
124 | static int stock_freq; | ||
125 | |||
126 | /* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ | ||
127 | static int pci_busclk; | ||
128 | module_param(pci_busclk, int, 0444); | ||
129 | |||
130 | /* maximum duration for which the cpu may be suspended | ||
131 | * (32us * MAX_DURATION). If no parameter is given, this defaults | ||
132 | * to 255. | ||
133 | * Note that this leads to a maximum of 8 ms(!) where the CPU clock | ||
134 | * is suspended -- processing power is just 0.39% of what it used to be, | ||
135 | * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ | ||
136 | static int max_duration = 255; | ||
137 | module_param(max_duration, int, 0444); | ||
138 | |||
139 | /* For the default policy, we want at least some processing power | ||
140 | * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) | ||
141 | */ | ||
142 | #define POLICY_MIN_DIV 20 | ||
143 | |||
144 | |||
145 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
146 | "gx-suspmod", msg) | ||
147 | |||
148 | /** | ||
149 | * we can detect a core multipiler from dir0_lsb | ||
150 | * from GX1 datasheet p.56, | ||
151 | * MULT[3:0]: | ||
152 | * 0000 = SYSCLK multiplied by 4 (test only) | ||
153 | * 0001 = SYSCLK multiplied by 10 | ||
154 | * 0010 = SYSCLK multiplied by 4 | ||
155 | * 0011 = SYSCLK multiplied by 6 | ||
156 | * 0100 = SYSCLK multiplied by 9 | ||
157 | * 0101 = SYSCLK multiplied by 5 | ||
158 | * 0110 = SYSCLK multiplied by 7 | ||
159 | * 0111 = SYSCLK multiplied by 8 | ||
160 | * of 33.3MHz | ||
161 | **/ | ||
162 | static int gx_freq_mult[16] = { | ||
163 | 4, 10, 4, 6, 9, 5, 7, 8, | ||
164 | 0, 0, 0, 0, 0, 0, 0, 0 | ||
165 | }; | ||
166 | |||
167 | |||
168 | /**************************************************************** | ||
169 | * Low Level chipset interface * | ||
170 | ****************************************************************/ | ||
171 | static struct pci_device_id gx_chipset_tbl[] __initdata = { | ||
172 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), }, | ||
173 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, | ||
174 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), }, | ||
175 | { 0, }, | ||
176 | }; | ||
177 | |||
178 | static void gx_write_byte(int reg, int value) | ||
179 | { | ||
180 | pci_write_config_byte(gx_params->cs55x0, reg, value); | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * gx_detect_chipset: | ||
185 | * | ||
186 | **/ | ||
187 | static __init struct pci_dev *gx_detect_chipset(void) | ||
188 | { | ||
189 | struct pci_dev *gx_pci = NULL; | ||
190 | |||
191 | /* check if CPU is a MediaGX or a Geode. */ | ||
192 | if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) && | ||
193 | (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { | ||
194 | dprintk("error: no MediaGX/Geode processor found!\n"); | ||
195 | return NULL; | ||
196 | } | ||
197 | |||
198 | /* detect which companion chip is used */ | ||
199 | for_each_pci_dev(gx_pci) { | ||
200 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) | ||
201 | return gx_pci; | ||
202 | } | ||
203 | |||
204 | dprintk("error: no supported chipset found!\n"); | ||
205 | return NULL; | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * gx_get_cpuspeed: | ||
210 | * | ||
211 | * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi | ||
212 | * Geode CPU runs. | ||
213 | */ | ||
214 | static unsigned int gx_get_cpuspeed(unsigned int cpu) | ||
215 | { | ||
216 | if ((gx_params->pci_suscfg & SUSMOD) == 0) | ||
217 | return stock_freq; | ||
218 | |||
219 | return (stock_freq * gx_params->off_duration) | ||
220 | / (gx_params->on_duration + gx_params->off_duration); | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * gx_validate_speed: | ||
225 | * determine current cpu speed | ||
226 | * | ||
227 | **/ | ||
228 | |||
229 | static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, | ||
230 | u8 *off_duration) | ||
231 | { | ||
232 | unsigned int i; | ||
233 | u8 tmp_on, tmp_off; | ||
234 | int old_tmp_freq = stock_freq; | ||
235 | int tmp_freq; | ||
236 | |||
237 | *off_duration = 1; | ||
238 | *on_duration = 0; | ||
239 | |||
240 | for (i = max_duration; i > 0; i--) { | ||
241 | tmp_off = ((khz * i) / stock_freq) & 0xff; | ||
242 | tmp_on = i - tmp_off; | ||
243 | tmp_freq = (stock_freq * tmp_off) / i; | ||
244 | /* if this relation is closer to khz, use this. If it's equal, | ||
245 | * prefer it, too - lower latency */ | ||
246 | if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { | ||
247 | *on_duration = tmp_on; | ||
248 | *off_duration = tmp_off; | ||
249 | old_tmp_freq = tmp_freq; | ||
250 | } | ||
251 | } | ||
252 | |||
253 | return old_tmp_freq; | ||
254 | } | ||
255 | |||
256 | |||
257 | /** | ||
258 | * gx_set_cpuspeed: | ||
259 | * set cpu speed in khz. | ||
260 | **/ | ||
261 | |||
262 | static void gx_set_cpuspeed(unsigned int khz) | ||
263 | { | ||
264 | u8 suscfg, pmer1; | ||
265 | unsigned int new_khz; | ||
266 | unsigned long flags; | ||
267 | struct cpufreq_freqs freqs; | ||
268 | |||
269 | freqs.cpu = 0; | ||
270 | freqs.old = gx_get_cpuspeed(0); | ||
271 | |||
272 | new_khz = gx_validate_speed(khz, &gx_params->on_duration, | ||
273 | &gx_params->off_duration); | ||
274 | |||
275 | freqs.new = new_khz; | ||
276 | |||
277 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
278 | local_irq_save(flags); | ||
279 | |||
280 | |||
281 | |||
282 | if (new_khz != stock_freq) { | ||
283 | /* if new khz == 100% of CPU speed, it is special case */ | ||
284 | switch (gx_params->cs55x0->device) { | ||
285 | case PCI_DEVICE_ID_CYRIX_5530_LEGACY: | ||
286 | pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; | ||
287 | /* FIXME: need to test other values -- Zwane,Miura */ | ||
288 | /* typical 2 to 4ms */ | ||
289 | gx_write_byte(PCI_IRQTC, 4); | ||
290 | /* typical 50 to 100ms */ | ||
291 | gx_write_byte(PCI_VIDTC, 100); | ||
292 | gx_write_byte(PCI_PMER1, pmer1); | ||
293 | |||
294 | if (gx_params->cs55x0->revision < 0x10) { | ||
295 | /* CS5530(rev 1.2, 1.3) */ | ||
296 | suscfg = gx_params->pci_suscfg|SUSMOD; | ||
297 | } else { | ||
298 | /* CS5530A,B.. */ | ||
299 | suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE; | ||
300 | } | ||
301 | break; | ||
302 | case PCI_DEVICE_ID_CYRIX_5520: | ||
303 | case PCI_DEVICE_ID_CYRIX_5510: | ||
304 | suscfg = gx_params->pci_suscfg | SUSMOD; | ||
305 | break; | ||
306 | default: | ||
307 | local_irq_restore(flags); | ||
308 | dprintk("fatal: try to set unknown chipset.\n"); | ||
309 | return; | ||
310 | } | ||
311 | } else { | ||
312 | suscfg = gx_params->pci_suscfg & ~(SUSMOD); | ||
313 | gx_params->off_duration = 0; | ||
314 | gx_params->on_duration = 0; | ||
315 | dprintk("suspend modulation disabled: cpu runs 100%% speed.\n"); | ||
316 | } | ||
317 | |||
318 | gx_write_byte(PCI_MODOFF, gx_params->off_duration); | ||
319 | gx_write_byte(PCI_MODON, gx_params->on_duration); | ||
320 | |||
321 | gx_write_byte(PCI_SUSCFG, suscfg); | ||
322 | pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); | ||
323 | |||
324 | local_irq_restore(flags); | ||
325 | |||
326 | gx_params->pci_suscfg = suscfg; | ||
327 | |||
328 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
329 | |||
330 | dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", | ||
331 | gx_params->on_duration * 32, gx_params->off_duration * 32); | ||
332 | dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); | ||
333 | } | ||
334 | |||
335 | /**************************************************************** | ||
336 | * High level functions * | ||
337 | ****************************************************************/ | ||
338 | |||
339 | /* | ||
340 | * cpufreq_gx_verify: test if frequency range is valid | ||
341 | * | ||
342 | * This function checks if a given frequency range in kHz is valid | ||
343 | * for the hardware supported by the driver. | ||
344 | */ | ||
345 | |||
346 | static int cpufreq_gx_verify(struct cpufreq_policy *policy) | ||
347 | { | ||
348 | unsigned int tmp_freq = 0; | ||
349 | u8 tmp1, tmp2; | ||
350 | |||
351 | if (!stock_freq || !policy) | ||
352 | return -EINVAL; | ||
353 | |||
354 | policy->cpu = 0; | ||
355 | cpufreq_verify_within_limits(policy, (stock_freq / max_duration), | ||
356 | stock_freq); | ||
357 | |||
358 | /* it needs to be assured that at least one supported frequency is | ||
359 | * within policy->min and policy->max. If it is not, policy->max | ||
360 | * needs to be increased until one freuqency is supported. | ||
361 | * policy->min may not be decreased, though. This way we guarantee a | ||
362 | * specific processing capacity. | ||
363 | */ | ||
364 | tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); | ||
365 | if (tmp_freq < policy->min) | ||
366 | tmp_freq += stock_freq / max_duration; | ||
367 | policy->min = tmp_freq; | ||
368 | if (policy->min > policy->max) | ||
369 | policy->max = tmp_freq; | ||
370 | tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); | ||
371 | if (tmp_freq > policy->max) | ||
372 | tmp_freq -= stock_freq / max_duration; | ||
373 | policy->max = tmp_freq; | ||
374 | if (policy->max < policy->min) | ||
375 | policy->max = policy->min; | ||
376 | cpufreq_verify_within_limits(policy, (stock_freq / max_duration), | ||
377 | stock_freq); | ||
378 | |||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * cpufreq_gx_target: | ||
384 | * | ||
385 | */ | ||
386 | static int cpufreq_gx_target(struct cpufreq_policy *policy, | ||
387 | unsigned int target_freq, | ||
388 | unsigned int relation) | ||
389 | { | ||
390 | u8 tmp1, tmp2; | ||
391 | unsigned int tmp_freq; | ||
392 | |||
393 | if (!stock_freq || !policy) | ||
394 | return -EINVAL; | ||
395 | |||
396 | policy->cpu = 0; | ||
397 | |||
398 | tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2); | ||
399 | while (tmp_freq < policy->min) { | ||
400 | tmp_freq += stock_freq / max_duration; | ||
401 | tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); | ||
402 | } | ||
403 | while (tmp_freq > policy->max) { | ||
404 | tmp_freq -= stock_freq / max_duration; | ||
405 | tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); | ||
406 | } | ||
407 | |||
408 | gx_set_cpuspeed(tmp_freq); | ||
409 | |||
410 | return 0; | ||
411 | } | ||
412 | |||
413 | static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) | ||
414 | { | ||
415 | unsigned int maxfreq, curfreq; | ||
416 | |||
417 | if (!policy || policy->cpu != 0) | ||
418 | return -ENODEV; | ||
419 | |||
420 | /* determine maximum frequency */ | ||
421 | if (pci_busclk) | ||
422 | maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; | ||
423 | else if (cpu_khz) | ||
424 | maxfreq = cpu_khz; | ||
425 | else | ||
426 | maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; | ||
427 | |||
428 | stock_freq = maxfreq; | ||
429 | curfreq = gx_get_cpuspeed(0); | ||
430 | |||
431 | dprintk("cpu max frequency is %d.\n", maxfreq); | ||
432 | dprintk("cpu current frequency is %dkHz.\n", curfreq); | ||
433 | |||
434 | /* setup basic struct for cpufreq API */ | ||
435 | policy->cpu = 0; | ||
436 | |||
437 | if (max_duration < POLICY_MIN_DIV) | ||
438 | policy->min = maxfreq / max_duration; | ||
439 | else | ||
440 | policy->min = maxfreq / POLICY_MIN_DIV; | ||
441 | policy->max = maxfreq; | ||
442 | policy->cur = curfreq; | ||
443 | policy->cpuinfo.min_freq = maxfreq / max_duration; | ||
444 | policy->cpuinfo.max_freq = maxfreq; | ||
445 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
446 | |||
447 | return 0; | ||
448 | } | ||
449 | |||
450 | /* | ||
451 | * cpufreq_gx_init: | ||
452 | * MediaGX/Geode GX initialize cpufreq driver | ||
453 | */ | ||
454 | static struct cpufreq_driver gx_suspmod_driver = { | ||
455 | .get = gx_get_cpuspeed, | ||
456 | .verify = cpufreq_gx_verify, | ||
457 | .target = cpufreq_gx_target, | ||
458 | .init = cpufreq_gx_cpu_init, | ||
459 | .name = "gx-suspmod", | ||
460 | .owner = THIS_MODULE, | ||
461 | }; | ||
462 | |||
463 | static int __init cpufreq_gx_init(void) | ||
464 | { | ||
465 | int ret; | ||
466 | struct gxfreq_params *params; | ||
467 | struct pci_dev *gx_pci; | ||
468 | |||
469 | /* Test if we have the right hardware */ | ||
470 | gx_pci = gx_detect_chipset(); | ||
471 | if (gx_pci == NULL) | ||
472 | return -ENODEV; | ||
473 | |||
474 | /* check whether module parameters are sane */ | ||
475 | if (max_duration > 0xff) | ||
476 | max_duration = 0xff; | ||
477 | |||
478 | dprintk("geode suspend modulation available.\n"); | ||
479 | |||
480 | params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); | ||
481 | if (params == NULL) | ||
482 | return -ENOMEM; | ||
483 | |||
484 | params->cs55x0 = gx_pci; | ||
485 | gx_params = params; | ||
486 | |||
487 | /* keep cs55x0 configurations */ | ||
488 | pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg)); | ||
489 | pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); | ||
490 | pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); | ||
491 | pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); | ||
492 | pci_read_config_byte(params->cs55x0, PCI_MODOFF, | ||
493 | &(params->off_duration)); | ||
494 | |||
495 | ret = cpufreq_register_driver(&gx_suspmod_driver); | ||
496 | if (ret) { | ||
497 | kfree(params); | ||
498 | return ret; /* register error! */ | ||
499 | } | ||
500 | |||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | static void __exit cpufreq_gx_exit(void) | ||
505 | { | ||
506 | cpufreq_unregister_driver(&gx_suspmod_driver); | ||
507 | pci_dev_put(gx_params->cs55x0); | ||
508 | kfree(gx_params); | ||
509 | } | ||
510 | |||
511 | MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>"); | ||
512 | MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); | ||
513 | MODULE_LICENSE("GPL"); | ||
514 | |||
515 | module_init(cpufreq_gx_init); | ||
516 | module_exit(cpufreq_gx_exit); | ||
517 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c deleted file mode 100644 index cf48cdd6907d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ /dev/null | |||
@@ -1,1029 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2001-2004 Dave Jones. <davej@redhat.com> | ||
3 | * (C) 2002 Padraig Brady. <padraig@antefacto.com> | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * Based upon datasheets & sample CPUs kindly provided by VIA. | ||
7 | * | ||
8 | * VIA have currently 3 different versions of Longhaul. | ||
9 | * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. | ||
10 | * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. | ||
11 | * Version 2 of longhaul is backward compatible with v1, but adds | ||
12 | * LONGHAUL MSR for purpose of both frequency and voltage scaling. | ||
13 | * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). | ||
14 | * Version 3 of longhaul got renamed to Powersaver and redesigned | ||
15 | * to use only the POWERSAVER MSR at 0x110a. | ||
16 | * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. | ||
17 | * It's pretty much the same feature wise to longhaul v2, though | ||
18 | * there is provision for scaling FSB too, but this doesn't work | ||
19 | * too well in practice so we don't even try to use this. | ||
20 | * | ||
21 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
22 | */ | ||
23 | |||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/moduleparam.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/cpufreq.h> | ||
29 | #include <linux/pci.h> | ||
30 | #include <linux/slab.h> | ||
31 | #include <linux/string.h> | ||
32 | #include <linux/delay.h> | ||
33 | #include <linux/timex.h> | ||
34 | #include <linux/io.h> | ||
35 | #include <linux/acpi.h> | ||
36 | |||
37 | #include <asm/msr.h> | ||
38 | #include <acpi/processor.h> | ||
39 | |||
40 | #include "longhaul.h" | ||
41 | |||
42 | #define PFX "longhaul: " | ||
43 | |||
44 | #define TYPE_LONGHAUL_V1 1 | ||
45 | #define TYPE_LONGHAUL_V2 2 | ||
46 | #define TYPE_POWERSAVER 3 | ||
47 | |||
48 | #define CPU_SAMUEL 1 | ||
49 | #define CPU_SAMUEL2 2 | ||
50 | #define CPU_EZRA 3 | ||
51 | #define CPU_EZRA_T 4 | ||
52 | #define CPU_NEHEMIAH 5 | ||
53 | #define CPU_NEHEMIAH_C 6 | ||
54 | |||
55 | /* Flags */ | ||
56 | #define USE_ACPI_C3 (1 << 1) | ||
57 | #define USE_NORTHBRIDGE (1 << 2) | ||
58 | |||
59 | static int cpu_model; | ||
60 | static unsigned int numscales = 16; | ||
61 | static unsigned int fsb; | ||
62 | |||
63 | static const struct mV_pos *vrm_mV_table; | ||
64 | static const unsigned char *mV_vrm_table; | ||
65 | |||
66 | static unsigned int highest_speed, lowest_speed; /* kHz */ | ||
67 | static unsigned int minmult, maxmult; | ||
68 | static int can_scale_voltage; | ||
69 | static struct acpi_processor *pr; | ||
70 | static struct acpi_processor_cx *cx; | ||
71 | static u32 acpi_regs_addr; | ||
72 | static u8 longhaul_flags; | ||
73 | static unsigned int longhaul_index; | ||
74 | |||
75 | /* Module parameters */ | ||
76 | static int scale_voltage; | ||
77 | static int disable_acpi_c3; | ||
78 | static int revid_errata; | ||
79 | |||
80 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
81 | "longhaul", msg) | ||
82 | |||
83 | |||
84 | /* Clock ratios multiplied by 10 */ | ||
85 | static int mults[32]; | ||
86 | static int eblcr[32]; | ||
87 | static int longhaul_version; | ||
88 | static struct cpufreq_frequency_table *longhaul_table; | ||
89 | |||
90 | #ifdef CONFIG_CPU_FREQ_DEBUG | ||
91 | static char speedbuffer[8]; | ||
92 | |||
93 | static char *print_speed(int speed) | ||
94 | { | ||
95 | if (speed < 1000) { | ||
96 | snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed); | ||
97 | return speedbuffer; | ||
98 | } | ||
99 | |||
100 | if (speed%1000 == 0) | ||
101 | snprintf(speedbuffer, sizeof(speedbuffer), | ||
102 | "%dGHz", speed/1000); | ||
103 | else | ||
104 | snprintf(speedbuffer, sizeof(speedbuffer), | ||
105 | "%d.%dGHz", speed/1000, (speed%1000)/100); | ||
106 | |||
107 | return speedbuffer; | ||
108 | } | ||
109 | #endif | ||
110 | |||
111 | |||
112 | static unsigned int calc_speed(int mult) | ||
113 | { | ||
114 | int khz; | ||
115 | khz = (mult/10)*fsb; | ||
116 | if (mult%10) | ||
117 | khz += fsb/2; | ||
118 | khz *= 1000; | ||
119 | return khz; | ||
120 | } | ||
121 | |||
122 | |||
123 | static int longhaul_get_cpu_mult(void) | ||
124 | { | ||
125 | unsigned long invalue = 0, lo, hi; | ||
126 | |||
127 | rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi); | ||
128 | invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22; | ||
129 | if (longhaul_version == TYPE_LONGHAUL_V2 || | ||
130 | longhaul_version == TYPE_POWERSAVER) { | ||
131 | if (lo & (1<<27)) | ||
132 | invalue += 16; | ||
133 | } | ||
134 | return eblcr[invalue]; | ||
135 | } | ||
136 | |||
137 | /* For processor with BCR2 MSR */ | ||
138 | |||
139 | static void do_longhaul1(unsigned int mults_index) | ||
140 | { | ||
141 | union msr_bcr2 bcr2; | ||
142 | |||
143 | rdmsrl(MSR_VIA_BCR2, bcr2.val); | ||
144 | /* Enable software clock multiplier */ | ||
145 | bcr2.bits.ESOFTBF = 1; | ||
146 | bcr2.bits.CLOCKMUL = mults_index & 0xff; | ||
147 | |||
148 | /* Sync to timer tick */ | ||
149 | safe_halt(); | ||
150 | /* Change frequency on next halt or sleep */ | ||
151 | wrmsrl(MSR_VIA_BCR2, bcr2.val); | ||
152 | /* Invoke transition */ | ||
153 | ACPI_FLUSH_CPU_CACHE(); | ||
154 | halt(); | ||
155 | |||
156 | /* Disable software clock multiplier */ | ||
157 | local_irq_disable(); | ||
158 | rdmsrl(MSR_VIA_BCR2, bcr2.val); | ||
159 | bcr2.bits.ESOFTBF = 0; | ||
160 | wrmsrl(MSR_VIA_BCR2, bcr2.val); | ||
161 | } | ||
162 | |||
163 | /* For processor with Longhaul MSR */ | ||
164 | |||
165 | static void do_powersaver(int cx_address, unsigned int mults_index, | ||
166 | unsigned int dir) | ||
167 | { | ||
168 | union msr_longhaul longhaul; | ||
169 | u32 t; | ||
170 | |||
171 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
172 | /* Setup new frequency */ | ||
173 | if (!revid_errata) | ||
174 | longhaul.bits.RevisionKey = longhaul.bits.RevisionID; | ||
175 | else | ||
176 | longhaul.bits.RevisionKey = 0; | ||
177 | longhaul.bits.SoftBusRatio = mults_index & 0xf; | ||
178 | longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4; | ||
179 | /* Setup new voltage */ | ||
180 | if (can_scale_voltage) | ||
181 | longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f; | ||
182 | /* Sync to timer tick */ | ||
183 | safe_halt(); | ||
184 | /* Raise voltage if necessary */ | ||
185 | if (can_scale_voltage && dir) { | ||
186 | longhaul.bits.EnableSoftVID = 1; | ||
187 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
188 | /* Change voltage */ | ||
189 | if (!cx_address) { | ||
190 | ACPI_FLUSH_CPU_CACHE(); | ||
191 | halt(); | ||
192 | } else { | ||
193 | ACPI_FLUSH_CPU_CACHE(); | ||
194 | /* Invoke C3 */ | ||
195 | inb(cx_address); | ||
196 | /* Dummy op - must do something useless after P_LVL3 | ||
197 | * read */ | ||
198 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
199 | } | ||
200 | longhaul.bits.EnableSoftVID = 0; | ||
201 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
202 | } | ||
203 | |||
204 | /* Change frequency on next halt or sleep */ | ||
205 | longhaul.bits.EnableSoftBusRatio = 1; | ||
206 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
207 | if (!cx_address) { | ||
208 | ACPI_FLUSH_CPU_CACHE(); | ||
209 | halt(); | ||
210 | } else { | ||
211 | ACPI_FLUSH_CPU_CACHE(); | ||
212 | /* Invoke C3 */ | ||
213 | inb(cx_address); | ||
214 | /* Dummy op - must do something useless after P_LVL3 read */ | ||
215 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
216 | } | ||
217 | /* Disable bus ratio bit */ | ||
218 | longhaul.bits.EnableSoftBusRatio = 0; | ||
219 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
220 | |||
221 | /* Reduce voltage if necessary */ | ||
222 | if (can_scale_voltage && !dir) { | ||
223 | longhaul.bits.EnableSoftVID = 1; | ||
224 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
225 | /* Change voltage */ | ||
226 | if (!cx_address) { | ||
227 | ACPI_FLUSH_CPU_CACHE(); | ||
228 | halt(); | ||
229 | } else { | ||
230 | ACPI_FLUSH_CPU_CACHE(); | ||
231 | /* Invoke C3 */ | ||
232 | inb(cx_address); | ||
233 | /* Dummy op - must do something useless after P_LVL3 | ||
234 | * read */ | ||
235 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
236 | } | ||
237 | longhaul.bits.EnableSoftVID = 0; | ||
238 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * longhaul_set_cpu_frequency() | ||
244 | * @mults_index : bitpattern of the new multiplier. | ||
245 | * | ||
246 | * Sets a new clock ratio. | ||
247 | */ | ||
248 | |||
249 | static void longhaul_setstate(unsigned int table_index) | ||
250 | { | ||
251 | unsigned int mults_index; | ||
252 | int speed, mult; | ||
253 | struct cpufreq_freqs freqs; | ||
254 | unsigned long flags; | ||
255 | unsigned int pic1_mask, pic2_mask; | ||
256 | u16 bm_status = 0; | ||
257 | u32 bm_timeout = 1000; | ||
258 | unsigned int dir = 0; | ||
259 | |||
260 | mults_index = longhaul_table[table_index].index; | ||
261 | /* Safety precautions */ | ||
262 | mult = mults[mults_index & 0x1f]; | ||
263 | if (mult == -1) | ||
264 | return; | ||
265 | speed = calc_speed(mult); | ||
266 | if ((speed > highest_speed) || (speed < lowest_speed)) | ||
267 | return; | ||
268 | /* Voltage transition before frequency transition? */ | ||
269 | if (can_scale_voltage && longhaul_index < table_index) | ||
270 | dir = 1; | ||
271 | |||
272 | freqs.old = calc_speed(longhaul_get_cpu_mult()); | ||
273 | freqs.new = speed; | ||
274 | freqs.cpu = 0; /* longhaul.c is UP only driver */ | ||
275 | |||
276 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
277 | |||
278 | dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", | ||
279 | fsb, mult/10, mult%10, print_speed(speed/1000)); | ||
280 | retry_loop: | ||
281 | preempt_disable(); | ||
282 | local_irq_save(flags); | ||
283 | |||
284 | pic2_mask = inb(0xA1); | ||
285 | pic1_mask = inb(0x21); /* works on C3. save mask. */ | ||
286 | outb(0xFF, 0xA1); /* Overkill */ | ||
287 | outb(0xFE, 0x21); /* TMR0 only */ | ||
288 | |||
289 | /* Wait while PCI bus is busy. */ | ||
290 | if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE | ||
291 | || ((pr != NULL) && pr->flags.bm_control))) { | ||
292 | bm_status = inw(acpi_regs_addr); | ||
293 | bm_status &= 1 << 4; | ||
294 | while (bm_status && bm_timeout) { | ||
295 | outw(1 << 4, acpi_regs_addr); | ||
296 | bm_timeout--; | ||
297 | bm_status = inw(acpi_regs_addr); | ||
298 | bm_status &= 1 << 4; | ||
299 | } | ||
300 | } | ||
301 | |||
302 | if (longhaul_flags & USE_NORTHBRIDGE) { | ||
303 | /* Disable AGP and PCI arbiters */ | ||
304 | outb(3, 0x22); | ||
305 | } else if ((pr != NULL) && pr->flags.bm_control) { | ||
306 | /* Disable bus master arbitration */ | ||
307 | acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1); | ||
308 | } | ||
309 | switch (longhaul_version) { | ||
310 | |||
311 | /* | ||
312 | * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) | ||
313 | * Software controlled multipliers only. | ||
314 | */ | ||
315 | case TYPE_LONGHAUL_V1: | ||
316 | do_longhaul1(mults_index); | ||
317 | break; | ||
318 | |||
319 | /* | ||
320 | * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] | ||
321 | * | ||
322 | * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) | ||
323 | * Nehemiah can do FSB scaling too, but this has never been proven | ||
324 | * to work in practice. | ||
325 | */ | ||
326 | case TYPE_LONGHAUL_V2: | ||
327 | case TYPE_POWERSAVER: | ||
328 | if (longhaul_flags & USE_ACPI_C3) { | ||
329 | /* Don't allow wakeup */ | ||
330 | acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0); | ||
331 | do_powersaver(cx->address, mults_index, dir); | ||
332 | } else { | ||
333 | do_powersaver(0, mults_index, dir); | ||
334 | } | ||
335 | break; | ||
336 | } | ||
337 | |||
338 | if (longhaul_flags & USE_NORTHBRIDGE) { | ||
339 | /* Enable arbiters */ | ||
340 | outb(0, 0x22); | ||
341 | } else if ((pr != NULL) && pr->flags.bm_control) { | ||
342 | /* Enable bus master arbitration */ | ||
343 | acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0); | ||
344 | } | ||
345 | outb(pic2_mask, 0xA1); /* restore mask */ | ||
346 | outb(pic1_mask, 0x21); | ||
347 | |||
348 | local_irq_restore(flags); | ||
349 | preempt_enable(); | ||
350 | |||
351 | freqs.new = calc_speed(longhaul_get_cpu_mult()); | ||
352 | /* Check if requested frequency is set. */ | ||
353 | if (unlikely(freqs.new != speed)) { | ||
354 | printk(KERN_INFO PFX "Failed to set requested frequency!\n"); | ||
355 | /* Revision ID = 1 but processor is expecting revision key | ||
356 | * equal to 0. Jumpers at the bottom of processor will change | ||
357 | * multiplier and FSB, but will not change bits in Longhaul | ||
358 | * MSR nor enable voltage scaling. */ | ||
359 | if (!revid_errata) { | ||
360 | printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" " | ||
361 | "option.\n"); | ||
362 | revid_errata = 1; | ||
363 | msleep(200); | ||
364 | goto retry_loop; | ||
365 | } | ||
366 | /* Why ACPI C3 sometimes doesn't work is a mystery for me. | ||
367 | * But it does happen. Processor is entering ACPI C3 state, | ||
368 | * but it doesn't change frequency. I tried poking various | ||
369 | * bits in northbridge registers, but without success. */ | ||
370 | if (longhaul_flags & USE_ACPI_C3) { | ||
371 | printk(KERN_INFO PFX "Disabling ACPI C3 support.\n"); | ||
372 | longhaul_flags &= ~USE_ACPI_C3; | ||
373 | if (revid_errata) { | ||
374 | printk(KERN_INFO PFX "Disabling \"Ignore " | ||
375 | "Revision ID\" option.\n"); | ||
376 | revid_errata = 0; | ||
377 | } | ||
378 | msleep(200); | ||
379 | goto retry_loop; | ||
380 | } | ||
381 | /* This shouldn't happen. Longhaul ver. 2 was reported not | ||
382 | * working on processors without voltage scaling, but with | ||
383 | * RevID = 1. RevID errata will make things right. Just | ||
384 | * to be 100% sure. */ | ||
385 | if (longhaul_version == TYPE_LONGHAUL_V2) { | ||
386 | printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n"); | ||
387 | longhaul_version = TYPE_LONGHAUL_V1; | ||
388 | msleep(200); | ||
389 | goto retry_loop; | ||
390 | } | ||
391 | } | ||
392 | /* Report true CPU frequency */ | ||
393 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
394 | |||
395 | if (!bm_timeout) | ||
396 | printk(KERN_INFO PFX "Warning: Timeout while waiting for " | ||
397 | "idle PCI bus.\n"); | ||
398 | } | ||
399 | |||
400 | /* | ||
401 | * Centaur decided to make life a little more tricky. | ||
402 | * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. | ||
403 | * Samuel2 and above have to try and guess what the FSB is. | ||
404 | * We do this by assuming we booted at maximum multiplier, and interpolate | ||
405 | * between that value multiplied by possible FSBs and cpu_mhz which | ||
406 | * was calculated at boot time. Really ugly, but no other way to do this. | ||
407 | */ | ||
408 | |||
409 | #define ROUNDING 0xf | ||
410 | |||
411 | static int guess_fsb(int mult) | ||
412 | { | ||
413 | int speed = cpu_khz / 1000; | ||
414 | int i; | ||
415 | int speeds[] = { 666, 1000, 1333, 2000 }; | ||
416 | int f_max, f_min; | ||
417 | |||
418 | for (i = 0; i < 4; i++) { | ||
419 | f_max = ((speeds[i] * mult) + 50) / 100; | ||
420 | f_max += (ROUNDING / 2); | ||
421 | f_min = f_max - ROUNDING; | ||
422 | if ((speed <= f_max) && (speed >= f_min)) | ||
423 | return speeds[i] / 10; | ||
424 | } | ||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | |||
429 | static int __cpuinit longhaul_get_ranges(void) | ||
430 | { | ||
431 | unsigned int i, j, k = 0; | ||
432 | unsigned int ratio; | ||
433 | int mult; | ||
434 | |||
435 | /* Get current frequency */ | ||
436 | mult = longhaul_get_cpu_mult(); | ||
437 | if (mult == -1) { | ||
438 | printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); | ||
439 | return -EINVAL; | ||
440 | } | ||
441 | fsb = guess_fsb(mult); | ||
442 | if (fsb == 0) { | ||
443 | printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); | ||
444 | return -EINVAL; | ||
445 | } | ||
446 | /* Get max multiplier - as we always did. | ||
447 | * Longhaul MSR is useful only when voltage scaling is enabled. | ||
448 | * C3 is booting at max anyway. */ | ||
449 | maxmult = mult; | ||
450 | /* Get min multiplier */ | ||
451 | switch (cpu_model) { | ||
452 | case CPU_NEHEMIAH: | ||
453 | minmult = 50; | ||
454 | break; | ||
455 | case CPU_NEHEMIAH_C: | ||
456 | minmult = 40; | ||
457 | break; | ||
458 | default: | ||
459 | minmult = 30; | ||
460 | break; | ||
461 | } | ||
462 | |||
463 | dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n", | ||
464 | minmult/10, minmult%10, maxmult/10, maxmult%10); | ||
465 | |||
466 | highest_speed = calc_speed(maxmult); | ||
467 | lowest_speed = calc_speed(minmult); | ||
468 | dprintk("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, | ||
469 | print_speed(lowest_speed/1000), | ||
470 | print_speed(highest_speed/1000)); | ||
471 | |||
472 | if (lowest_speed == highest_speed) { | ||
473 | printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n"); | ||
474 | return -EINVAL; | ||
475 | } | ||
476 | if (lowest_speed > highest_speed) { | ||
477 | printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", | ||
478 | lowest_speed, highest_speed); | ||
479 | return -EINVAL; | ||
480 | } | ||
481 | |||
482 | longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table), | ||
483 | GFP_KERNEL); | ||
484 | if (!longhaul_table) | ||
485 | return -ENOMEM; | ||
486 | |||
487 | for (j = 0; j < numscales; j++) { | ||
488 | ratio = mults[j]; | ||
489 | if (ratio == -1) | ||
490 | continue; | ||
491 | if (ratio > maxmult || ratio < minmult) | ||
492 | continue; | ||
493 | longhaul_table[k].frequency = calc_speed(ratio); | ||
494 | longhaul_table[k].index = j; | ||
495 | k++; | ||
496 | } | ||
497 | if (k <= 1) { | ||
498 | kfree(longhaul_table); | ||
499 | return -ENODEV; | ||
500 | } | ||
501 | /* Sort */ | ||
502 | for (j = 0; j < k - 1; j++) { | ||
503 | unsigned int min_f, min_i; | ||
504 | min_f = longhaul_table[j].frequency; | ||
505 | min_i = j; | ||
506 | for (i = j + 1; i < k; i++) { | ||
507 | if (longhaul_table[i].frequency < min_f) { | ||
508 | min_f = longhaul_table[i].frequency; | ||
509 | min_i = i; | ||
510 | } | ||
511 | } | ||
512 | if (min_i != j) { | ||
513 | swap(longhaul_table[j].frequency, | ||
514 | longhaul_table[min_i].frequency); | ||
515 | swap(longhaul_table[j].index, | ||
516 | longhaul_table[min_i].index); | ||
517 | } | ||
518 | } | ||
519 | |||
520 | longhaul_table[k].frequency = CPUFREQ_TABLE_END; | ||
521 | |||
522 | /* Find index we are running on */ | ||
523 | for (j = 0; j < k; j++) { | ||
524 | if (mults[longhaul_table[j].index & 0x1f] == mult) { | ||
525 | longhaul_index = j; | ||
526 | break; | ||
527 | } | ||
528 | } | ||
529 | return 0; | ||
530 | } | ||
531 | |||
532 | |||
533 | static void __cpuinit longhaul_setup_voltagescaling(void) | ||
534 | { | ||
535 | union msr_longhaul longhaul; | ||
536 | struct mV_pos minvid, maxvid, vid; | ||
537 | unsigned int j, speed, pos, kHz_step, numvscales; | ||
538 | int min_vid_speed; | ||
539 | |||
540 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
541 | if (!(longhaul.bits.RevisionID & 1)) { | ||
542 | printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); | ||
543 | return; | ||
544 | } | ||
545 | |||
546 | if (!longhaul.bits.VRMRev) { | ||
547 | printk(KERN_INFO PFX "VRM 8.5\n"); | ||
548 | vrm_mV_table = &vrm85_mV[0]; | ||
549 | mV_vrm_table = &mV_vrm85[0]; | ||
550 | } else { | ||
551 | printk(KERN_INFO PFX "Mobile VRM\n"); | ||
552 | if (cpu_model < CPU_NEHEMIAH) | ||
553 | return; | ||
554 | vrm_mV_table = &mobilevrm_mV[0]; | ||
555 | mV_vrm_table = &mV_mobilevrm[0]; | ||
556 | } | ||
557 | |||
558 | minvid = vrm_mV_table[longhaul.bits.MinimumVID]; | ||
559 | maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; | ||
560 | |||
561 | if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { | ||
562 | printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " | ||
563 | "Voltage scaling disabled.\n", | ||
564 | minvid.mV/1000, minvid.mV%1000, | ||
565 | maxvid.mV/1000, maxvid.mV%1000); | ||
566 | return; | ||
567 | } | ||
568 | |||
569 | if (minvid.mV == maxvid.mV) { | ||
570 | printk(KERN_INFO PFX "Claims to support voltage scaling but " | ||
571 | "min & max are both %d.%03d. " | ||
572 | "Voltage scaling disabled\n", | ||
573 | maxvid.mV/1000, maxvid.mV%1000); | ||
574 | return; | ||
575 | } | ||
576 | |||
577 | /* How many voltage steps*/ | ||
578 | numvscales = maxvid.pos - minvid.pos + 1; | ||
579 | printk(KERN_INFO PFX | ||
580 | "Max VID=%d.%03d " | ||
581 | "Min VID=%d.%03d, " | ||
582 | "%d possible voltage scales\n", | ||
583 | maxvid.mV/1000, maxvid.mV%1000, | ||
584 | minvid.mV/1000, minvid.mV%1000, | ||
585 | numvscales); | ||
586 | |||
587 | /* Calculate max frequency at min voltage */ | ||
588 | j = longhaul.bits.MinMHzBR; | ||
589 | if (longhaul.bits.MinMHzBR4) | ||
590 | j += 16; | ||
591 | min_vid_speed = eblcr[j]; | ||
592 | if (min_vid_speed == -1) | ||
593 | return; | ||
594 | switch (longhaul.bits.MinMHzFSB) { | ||
595 | case 0: | ||
596 | min_vid_speed *= 13333; | ||
597 | break; | ||
598 | case 1: | ||
599 | min_vid_speed *= 10000; | ||
600 | break; | ||
601 | case 3: | ||
602 | min_vid_speed *= 6666; | ||
603 | break; | ||
604 | default: | ||
605 | return; | ||
606 | break; | ||
607 | } | ||
608 | if (min_vid_speed >= highest_speed) | ||
609 | return; | ||
610 | /* Calculate kHz for one voltage step */ | ||
611 | kHz_step = (highest_speed - min_vid_speed) / numvscales; | ||
612 | |||
613 | j = 0; | ||
614 | while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { | ||
615 | speed = longhaul_table[j].frequency; | ||
616 | if (speed > min_vid_speed) | ||
617 | pos = (speed - min_vid_speed) / kHz_step + minvid.pos; | ||
618 | else | ||
619 | pos = minvid.pos; | ||
620 | longhaul_table[j].index |= mV_vrm_table[pos] << 8; | ||
621 | vid = vrm_mV_table[mV_vrm_table[pos]]; | ||
622 | printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", | ||
623 | speed, j, vid.mV); | ||
624 | j++; | ||
625 | } | ||
626 | |||
627 | can_scale_voltage = 1; | ||
628 | printk(KERN_INFO PFX "Voltage scaling enabled.\n"); | ||
629 | } | ||
630 | |||
631 | |||
632 | static int longhaul_verify(struct cpufreq_policy *policy) | ||
633 | { | ||
634 | return cpufreq_frequency_table_verify(policy, longhaul_table); | ||
635 | } | ||
636 | |||
637 | |||
638 | static int longhaul_target(struct cpufreq_policy *policy, | ||
639 | unsigned int target_freq, unsigned int relation) | ||
640 | { | ||
641 | unsigned int table_index = 0; | ||
642 | unsigned int i; | ||
643 | unsigned int dir = 0; | ||
644 | u8 vid, current_vid; | ||
645 | |||
646 | if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, | ||
647 | relation, &table_index)) | ||
648 | return -EINVAL; | ||
649 | |||
650 | /* Don't set same frequency again */ | ||
651 | if (longhaul_index == table_index) | ||
652 | return 0; | ||
653 | |||
654 | if (!can_scale_voltage) | ||
655 | longhaul_setstate(table_index); | ||
656 | else { | ||
657 | /* On test system voltage transitions exceeding single | ||
658 | * step up or down were turning motherboard off. Both | ||
659 | * "ondemand" and "userspace" are unsafe. C7 is doing | ||
660 | * this in hardware, C3 is old and we need to do this | ||
661 | * in software. */ | ||
662 | i = longhaul_index; | ||
663 | current_vid = (longhaul_table[longhaul_index].index >> 8); | ||
664 | current_vid &= 0x1f; | ||
665 | if (table_index > longhaul_index) | ||
666 | dir = 1; | ||
667 | while (i != table_index) { | ||
668 | vid = (longhaul_table[i].index >> 8) & 0x1f; | ||
669 | if (vid != current_vid) { | ||
670 | longhaul_setstate(i); | ||
671 | current_vid = vid; | ||
672 | msleep(200); | ||
673 | } | ||
674 | if (dir) | ||
675 | i++; | ||
676 | else | ||
677 | i--; | ||
678 | } | ||
679 | longhaul_setstate(table_index); | ||
680 | } | ||
681 | longhaul_index = table_index; | ||
682 | return 0; | ||
683 | } | ||
684 | |||
685 | |||
686 | static unsigned int longhaul_get(unsigned int cpu) | ||
687 | { | ||
688 | if (cpu) | ||
689 | return 0; | ||
690 | return calc_speed(longhaul_get_cpu_mult()); | ||
691 | } | ||
692 | |||
693 | static acpi_status longhaul_walk_callback(acpi_handle obj_handle, | ||
694 | u32 nesting_level, | ||
695 | void *context, void **return_value) | ||
696 | { | ||
697 | struct acpi_device *d; | ||
698 | |||
699 | if (acpi_bus_get_device(obj_handle, &d)) | ||
700 | return 0; | ||
701 | |||
702 | *return_value = acpi_driver_data(d); | ||
703 | return 1; | ||
704 | } | ||
705 | |||
706 | /* VIA don't support PM2 reg, but have something similar */ | ||
707 | static int enable_arbiter_disable(void) | ||
708 | { | ||
709 | struct pci_dev *dev; | ||
710 | int status = 1; | ||
711 | int reg; | ||
712 | u8 pci_cmd; | ||
713 | |||
714 | /* Find PLE133 host bridge */ | ||
715 | reg = 0x78; | ||
716 | dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, | ||
717 | NULL); | ||
718 | /* Find PM133/VT8605 host bridge */ | ||
719 | if (dev == NULL) | ||
720 | dev = pci_get_device(PCI_VENDOR_ID_VIA, | ||
721 | PCI_DEVICE_ID_VIA_8605_0, NULL); | ||
722 | /* Find CLE266 host bridge */ | ||
723 | if (dev == NULL) { | ||
724 | reg = 0x76; | ||
725 | dev = pci_get_device(PCI_VENDOR_ID_VIA, | ||
726 | PCI_DEVICE_ID_VIA_862X_0, NULL); | ||
727 | /* Find CN400 V-Link host bridge */ | ||
728 | if (dev == NULL) | ||
729 | dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); | ||
730 | } | ||
731 | if (dev != NULL) { | ||
732 | /* Enable access to port 0x22 */ | ||
733 | pci_read_config_byte(dev, reg, &pci_cmd); | ||
734 | if (!(pci_cmd & 1<<7)) { | ||
735 | pci_cmd |= 1<<7; | ||
736 | pci_write_config_byte(dev, reg, pci_cmd); | ||
737 | pci_read_config_byte(dev, reg, &pci_cmd); | ||
738 | if (!(pci_cmd & 1<<7)) { | ||
739 | printk(KERN_ERR PFX | ||
740 | "Can't enable access to port 0x22.\n"); | ||
741 | status = 0; | ||
742 | } | ||
743 | } | ||
744 | pci_dev_put(dev); | ||
745 | return status; | ||
746 | } | ||
747 | return 0; | ||
748 | } | ||
749 | |||
750 | static int longhaul_setup_southbridge(void) | ||
751 | { | ||
752 | struct pci_dev *dev; | ||
753 | u8 pci_cmd; | ||
754 | |||
755 | /* Find VT8235 southbridge */ | ||
756 | dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); | ||
757 | if (dev == NULL) | ||
758 | /* Find VT8237 southbridge */ | ||
759 | dev = pci_get_device(PCI_VENDOR_ID_VIA, | ||
760 | PCI_DEVICE_ID_VIA_8237, NULL); | ||
761 | if (dev != NULL) { | ||
762 | /* Set transition time to max */ | ||
763 | pci_read_config_byte(dev, 0xec, &pci_cmd); | ||
764 | pci_cmd &= ~(1 << 2); | ||
765 | pci_write_config_byte(dev, 0xec, pci_cmd); | ||
766 | pci_read_config_byte(dev, 0xe4, &pci_cmd); | ||
767 | pci_cmd &= ~(1 << 7); | ||
768 | pci_write_config_byte(dev, 0xe4, pci_cmd); | ||
769 | pci_read_config_byte(dev, 0xe5, &pci_cmd); | ||
770 | pci_cmd |= 1 << 7; | ||
771 | pci_write_config_byte(dev, 0xe5, pci_cmd); | ||
772 | /* Get address of ACPI registers block*/ | ||
773 | pci_read_config_byte(dev, 0x81, &pci_cmd); | ||
774 | if (pci_cmd & 1 << 7) { | ||
775 | pci_read_config_dword(dev, 0x88, &acpi_regs_addr); | ||
776 | acpi_regs_addr &= 0xff00; | ||
777 | printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", | ||
778 | acpi_regs_addr); | ||
779 | } | ||
780 | |||
781 | pci_dev_put(dev); | ||
782 | return 1; | ||
783 | } | ||
784 | return 0; | ||
785 | } | ||
786 | |||
787 | static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy) | ||
788 | { | ||
789 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
790 | char *cpuname = NULL; | ||
791 | int ret; | ||
792 | u32 lo, hi; | ||
793 | |||
794 | /* Check what we have on this motherboard */ | ||
795 | switch (c->x86_model) { | ||
796 | case 6: | ||
797 | cpu_model = CPU_SAMUEL; | ||
798 | cpuname = "C3 'Samuel' [C5A]"; | ||
799 | longhaul_version = TYPE_LONGHAUL_V1; | ||
800 | memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); | ||
801 | memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr)); | ||
802 | break; | ||
803 | |||
804 | case 7: | ||
805 | switch (c->x86_mask) { | ||
806 | case 0: | ||
807 | longhaul_version = TYPE_LONGHAUL_V1; | ||
808 | cpu_model = CPU_SAMUEL2; | ||
809 | cpuname = "C3 'Samuel 2' [C5B]"; | ||
810 | /* Note, this is not a typo, early Samuel2's had | ||
811 | * Samuel1 ratios. */ | ||
812 | memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); | ||
813 | memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); | ||
814 | break; | ||
815 | case 1 ... 15: | ||
816 | longhaul_version = TYPE_LONGHAUL_V2; | ||
817 | if (c->x86_mask < 8) { | ||
818 | cpu_model = CPU_SAMUEL2; | ||
819 | cpuname = "C3 'Samuel 2' [C5B]"; | ||
820 | } else { | ||
821 | cpu_model = CPU_EZRA; | ||
822 | cpuname = "C3 'Ezra' [C5C]"; | ||
823 | } | ||
824 | memcpy(mults, ezra_mults, sizeof(ezra_mults)); | ||
825 | memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr)); | ||
826 | break; | ||
827 | } | ||
828 | break; | ||
829 | |||
830 | case 8: | ||
831 | cpu_model = CPU_EZRA_T; | ||
832 | cpuname = "C3 'Ezra-T' [C5M]"; | ||
833 | longhaul_version = TYPE_POWERSAVER; | ||
834 | numscales = 32; | ||
835 | memcpy(mults, ezrat_mults, sizeof(ezrat_mults)); | ||
836 | memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr)); | ||
837 | break; | ||
838 | |||
839 | case 9: | ||
840 | longhaul_version = TYPE_POWERSAVER; | ||
841 | numscales = 32; | ||
842 | memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); | ||
843 | memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); | ||
844 | switch (c->x86_mask) { | ||
845 | case 0 ... 1: | ||
846 | cpu_model = CPU_NEHEMIAH; | ||
847 | cpuname = "C3 'Nehemiah A' [C5XLOE]"; | ||
848 | break; | ||
849 | case 2 ... 4: | ||
850 | cpu_model = CPU_NEHEMIAH; | ||
851 | cpuname = "C3 'Nehemiah B' [C5XLOH]"; | ||
852 | break; | ||
853 | case 5 ... 15: | ||
854 | cpu_model = CPU_NEHEMIAH_C; | ||
855 | cpuname = "C3 'Nehemiah C' [C5P]"; | ||
856 | break; | ||
857 | } | ||
858 | break; | ||
859 | |||
860 | default: | ||
861 | cpuname = "Unknown"; | ||
862 | break; | ||
863 | } | ||
864 | /* Check Longhaul ver. 2 */ | ||
865 | if (longhaul_version == TYPE_LONGHAUL_V2) { | ||
866 | rdmsr(MSR_VIA_LONGHAUL, lo, hi); | ||
867 | if (lo == 0 && hi == 0) | ||
868 | /* Looks like MSR isn't present */ | ||
869 | longhaul_version = TYPE_LONGHAUL_V1; | ||
870 | } | ||
871 | |||
872 | printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname); | ||
873 | switch (longhaul_version) { | ||
874 | case TYPE_LONGHAUL_V1: | ||
875 | case TYPE_LONGHAUL_V2: | ||
876 | printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version); | ||
877 | break; | ||
878 | case TYPE_POWERSAVER: | ||
879 | printk(KERN_CONT "Powersaver supported.\n"); | ||
880 | break; | ||
881 | }; | ||
882 | |||
883 | /* Doesn't hurt */ | ||
884 | longhaul_setup_southbridge(); | ||
885 | |||
886 | /* Find ACPI data for processor */ | ||
887 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, | ||
888 | ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, | ||
889 | NULL, (void *)&pr); | ||
890 | |||
891 | /* Check ACPI support for C3 state */ | ||
892 | if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { | ||
893 | cx = &pr->power.states[ACPI_STATE_C3]; | ||
894 | if (cx->address > 0 && cx->latency <= 1000) | ||
895 | longhaul_flags |= USE_ACPI_C3; | ||
896 | } | ||
897 | /* Disable if it isn't working */ | ||
898 | if (disable_acpi_c3) | ||
899 | longhaul_flags &= ~USE_ACPI_C3; | ||
900 | /* Check if northbridge is friendly */ | ||
901 | if (enable_arbiter_disable()) | ||
902 | longhaul_flags |= USE_NORTHBRIDGE; | ||
903 | |||
904 | /* Check ACPI support for bus master arbiter disable */ | ||
905 | if (!(longhaul_flags & USE_ACPI_C3 | ||
906 | || longhaul_flags & USE_NORTHBRIDGE) | ||
907 | && ((pr == NULL) || !(pr->flags.bm_control))) { | ||
908 | printk(KERN_ERR PFX | ||
909 | "No ACPI support. Unsupported northbridge.\n"); | ||
910 | return -ENODEV; | ||
911 | } | ||
912 | |||
913 | if (longhaul_flags & USE_NORTHBRIDGE) | ||
914 | printk(KERN_INFO PFX "Using northbridge support.\n"); | ||
915 | if (longhaul_flags & USE_ACPI_C3) | ||
916 | printk(KERN_INFO PFX "Using ACPI support.\n"); | ||
917 | |||
918 | ret = longhaul_get_ranges(); | ||
919 | if (ret != 0) | ||
920 | return ret; | ||
921 | |||
922 | if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) | ||
923 | longhaul_setup_voltagescaling(); | ||
924 | |||
925 | policy->cpuinfo.transition_latency = 200000; /* nsec */ | ||
926 | policy->cur = calc_speed(longhaul_get_cpu_mult()); | ||
927 | |||
928 | ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); | ||
929 | if (ret) | ||
930 | return ret; | ||
931 | |||
932 | cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); | ||
933 | |||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) | ||
938 | { | ||
939 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
940 | return 0; | ||
941 | } | ||
942 | |||
943 | static struct freq_attr *longhaul_attr[] = { | ||
944 | &cpufreq_freq_attr_scaling_available_freqs, | ||
945 | NULL, | ||
946 | }; | ||
947 | |||
948 | static struct cpufreq_driver longhaul_driver = { | ||
949 | .verify = longhaul_verify, | ||
950 | .target = longhaul_target, | ||
951 | .get = longhaul_get, | ||
952 | .init = longhaul_cpu_init, | ||
953 | .exit = __devexit_p(longhaul_cpu_exit), | ||
954 | .name = "longhaul", | ||
955 | .owner = THIS_MODULE, | ||
956 | .attr = longhaul_attr, | ||
957 | }; | ||
958 | |||
959 | |||
960 | static int __init longhaul_init(void) | ||
961 | { | ||
962 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
963 | |||
964 | if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) | ||
965 | return -ENODEV; | ||
966 | |||
967 | #ifdef CONFIG_SMP | ||
968 | if (num_online_cpus() > 1) { | ||
969 | printk(KERN_ERR PFX "More than 1 CPU detected, " | ||
970 | "longhaul disabled.\n"); | ||
971 | return -ENODEV; | ||
972 | } | ||
973 | #endif | ||
974 | #ifdef CONFIG_X86_IO_APIC | ||
975 | if (cpu_has_apic) { | ||
976 | printk(KERN_ERR PFX "APIC detected. Longhaul is currently " | ||
977 | "broken in this configuration.\n"); | ||
978 | return -ENODEV; | ||
979 | } | ||
980 | #endif | ||
981 | switch (c->x86_model) { | ||
982 | case 6 ... 9: | ||
983 | return cpufreq_register_driver(&longhaul_driver); | ||
984 | case 10: | ||
985 | printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); | ||
986 | default: | ||
987 | ; | ||
988 | } | ||
989 | |||
990 | return -ENODEV; | ||
991 | } | ||
992 | |||
993 | |||
994 | static void __exit longhaul_exit(void) | ||
995 | { | ||
996 | int i; | ||
997 | |||
998 | for (i = 0; i < numscales; i++) { | ||
999 | if (mults[i] == maxmult) { | ||
1000 | longhaul_setstate(i); | ||
1001 | break; | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | cpufreq_unregister_driver(&longhaul_driver); | ||
1006 | kfree(longhaul_table); | ||
1007 | } | ||
1008 | |||
1009 | /* Even if BIOS is exporting ACPI C3 state, and it is used | ||
1010 | * with success when CPU is idle, this state doesn't | ||
1011 | * trigger frequency transition in some cases. */ | ||
1012 | module_param(disable_acpi_c3, int, 0644); | ||
1013 | MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); | ||
1014 | /* Change CPU voltage with frequency. Very useful to save | ||
1015 | * power, but most VIA C3 processors aren't supporting it. */ | ||
1016 | module_param(scale_voltage, int, 0644); | ||
1017 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); | ||
1018 | /* Force revision key to 0 for processors which doesn't | ||
1019 | * support voltage scaling, but are introducing itself as | ||
1020 | * such. */ | ||
1021 | module_param(revid_errata, int, 0644); | ||
1022 | MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); | ||
1023 | |||
1024 | MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); | ||
1025 | MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors."); | ||
1026 | MODULE_LICENSE("GPL"); | ||
1027 | |||
1028 | late_initcall(longhaul_init); | ||
1029 | module_exit(longhaul_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h deleted file mode 100644 index cbf48fbca881..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ /dev/null | |||
@@ -1,353 +0,0 @@ | |||
1 | /* | ||
2 | * longhaul.h | ||
3 | * (C) 2003 Dave Jones. | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * | ||
7 | * VIA-specific information | ||
8 | */ | ||
9 | |||
10 | union msr_bcr2 { | ||
11 | struct { | ||
12 | unsigned Reseved:19, // 18:0 | ||
13 | ESOFTBF:1, // 19 | ||
14 | Reserved2:3, // 22:20 | ||
15 | CLOCKMUL:4, // 26:23 | ||
16 | Reserved3:5; // 31:27 | ||
17 | } bits; | ||
18 | unsigned long val; | ||
19 | }; | ||
20 | |||
21 | union msr_longhaul { | ||
22 | struct { | ||
23 | unsigned RevisionID:4, // 3:0 | ||
24 | RevisionKey:4, // 7:4 | ||
25 | EnableSoftBusRatio:1, // 8 | ||
26 | EnableSoftVID:1, // 9 | ||
27 | EnableSoftBSEL:1, // 10 | ||
28 | Reserved:3, // 11:13 | ||
29 | SoftBusRatio4:1, // 14 | ||
30 | VRMRev:1, // 15 | ||
31 | SoftBusRatio:4, // 19:16 | ||
32 | SoftVID:5, // 24:20 | ||
33 | Reserved2:3, // 27:25 | ||
34 | SoftBSEL:2, // 29:28 | ||
35 | Reserved3:2, // 31:30 | ||
36 | MaxMHzBR:4, // 35:32 | ||
37 | MaximumVID:5, // 40:36 | ||
38 | MaxMHzFSB:2, // 42:41 | ||
39 | MaxMHzBR4:1, // 43 | ||
40 | Reserved4:4, // 47:44 | ||
41 | MinMHzBR:4, // 51:48 | ||
42 | MinimumVID:5, // 56:52 | ||
43 | MinMHzFSB:2, // 58:57 | ||
44 | MinMHzBR4:1, // 59 | ||
45 | Reserved5:4; // 63:60 | ||
46 | } bits; | ||
47 | unsigned long long val; | ||
48 | }; | ||
49 | |||
50 | /* | ||
51 | * Clock ratio tables. Div/Mod by 10 to get ratio. | ||
52 | * The eblcr values specify the ratio read from the CPU. | ||
53 | * The mults values specify what to write to the CPU. | ||
54 | */ | ||
55 | |||
56 | /* | ||
57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) | ||
58 | */ | ||
59 | static const int __cpuinitdata samuel1_mults[16] = { | ||
60 | -1, /* 0000 -> RESERVED */ | ||
61 | 30, /* 0001 -> 3.0x */ | ||
62 | 40, /* 0010 -> 4.0x */ | ||
63 | -1, /* 0011 -> RESERVED */ | ||
64 | -1, /* 0100 -> RESERVED */ | ||
65 | 35, /* 0101 -> 3.5x */ | ||
66 | 45, /* 0110 -> 4.5x */ | ||
67 | 55, /* 0111 -> 5.5x */ | ||
68 | 60, /* 1000 -> 6.0x */ | ||
69 | 70, /* 1001 -> 7.0x */ | ||
70 | 80, /* 1010 -> 8.0x */ | ||
71 | 50, /* 1011 -> 5.0x */ | ||
72 | 65, /* 1100 -> 6.5x */ | ||
73 | 75, /* 1101 -> 7.5x */ | ||
74 | -1, /* 1110 -> RESERVED */ | ||
75 | -1, /* 1111 -> RESERVED */ | ||
76 | }; | ||
77 | |||
78 | static const int __cpuinitdata samuel1_eblcr[16] = { | ||
79 | 50, /* 0000 -> RESERVED */ | ||
80 | 30, /* 0001 -> 3.0x */ | ||
81 | 40, /* 0010 -> 4.0x */ | ||
82 | -1, /* 0011 -> RESERVED */ | ||
83 | 55, /* 0100 -> 5.5x */ | ||
84 | 35, /* 0101 -> 3.5x */ | ||
85 | 45, /* 0110 -> 4.5x */ | ||
86 | -1, /* 0111 -> RESERVED */ | ||
87 | -1, /* 1000 -> RESERVED */ | ||
88 | 70, /* 1001 -> 7.0x */ | ||
89 | 80, /* 1010 -> 8.0x */ | ||
90 | 60, /* 1011 -> 6.0x */ | ||
91 | -1, /* 1100 -> RESERVED */ | ||
92 | 75, /* 1101 -> 7.5x */ | ||
93 | -1, /* 1110 -> RESERVED */ | ||
94 | 65, /* 1111 -> 6.5x */ | ||
95 | }; | ||
96 | |||
97 | /* | ||
98 | * VIA C3 Samuel2 Stepping 1->15 | ||
99 | */ | ||
100 | static const int __cpuinitdata samuel2_eblcr[16] = { | ||
101 | 50, /* 0000 -> 5.0x */ | ||
102 | 30, /* 0001 -> 3.0x */ | ||
103 | 40, /* 0010 -> 4.0x */ | ||
104 | 100, /* 0011 -> 10.0x */ | ||
105 | 55, /* 0100 -> 5.5x */ | ||
106 | 35, /* 0101 -> 3.5x */ | ||
107 | 45, /* 0110 -> 4.5x */ | ||
108 | 110, /* 0111 -> 11.0x */ | ||
109 | 90, /* 1000 -> 9.0x */ | ||
110 | 70, /* 1001 -> 7.0x */ | ||
111 | 80, /* 1010 -> 8.0x */ | ||
112 | 60, /* 1011 -> 6.0x */ | ||
113 | 120, /* 1100 -> 12.0x */ | ||
114 | 75, /* 1101 -> 7.5x */ | ||
115 | 130, /* 1110 -> 13.0x */ | ||
116 | 65, /* 1111 -> 6.5x */ | ||
117 | }; | ||
118 | |||
119 | /* | ||
120 | * VIA C3 Ezra | ||
121 | */ | ||
122 | static const int __cpuinitdata ezra_mults[16] = { | ||
123 | 100, /* 0000 -> 10.0x */ | ||
124 | 30, /* 0001 -> 3.0x */ | ||
125 | 40, /* 0010 -> 4.0x */ | ||
126 | 90, /* 0011 -> 9.0x */ | ||
127 | 95, /* 0100 -> 9.5x */ | ||
128 | 35, /* 0101 -> 3.5x */ | ||
129 | 45, /* 0110 -> 4.5x */ | ||
130 | 55, /* 0111 -> 5.5x */ | ||
131 | 60, /* 1000 -> 6.0x */ | ||
132 | 70, /* 1001 -> 7.0x */ | ||
133 | 80, /* 1010 -> 8.0x */ | ||
134 | 50, /* 1011 -> 5.0x */ | ||
135 | 65, /* 1100 -> 6.5x */ | ||
136 | 75, /* 1101 -> 7.5x */ | ||
137 | 85, /* 1110 -> 8.5x */ | ||
138 | 120, /* 1111 -> 12.0x */ | ||
139 | }; | ||
140 | |||
141 | static const int __cpuinitdata ezra_eblcr[16] = { | ||
142 | 50, /* 0000 -> 5.0x */ | ||
143 | 30, /* 0001 -> 3.0x */ | ||
144 | 40, /* 0010 -> 4.0x */ | ||
145 | 100, /* 0011 -> 10.0x */ | ||
146 | 55, /* 0100 -> 5.5x */ | ||
147 | 35, /* 0101 -> 3.5x */ | ||
148 | 45, /* 0110 -> 4.5x */ | ||
149 | 95, /* 0111 -> 9.5x */ | ||
150 | 90, /* 1000 -> 9.0x */ | ||
151 | 70, /* 1001 -> 7.0x */ | ||
152 | 80, /* 1010 -> 8.0x */ | ||
153 | 60, /* 1011 -> 6.0x */ | ||
154 | 120, /* 1100 -> 12.0x */ | ||
155 | 75, /* 1101 -> 7.5x */ | ||
156 | 85, /* 1110 -> 8.5x */ | ||
157 | 65, /* 1111 -> 6.5x */ | ||
158 | }; | ||
159 | |||
160 | /* | ||
161 | * VIA C3 (Ezra-T) [C5M]. | ||
162 | */ | ||
163 | static const int __cpuinitdata ezrat_mults[32] = { | ||
164 | 100, /* 0000 -> 10.0x */ | ||
165 | 30, /* 0001 -> 3.0x */ | ||
166 | 40, /* 0010 -> 4.0x */ | ||
167 | 90, /* 0011 -> 9.0x */ | ||
168 | 95, /* 0100 -> 9.5x */ | ||
169 | 35, /* 0101 -> 3.5x */ | ||
170 | 45, /* 0110 -> 4.5x */ | ||
171 | 55, /* 0111 -> 5.5x */ | ||
172 | 60, /* 1000 -> 6.0x */ | ||
173 | 70, /* 1001 -> 7.0x */ | ||
174 | 80, /* 1010 -> 8.0x */ | ||
175 | 50, /* 1011 -> 5.0x */ | ||
176 | 65, /* 1100 -> 6.5x */ | ||
177 | 75, /* 1101 -> 7.5x */ | ||
178 | 85, /* 1110 -> 8.5x */ | ||
179 | 120, /* 1111 -> 12.0x */ | ||
180 | |||
181 | -1, /* 0000 -> RESERVED (10.0x) */ | ||
182 | 110, /* 0001 -> 11.0x */ | ||
183 | -1, /* 0010 -> 12.0x */ | ||
184 | -1, /* 0011 -> RESERVED (9.0x)*/ | ||
185 | 105, /* 0100 -> 10.5x */ | ||
186 | 115, /* 0101 -> 11.5x */ | ||
187 | 125, /* 0110 -> 12.5x */ | ||
188 | 135, /* 0111 -> 13.5x */ | ||
189 | 140, /* 1000 -> 14.0x */ | ||
190 | 150, /* 1001 -> 15.0x */ | ||
191 | 160, /* 1010 -> 16.0x */ | ||
192 | 130, /* 1011 -> 13.0x */ | ||
193 | 145, /* 1100 -> 14.5x */ | ||
194 | 155, /* 1101 -> 15.5x */ | ||
195 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
196 | -1, /* 1111 -> RESERVED (12.0x) */ | ||
197 | }; | ||
198 | |||
199 | static const int __cpuinitdata ezrat_eblcr[32] = { | ||
200 | 50, /* 0000 -> 5.0x */ | ||
201 | 30, /* 0001 -> 3.0x */ | ||
202 | 40, /* 0010 -> 4.0x */ | ||
203 | 100, /* 0011 -> 10.0x */ | ||
204 | 55, /* 0100 -> 5.5x */ | ||
205 | 35, /* 0101 -> 3.5x */ | ||
206 | 45, /* 0110 -> 4.5x */ | ||
207 | 95, /* 0111 -> 9.5x */ | ||
208 | 90, /* 1000 -> 9.0x */ | ||
209 | 70, /* 1001 -> 7.0x */ | ||
210 | 80, /* 1010 -> 8.0x */ | ||
211 | 60, /* 1011 -> 6.0x */ | ||
212 | 120, /* 1100 -> 12.0x */ | ||
213 | 75, /* 1101 -> 7.5x */ | ||
214 | 85, /* 1110 -> 8.5x */ | ||
215 | 65, /* 1111 -> 6.5x */ | ||
216 | |||
217 | -1, /* 0000 -> RESERVED (9.0x) */ | ||
218 | 110, /* 0001 -> 11.0x */ | ||
219 | 120, /* 0010 -> 12.0x */ | ||
220 | -1, /* 0011 -> RESERVED (10.0x)*/ | ||
221 | 135, /* 0100 -> 13.5x */ | ||
222 | 115, /* 0101 -> 11.5x */ | ||
223 | 125, /* 0110 -> 12.5x */ | ||
224 | 105, /* 0111 -> 10.5x */ | ||
225 | 130, /* 1000 -> 13.0x */ | ||
226 | 150, /* 1001 -> 15.0x */ | ||
227 | 160, /* 1010 -> 16.0x */ | ||
228 | 140, /* 1011 -> 14.0x */ | ||
229 | -1, /* 1100 -> RESERVED (12.0x) */ | ||
230 | 155, /* 1101 -> 15.5x */ | ||
231 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
232 | 145, /* 1111 -> 14.5x */ | ||
233 | }; | ||
234 | |||
235 | /* | ||
236 | * VIA C3 Nehemiah */ | ||
237 | |||
238 | static const int __cpuinitdata nehemiah_mults[32] = { | ||
239 | 100, /* 0000 -> 10.0x */ | ||
240 | -1, /* 0001 -> 16.0x */ | ||
241 | 40, /* 0010 -> 4.0x */ | ||
242 | 90, /* 0011 -> 9.0x */ | ||
243 | 95, /* 0100 -> 9.5x */ | ||
244 | -1, /* 0101 -> RESERVED */ | ||
245 | 45, /* 0110 -> 4.5x */ | ||
246 | 55, /* 0111 -> 5.5x */ | ||
247 | 60, /* 1000 -> 6.0x */ | ||
248 | 70, /* 1001 -> 7.0x */ | ||
249 | 80, /* 1010 -> 8.0x */ | ||
250 | 50, /* 1011 -> 5.0x */ | ||
251 | 65, /* 1100 -> 6.5x */ | ||
252 | 75, /* 1101 -> 7.5x */ | ||
253 | 85, /* 1110 -> 8.5x */ | ||
254 | 120, /* 1111 -> 12.0x */ | ||
255 | -1, /* 0000 -> 10.0x */ | ||
256 | 110, /* 0001 -> 11.0x */ | ||
257 | -1, /* 0010 -> 12.0x */ | ||
258 | -1, /* 0011 -> 9.0x */ | ||
259 | 105, /* 0100 -> 10.5x */ | ||
260 | 115, /* 0101 -> 11.5x */ | ||
261 | 125, /* 0110 -> 12.5x */ | ||
262 | 135, /* 0111 -> 13.5x */ | ||
263 | 140, /* 1000 -> 14.0x */ | ||
264 | 150, /* 1001 -> 15.0x */ | ||
265 | 160, /* 1010 -> 16.0x */ | ||
266 | 130, /* 1011 -> 13.0x */ | ||
267 | 145, /* 1100 -> 14.5x */ | ||
268 | 155, /* 1101 -> 15.5x */ | ||
269 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
270 | -1, /* 1111 -> 12.0x */ | ||
271 | }; | ||
272 | |||
273 | static const int __cpuinitdata nehemiah_eblcr[32] = { | ||
274 | 50, /* 0000 -> 5.0x */ | ||
275 | 160, /* 0001 -> 16.0x */ | ||
276 | 40, /* 0010 -> 4.0x */ | ||
277 | 100, /* 0011 -> 10.0x */ | ||
278 | 55, /* 0100 -> 5.5x */ | ||
279 | -1, /* 0101 -> RESERVED */ | ||
280 | 45, /* 0110 -> 4.5x */ | ||
281 | 95, /* 0111 -> 9.5x */ | ||
282 | 90, /* 1000 -> 9.0x */ | ||
283 | 70, /* 1001 -> 7.0x */ | ||
284 | 80, /* 1010 -> 8.0x */ | ||
285 | 60, /* 1011 -> 6.0x */ | ||
286 | 120, /* 1100 -> 12.0x */ | ||
287 | 75, /* 1101 -> 7.5x */ | ||
288 | 85, /* 1110 -> 8.5x */ | ||
289 | 65, /* 1111 -> 6.5x */ | ||
290 | 90, /* 0000 -> 9.0x */ | ||
291 | 110, /* 0001 -> 11.0x */ | ||
292 | 120, /* 0010 -> 12.0x */ | ||
293 | 100, /* 0011 -> 10.0x */ | ||
294 | 135, /* 0100 -> 13.5x */ | ||
295 | 115, /* 0101 -> 11.5x */ | ||
296 | 125, /* 0110 -> 12.5x */ | ||
297 | 105, /* 0111 -> 10.5x */ | ||
298 | 130, /* 1000 -> 13.0x */ | ||
299 | 150, /* 1001 -> 15.0x */ | ||
300 | 160, /* 1010 -> 16.0x */ | ||
301 | 140, /* 1011 -> 14.0x */ | ||
302 | 120, /* 1100 -> 12.0x */ | ||
303 | 155, /* 1101 -> 15.5x */ | ||
304 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
305 | 145 /* 1111 -> 14.5x */ | ||
306 | }; | ||
307 | |||
308 | /* | ||
309 | * Voltage scales. Div/Mod by 1000 to get actual voltage. | ||
310 | * Which scale to use depends on the VRM type in use. | ||
311 | */ | ||
312 | |||
313 | struct mV_pos { | ||
314 | unsigned short mV; | ||
315 | unsigned short pos; | ||
316 | }; | ||
317 | |||
318 | static const struct mV_pos __cpuinitdata vrm85_mV[32] = { | ||
319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, | ||
320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, | ||
321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, | ||
322 | {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, | ||
323 | {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, | ||
324 | {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, | ||
325 | {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, | ||
326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} | ||
327 | }; | ||
328 | |||
329 | static const unsigned char __cpuinitdata mV_vrm85[32] = { | ||
330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, | ||
331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, | ||
332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, | ||
333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 | ||
334 | }; | ||
335 | |||
336 | static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { | ||
337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, | ||
338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, | ||
339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, | ||
340 | {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, | ||
341 | {975, 15}, {950, 14}, {925, 13}, {900, 12}, | ||
342 | {875, 11}, {850, 10}, {825, 9}, {800, 8}, | ||
343 | {775, 7}, {750, 6}, {725, 5}, {700, 4}, | ||
344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} | ||
345 | }; | ||
346 | |||
347 | static const unsigned char __cpuinitdata mV_mobilevrm[32] = { | ||
348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, | ||
349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, | ||
350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, | ||
351 | 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 | ||
352 | }; | ||
353 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c deleted file mode 100644 index d9f51367666b..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ /dev/null | |||
@@ -1,327 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/cpufreq.h> | ||
13 | #include <linux/timex.h> | ||
14 | |||
15 | #include <asm/msr.h> | ||
16 | #include <asm/processor.h> | ||
17 | |||
18 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
19 | "longrun", msg) | ||
20 | |||
21 | static struct cpufreq_driver longrun_driver; | ||
22 | |||
23 | /** | ||
24 | * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz | ||
25 | * values into per cent values. In TMTA microcode, the following is valid: | ||
26 | * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) | ||
27 | */ | ||
28 | static unsigned int longrun_low_freq, longrun_high_freq; | ||
29 | |||
30 | |||
31 | /** | ||
32 | * longrun_get_policy - get the current LongRun policy | ||
33 | * @policy: struct cpufreq_policy where current policy is written into | ||
34 | * | ||
35 | * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS | ||
36 | * and MSR_TMTA_LONGRUN_CTRL | ||
37 | */ | ||
38 | static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy) | ||
39 | { | ||
40 | u32 msr_lo, msr_hi; | ||
41 | |||
42 | rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); | ||
43 | dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi); | ||
44 | if (msr_lo & 0x01) | ||
45 | policy->policy = CPUFREQ_POLICY_PERFORMANCE; | ||
46 | else | ||
47 | policy->policy = CPUFREQ_POLICY_POWERSAVE; | ||
48 | |||
49 | rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
50 | dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi); | ||
51 | msr_lo &= 0x0000007F; | ||
52 | msr_hi &= 0x0000007F; | ||
53 | |||
54 | if (longrun_high_freq <= longrun_low_freq) { | ||
55 | /* Assume degenerate Longrun table */ | ||
56 | policy->min = policy->max = longrun_high_freq; | ||
57 | } else { | ||
58 | policy->min = longrun_low_freq + msr_lo * | ||
59 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
60 | policy->max = longrun_low_freq + msr_hi * | ||
61 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
62 | } | ||
63 | policy->cpu = 0; | ||
64 | } | ||
65 | |||
66 | |||
67 | /** | ||
68 | * longrun_set_policy - sets a new CPUFreq policy | ||
69 | * @policy: new policy | ||
70 | * | ||
71 | * Sets a new CPUFreq policy on LongRun-capable processors. This function | ||
72 | * has to be called with cpufreq_driver locked. | ||
73 | */ | ||
74 | static int longrun_set_policy(struct cpufreq_policy *policy) | ||
75 | { | ||
76 | u32 msr_lo, msr_hi; | ||
77 | u32 pctg_lo, pctg_hi; | ||
78 | |||
79 | if (!policy) | ||
80 | return -EINVAL; | ||
81 | |||
82 | if (longrun_high_freq <= longrun_low_freq) { | ||
83 | /* Assume degenerate Longrun table */ | ||
84 | pctg_lo = pctg_hi = 100; | ||
85 | } else { | ||
86 | pctg_lo = (policy->min - longrun_low_freq) / | ||
87 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
88 | pctg_hi = (policy->max - longrun_low_freq) / | ||
89 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
90 | } | ||
91 | |||
92 | if (pctg_hi > 100) | ||
93 | pctg_hi = 100; | ||
94 | if (pctg_lo > pctg_hi) | ||
95 | pctg_lo = pctg_hi; | ||
96 | |||
97 | /* performance or economy mode */ | ||
98 | rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); | ||
99 | msr_lo &= 0xFFFFFFFE; | ||
100 | switch (policy->policy) { | ||
101 | case CPUFREQ_POLICY_PERFORMANCE: | ||
102 | msr_lo |= 0x00000001; | ||
103 | break; | ||
104 | case CPUFREQ_POLICY_POWERSAVE: | ||
105 | break; | ||
106 | } | ||
107 | wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); | ||
108 | |||
109 | /* lower and upper boundary */ | ||
110 | rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
111 | msr_lo &= 0xFFFFFF80; | ||
112 | msr_hi &= 0xFFFFFF80; | ||
113 | msr_lo |= pctg_lo; | ||
114 | msr_hi |= pctg_hi; | ||
115 | wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | |||
121 | /** | ||
122 | * longrun_verify_poliy - verifies a new CPUFreq policy | ||
123 | * @policy: the policy to verify | ||
124 | * | ||
125 | * Validates a new CPUFreq policy. This function has to be called with | ||
126 | * cpufreq_driver locked. | ||
127 | */ | ||
128 | static int longrun_verify_policy(struct cpufreq_policy *policy) | ||
129 | { | ||
130 | if (!policy) | ||
131 | return -EINVAL; | ||
132 | |||
133 | policy->cpu = 0; | ||
134 | cpufreq_verify_within_limits(policy, | ||
135 | policy->cpuinfo.min_freq, | ||
136 | policy->cpuinfo.max_freq); | ||
137 | |||
138 | if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && | ||
139 | (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) | ||
140 | return -EINVAL; | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static unsigned int longrun_get(unsigned int cpu) | ||
146 | { | ||
147 | u32 eax, ebx, ecx, edx; | ||
148 | |||
149 | if (cpu) | ||
150 | return 0; | ||
151 | |||
152 | cpuid(0x80860007, &eax, &ebx, &ecx, &edx); | ||
153 | dprintk("cpuid eax is %u\n", eax); | ||
154 | |||
155 | return eax * 1000; | ||
156 | } | ||
157 | |||
158 | /** | ||
159 | * longrun_determine_freqs - determines the lowest and highest possible core frequency | ||
160 | * @low_freq: an int to put the lowest frequency into | ||
161 | * @high_freq: an int to put the highest frequency into | ||
162 | * | ||
163 | * Determines the lowest and highest possible core frequencies on this CPU. | ||
164 | * This is necessary to calculate the performance percentage according to | ||
165 | * TMTA rules: | ||
166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) | ||
167 | */ | ||
168 | static int __cpuinit longrun_determine_freqs(unsigned int *low_freq, | ||
169 | unsigned int *high_freq) | ||
170 | { | ||
171 | u32 msr_lo, msr_hi; | ||
172 | u32 save_lo, save_hi; | ||
173 | u32 eax, ebx, ecx, edx; | ||
174 | u32 try_hi; | ||
175 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
176 | |||
177 | if (!low_freq || !high_freq) | ||
178 | return -EINVAL; | ||
179 | |||
180 | if (cpu_has(c, X86_FEATURE_LRTI)) { | ||
181 | /* if the LongRun Table Interface is present, the | ||
182 | * detection is a bit easier: | ||
183 | * For minimum frequency, read out the maximum | ||
184 | * level (msr_hi), write that into "currently | ||
185 | * selected level", and read out the frequency. | ||
186 | * For maximum frequency, read out level zero. | ||
187 | */ | ||
188 | /* minimum */ | ||
189 | rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); | ||
190 | wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); | ||
191 | rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); | ||
192 | *low_freq = msr_lo * 1000; /* to kHz */ | ||
193 | |||
194 | /* maximum */ | ||
195 | wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); | ||
196 | rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); | ||
197 | *high_freq = msr_lo * 1000; /* to kHz */ | ||
198 | |||
199 | dprintk("longrun table interface told %u - %u kHz\n", | ||
200 | *low_freq, *high_freq); | ||
201 | |||
202 | if (*low_freq > *high_freq) | ||
203 | *low_freq = *high_freq; | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | /* set the upper border to the value determined during TSC init */ | ||
208 | *high_freq = (cpu_khz / 1000); | ||
209 | *high_freq = *high_freq * 1000; | ||
210 | dprintk("high frequency is %u kHz\n", *high_freq); | ||
211 | |||
212 | /* get current borders */ | ||
213 | rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
214 | save_lo = msr_lo & 0x0000007F; | ||
215 | save_hi = msr_hi & 0x0000007F; | ||
216 | |||
217 | /* if current perf_pctg is larger than 90%, we need to decrease the | ||
218 | * upper limit to make the calculation more accurate. | ||
219 | */ | ||
220 | cpuid(0x80860007, &eax, &ebx, &ecx, &edx); | ||
221 | /* try decreasing in 10% steps, some processors react only | ||
222 | * on some barrier values */ | ||
223 | for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) { | ||
224 | /* set to 0 to try_hi perf_pctg */ | ||
225 | msr_lo &= 0xFFFFFF80; | ||
226 | msr_hi &= 0xFFFFFF80; | ||
227 | msr_hi |= try_hi; | ||
228 | wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
229 | |||
230 | /* read out current core MHz and current perf_pctg */ | ||
231 | cpuid(0x80860007, &eax, &ebx, &ecx, &edx); | ||
232 | |||
233 | /* restore values */ | ||
234 | wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); | ||
235 | } | ||
236 | dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); | ||
237 | |||
238 | /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) | ||
239 | * eqals | ||
240 | * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) | ||
241 | * | ||
242 | * high_freq * perf_pctg is stored tempoarily into "ebx". | ||
243 | */ | ||
244 | ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ | ||
245 | |||
246 | if ((ecx > 95) || (ecx == 0) || (eax < ebx)) | ||
247 | return -EIO; | ||
248 | |||
249 | edx = ((eax - ebx) * 100) / (100 - ecx); | ||
250 | *low_freq = edx * 1000; /* back to kHz */ | ||
251 | |||
252 | dprintk("low frequency is %u kHz\n", *low_freq); | ||
253 | |||
254 | if (*low_freq > *high_freq) | ||
255 | *low_freq = *high_freq; | ||
256 | |||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | |||
261 | static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy) | ||
262 | { | ||
263 | int result = 0; | ||
264 | |||
265 | /* capability check */ | ||
266 | if (policy->cpu != 0) | ||
267 | return -ENODEV; | ||
268 | |||
269 | /* detect low and high frequency */ | ||
270 | result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq); | ||
271 | if (result) | ||
272 | return result; | ||
273 | |||
274 | /* cpuinfo and default policy values */ | ||
275 | policy->cpuinfo.min_freq = longrun_low_freq; | ||
276 | policy->cpuinfo.max_freq = longrun_high_freq; | ||
277 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
278 | longrun_get_policy(policy); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | |||
284 | static struct cpufreq_driver longrun_driver = { | ||
285 | .flags = CPUFREQ_CONST_LOOPS, | ||
286 | .verify = longrun_verify_policy, | ||
287 | .setpolicy = longrun_set_policy, | ||
288 | .get = longrun_get, | ||
289 | .init = longrun_cpu_init, | ||
290 | .name = "longrun", | ||
291 | .owner = THIS_MODULE, | ||
292 | }; | ||
293 | |||
294 | |||
295 | /** | ||
296 | * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver | ||
297 | * | ||
298 | * Initializes the LongRun support. | ||
299 | */ | ||
300 | static int __init longrun_init(void) | ||
301 | { | ||
302 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
303 | |||
304 | if (c->x86_vendor != X86_VENDOR_TRANSMETA || | ||
305 | !cpu_has(c, X86_FEATURE_LONGRUN)) | ||
306 | return -ENODEV; | ||
307 | |||
308 | return cpufreq_register_driver(&longrun_driver); | ||
309 | } | ||
310 | |||
311 | |||
312 | /** | ||
313 | * longrun_exit - unregisters LongRun support | ||
314 | */ | ||
315 | static void __exit longrun_exit(void) | ||
316 | { | ||
317 | cpufreq_unregister_driver(&longrun_driver); | ||
318 | } | ||
319 | |||
320 | |||
321 | MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); | ||
322 | MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and " | ||
323 | "Efficeon processors."); | ||
324 | MODULE_LICENSE("GPL"); | ||
325 | |||
326 | module_init(longrun_init); | ||
327 | module_exit(longrun_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c deleted file mode 100644 index 911e193018ae..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/mperf.c +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/cpufreq.h> | ||
6 | #include <linux/slab.h> | ||
7 | |||
8 | #include "mperf.h" | ||
9 | |||
10 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
11 | |||
12 | /* Called via smp_call_function_single(), on the target CPU */ | ||
13 | static void read_measured_perf_ctrs(void *_cur) | ||
14 | { | ||
15 | struct aperfmperf *am = _cur; | ||
16 | |||
17 | get_aperfmperf(am); | ||
18 | } | ||
19 | |||
20 | /* | ||
21 | * Return the measured active (C0) frequency on this CPU since last call | ||
22 | * to this function. | ||
23 | * Input: cpu number | ||
24 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
25 | * | ||
26 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
27 | * over a period of time, while CPU is in C0 state. | ||
28 | * IA32_MPERF counts at the rate of max advertised frequency | ||
29 | * IA32_APERF counts at the rate of actual CPU frequency | ||
30 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
31 | * no meaning should be associated with absolute values of these MSRs. | ||
32 | */ | ||
33 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
34 | unsigned int cpu) | ||
35 | { | ||
36 | struct aperfmperf perf; | ||
37 | unsigned long ratio; | ||
38 | unsigned int retval; | ||
39 | |||
40 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
41 | return 0; | ||
42 | |||
43 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
44 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
45 | |||
46 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
47 | |||
48 | return retval; | ||
49 | } | ||
50 | EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); | ||
51 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h deleted file mode 100644 index 5dbf2950dc22..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/mperf.h +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | /* | ||
2 | * (c) 2010 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
9 | unsigned int cpu); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c deleted file mode 100644 index 52c93648e492..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ /dev/null | |||
@@ -1,331 +0,0 @@ | |||
1 | /* | ||
2 | * Pentium 4/Xeon CPU on demand clock modulation/speed scaling | ||
3 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
4 | * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> | ||
5 | * (C) 2002 Arjan van de Ven <arjanv@redhat.com> | ||
6 | * (C) 2002 Tora T. Engstad | ||
7 | * All Rights Reserved | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | * | ||
14 | * The author(s) of this software shall not be held liable for damages | ||
15 | * of any nature resulting due to the use of this software. This | ||
16 | * software is provided AS-IS with no warranties. | ||
17 | * | ||
18 | * Date Errata Description | ||
19 | * 20020525 N44, O17 12.5% or 25% DC causes lockup | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/smp.h> | ||
27 | #include <linux/cpufreq.h> | ||
28 | #include <linux/cpumask.h> | ||
29 | #include <linux/timex.h> | ||
30 | |||
31 | #include <asm/processor.h> | ||
32 | #include <asm/msr.h> | ||
33 | #include <asm/timer.h> | ||
34 | |||
35 | #include "speedstep-lib.h" | ||
36 | |||
37 | #define PFX "p4-clockmod: " | ||
38 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
39 | "p4-clockmod", msg) | ||
40 | |||
41 | /* | ||
42 | * Duty Cycle (3bits), note DC_DISABLE is not specified in | ||
43 | * intel docs i just use it to mean disable | ||
44 | */ | ||
45 | enum { | ||
46 | DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT, | ||
47 | DC_64PT, DC_75PT, DC_88PT, DC_DISABLE | ||
48 | }; | ||
49 | |||
50 | #define DC_ENTRIES 8 | ||
51 | |||
52 | |||
53 | static int has_N44_O17_errata[NR_CPUS]; | ||
54 | static unsigned int stock_freq; | ||
55 | static struct cpufreq_driver p4clockmod_driver; | ||
56 | static unsigned int cpufreq_p4_get(unsigned int cpu); | ||
57 | |||
58 | static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) | ||
59 | { | ||
60 | u32 l, h; | ||
61 | |||
62 | if (!cpu_online(cpu) || | ||
63 | (newstate > DC_DISABLE) || (newstate == DC_RESV)) | ||
64 | return -EINVAL; | ||
65 | |||
66 | rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); | ||
67 | |||
68 | if (l & 0x01) | ||
69 | dprintk("CPU#%d currently thermal throttled\n", cpu); | ||
70 | |||
71 | if (has_N44_O17_errata[cpu] && | ||
72 | (newstate == DC_25PT || newstate == DC_DFLT)) | ||
73 | newstate = DC_38PT; | ||
74 | |||
75 | rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); | ||
76 | if (newstate == DC_DISABLE) { | ||
77 | dprintk("CPU#%d disabling modulation\n", cpu); | ||
78 | wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h); | ||
79 | } else { | ||
80 | dprintk("CPU#%d setting duty cycle to %d%%\n", | ||
81 | cpu, ((125 * newstate) / 10)); | ||
82 | /* bits 63 - 5 : reserved | ||
83 | * bit 4 : enable/disable | ||
84 | * bits 3-1 : duty cycle | ||
85 | * bit 0 : reserved | ||
86 | */ | ||
87 | l = (l & ~14); | ||
88 | l = l | (1<<4) | ((newstate & 0x7)<<1); | ||
89 | wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h); | ||
90 | } | ||
91 | |||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | |||
96 | static struct cpufreq_frequency_table p4clockmod_table[] = { | ||
97 | {DC_RESV, CPUFREQ_ENTRY_INVALID}, | ||
98 | {DC_DFLT, 0}, | ||
99 | {DC_25PT, 0}, | ||
100 | {DC_38PT, 0}, | ||
101 | {DC_50PT, 0}, | ||
102 | {DC_64PT, 0}, | ||
103 | {DC_75PT, 0}, | ||
104 | {DC_88PT, 0}, | ||
105 | {DC_DISABLE, 0}, | ||
106 | {DC_RESV, CPUFREQ_TABLE_END}, | ||
107 | }; | ||
108 | |||
109 | |||
110 | static int cpufreq_p4_target(struct cpufreq_policy *policy, | ||
111 | unsigned int target_freq, | ||
112 | unsigned int relation) | ||
113 | { | ||
114 | unsigned int newstate = DC_RESV; | ||
115 | struct cpufreq_freqs freqs; | ||
116 | int i; | ||
117 | |||
118 | if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], | ||
119 | target_freq, relation, &newstate)) | ||
120 | return -EINVAL; | ||
121 | |||
122 | freqs.old = cpufreq_p4_get(policy->cpu); | ||
123 | freqs.new = stock_freq * p4clockmod_table[newstate].index / 8; | ||
124 | |||
125 | if (freqs.new == freqs.old) | ||
126 | return 0; | ||
127 | |||
128 | /* notifiers */ | ||
129 | for_each_cpu(i, policy->cpus) { | ||
130 | freqs.cpu = i; | ||
131 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
132 | } | ||
133 | |||
134 | /* run on each logical CPU, | ||
135 | * see section 13.15.3 of IA32 Intel Architecture Software | ||
136 | * Developer's Manual, Volume 3 | ||
137 | */ | ||
138 | for_each_cpu(i, policy->cpus) | ||
139 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); | ||
140 | |||
141 | /* notifiers */ | ||
142 | for_each_cpu(i, policy->cpus) { | ||
143 | freqs.cpu = i; | ||
144 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
145 | } | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | |||
151 | static int cpufreq_p4_verify(struct cpufreq_policy *policy) | ||
152 | { | ||
153 | return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]); | ||
154 | } | ||
155 | |||
156 | |||
157 | static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | ||
158 | { | ||
159 | if (c->x86 == 0x06) { | ||
160 | if (cpu_has(c, X86_FEATURE_EST)) | ||
161 | printk_once(KERN_WARNING PFX "Warning: EST-capable " | ||
162 | "CPU detected. The acpi-cpufreq module offers " | ||
163 | "voltage scaling in addition to frequency " | ||
164 | "scaling. You should use that instead of " | ||
165 | "p4-clockmod, if possible.\n"); | ||
166 | switch (c->x86_model) { | ||
167 | case 0x0E: /* Core */ | ||
168 | case 0x0F: /* Core Duo */ | ||
169 | case 0x16: /* Celeron Core */ | ||
170 | case 0x1C: /* Atom */ | ||
171 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
172 | return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); | ||
173 | case 0x0D: /* Pentium M (Dothan) */ | ||
174 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
175 | /* fall through */ | ||
176 | case 0x09: /* Pentium M (Banias) */ | ||
177 | return speedstep_get_frequency(SPEEDSTEP_CPU_PM); | ||
178 | } | ||
179 | } | ||
180 | |||
181 | if (c->x86 != 0xF) | ||
182 | return 0; | ||
183 | |||
184 | /* on P-4s, the TSC runs with constant frequency independent whether | ||
185 | * throttling is active or not. */ | ||
186 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
187 | |||
188 | if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) { | ||
189 | printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " | ||
190 | "The speedstep-ich or acpi cpufreq modules offer " | ||
191 | "voltage scaling in addition of frequency scaling. " | ||
192 | "You should use either one instead of p4-clockmod, " | ||
193 | "if possible.\n"); | ||
194 | return speedstep_get_frequency(SPEEDSTEP_CPU_P4M); | ||
195 | } | ||
196 | |||
197 | return speedstep_get_frequency(SPEEDSTEP_CPU_P4D); | ||
198 | } | ||
199 | |||
200 | |||
201 | |||
202 | static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) | ||
203 | { | ||
204 | struct cpuinfo_x86 *c = &cpu_data(policy->cpu); | ||
205 | int cpuid = 0; | ||
206 | unsigned int i; | ||
207 | |||
208 | #ifdef CONFIG_SMP | ||
209 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); | ||
210 | #endif | ||
211 | |||
212 | /* Errata workaround */ | ||
213 | cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; | ||
214 | switch (cpuid) { | ||
215 | case 0x0f07: | ||
216 | case 0x0f0a: | ||
217 | case 0x0f11: | ||
218 | case 0x0f12: | ||
219 | has_N44_O17_errata[policy->cpu] = 1; | ||
220 | dprintk("has errata -- disabling low frequencies\n"); | ||
221 | } | ||
222 | |||
223 | if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D && | ||
224 | c->x86_model < 2) { | ||
225 | /* switch to maximum frequency and measure result */ | ||
226 | cpufreq_p4_setdc(policy->cpu, DC_DISABLE); | ||
227 | recalibrate_cpu_khz(); | ||
228 | } | ||
229 | /* get max frequency */ | ||
230 | stock_freq = cpufreq_p4_get_frequency(c); | ||
231 | if (!stock_freq) | ||
232 | return -EINVAL; | ||
233 | |||
234 | /* table init */ | ||
235 | for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { | ||
236 | if ((i < 2) && (has_N44_O17_errata[policy->cpu])) | ||
237 | p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
238 | else | ||
239 | p4clockmod_table[i].frequency = (stock_freq * i)/8; | ||
240 | } | ||
241 | cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); | ||
242 | |||
243 | /* cpuinfo and default policy values */ | ||
244 | |||
245 | /* the transition latency is set to be 1 higher than the maximum | ||
246 | * transition latency of the ondemand governor */ | ||
247 | policy->cpuinfo.transition_latency = 10000001; | ||
248 | policy->cur = stock_freq; | ||
249 | |||
250 | return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); | ||
251 | } | ||
252 | |||
253 | |||
254 | static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) | ||
255 | { | ||
256 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | static unsigned int cpufreq_p4_get(unsigned int cpu) | ||
261 | { | ||
262 | u32 l, h; | ||
263 | |||
264 | rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); | ||
265 | |||
266 | if (l & 0x10) { | ||
267 | l = l >> 1; | ||
268 | l &= 0x7; | ||
269 | } else | ||
270 | l = DC_DISABLE; | ||
271 | |||
272 | if (l != DC_DISABLE) | ||
273 | return stock_freq * l / 8; | ||
274 | |||
275 | return stock_freq; | ||
276 | } | ||
277 | |||
278 | static struct freq_attr *p4clockmod_attr[] = { | ||
279 | &cpufreq_freq_attr_scaling_available_freqs, | ||
280 | NULL, | ||
281 | }; | ||
282 | |||
283 | static struct cpufreq_driver p4clockmod_driver = { | ||
284 | .verify = cpufreq_p4_verify, | ||
285 | .target = cpufreq_p4_target, | ||
286 | .init = cpufreq_p4_cpu_init, | ||
287 | .exit = cpufreq_p4_cpu_exit, | ||
288 | .get = cpufreq_p4_get, | ||
289 | .name = "p4-clockmod", | ||
290 | .owner = THIS_MODULE, | ||
291 | .attr = p4clockmod_attr, | ||
292 | }; | ||
293 | |||
294 | |||
295 | static int __init cpufreq_p4_init(void) | ||
296 | { | ||
297 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
298 | int ret; | ||
299 | |||
300 | /* | ||
301 | * THERM_CONTROL is architectural for IA32 now, so | ||
302 | * we can rely on the capability checks | ||
303 | */ | ||
304 | if (c->x86_vendor != X86_VENDOR_INTEL) | ||
305 | return -ENODEV; | ||
306 | |||
307 | if (!test_cpu_cap(c, X86_FEATURE_ACPI) || | ||
308 | !test_cpu_cap(c, X86_FEATURE_ACC)) | ||
309 | return -ENODEV; | ||
310 | |||
311 | ret = cpufreq_register_driver(&p4clockmod_driver); | ||
312 | if (!ret) | ||
313 | printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock " | ||
314 | "Modulation available\n"); | ||
315 | |||
316 | return ret; | ||
317 | } | ||
318 | |||
319 | |||
320 | static void __exit cpufreq_p4_exit(void) | ||
321 | { | ||
322 | cpufreq_unregister_driver(&p4clockmod_driver); | ||
323 | } | ||
324 | |||
325 | |||
326 | MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>"); | ||
327 | MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); | ||
328 | MODULE_LICENSE("GPL"); | ||
329 | |||
330 | late_initcall(cpufreq_p4_init); | ||
331 | module_exit(cpufreq_p4_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c deleted file mode 100644 index 755a31e0f5b0..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ /dev/null | |||
@@ -1,624 +0,0 @@ | |||
1 | /* | ||
2 | * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface | ||
3 | * | ||
4 | * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com> | ||
5 | * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. | ||
6 | * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com> | ||
7 | * | ||
8 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; version 2 of the License. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON | ||
17 | * INFRINGEMENT. See the GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
21 | * 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | * | ||
23 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
24 | */ | ||
25 | |||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/smp.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/cpufreq.h> | ||
32 | #include <linux/compiler.h> | ||
33 | #include <linux/slab.h> | ||
34 | |||
35 | #include <linux/acpi.h> | ||
36 | #include <linux/io.h> | ||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/uaccess.h> | ||
39 | |||
40 | #include <acpi/processor.h> | ||
41 | |||
42 | #define PCC_VERSION "1.00.00" | ||
43 | #define POLL_LOOPS 300 | ||
44 | |||
45 | #define CMD_COMPLETE 0x1 | ||
46 | #define CMD_GET_FREQ 0x0 | ||
47 | #define CMD_SET_FREQ 0x1 | ||
48 | |||
49 | #define BUF_SZ 4 | ||
50 | |||
51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
52 | "pcc-cpufreq", msg) | ||
53 | |||
54 | struct pcc_register_resource { | ||
55 | u8 descriptor; | ||
56 | u16 length; | ||
57 | u8 space_id; | ||
58 | u8 bit_width; | ||
59 | u8 bit_offset; | ||
60 | u8 access_size; | ||
61 | u64 address; | ||
62 | } __attribute__ ((packed)); | ||
63 | |||
64 | struct pcc_memory_resource { | ||
65 | u8 descriptor; | ||
66 | u16 length; | ||
67 | u8 space_id; | ||
68 | u8 resource_usage; | ||
69 | u8 type_specific; | ||
70 | u64 granularity; | ||
71 | u64 minimum; | ||
72 | u64 maximum; | ||
73 | u64 translation_offset; | ||
74 | u64 address_length; | ||
75 | } __attribute__ ((packed)); | ||
76 | |||
77 | static struct cpufreq_driver pcc_cpufreq_driver; | ||
78 | |||
79 | struct pcc_header { | ||
80 | u32 signature; | ||
81 | u16 length; | ||
82 | u8 major; | ||
83 | u8 minor; | ||
84 | u32 features; | ||
85 | u16 command; | ||
86 | u16 status; | ||
87 | u32 latency; | ||
88 | u32 minimum_time; | ||
89 | u32 maximum_time; | ||
90 | u32 nominal; | ||
91 | u32 throttled_frequency; | ||
92 | u32 minimum_frequency; | ||
93 | }; | ||
94 | |||
95 | static void __iomem *pcch_virt_addr; | ||
96 | static struct pcc_header __iomem *pcch_hdr; | ||
97 | |||
98 | static DEFINE_SPINLOCK(pcc_lock); | ||
99 | |||
100 | static struct acpi_generic_address doorbell; | ||
101 | |||
102 | static u64 doorbell_preserve; | ||
103 | static u64 doorbell_write; | ||
104 | |||
105 | static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f, | ||
106 | 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46}; | ||
107 | |||
108 | struct pcc_cpu { | ||
109 | u32 input_offset; | ||
110 | u32 output_offset; | ||
111 | }; | ||
112 | |||
113 | static struct pcc_cpu __percpu *pcc_cpu_info; | ||
114 | |||
115 | static int pcc_cpufreq_verify(struct cpufreq_policy *policy) | ||
116 | { | ||
117 | cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, | ||
118 | policy->cpuinfo.max_freq); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static inline void pcc_cmd(void) | ||
123 | { | ||
124 | u64 doorbell_value; | ||
125 | int i; | ||
126 | |||
127 | acpi_read(&doorbell_value, &doorbell); | ||
128 | acpi_write((doorbell_value & doorbell_preserve) | doorbell_write, | ||
129 | &doorbell); | ||
130 | |||
131 | for (i = 0; i < POLL_LOOPS; i++) { | ||
132 | if (ioread16(&pcch_hdr->status) & CMD_COMPLETE) | ||
133 | break; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static inline void pcc_clear_mapping(void) | ||
138 | { | ||
139 | if (pcch_virt_addr) | ||
140 | iounmap(pcch_virt_addr); | ||
141 | pcch_virt_addr = NULL; | ||
142 | } | ||
143 | |||
144 | static unsigned int pcc_get_freq(unsigned int cpu) | ||
145 | { | ||
146 | struct pcc_cpu *pcc_cpu_data; | ||
147 | unsigned int curr_freq; | ||
148 | unsigned int freq_limit; | ||
149 | u16 status; | ||
150 | u32 input_buffer; | ||
151 | u32 output_buffer; | ||
152 | |||
153 | spin_lock(&pcc_lock); | ||
154 | |||
155 | dprintk("get: get_freq for CPU %d\n", cpu); | ||
156 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
157 | |||
158 | input_buffer = 0x1; | ||
159 | iowrite32(input_buffer, | ||
160 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
161 | iowrite16(CMD_GET_FREQ, &pcch_hdr->command); | ||
162 | |||
163 | pcc_cmd(); | ||
164 | |||
165 | output_buffer = | ||
166 | ioread32(pcch_virt_addr + pcc_cpu_data->output_offset); | ||
167 | |||
168 | /* Clear the input buffer - we are done with the current command */ | ||
169 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
170 | |||
171 | status = ioread16(&pcch_hdr->status); | ||
172 | if (status != CMD_COMPLETE) { | ||
173 | dprintk("get: FAILED: for CPU %d, status is %d\n", | ||
174 | cpu, status); | ||
175 | goto cmd_incomplete; | ||
176 | } | ||
177 | iowrite16(0, &pcch_hdr->status); | ||
178 | curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff)) | ||
179 | / 100) * 1000); | ||
180 | |||
181 | dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is " | ||
182 | "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n", | ||
183 | cpu, (pcch_virt_addr + pcc_cpu_data->output_offset), | ||
184 | output_buffer, curr_freq); | ||
185 | |||
186 | freq_limit = (output_buffer >> 8) & 0xff; | ||
187 | if (freq_limit != 0xff) { | ||
188 | dprintk("get: frequency for cpu %d is being temporarily" | ||
189 | " capped at %d\n", cpu, curr_freq); | ||
190 | } | ||
191 | |||
192 | spin_unlock(&pcc_lock); | ||
193 | return curr_freq; | ||
194 | |||
195 | cmd_incomplete: | ||
196 | iowrite16(0, &pcch_hdr->status); | ||
197 | spin_unlock(&pcc_lock); | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static int pcc_cpufreq_target(struct cpufreq_policy *policy, | ||
202 | unsigned int target_freq, | ||
203 | unsigned int relation) | ||
204 | { | ||
205 | struct pcc_cpu *pcc_cpu_data; | ||
206 | struct cpufreq_freqs freqs; | ||
207 | u16 status; | ||
208 | u32 input_buffer; | ||
209 | int cpu; | ||
210 | |||
211 | spin_lock(&pcc_lock); | ||
212 | cpu = policy->cpu; | ||
213 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
214 | |||
215 | dprintk("target: CPU %d should go to target freq: %d " | ||
216 | "(virtual) input_offset is 0x%x\n", | ||
217 | cpu, target_freq, | ||
218 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
219 | |||
220 | freqs.new = target_freq; | ||
221 | freqs.cpu = cpu; | ||
222 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
223 | |||
224 | input_buffer = 0x1 | (((target_freq * 100) | ||
225 | / (ioread32(&pcch_hdr->nominal) * 1000)) << 8); | ||
226 | iowrite32(input_buffer, | ||
227 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
228 | iowrite16(CMD_SET_FREQ, &pcch_hdr->command); | ||
229 | |||
230 | pcc_cmd(); | ||
231 | |||
232 | /* Clear the input buffer - we are done with the current command */ | ||
233 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
234 | |||
235 | status = ioread16(&pcch_hdr->status); | ||
236 | if (status != CMD_COMPLETE) { | ||
237 | dprintk("target: FAILED for cpu %d, with status: 0x%x\n", | ||
238 | cpu, status); | ||
239 | goto cmd_incomplete; | ||
240 | } | ||
241 | iowrite16(0, &pcch_hdr->status); | ||
242 | |||
243 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
244 | dprintk("target: was SUCCESSFUL for cpu %d\n", cpu); | ||
245 | spin_unlock(&pcc_lock); | ||
246 | |||
247 | return 0; | ||
248 | |||
249 | cmd_incomplete: | ||
250 | iowrite16(0, &pcch_hdr->status); | ||
251 | spin_unlock(&pcc_lock); | ||
252 | return -EINVAL; | ||
253 | } | ||
254 | |||
255 | static int pcc_get_offset(int cpu) | ||
256 | { | ||
257 | acpi_status status; | ||
258 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
259 | union acpi_object *pccp, *offset; | ||
260 | struct pcc_cpu *pcc_cpu_data; | ||
261 | struct acpi_processor *pr; | ||
262 | int ret = 0; | ||
263 | |||
264 | pr = per_cpu(processors, cpu); | ||
265 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
266 | |||
267 | status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer); | ||
268 | if (ACPI_FAILURE(status)) | ||
269 | return -ENODEV; | ||
270 | |||
271 | pccp = buffer.pointer; | ||
272 | if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) { | ||
273 | ret = -ENODEV; | ||
274 | goto out_free; | ||
275 | }; | ||
276 | |||
277 | offset = &(pccp->package.elements[0]); | ||
278 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
279 | ret = -ENODEV; | ||
280 | goto out_free; | ||
281 | } | ||
282 | |||
283 | pcc_cpu_data->input_offset = offset->integer.value; | ||
284 | |||
285 | offset = &(pccp->package.elements[1]); | ||
286 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
287 | ret = -ENODEV; | ||
288 | goto out_free; | ||
289 | } | ||
290 | |||
291 | pcc_cpu_data->output_offset = offset->integer.value; | ||
292 | |||
293 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
294 | memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ); | ||
295 | |||
296 | dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data " | ||
297 | "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n", | ||
298 | cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset); | ||
299 | out_free: | ||
300 | kfree(buffer.pointer); | ||
301 | return ret; | ||
302 | } | ||
303 | |||
304 | static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | ||
305 | { | ||
306 | acpi_status status; | ||
307 | struct acpi_object_list input; | ||
308 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
309 | union acpi_object in_params[4]; | ||
310 | union acpi_object *out_obj; | ||
311 | u32 capabilities[2]; | ||
312 | u32 errors; | ||
313 | u32 supported; | ||
314 | int ret = 0; | ||
315 | |||
316 | input.count = 4; | ||
317 | input.pointer = in_params; | ||
318 | in_params[0].type = ACPI_TYPE_BUFFER; | ||
319 | in_params[0].buffer.length = 16; | ||
320 | in_params[0].buffer.pointer = OSC_UUID; | ||
321 | in_params[1].type = ACPI_TYPE_INTEGER; | ||
322 | in_params[1].integer.value = 1; | ||
323 | in_params[2].type = ACPI_TYPE_INTEGER; | ||
324 | in_params[2].integer.value = 2; | ||
325 | in_params[3].type = ACPI_TYPE_BUFFER; | ||
326 | in_params[3].buffer.length = 8; | ||
327 | in_params[3].buffer.pointer = (u8 *)&capabilities; | ||
328 | |||
329 | capabilities[0] = OSC_QUERY_ENABLE; | ||
330 | capabilities[1] = 0x1; | ||
331 | |||
332 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
333 | if (ACPI_FAILURE(status)) | ||
334 | return -ENODEV; | ||
335 | |||
336 | if (!output.length) | ||
337 | return -ENODEV; | ||
338 | |||
339 | out_obj = output.pointer; | ||
340 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
341 | ret = -ENODEV; | ||
342 | goto out_free; | ||
343 | } | ||
344 | |||
345 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
346 | if (errors) { | ||
347 | ret = -ENODEV; | ||
348 | goto out_free; | ||
349 | } | ||
350 | |||
351 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
352 | if (!(supported & 0x1)) { | ||
353 | ret = -ENODEV; | ||
354 | goto out_free; | ||
355 | } | ||
356 | |||
357 | kfree(output.pointer); | ||
358 | capabilities[0] = 0x0; | ||
359 | capabilities[1] = 0x1; | ||
360 | |||
361 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
362 | if (ACPI_FAILURE(status)) | ||
363 | return -ENODEV; | ||
364 | |||
365 | if (!output.length) | ||
366 | return -ENODEV; | ||
367 | |||
368 | out_obj = output.pointer; | ||
369 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
370 | ret = -ENODEV; | ||
371 | goto out_free; | ||
372 | } | ||
373 | |||
374 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
375 | if (errors) { | ||
376 | ret = -ENODEV; | ||
377 | goto out_free; | ||
378 | } | ||
379 | |||
380 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
381 | if (!(supported & 0x1)) { | ||
382 | ret = -ENODEV; | ||
383 | goto out_free; | ||
384 | } | ||
385 | |||
386 | out_free: | ||
387 | kfree(output.pointer); | ||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | static int __init pcc_cpufreq_probe(void) | ||
392 | { | ||
393 | acpi_status status; | ||
394 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
395 | struct pcc_memory_resource *mem_resource; | ||
396 | struct pcc_register_resource *reg_resource; | ||
397 | union acpi_object *out_obj, *member; | ||
398 | acpi_handle handle, osc_handle, pcch_handle; | ||
399 | int ret = 0; | ||
400 | |||
401 | status = acpi_get_handle(NULL, "\\_SB", &handle); | ||
402 | if (ACPI_FAILURE(status)) | ||
403 | return -ENODEV; | ||
404 | |||
405 | status = acpi_get_handle(handle, "PCCH", &pcch_handle); | ||
406 | if (ACPI_FAILURE(status)) | ||
407 | return -ENODEV; | ||
408 | |||
409 | status = acpi_get_handle(handle, "_OSC", &osc_handle); | ||
410 | if (ACPI_SUCCESS(status)) { | ||
411 | ret = pcc_cpufreq_do_osc(&osc_handle); | ||
412 | if (ret) | ||
413 | dprintk("probe: _OSC evaluation did not succeed\n"); | ||
414 | /* Firmware's use of _OSC is optional */ | ||
415 | ret = 0; | ||
416 | } | ||
417 | |||
418 | status = acpi_evaluate_object(handle, "PCCH", NULL, &output); | ||
419 | if (ACPI_FAILURE(status)) | ||
420 | return -ENODEV; | ||
421 | |||
422 | out_obj = output.pointer; | ||
423 | if (out_obj->type != ACPI_TYPE_PACKAGE) { | ||
424 | ret = -ENODEV; | ||
425 | goto out_free; | ||
426 | } | ||
427 | |||
428 | member = &out_obj->package.elements[0]; | ||
429 | if (member->type != ACPI_TYPE_BUFFER) { | ||
430 | ret = -ENODEV; | ||
431 | goto out_free; | ||
432 | } | ||
433 | |||
434 | mem_resource = (struct pcc_memory_resource *)member->buffer.pointer; | ||
435 | |||
436 | dprintk("probe: mem_resource descriptor: 0x%x," | ||
437 | " length: %d, space_id: %d, resource_usage: %d," | ||
438 | " type_specific: %d, granularity: 0x%llx," | ||
439 | " minimum: 0x%llx, maximum: 0x%llx," | ||
440 | " translation_offset: 0x%llx, address_length: 0x%llx\n", | ||
441 | mem_resource->descriptor, mem_resource->length, | ||
442 | mem_resource->space_id, mem_resource->resource_usage, | ||
443 | mem_resource->type_specific, mem_resource->granularity, | ||
444 | mem_resource->minimum, mem_resource->maximum, | ||
445 | mem_resource->translation_offset, | ||
446 | mem_resource->address_length); | ||
447 | |||
448 | if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) { | ||
449 | ret = -ENODEV; | ||
450 | goto out_free; | ||
451 | } | ||
452 | |||
453 | pcch_virt_addr = ioremap_nocache(mem_resource->minimum, | ||
454 | mem_resource->address_length); | ||
455 | if (pcch_virt_addr == NULL) { | ||
456 | dprintk("probe: could not map shared mem region\n"); | ||
457 | goto out_free; | ||
458 | } | ||
459 | pcch_hdr = pcch_virt_addr; | ||
460 | |||
461 | dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr); | ||
462 | dprintk("probe: PCCH header is at physical address: 0x%llx," | ||
463 | " signature: 0x%x, length: %d bytes, major: %d, minor: %d," | ||
464 | " supported features: 0x%x, command field: 0x%x," | ||
465 | " status field: 0x%x, nominal latency: %d us\n", | ||
466 | mem_resource->minimum, ioread32(&pcch_hdr->signature), | ||
467 | ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major), | ||
468 | ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features), | ||
469 | ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status), | ||
470 | ioread32(&pcch_hdr->latency)); | ||
471 | |||
472 | dprintk("probe: min time between commands: %d us," | ||
473 | " max time between commands: %d us," | ||
474 | " nominal CPU frequency: %d MHz," | ||
475 | " minimum CPU frequency: %d MHz," | ||
476 | " minimum CPU frequency without throttling: %d MHz\n", | ||
477 | ioread32(&pcch_hdr->minimum_time), | ||
478 | ioread32(&pcch_hdr->maximum_time), | ||
479 | ioread32(&pcch_hdr->nominal), | ||
480 | ioread32(&pcch_hdr->throttled_frequency), | ||
481 | ioread32(&pcch_hdr->minimum_frequency)); | ||
482 | |||
483 | member = &out_obj->package.elements[1]; | ||
484 | if (member->type != ACPI_TYPE_BUFFER) { | ||
485 | ret = -ENODEV; | ||
486 | goto pcch_free; | ||
487 | } | ||
488 | |||
489 | reg_resource = (struct pcc_register_resource *)member->buffer.pointer; | ||
490 | |||
491 | doorbell.space_id = reg_resource->space_id; | ||
492 | doorbell.bit_width = reg_resource->bit_width; | ||
493 | doorbell.bit_offset = reg_resource->bit_offset; | ||
494 | doorbell.access_width = 64; | ||
495 | doorbell.address = reg_resource->address; | ||
496 | |||
497 | dprintk("probe: doorbell: space_id is %d, bit_width is %d, " | ||
498 | "bit_offset is %d, access_width is %d, address is 0x%llx\n", | ||
499 | doorbell.space_id, doorbell.bit_width, doorbell.bit_offset, | ||
500 | doorbell.access_width, reg_resource->address); | ||
501 | |||
502 | member = &out_obj->package.elements[2]; | ||
503 | if (member->type != ACPI_TYPE_INTEGER) { | ||
504 | ret = -ENODEV; | ||
505 | goto pcch_free; | ||
506 | } | ||
507 | |||
508 | doorbell_preserve = member->integer.value; | ||
509 | |||
510 | member = &out_obj->package.elements[3]; | ||
511 | if (member->type != ACPI_TYPE_INTEGER) { | ||
512 | ret = -ENODEV; | ||
513 | goto pcch_free; | ||
514 | } | ||
515 | |||
516 | doorbell_write = member->integer.value; | ||
517 | |||
518 | dprintk("probe: doorbell_preserve: 0x%llx," | ||
519 | " doorbell_write: 0x%llx\n", | ||
520 | doorbell_preserve, doorbell_write); | ||
521 | |||
522 | pcc_cpu_info = alloc_percpu(struct pcc_cpu); | ||
523 | if (!pcc_cpu_info) { | ||
524 | ret = -ENOMEM; | ||
525 | goto pcch_free; | ||
526 | } | ||
527 | |||
528 | printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency" | ||
529 | " limits: %d MHz, %d MHz\n", PCC_VERSION, | ||
530 | ioread32(&pcch_hdr->minimum_frequency), | ||
531 | ioread32(&pcch_hdr->nominal)); | ||
532 | kfree(output.pointer); | ||
533 | return ret; | ||
534 | pcch_free: | ||
535 | pcc_clear_mapping(); | ||
536 | out_free: | ||
537 | kfree(output.pointer); | ||
538 | return ret; | ||
539 | } | ||
540 | |||
541 | static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) | ||
542 | { | ||
543 | unsigned int cpu = policy->cpu; | ||
544 | unsigned int result = 0; | ||
545 | |||
546 | if (!pcch_virt_addr) { | ||
547 | result = -1; | ||
548 | goto out; | ||
549 | } | ||
550 | |||
551 | result = pcc_get_offset(cpu); | ||
552 | if (result) { | ||
553 | dprintk("init: PCCP evaluation failed\n"); | ||
554 | goto out; | ||
555 | } | ||
556 | |||
557 | policy->max = policy->cpuinfo.max_freq = | ||
558 | ioread32(&pcch_hdr->nominal) * 1000; | ||
559 | policy->min = policy->cpuinfo.min_freq = | ||
560 | ioread32(&pcch_hdr->minimum_frequency) * 1000; | ||
561 | policy->cur = pcc_get_freq(cpu); | ||
562 | |||
563 | if (!policy->cur) { | ||
564 | dprintk("init: Unable to get current CPU frequency\n"); | ||
565 | result = -EINVAL; | ||
566 | goto out; | ||
567 | } | ||
568 | |||
569 | dprintk("init: policy->max is %d, policy->min is %d\n", | ||
570 | policy->max, policy->min); | ||
571 | out: | ||
572 | return result; | ||
573 | } | ||
574 | |||
575 | static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy) | ||
576 | { | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | static struct cpufreq_driver pcc_cpufreq_driver = { | ||
581 | .flags = CPUFREQ_CONST_LOOPS, | ||
582 | .get = pcc_get_freq, | ||
583 | .verify = pcc_cpufreq_verify, | ||
584 | .target = pcc_cpufreq_target, | ||
585 | .init = pcc_cpufreq_cpu_init, | ||
586 | .exit = pcc_cpufreq_cpu_exit, | ||
587 | .name = "pcc-cpufreq", | ||
588 | .owner = THIS_MODULE, | ||
589 | }; | ||
590 | |||
591 | static int __init pcc_cpufreq_init(void) | ||
592 | { | ||
593 | int ret; | ||
594 | |||
595 | if (acpi_disabled) | ||
596 | return 0; | ||
597 | |||
598 | ret = pcc_cpufreq_probe(); | ||
599 | if (ret) { | ||
600 | dprintk("pcc_cpufreq_init: PCCH evaluation failed\n"); | ||
601 | return ret; | ||
602 | } | ||
603 | |||
604 | ret = cpufreq_register_driver(&pcc_cpufreq_driver); | ||
605 | |||
606 | return ret; | ||
607 | } | ||
608 | |||
609 | static void __exit pcc_cpufreq_exit(void) | ||
610 | { | ||
611 | cpufreq_unregister_driver(&pcc_cpufreq_driver); | ||
612 | |||
613 | pcc_clear_mapping(); | ||
614 | |||
615 | free_percpu(pcc_cpu_info); | ||
616 | } | ||
617 | |||
618 | MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar"); | ||
619 | MODULE_VERSION(PCC_VERSION); | ||
620 | MODULE_DESCRIPTION("Processor Clocking Control interface driver"); | ||
621 | MODULE_LICENSE("GPL"); | ||
622 | |||
623 | late_initcall(pcc_cpufreq_init); | ||
624 | module_exit(pcc_cpufreq_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c deleted file mode 100644 index b3379d6a5c57..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ /dev/null | |||
@@ -1,261 +0,0 @@ | |||
1 | /* | ||
2 | * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) | ||
3 | * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, | ||
4 | * Dominik Brodowski. | ||
5 | * | ||
6 | * Licensed under the terms of the GNU GPL License version 2. | ||
7 | * | ||
8 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
9 | */ | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/cpufreq.h> | ||
15 | #include <linux/ioport.h> | ||
16 | #include <linux/timex.h> | ||
17 | #include <linux/io.h> | ||
18 | |||
19 | #include <asm/msr.h> | ||
20 | |||
21 | #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long | ||
22 | as it is unused */ | ||
23 | |||
24 | #define PFX "powernow-k6: " | ||
25 | static unsigned int busfreq; /* FSB, in 10 kHz */ | ||
26 | static unsigned int max_multiplier; | ||
27 | |||
28 | |||
29 | /* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ | ||
30 | static struct cpufreq_frequency_table clock_ratio[] = { | ||
31 | {45, /* 000 -> 4.5x */ 0}, | ||
32 | {50, /* 001 -> 5.0x */ 0}, | ||
33 | {40, /* 010 -> 4.0x */ 0}, | ||
34 | {55, /* 011 -> 5.5x */ 0}, | ||
35 | {20, /* 100 -> 2.0x */ 0}, | ||
36 | {30, /* 101 -> 3.0x */ 0}, | ||
37 | {60, /* 110 -> 6.0x */ 0}, | ||
38 | {35, /* 111 -> 3.5x */ 0}, | ||
39 | {0, CPUFREQ_TABLE_END} | ||
40 | }; | ||
41 | |||
42 | |||
43 | /** | ||
44 | * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier | ||
45 | * | ||
46 | * Returns the current setting of the frequency multiplier. Core clock | ||
47 | * speed is frequency of the Front-Side Bus multiplied with this value. | ||
48 | */ | ||
49 | static int powernow_k6_get_cpu_multiplier(void) | ||
50 | { | ||
51 | u64 invalue = 0; | ||
52 | u32 msrval; | ||
53 | |||
54 | msrval = POWERNOW_IOPORT + 0x1; | ||
55 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | ||
56 | invalue = inl(POWERNOW_IOPORT + 0x8); | ||
57 | msrval = POWERNOW_IOPORT + 0x0; | ||
58 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | ||
59 | |||
60 | return clock_ratio[(invalue >> 5)&7].index; | ||
61 | } | ||
62 | |||
63 | |||
64 | /** | ||
65 | * powernow_k6_set_state - set the PowerNow! multiplier | ||
66 | * @best_i: clock_ratio[best_i] is the target multiplier | ||
67 | * | ||
68 | * Tries to change the PowerNow! multiplier | ||
69 | */ | ||
70 | static void powernow_k6_set_state(unsigned int best_i) | ||
71 | { | ||
72 | unsigned long outvalue = 0, invalue = 0; | ||
73 | unsigned long msrval; | ||
74 | struct cpufreq_freqs freqs; | ||
75 | |||
76 | if (clock_ratio[best_i].index > max_multiplier) { | ||
77 | printk(KERN_ERR PFX "invalid target frequency\n"); | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); | ||
82 | freqs.new = busfreq * clock_ratio[best_i].index; | ||
83 | freqs.cpu = 0; /* powernow-k6.c is UP only driver */ | ||
84 | |||
85 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
86 | |||
87 | /* we now need to transform best_i to the BVC format, see AMD#23446 */ | ||
88 | |||
89 | outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5); | ||
90 | |||
91 | msrval = POWERNOW_IOPORT + 0x1; | ||
92 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | ||
93 | invalue = inl(POWERNOW_IOPORT + 0x8); | ||
94 | invalue = invalue & 0xf; | ||
95 | outvalue = outvalue | invalue; | ||
96 | outl(outvalue , (POWERNOW_IOPORT + 0x8)); | ||
97 | msrval = POWERNOW_IOPORT + 0x0; | ||
98 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | ||
99 | |||
100 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
101 | |||
102 | return; | ||
103 | } | ||
104 | |||
105 | |||
106 | /** | ||
107 | * powernow_k6_verify - verifies a new CPUfreq policy | ||
108 | * @policy: new policy | ||
109 | * | ||
110 | * Policy must be within lowest and highest possible CPU Frequency, | ||
111 | * and at least one possible state must be within min and max. | ||
112 | */ | ||
113 | static int powernow_k6_verify(struct cpufreq_policy *policy) | ||
114 | { | ||
115 | return cpufreq_frequency_table_verify(policy, &clock_ratio[0]); | ||
116 | } | ||
117 | |||
118 | |||
119 | /** | ||
120 | * powernow_k6_setpolicy - sets a new CPUFreq policy | ||
121 | * @policy: new policy | ||
122 | * @target_freq: the target frequency | ||
123 | * @relation: how that frequency relates to achieved frequency | ||
124 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
125 | * | ||
126 | * sets a new CPUFreq policy | ||
127 | */ | ||
128 | static int powernow_k6_target(struct cpufreq_policy *policy, | ||
129 | unsigned int target_freq, | ||
130 | unsigned int relation) | ||
131 | { | ||
132 | unsigned int newstate = 0; | ||
133 | |||
134 | if (cpufreq_frequency_table_target(policy, &clock_ratio[0], | ||
135 | target_freq, relation, &newstate)) | ||
136 | return -EINVAL; | ||
137 | |||
138 | powernow_k6_set_state(newstate); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | |||
144 | static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | ||
145 | { | ||
146 | unsigned int i, f; | ||
147 | int result; | ||
148 | |||
149 | if (policy->cpu != 0) | ||
150 | return -ENODEV; | ||
151 | |||
152 | /* get frequencies */ | ||
153 | max_multiplier = powernow_k6_get_cpu_multiplier(); | ||
154 | busfreq = cpu_khz / max_multiplier; | ||
155 | |||
156 | /* table init */ | ||
157 | for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { | ||
158 | f = clock_ratio[i].index; | ||
159 | if (f > max_multiplier) | ||
160 | clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
161 | else | ||
162 | clock_ratio[i].frequency = busfreq * f; | ||
163 | } | ||
164 | |||
165 | /* cpuinfo and default policy values */ | ||
166 | policy->cpuinfo.transition_latency = 200000; | ||
167 | policy->cur = busfreq * max_multiplier; | ||
168 | |||
169 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); | ||
170 | if (result) | ||
171 | return result; | ||
172 | |||
173 | cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); | ||
174 | |||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | |||
179 | static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) | ||
180 | { | ||
181 | unsigned int i; | ||
182 | for (i = 0; i < 8; i++) { | ||
183 | if (i == max_multiplier) | ||
184 | powernow_k6_set_state(i); | ||
185 | } | ||
186 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static unsigned int powernow_k6_get(unsigned int cpu) | ||
191 | { | ||
192 | unsigned int ret; | ||
193 | ret = (busfreq * powernow_k6_get_cpu_multiplier()); | ||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | static struct freq_attr *powernow_k6_attr[] = { | ||
198 | &cpufreq_freq_attr_scaling_available_freqs, | ||
199 | NULL, | ||
200 | }; | ||
201 | |||
202 | static struct cpufreq_driver powernow_k6_driver = { | ||
203 | .verify = powernow_k6_verify, | ||
204 | .target = powernow_k6_target, | ||
205 | .init = powernow_k6_cpu_init, | ||
206 | .exit = powernow_k6_cpu_exit, | ||
207 | .get = powernow_k6_get, | ||
208 | .name = "powernow-k6", | ||
209 | .owner = THIS_MODULE, | ||
210 | .attr = powernow_k6_attr, | ||
211 | }; | ||
212 | |||
213 | |||
214 | /** | ||
215 | * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver | ||
216 | * | ||
217 | * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported | ||
218 | * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero | ||
219 | * on success. | ||
220 | */ | ||
221 | static int __init powernow_k6_init(void) | ||
222 | { | ||
223 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
224 | |||
225 | if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || | ||
226 | ((c->x86_model != 12) && (c->x86_model != 13))) | ||
227 | return -ENODEV; | ||
228 | |||
229 | if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { | ||
230 | printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n"); | ||
231 | return -EIO; | ||
232 | } | ||
233 | |||
234 | if (cpufreq_register_driver(&powernow_k6_driver)) { | ||
235 | release_region(POWERNOW_IOPORT, 16); | ||
236 | return -EINVAL; | ||
237 | } | ||
238 | |||
239 | return 0; | ||
240 | } | ||
241 | |||
242 | |||
243 | /** | ||
244 | * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support | ||
245 | * | ||
246 | * Unregisters AMD K6-2+ / K6-3+ PowerNow! support. | ||
247 | */ | ||
248 | static void __exit powernow_k6_exit(void) | ||
249 | { | ||
250 | cpufreq_unregister_driver(&powernow_k6_driver); | ||
251 | release_region(POWERNOW_IOPORT, 16); | ||
252 | } | ||
253 | |||
254 | |||
255 | MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, " | ||
256 | "Dominik Brodowski <linux@brodo.de>"); | ||
257 | MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); | ||
258 | MODULE_LICENSE("GPL"); | ||
259 | |||
260 | module_init(powernow_k6_init); | ||
261 | module_exit(powernow_k6_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c deleted file mode 100644 index 4a45fd6e41ba..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ /dev/null | |||
@@ -1,752 +0,0 @@ | |||
1 | /* | ||
2 | * AMD K7 Powernow driver. | ||
3 | * (C) 2003 Dave Jones on behalf of SuSE Labs. | ||
4 | * (C) 2003-2004 Dave Jones <davej@redhat.com> | ||
5 | * | ||
6 | * Licensed under the terms of the GNU GPL License version 2. | ||
7 | * Based upon datasheets & sample CPUs kindly provided by AMD. | ||
8 | * | ||
9 | * Errata 5: | ||
10 | * CPU may fail to execute a FID/VID change in presence of interrupt. | ||
11 | * - We cli/sti on stepping A0 CPUs around the FID/VID transition. | ||
12 | * Errata 15: | ||
13 | * CPU with half frequency multipliers may hang upon wakeup from disconnect. | ||
14 | * - We disable half multipliers if ACPI is used on A0 stepping CPUs. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/moduleparam.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/cpufreq.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/dmi.h> | ||
25 | #include <linux/timex.h> | ||
26 | #include <linux/io.h> | ||
27 | |||
28 | #include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */ | ||
29 | #include <asm/msr.h> | ||
30 | #include <asm/system.h> | ||
31 | |||
32 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
33 | #include <linux/acpi.h> | ||
34 | #include <acpi/processor.h> | ||
35 | #endif | ||
36 | |||
37 | #include "powernow-k7.h" | ||
38 | |||
39 | #define PFX "powernow: " | ||
40 | |||
41 | |||
42 | struct psb_s { | ||
43 | u8 signature[10]; | ||
44 | u8 tableversion; | ||
45 | u8 flags; | ||
46 | u16 settlingtime; | ||
47 | u8 reserved1; | ||
48 | u8 numpst; | ||
49 | }; | ||
50 | |||
51 | struct pst_s { | ||
52 | u32 cpuid; | ||
53 | u8 fsbspeed; | ||
54 | u8 maxfid; | ||
55 | u8 startvid; | ||
56 | u8 numpstates; | ||
57 | }; | ||
58 | |||
59 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
60 | union powernow_acpi_control_t { | ||
61 | struct { | ||
62 | unsigned long fid:5, | ||
63 | vid:5, | ||
64 | sgtc:20, | ||
65 | res1:2; | ||
66 | } bits; | ||
67 | unsigned long val; | ||
68 | }; | ||
69 | #endif | ||
70 | |||
71 | #ifdef CONFIG_CPU_FREQ_DEBUG | ||
72 | /* divide by 1000 to get VCore voltage in V. */ | ||
73 | static const int mobile_vid_table[32] = { | ||
74 | 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, | ||
75 | 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, | ||
76 | 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, | ||
77 | 1075, 1050, 1025, 1000, 975, 950, 925, 0, | ||
78 | }; | ||
79 | #endif | ||
80 | |||
81 | /* divide by 10 to get FID. */ | ||
82 | static const int fid_codes[32] = { | ||
83 | 110, 115, 120, 125, 50, 55, 60, 65, | ||
84 | 70, 75, 80, 85, 90, 95, 100, 105, | ||
85 | 30, 190, 40, 200, 130, 135, 140, 210, | ||
86 | 150, 225, 160, 165, 170, 180, -1, -1, | ||
87 | }; | ||
88 | |||
89 | /* This parameter is used in order to force ACPI instead of legacy method for | ||
90 | * configuration purpose. | ||
91 | */ | ||
92 | |||
93 | static int acpi_force; | ||
94 | |||
95 | static struct cpufreq_frequency_table *powernow_table; | ||
96 | |||
97 | static unsigned int can_scale_bus; | ||
98 | static unsigned int can_scale_vid; | ||
99 | static unsigned int minimum_speed = -1; | ||
100 | static unsigned int maximum_speed; | ||
101 | static unsigned int number_scales; | ||
102 | static unsigned int fsb; | ||
103 | static unsigned int latency; | ||
104 | static char have_a0; | ||
105 | |||
106 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
107 | "powernow-k7", msg) | ||
108 | |||
109 | static int check_fsb(unsigned int fsbspeed) | ||
110 | { | ||
111 | int delta; | ||
112 | unsigned int f = fsb / 1000; | ||
113 | |||
114 | delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; | ||
115 | return delta < 5; | ||
116 | } | ||
117 | |||
118 | static int check_powernow(void) | ||
119 | { | ||
120 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
121 | unsigned int maxei, eax, ebx, ecx, edx; | ||
122 | |||
123 | if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) { | ||
124 | #ifdef MODULE | ||
125 | printk(KERN_INFO PFX "This module only works with " | ||
126 | "AMD K7 CPUs\n"); | ||
127 | #endif | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | /* Get maximum capabilities */ | ||
132 | maxei = cpuid_eax(0x80000000); | ||
133 | if (maxei < 0x80000007) { /* Any powernow info ? */ | ||
134 | #ifdef MODULE | ||
135 | printk(KERN_INFO PFX "No powernow capabilities detected\n"); | ||
136 | #endif | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | if ((c->x86_model == 6) && (c->x86_mask == 0)) { | ||
141 | printk(KERN_INFO PFX "K7 660[A0] core detected, " | ||
142 | "enabling errata workarounds\n"); | ||
143 | have_a0 = 1; | ||
144 | } | ||
145 | |||
146 | cpuid(0x80000007, &eax, &ebx, &ecx, &edx); | ||
147 | |||
148 | /* Check we can actually do something before we say anything.*/ | ||
149 | if (!(edx & (1 << 1 | 1 << 2))) | ||
150 | return 0; | ||
151 | |||
152 | printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); | ||
153 | |||
154 | if (edx & 1 << 1) { | ||
155 | printk("frequency"); | ||
156 | can_scale_bus = 1; | ||
157 | } | ||
158 | |||
159 | if ((edx & (1 << 1 | 1 << 2)) == 0x6) | ||
160 | printk(" and "); | ||
161 | |||
162 | if (edx & 1 << 2) { | ||
163 | printk("voltage"); | ||
164 | can_scale_vid = 1; | ||
165 | } | ||
166 | |||
167 | printk(".\n"); | ||
168 | return 1; | ||
169 | } | ||
170 | |||
171 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
172 | static void invalidate_entry(unsigned int entry) | ||
173 | { | ||
174 | powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; | ||
175 | } | ||
176 | #endif | ||
177 | |||
178 | static int get_ranges(unsigned char *pst) | ||
179 | { | ||
180 | unsigned int j; | ||
181 | unsigned int speed; | ||
182 | u8 fid, vid; | ||
183 | |||
184 | powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * | ||
185 | (number_scales + 1)), GFP_KERNEL); | ||
186 | if (!powernow_table) | ||
187 | return -ENOMEM; | ||
188 | |||
189 | for (j = 0 ; j < number_scales; j++) { | ||
190 | fid = *pst++; | ||
191 | |||
192 | powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; | ||
193 | powernow_table[j].index = fid; /* lower 8 bits */ | ||
194 | |||
195 | speed = powernow_table[j].frequency; | ||
196 | |||
197 | if ((fid_codes[fid] % 10) == 5) { | ||
198 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
199 | if (have_a0 == 1) | ||
200 | invalidate_entry(j); | ||
201 | #endif | ||
202 | } | ||
203 | |||
204 | if (speed < minimum_speed) | ||
205 | minimum_speed = speed; | ||
206 | if (speed > maximum_speed) | ||
207 | maximum_speed = speed; | ||
208 | |||
209 | vid = *pst++; | ||
210 | powernow_table[j].index |= (vid << 8); /* upper 8 bits */ | ||
211 | |||
212 | dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " | ||
213 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, | ||
214 | fid_codes[fid] % 10, speed/1000, vid, | ||
215 | mobile_vid_table[vid]/1000, | ||
216 | mobile_vid_table[vid]%1000); | ||
217 | } | ||
218 | powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; | ||
219 | powernow_table[number_scales].index = 0; | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | |||
225 | static void change_FID(int fid) | ||
226 | { | ||
227 | union msr_fidvidctl fidvidctl; | ||
228 | |||
229 | rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
230 | if (fidvidctl.bits.FID != fid) { | ||
231 | fidvidctl.bits.SGTC = latency; | ||
232 | fidvidctl.bits.FID = fid; | ||
233 | fidvidctl.bits.VIDC = 0; | ||
234 | fidvidctl.bits.FIDC = 1; | ||
235 | wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | |||
240 | static void change_VID(int vid) | ||
241 | { | ||
242 | union msr_fidvidctl fidvidctl; | ||
243 | |||
244 | rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
245 | if (fidvidctl.bits.VID != vid) { | ||
246 | fidvidctl.bits.SGTC = latency; | ||
247 | fidvidctl.bits.VID = vid; | ||
248 | fidvidctl.bits.FIDC = 0; | ||
249 | fidvidctl.bits.VIDC = 1; | ||
250 | wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | |||
255 | static void change_speed(unsigned int index) | ||
256 | { | ||
257 | u8 fid, vid; | ||
258 | struct cpufreq_freqs freqs; | ||
259 | union msr_fidvidstatus fidvidstatus; | ||
260 | int cfid; | ||
261 | |||
262 | /* fid are the lower 8 bits of the index we stored into | ||
263 | * the cpufreq frequency table in powernow_decode_bios, | ||
264 | * vid are the upper 8 bits. | ||
265 | */ | ||
266 | |||
267 | fid = powernow_table[index].index & 0xFF; | ||
268 | vid = (powernow_table[index].index & 0xFF00) >> 8; | ||
269 | |||
270 | freqs.cpu = 0; | ||
271 | |||
272 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); | ||
273 | cfid = fidvidstatus.bits.CFID; | ||
274 | freqs.old = fsb * fid_codes[cfid] / 10; | ||
275 | |||
276 | freqs.new = powernow_table[index].frequency; | ||
277 | |||
278 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
279 | |||
280 | /* Now do the magic poking into the MSRs. */ | ||
281 | |||
282 | if (have_a0 == 1) /* A0 errata 5 */ | ||
283 | local_irq_disable(); | ||
284 | |||
285 | if (freqs.old > freqs.new) { | ||
286 | /* Going down, so change FID first */ | ||
287 | change_FID(fid); | ||
288 | change_VID(vid); | ||
289 | } else { | ||
290 | /* Going up, so change VID first */ | ||
291 | change_VID(vid); | ||
292 | change_FID(fid); | ||
293 | } | ||
294 | |||
295 | |||
296 | if (have_a0 == 1) | ||
297 | local_irq_enable(); | ||
298 | |||
299 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
300 | } | ||
301 | |||
302 | |||
303 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
304 | |||
305 | static struct acpi_processor_performance *acpi_processor_perf; | ||
306 | |||
307 | static int powernow_acpi_init(void) | ||
308 | { | ||
309 | int i; | ||
310 | int retval = 0; | ||
311 | union powernow_acpi_control_t pc; | ||
312 | |||
313 | if (acpi_processor_perf != NULL && powernow_table != NULL) { | ||
314 | retval = -EINVAL; | ||
315 | goto err0; | ||
316 | } | ||
317 | |||
318 | acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), | ||
319 | GFP_KERNEL); | ||
320 | if (!acpi_processor_perf) { | ||
321 | retval = -ENOMEM; | ||
322 | goto err0; | ||
323 | } | ||
324 | |||
325 | if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, | ||
326 | GFP_KERNEL)) { | ||
327 | retval = -ENOMEM; | ||
328 | goto err05; | ||
329 | } | ||
330 | |||
331 | if (acpi_processor_register_performance(acpi_processor_perf, 0)) { | ||
332 | retval = -EIO; | ||
333 | goto err1; | ||
334 | } | ||
335 | |||
336 | if (acpi_processor_perf->control_register.space_id != | ||
337 | ACPI_ADR_SPACE_FIXED_HARDWARE) { | ||
338 | retval = -ENODEV; | ||
339 | goto err2; | ||
340 | } | ||
341 | |||
342 | if (acpi_processor_perf->status_register.space_id != | ||
343 | ACPI_ADR_SPACE_FIXED_HARDWARE) { | ||
344 | retval = -ENODEV; | ||
345 | goto err2; | ||
346 | } | ||
347 | |||
348 | number_scales = acpi_processor_perf->state_count; | ||
349 | |||
350 | if (number_scales < 2) { | ||
351 | retval = -ENODEV; | ||
352 | goto err2; | ||
353 | } | ||
354 | |||
355 | powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * | ||
356 | (number_scales + 1)), GFP_KERNEL); | ||
357 | if (!powernow_table) { | ||
358 | retval = -ENOMEM; | ||
359 | goto err2; | ||
360 | } | ||
361 | |||
362 | pc.val = (unsigned long) acpi_processor_perf->states[0].control; | ||
363 | for (i = 0; i < number_scales; i++) { | ||
364 | u8 fid, vid; | ||
365 | struct acpi_processor_px *state = | ||
366 | &acpi_processor_perf->states[i]; | ||
367 | unsigned int speed, speed_mhz; | ||
368 | |||
369 | pc.val = (unsigned long) state->control; | ||
370 | dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", | ||
371 | i, | ||
372 | (u32) state->core_frequency, | ||
373 | (u32) state->power, | ||
374 | (u32) state->transition_latency, | ||
375 | (u32) state->control, | ||
376 | pc.bits.sgtc); | ||
377 | |||
378 | vid = pc.bits.vid; | ||
379 | fid = pc.bits.fid; | ||
380 | |||
381 | powernow_table[i].frequency = fsb * fid_codes[fid] / 10; | ||
382 | powernow_table[i].index = fid; /* lower 8 bits */ | ||
383 | powernow_table[i].index |= (vid << 8); /* upper 8 bits */ | ||
384 | |||
385 | speed = powernow_table[i].frequency; | ||
386 | speed_mhz = speed / 1000; | ||
387 | |||
388 | /* processor_perflib will multiply the MHz value by 1000 to | ||
389 | * get a KHz value (e.g. 1266000). However, powernow-k7 works | ||
390 | * with true KHz values (e.g. 1266768). To ensure that all | ||
391 | * powernow frequencies are available, we must ensure that | ||
392 | * ACPI doesn't restrict them, so we round up the MHz value | ||
393 | * to ensure that perflib's computed KHz value is greater than | ||
394 | * or equal to powernow's KHz value. | ||
395 | */ | ||
396 | if (speed % 1000 > 0) | ||
397 | speed_mhz++; | ||
398 | |||
399 | if ((fid_codes[fid] % 10) == 5) { | ||
400 | if (have_a0 == 1) | ||
401 | invalidate_entry(i); | ||
402 | } | ||
403 | |||
404 | dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " | ||
405 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, | ||
406 | fid_codes[fid] % 10, speed_mhz, vid, | ||
407 | mobile_vid_table[vid]/1000, | ||
408 | mobile_vid_table[vid]%1000); | ||
409 | |||
410 | if (state->core_frequency != speed_mhz) { | ||
411 | state->core_frequency = speed_mhz; | ||
412 | dprintk(" Corrected ACPI frequency to %d\n", | ||
413 | speed_mhz); | ||
414 | } | ||
415 | |||
416 | if (latency < pc.bits.sgtc) | ||
417 | latency = pc.bits.sgtc; | ||
418 | |||
419 | if (speed < minimum_speed) | ||
420 | minimum_speed = speed; | ||
421 | if (speed > maximum_speed) | ||
422 | maximum_speed = speed; | ||
423 | } | ||
424 | |||
425 | powernow_table[i].frequency = CPUFREQ_TABLE_END; | ||
426 | powernow_table[i].index = 0; | ||
427 | |||
428 | /* notify BIOS that we exist */ | ||
429 | acpi_processor_notify_smm(THIS_MODULE); | ||
430 | |||
431 | return 0; | ||
432 | |||
433 | err2: | ||
434 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | ||
435 | err1: | ||
436 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | ||
437 | err05: | ||
438 | kfree(acpi_processor_perf); | ||
439 | err0: | ||
440 | printk(KERN_WARNING PFX "ACPI perflib can not be used on " | ||
441 | "this platform\n"); | ||
442 | acpi_processor_perf = NULL; | ||
443 | return retval; | ||
444 | } | ||
445 | #else | ||
446 | static int powernow_acpi_init(void) | ||
447 | { | ||
448 | printk(KERN_INFO PFX "no support for ACPI processor found." | ||
449 | " Please recompile your kernel with ACPI processor\n"); | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | #endif | ||
453 | |||
454 | static void print_pst_entry(struct pst_s *pst, unsigned int j) | ||
455 | { | ||
456 | dprintk("PST:%d (@%p)\n", j, pst); | ||
457 | dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", | ||
458 | pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); | ||
459 | } | ||
460 | |||
461 | static int powernow_decode_bios(int maxfid, int startvid) | ||
462 | { | ||
463 | struct psb_s *psb; | ||
464 | struct pst_s *pst; | ||
465 | unsigned int i, j; | ||
466 | unsigned char *p; | ||
467 | unsigned int etuple; | ||
468 | unsigned int ret; | ||
469 | |||
470 | etuple = cpuid_eax(0x80000001); | ||
471 | |||
472 | for (i = 0xC0000; i < 0xffff0 ; i += 16) { | ||
473 | |||
474 | p = phys_to_virt(i); | ||
475 | |||
476 | if (memcmp(p, "AMDK7PNOW!", 10) == 0) { | ||
477 | dprintk("Found PSB header at %p\n", p); | ||
478 | psb = (struct psb_s *) p; | ||
479 | dprintk("Table version: 0x%x\n", psb->tableversion); | ||
480 | if (psb->tableversion != 0x12) { | ||
481 | printk(KERN_INFO PFX "Sorry, only v1.2 tables" | ||
482 | " supported right now\n"); | ||
483 | return -ENODEV; | ||
484 | } | ||
485 | |||
486 | dprintk("Flags: 0x%x\n", psb->flags); | ||
487 | if ((psb->flags & 1) == 0) | ||
488 | dprintk("Mobile voltage regulator\n"); | ||
489 | else | ||
490 | dprintk("Desktop voltage regulator\n"); | ||
491 | |||
492 | latency = psb->settlingtime; | ||
493 | if (latency < 100) { | ||
494 | printk(KERN_INFO PFX "BIOS set settling time " | ||
495 | "to %d microseconds. " | ||
496 | "Should be at least 100. " | ||
497 | "Correcting.\n", latency); | ||
498 | latency = 100; | ||
499 | } | ||
500 | dprintk("Settling Time: %d microseconds.\n", | ||
501 | psb->settlingtime); | ||
502 | dprintk("Has %d PST tables. (Only dumping ones " | ||
503 | "relevant to this CPU).\n", | ||
504 | psb->numpst); | ||
505 | |||
506 | p += sizeof(struct psb_s); | ||
507 | |||
508 | pst = (struct pst_s *) p; | ||
509 | |||
510 | for (j = 0; j < psb->numpst; j++) { | ||
511 | pst = (struct pst_s *) p; | ||
512 | number_scales = pst->numpstates; | ||
513 | |||
514 | if ((etuple == pst->cpuid) && | ||
515 | check_fsb(pst->fsbspeed) && | ||
516 | (maxfid == pst->maxfid) && | ||
517 | (startvid == pst->startvid)) { | ||
518 | print_pst_entry(pst, j); | ||
519 | p = (char *)pst + sizeof(struct pst_s); | ||
520 | ret = get_ranges(p); | ||
521 | return ret; | ||
522 | } else { | ||
523 | unsigned int k; | ||
524 | p = (char *)pst + sizeof(struct pst_s); | ||
525 | for (k = 0; k < number_scales; k++) | ||
526 | p += 2; | ||
527 | } | ||
528 | } | ||
529 | printk(KERN_INFO PFX "No PST tables match this cpuid " | ||
530 | "(0x%x)\n", etuple); | ||
531 | printk(KERN_INFO PFX "This is indicative of a broken " | ||
532 | "BIOS.\n"); | ||
533 | |||
534 | return -EINVAL; | ||
535 | } | ||
536 | p++; | ||
537 | } | ||
538 | |||
539 | return -ENODEV; | ||
540 | } | ||
541 | |||
542 | |||
543 | static int powernow_target(struct cpufreq_policy *policy, | ||
544 | unsigned int target_freq, | ||
545 | unsigned int relation) | ||
546 | { | ||
547 | unsigned int newstate; | ||
548 | |||
549 | if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, | ||
550 | relation, &newstate)) | ||
551 | return -EINVAL; | ||
552 | |||
553 | change_speed(newstate); | ||
554 | |||
555 | return 0; | ||
556 | } | ||
557 | |||
558 | |||
559 | static int powernow_verify(struct cpufreq_policy *policy) | ||
560 | { | ||
561 | return cpufreq_frequency_table_verify(policy, powernow_table); | ||
562 | } | ||
563 | |||
564 | /* | ||
565 | * We use the fact that the bus frequency is somehow | ||
566 | * a multiple of 100000/3 khz, then we compute sgtc according | ||
567 | * to this multiple. | ||
568 | * That way, we match more how AMD thinks all of that work. | ||
569 | * We will then get the same kind of behaviour already tested under | ||
570 | * the "well-known" other OS. | ||
571 | */ | ||
572 | static int __cpuinit fixup_sgtc(void) | ||
573 | { | ||
574 | unsigned int sgtc; | ||
575 | unsigned int m; | ||
576 | |||
577 | m = fsb / 3333; | ||
578 | if ((m % 10) >= 5) | ||
579 | m += 5; | ||
580 | |||
581 | m /= 10; | ||
582 | |||
583 | sgtc = 100 * m * latency; | ||
584 | sgtc = sgtc / 3; | ||
585 | if (sgtc > 0xfffff) { | ||
586 | printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); | ||
587 | sgtc = 0xfffff; | ||
588 | } | ||
589 | return sgtc; | ||
590 | } | ||
591 | |||
592 | static unsigned int powernow_get(unsigned int cpu) | ||
593 | { | ||
594 | union msr_fidvidstatus fidvidstatus; | ||
595 | unsigned int cfid; | ||
596 | |||
597 | if (cpu) | ||
598 | return 0; | ||
599 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); | ||
600 | cfid = fidvidstatus.bits.CFID; | ||
601 | |||
602 | return fsb * fid_codes[cfid] / 10; | ||
603 | } | ||
604 | |||
605 | |||
606 | static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d) | ||
607 | { | ||
608 | printk(KERN_WARNING PFX | ||
609 | "%s laptop with broken PST tables in BIOS detected.\n", | ||
610 | d->ident); | ||
611 | printk(KERN_WARNING PFX | ||
612 | "You need to downgrade to 3A21 (09/09/2002), or try a newer " | ||
613 | "BIOS than 3A71 (01/20/2003)\n"); | ||
614 | printk(KERN_WARNING PFX | ||
615 | "cpufreq scaling has been disabled as a result of this.\n"); | ||
616 | return 0; | ||
617 | } | ||
618 | |||
619 | /* | ||
620 | * Some Athlon laptops have really fucked PST tables. | ||
621 | * A BIOS update is all that can save them. | ||
622 | * Mention this, and disable cpufreq. | ||
623 | */ | ||
624 | static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = { | ||
625 | { | ||
626 | .callback = acer_cpufreq_pst, | ||
627 | .ident = "Acer Aspire", | ||
628 | .matches = { | ||
629 | DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), | ||
630 | DMI_MATCH(DMI_BIOS_VERSION, "3A71"), | ||
631 | }, | ||
632 | }, | ||
633 | { } | ||
634 | }; | ||
635 | |||
636 | static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy) | ||
637 | { | ||
638 | union msr_fidvidstatus fidvidstatus; | ||
639 | int result; | ||
640 | |||
641 | if (policy->cpu != 0) | ||
642 | return -ENODEV; | ||
643 | |||
644 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); | ||
645 | |||
646 | recalibrate_cpu_khz(); | ||
647 | |||
648 | fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; | ||
649 | if (!fsb) { | ||
650 | printk(KERN_WARNING PFX "can not determine bus frequency\n"); | ||
651 | return -EINVAL; | ||
652 | } | ||
653 | dprintk("FSB: %3dMHz\n", fsb/1000); | ||
654 | |||
655 | if (dmi_check_system(powernow_dmi_table) || acpi_force) { | ||
656 | printk(KERN_INFO PFX "PSB/PST known to be broken. " | ||
657 | "Trying ACPI instead\n"); | ||
658 | result = powernow_acpi_init(); | ||
659 | } else { | ||
660 | result = powernow_decode_bios(fidvidstatus.bits.MFID, | ||
661 | fidvidstatus.bits.SVID); | ||
662 | if (result) { | ||
663 | printk(KERN_INFO PFX "Trying ACPI perflib\n"); | ||
664 | maximum_speed = 0; | ||
665 | minimum_speed = -1; | ||
666 | latency = 0; | ||
667 | result = powernow_acpi_init(); | ||
668 | if (result) { | ||
669 | printk(KERN_INFO PFX | ||
670 | "ACPI and legacy methods failed\n"); | ||
671 | } | ||
672 | } else { | ||
673 | /* SGTC use the bus clock as timer */ | ||
674 | latency = fixup_sgtc(); | ||
675 | printk(KERN_INFO PFX "SGTC: %d\n", latency); | ||
676 | } | ||
677 | } | ||
678 | |||
679 | if (result) | ||
680 | return result; | ||
681 | |||
682 | printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", | ||
683 | minimum_speed/1000, maximum_speed/1000); | ||
684 | |||
685 | policy->cpuinfo.transition_latency = | ||
686 | cpufreq_scale(2000000UL, fsb, latency); | ||
687 | |||
688 | policy->cur = powernow_get(0); | ||
689 | |||
690 | cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); | ||
691 | |||
692 | return cpufreq_frequency_table_cpuinfo(policy, powernow_table); | ||
693 | } | ||
694 | |||
695 | static int powernow_cpu_exit(struct cpufreq_policy *policy) | ||
696 | { | ||
697 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
698 | |||
699 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
700 | if (acpi_processor_perf) { | ||
701 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | ||
702 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | ||
703 | kfree(acpi_processor_perf); | ||
704 | } | ||
705 | #endif | ||
706 | |||
707 | kfree(powernow_table); | ||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | static struct freq_attr *powernow_table_attr[] = { | ||
712 | &cpufreq_freq_attr_scaling_available_freqs, | ||
713 | NULL, | ||
714 | }; | ||
715 | |||
716 | static struct cpufreq_driver powernow_driver = { | ||
717 | .verify = powernow_verify, | ||
718 | .target = powernow_target, | ||
719 | .get = powernow_get, | ||
720 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
721 | .bios_limit = acpi_processor_get_bios_limit, | ||
722 | #endif | ||
723 | .init = powernow_cpu_init, | ||
724 | .exit = powernow_cpu_exit, | ||
725 | .name = "powernow-k7", | ||
726 | .owner = THIS_MODULE, | ||
727 | .attr = powernow_table_attr, | ||
728 | }; | ||
729 | |||
730 | static int __init powernow_init(void) | ||
731 | { | ||
732 | if (check_powernow() == 0) | ||
733 | return -ENODEV; | ||
734 | return cpufreq_register_driver(&powernow_driver); | ||
735 | } | ||
736 | |||
737 | |||
738 | static void __exit powernow_exit(void) | ||
739 | { | ||
740 | cpufreq_unregister_driver(&powernow_driver); | ||
741 | } | ||
742 | |||
743 | module_param(acpi_force, int, 0444); | ||
744 | MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); | ||
745 | |||
746 | MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); | ||
747 | MODULE_DESCRIPTION("Powernow driver for AMD K7 processors."); | ||
748 | MODULE_LICENSE("GPL"); | ||
749 | |||
750 | late_initcall(powernow_init); | ||
751 | module_exit(powernow_exit); | ||
752 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h deleted file mode 100644 index 35fb4eaf6e1c..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2003 Dave Jones. | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * AMD-specific information | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | union msr_fidvidctl { | ||
11 | struct { | ||
12 | unsigned FID:5, // 4:0 | ||
13 | reserved1:3, // 7:5 | ||
14 | VID:5, // 12:8 | ||
15 | reserved2:3, // 15:13 | ||
16 | FIDC:1, // 16 | ||
17 | VIDC:1, // 17 | ||
18 | reserved3:2, // 19:18 | ||
19 | FIDCHGRATIO:1, // 20 | ||
20 | reserved4:11, // 31-21 | ||
21 | SGTC:20, // 32:51 | ||
22 | reserved5:12; // 63:52 | ||
23 | } bits; | ||
24 | unsigned long long val; | ||
25 | }; | ||
26 | |||
27 | union msr_fidvidstatus { | ||
28 | struct { | ||
29 | unsigned CFID:5, // 4:0 | ||
30 | reserved1:3, // 7:5 | ||
31 | SFID:5, // 12:8 | ||
32 | reserved2:3, // 15:13 | ||
33 | MFID:5, // 20:16 | ||
34 | reserved3:11, // 31:21 | ||
35 | CVID:5, // 36:32 | ||
36 | reserved4:3, // 39:37 | ||
37 | SVID:5, // 44:40 | ||
38 | reserved5:3, // 47:45 | ||
39 | MVID:5, // 52:48 | ||
40 | reserved6:11; // 63:53 | ||
41 | } bits; | ||
42 | unsigned long long val; | ||
43 | }; | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c deleted file mode 100644 index 2368e38327b3..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ /dev/null | |||
@@ -1,1607 +0,0 @@ | |||
1 | /* | ||
2 | * (c) 2003-2010 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | * | ||
7 | * Support : mark.langsdorf@amd.com | ||
8 | * | ||
9 | * Based on the powernow-k7.c module written by Dave Jones. | ||
10 | * (C) 2003 Dave Jones on behalf of SuSE Labs | ||
11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> | ||
12 | * (C) 2004 Pavel Machek <pavel@ucw.cz> | ||
13 | * Licensed under the terms of the GNU GPL License version 2. | ||
14 | * Based upon datasheets & sample CPUs kindly provided by AMD. | ||
15 | * | ||
16 | * Valuable input gratefully received from Dave Jones, Pavel Machek, | ||
17 | * Dominik Brodowski, Jacob Shin, and others. | ||
18 | * Originally developed by Paul Devriendt. | ||
19 | * Processor information obtained from Chapter 9 (Power and Thermal Management) | ||
20 | * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD | ||
21 | * Opteron Processors" available for download from www.amd.com | ||
22 | * | ||
23 | * Tables for specific CPUs can be inferred from | ||
24 | * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf | ||
25 | */ | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/cpufreq.h> | ||
32 | #include <linux/slab.h> | ||
33 | #include <linux/string.h> | ||
34 | #include <linux/cpumask.h> | ||
35 | #include <linux/sched.h> /* for current / set_cpus_allowed() */ | ||
36 | #include <linux/io.h> | ||
37 | #include <linux/delay.h> | ||
38 | |||
39 | #include <asm/msr.h> | ||
40 | |||
41 | #include <linux/acpi.h> | ||
42 | #include <linux/mutex.h> | ||
43 | #include <acpi/processor.h> | ||
44 | |||
45 | #define PFX "powernow-k8: " | ||
46 | #define VERSION "version 2.20.00" | ||
47 | #include "powernow-k8.h" | ||
48 | #include "mperf.h" | ||
49 | |||
50 | /* serialize freq changes */ | ||
51 | static DEFINE_MUTEX(fidvid_mutex); | ||
52 | |||
53 | static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | ||
54 | |||
55 | static int cpu_family = CPU_OPTERON; | ||
56 | |||
57 | /* core performance boost */ | ||
58 | static bool cpb_capable, cpb_enabled; | ||
59 | static struct msr __percpu *msrs; | ||
60 | |||
61 | static struct cpufreq_driver cpufreq_amd64_driver; | ||
62 | |||
63 | #ifndef CONFIG_SMP | ||
64 | static inline const struct cpumask *cpu_core_mask(int cpu) | ||
65 | { | ||
66 | return cpumask_of(0); | ||
67 | } | ||
68 | #endif | ||
69 | |||
70 | /* Return a frequency in MHz, given an input fid */ | ||
71 | static u32 find_freq_from_fid(u32 fid) | ||
72 | { | ||
73 | return 800 + (fid * 100); | ||
74 | } | ||
75 | |||
76 | /* Return a frequency in KHz, given an input fid */ | ||
77 | static u32 find_khz_freq_from_fid(u32 fid) | ||
78 | { | ||
79 | return 1000 * find_freq_from_fid(fid); | ||
80 | } | ||
81 | |||
82 | static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, | ||
83 | u32 pstate) | ||
84 | { | ||
85 | return data[pstate].frequency; | ||
86 | } | ||
87 | |||
88 | /* Return the vco fid for an input fid | ||
89 | * | ||
90 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids | ||
91 | * only from corresponding high fids. This returns "high" fid corresponding to | ||
92 | * "low" one. | ||
93 | */ | ||
94 | static u32 convert_fid_to_vco_fid(u32 fid) | ||
95 | { | ||
96 | if (fid < HI_FID_TABLE_BOTTOM) | ||
97 | return 8 + (2 * fid); | ||
98 | else | ||
99 | return fid; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Return 1 if the pending bit is set. Unless we just instructed the processor | ||
104 | * to transition to a new state, seeing this bit set is really bad news. | ||
105 | */ | ||
106 | static int pending_bit_stuck(void) | ||
107 | { | ||
108 | u32 lo, hi; | ||
109 | |||
110 | if (cpu_family == CPU_HW_PSTATE) | ||
111 | return 0; | ||
112 | |||
113 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | ||
114 | return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Update the global current fid / vid values from the status msr. | ||
119 | * Returns 1 on error. | ||
120 | */ | ||
121 | static int query_current_values_with_pending_wait(struct powernow_k8_data *data) | ||
122 | { | ||
123 | u32 lo, hi; | ||
124 | u32 i = 0; | ||
125 | |||
126 | if (cpu_family == CPU_HW_PSTATE) { | ||
127 | rdmsr(MSR_PSTATE_STATUS, lo, hi); | ||
128 | i = lo & HW_PSTATE_MASK; | ||
129 | data->currpstate = i; | ||
130 | |||
131 | /* | ||
132 | * a workaround for family 11h erratum 311 might cause | ||
133 | * an "out-of-range Pstate if the core is in Pstate-0 | ||
134 | */ | ||
135 | if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps)) | ||
136 | data->currpstate = HW_PSTATE_0; | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | do { | ||
141 | if (i++ > 10000) { | ||
142 | dprintk("detected change pending stuck\n"); | ||
143 | return 1; | ||
144 | } | ||
145 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | ||
146 | } while (lo & MSR_S_LO_CHANGE_PENDING); | ||
147 | |||
148 | data->currvid = hi & MSR_S_HI_CURRENT_VID; | ||
149 | data->currfid = lo & MSR_S_LO_CURRENT_FID; | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | /* the isochronous relief time */ | ||
155 | static void count_off_irt(struct powernow_k8_data *data) | ||
156 | { | ||
157 | udelay((1 << data->irt) * 10); | ||
158 | return; | ||
159 | } | ||
160 | |||
161 | /* the voltage stabilization time */ | ||
162 | static void count_off_vst(struct powernow_k8_data *data) | ||
163 | { | ||
164 | udelay(data->vstable * VST_UNITS_20US); | ||
165 | return; | ||
166 | } | ||
167 | |||
168 | /* need to init the control msr to a safe value (for each cpu) */ | ||
169 | static void fidvid_msr_init(void) | ||
170 | { | ||
171 | u32 lo, hi; | ||
172 | u8 fid, vid; | ||
173 | |||
174 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | ||
175 | vid = hi & MSR_S_HI_CURRENT_VID; | ||
176 | fid = lo & MSR_S_LO_CURRENT_FID; | ||
177 | lo = fid | (vid << MSR_C_LO_VID_SHIFT); | ||
178 | hi = MSR_C_HI_STP_GNT_BENIGN; | ||
179 | dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); | ||
180 | wrmsr(MSR_FIDVID_CTL, lo, hi); | ||
181 | } | ||
182 | |||
183 | /* write the new fid value along with the other control fields to the msr */ | ||
184 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) | ||
185 | { | ||
186 | u32 lo; | ||
187 | u32 savevid = data->currvid; | ||
188 | u32 i = 0; | ||
189 | |||
190 | if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { | ||
191 | printk(KERN_ERR PFX "internal error - overflow on fid write\n"); | ||
192 | return 1; | ||
193 | } | ||
194 | |||
195 | lo = fid; | ||
196 | lo |= (data->currvid << MSR_C_LO_VID_SHIFT); | ||
197 | lo |= MSR_C_LO_INIT_FID_VID; | ||
198 | |||
199 | dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", | ||
200 | fid, lo, data->plllock * PLL_LOCK_CONVERSION); | ||
201 | |||
202 | do { | ||
203 | wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); | ||
204 | if (i++ > 100) { | ||
205 | printk(KERN_ERR PFX | ||
206 | "Hardware error - pending bit very stuck - " | ||
207 | "no further pstate changes possible\n"); | ||
208 | return 1; | ||
209 | } | ||
210 | } while (query_current_values_with_pending_wait(data)); | ||
211 | |||
212 | count_off_irt(data); | ||
213 | |||
214 | if (savevid != data->currvid) { | ||
215 | printk(KERN_ERR PFX | ||
216 | "vid change on fid trans, old 0x%x, new 0x%x\n", | ||
217 | savevid, data->currvid); | ||
218 | return 1; | ||
219 | } | ||
220 | |||
221 | if (fid != data->currfid) { | ||
222 | printk(KERN_ERR PFX | ||
223 | "fid trans failed, fid 0x%x, curr 0x%x\n", fid, | ||
224 | data->currfid); | ||
225 | return 1; | ||
226 | } | ||
227 | |||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | /* Write a new vid to the hardware */ | ||
232 | static int write_new_vid(struct powernow_k8_data *data, u32 vid) | ||
233 | { | ||
234 | u32 lo; | ||
235 | u32 savefid = data->currfid; | ||
236 | int i = 0; | ||
237 | |||
238 | if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { | ||
239 | printk(KERN_ERR PFX "internal error - overflow on vid write\n"); | ||
240 | return 1; | ||
241 | } | ||
242 | |||
243 | lo = data->currfid; | ||
244 | lo |= (vid << MSR_C_LO_VID_SHIFT); | ||
245 | lo |= MSR_C_LO_INIT_FID_VID; | ||
246 | |||
247 | dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", | ||
248 | vid, lo, STOP_GRANT_5NS); | ||
249 | |||
250 | do { | ||
251 | wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); | ||
252 | if (i++ > 100) { | ||
253 | printk(KERN_ERR PFX "internal error - pending bit " | ||
254 | "very stuck - no further pstate " | ||
255 | "changes possible\n"); | ||
256 | return 1; | ||
257 | } | ||
258 | } while (query_current_values_with_pending_wait(data)); | ||
259 | |||
260 | if (savefid != data->currfid) { | ||
261 | printk(KERN_ERR PFX "fid changed on vid trans, old " | ||
262 | "0x%x new 0x%x\n", | ||
263 | savefid, data->currfid); | ||
264 | return 1; | ||
265 | } | ||
266 | |||
267 | if (vid != data->currvid) { | ||
268 | printk(KERN_ERR PFX "vid trans failed, vid 0x%x, " | ||
269 | "curr 0x%x\n", | ||
270 | vid, data->currvid); | ||
271 | return 1; | ||
272 | } | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | /* | ||
278 | * Reduce the vid by the max of step or reqvid. | ||
279 | * Decreasing vid codes represent increasing voltages: | ||
280 | * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. | ||
281 | */ | ||
282 | static int decrease_vid_code_by_step(struct powernow_k8_data *data, | ||
283 | u32 reqvid, u32 step) | ||
284 | { | ||
285 | if ((data->currvid - reqvid) > step) | ||
286 | reqvid = data->currvid - step; | ||
287 | |||
288 | if (write_new_vid(data, reqvid)) | ||
289 | return 1; | ||
290 | |||
291 | count_off_vst(data); | ||
292 | |||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | /* Change hardware pstate by single MSR write */ | ||
297 | static int transition_pstate(struct powernow_k8_data *data, u32 pstate) | ||
298 | { | ||
299 | wrmsr(MSR_PSTATE_CTRL, pstate, 0); | ||
300 | data->currpstate = pstate; | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ | ||
305 | static int transition_fid_vid(struct powernow_k8_data *data, | ||
306 | u32 reqfid, u32 reqvid) | ||
307 | { | ||
308 | if (core_voltage_pre_transition(data, reqvid, reqfid)) | ||
309 | return 1; | ||
310 | |||
311 | if (core_frequency_transition(data, reqfid)) | ||
312 | return 1; | ||
313 | |||
314 | if (core_voltage_post_transition(data, reqvid)) | ||
315 | return 1; | ||
316 | |||
317 | if (query_current_values_with_pending_wait(data)) | ||
318 | return 1; | ||
319 | |||
320 | if ((reqfid != data->currfid) || (reqvid != data->currvid)) { | ||
321 | printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, " | ||
322 | "curr 0x%x 0x%x\n", | ||
323 | smp_processor_id(), | ||
324 | reqfid, reqvid, data->currfid, data->currvid); | ||
325 | return 1; | ||
326 | } | ||
327 | |||
328 | dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", | ||
329 | smp_processor_id(), data->currfid, data->currvid); | ||
330 | |||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | /* Phase 1 - core voltage transition ... setup voltage */ | ||
335 | static int core_voltage_pre_transition(struct powernow_k8_data *data, | ||
336 | u32 reqvid, u32 reqfid) | ||
337 | { | ||
338 | u32 rvosteps = data->rvo; | ||
339 | u32 savefid = data->currfid; | ||
340 | u32 maxvid, lo, rvomult = 1; | ||
341 | |||
342 | dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " | ||
343 | "reqvid 0x%x, rvo 0x%x\n", | ||
344 | smp_processor_id(), | ||
345 | data->currfid, data->currvid, reqvid, data->rvo); | ||
346 | |||
347 | if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP)) | ||
348 | rvomult = 2; | ||
349 | rvosteps *= rvomult; | ||
350 | rdmsr(MSR_FIDVID_STATUS, lo, maxvid); | ||
351 | maxvid = 0x1f & (maxvid >> 16); | ||
352 | dprintk("ph1 maxvid=0x%x\n", maxvid); | ||
353 | if (reqvid < maxvid) /* lower numbers are higher voltages */ | ||
354 | reqvid = maxvid; | ||
355 | |||
356 | while (data->currvid > reqvid) { | ||
357 | dprintk("ph1: curr 0x%x, req vid 0x%x\n", | ||
358 | data->currvid, reqvid); | ||
359 | if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) | ||
360 | return 1; | ||
361 | } | ||
362 | |||
363 | while ((rvosteps > 0) && | ||
364 | ((rvomult * data->rvo + data->currvid) > reqvid)) { | ||
365 | if (data->currvid == maxvid) { | ||
366 | rvosteps = 0; | ||
367 | } else { | ||
368 | dprintk("ph1: changing vid for rvo, req 0x%x\n", | ||
369 | data->currvid - 1); | ||
370 | if (decrease_vid_code_by_step(data, data->currvid-1, 1)) | ||
371 | return 1; | ||
372 | rvosteps--; | ||
373 | } | ||
374 | } | ||
375 | |||
376 | if (query_current_values_with_pending_wait(data)) | ||
377 | return 1; | ||
378 | |||
379 | if (savefid != data->currfid) { | ||
380 | printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", | ||
381 | data->currfid); | ||
382 | return 1; | ||
383 | } | ||
384 | |||
385 | dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", | ||
386 | data->currfid, data->currvid); | ||
387 | |||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | /* Phase 2 - core frequency transition */ | ||
392 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) | ||
393 | { | ||
394 | u32 vcoreqfid, vcocurrfid, vcofiddiff; | ||
395 | u32 fid_interval, savevid = data->currvid; | ||
396 | |||
397 | if (data->currfid == reqfid) { | ||
398 | printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", | ||
399 | data->currfid); | ||
400 | return 0; | ||
401 | } | ||
402 | |||
403 | dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, " | ||
404 | "reqfid 0x%x\n", | ||
405 | smp_processor_id(), | ||
406 | data->currfid, data->currvid, reqfid); | ||
407 | |||
408 | vcoreqfid = convert_fid_to_vco_fid(reqfid); | ||
409 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); | ||
410 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid | ||
411 | : vcoreqfid - vcocurrfid; | ||
412 | |||
413 | if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP)) | ||
414 | vcofiddiff = 0; | ||
415 | |||
416 | while (vcofiddiff > 2) { | ||
417 | (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); | ||
418 | |||
419 | if (reqfid > data->currfid) { | ||
420 | if (data->currfid > LO_FID_TABLE_TOP) { | ||
421 | if (write_new_fid(data, | ||
422 | data->currfid + fid_interval)) | ||
423 | return 1; | ||
424 | } else { | ||
425 | if (write_new_fid | ||
426 | (data, | ||
427 | 2 + convert_fid_to_vco_fid(data->currfid))) | ||
428 | return 1; | ||
429 | } | ||
430 | } else { | ||
431 | if (write_new_fid(data, data->currfid - fid_interval)) | ||
432 | return 1; | ||
433 | } | ||
434 | |||
435 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); | ||
436 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid | ||
437 | : vcoreqfid - vcocurrfid; | ||
438 | } | ||
439 | |||
440 | if (write_new_fid(data, reqfid)) | ||
441 | return 1; | ||
442 | |||
443 | if (query_current_values_with_pending_wait(data)) | ||
444 | return 1; | ||
445 | |||
446 | if (data->currfid != reqfid) { | ||
447 | printk(KERN_ERR PFX | ||
448 | "ph2: mismatch, failed fid transition, " | ||
449 | "curr 0x%x, req 0x%x\n", | ||
450 | data->currfid, reqfid); | ||
451 | return 1; | ||
452 | } | ||
453 | |||
454 | if (savevid != data->currvid) { | ||
455 | printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", | ||
456 | savevid, data->currvid); | ||
457 | return 1; | ||
458 | } | ||
459 | |||
460 | dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", | ||
461 | data->currfid, data->currvid); | ||
462 | |||
463 | return 0; | ||
464 | } | ||
465 | |||
466 | /* Phase 3 - core voltage transition flow ... jump to the final vid. */ | ||
467 | static int core_voltage_post_transition(struct powernow_k8_data *data, | ||
468 | u32 reqvid) | ||
469 | { | ||
470 | u32 savefid = data->currfid; | ||
471 | u32 savereqvid = reqvid; | ||
472 | |||
473 | dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", | ||
474 | smp_processor_id(), | ||
475 | data->currfid, data->currvid); | ||
476 | |||
477 | if (reqvid != data->currvid) { | ||
478 | if (write_new_vid(data, reqvid)) | ||
479 | return 1; | ||
480 | |||
481 | if (savefid != data->currfid) { | ||
482 | printk(KERN_ERR PFX | ||
483 | "ph3: bad fid change, save 0x%x, curr 0x%x\n", | ||
484 | savefid, data->currfid); | ||
485 | return 1; | ||
486 | } | ||
487 | |||
488 | if (data->currvid != reqvid) { | ||
489 | printk(KERN_ERR PFX | ||
490 | "ph3: failed vid transition\n, " | ||
491 | "req 0x%x, curr 0x%x", | ||
492 | reqvid, data->currvid); | ||
493 | return 1; | ||
494 | } | ||
495 | } | ||
496 | |||
497 | if (query_current_values_with_pending_wait(data)) | ||
498 | return 1; | ||
499 | |||
500 | if (savereqvid != data->currvid) { | ||
501 | dprintk("ph3 failed, currvid 0x%x\n", data->currvid); | ||
502 | return 1; | ||
503 | } | ||
504 | |||
505 | if (savefid != data->currfid) { | ||
506 | dprintk("ph3 failed, currfid changed 0x%x\n", | ||
507 | data->currfid); | ||
508 | return 1; | ||
509 | } | ||
510 | |||
511 | dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", | ||
512 | data->currfid, data->currvid); | ||
513 | |||
514 | return 0; | ||
515 | } | ||
516 | |||
517 | static void check_supported_cpu(void *_rc) | ||
518 | { | ||
519 | u32 eax, ebx, ecx, edx; | ||
520 | int *rc = _rc; | ||
521 | |||
522 | *rc = -ENODEV; | ||
523 | |||
524 | if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD) | ||
525 | return; | ||
526 | |||
527 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | ||
528 | if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && | ||
529 | ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) | ||
530 | return; | ||
531 | |||
532 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { | ||
533 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || | ||
534 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { | ||
535 | printk(KERN_INFO PFX | ||
536 | "Processor cpuid %x not supported\n", eax); | ||
537 | return; | ||
538 | } | ||
539 | |||
540 | eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); | ||
541 | if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { | ||
542 | printk(KERN_INFO PFX | ||
543 | "No frequency change capabilities detected\n"); | ||
544 | return; | ||
545 | } | ||
546 | |||
547 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | ||
548 | if ((edx & P_STATE_TRANSITION_CAPABLE) | ||
549 | != P_STATE_TRANSITION_CAPABLE) { | ||
550 | printk(KERN_INFO PFX | ||
551 | "Power state transitions not supported\n"); | ||
552 | return; | ||
553 | } | ||
554 | } else { /* must be a HW Pstate capable processor */ | ||
555 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | ||
556 | if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) | ||
557 | cpu_family = CPU_HW_PSTATE; | ||
558 | else | ||
559 | return; | ||
560 | } | ||
561 | |||
562 | *rc = 0; | ||
563 | } | ||
564 | |||
565 | static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, | ||
566 | u8 maxvid) | ||
567 | { | ||
568 | unsigned int j; | ||
569 | u8 lastfid = 0xff; | ||
570 | |||
571 | for (j = 0; j < data->numps; j++) { | ||
572 | if (pst[j].vid > LEAST_VID) { | ||
573 | printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n", | ||
574 | j, pst[j].vid); | ||
575 | return -EINVAL; | ||
576 | } | ||
577 | if (pst[j].vid < data->rvo) { | ||
578 | /* vid + rvo >= 0 */ | ||
579 | printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" | ||
580 | " %d\n", j); | ||
581 | return -ENODEV; | ||
582 | } | ||
583 | if (pst[j].vid < maxvid + data->rvo) { | ||
584 | /* vid + rvo >= maxvid */ | ||
585 | printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" | ||
586 | " %d\n", j); | ||
587 | return -ENODEV; | ||
588 | } | ||
589 | if (pst[j].fid > MAX_FID) { | ||
590 | printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate" | ||
591 | " %d\n", j); | ||
592 | return -ENODEV; | ||
593 | } | ||
594 | if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { | ||
595 | /* Only first fid is allowed to be in "low" range */ | ||
596 | printk(KERN_ERR FW_BUG PFX "two low fids - %d : " | ||
597 | "0x%x\n", j, pst[j].fid); | ||
598 | return -EINVAL; | ||
599 | } | ||
600 | if (pst[j].fid < lastfid) | ||
601 | lastfid = pst[j].fid; | ||
602 | } | ||
603 | if (lastfid & 1) { | ||
604 | printk(KERN_ERR FW_BUG PFX "lastfid invalid\n"); | ||
605 | return -EINVAL; | ||
606 | } | ||
607 | if (lastfid > LO_FID_TABLE_TOP) | ||
608 | printk(KERN_INFO FW_BUG PFX | ||
609 | "first fid not from lo freq table\n"); | ||
610 | |||
611 | return 0; | ||
612 | } | ||
613 | |||
614 | static void invalidate_entry(struct cpufreq_frequency_table *powernow_table, | ||
615 | unsigned int entry) | ||
616 | { | ||
617 | powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; | ||
618 | } | ||
619 | |||
620 | static void print_basics(struct powernow_k8_data *data) | ||
621 | { | ||
622 | int j; | ||
623 | for (j = 0; j < data->numps; j++) { | ||
624 | if (data->powernow_table[j].frequency != | ||
625 | CPUFREQ_ENTRY_INVALID) { | ||
626 | if (cpu_family == CPU_HW_PSTATE) { | ||
627 | printk(KERN_INFO PFX | ||
628 | " %d : pstate %d (%d MHz)\n", j, | ||
629 | data->powernow_table[j].index, | ||
630 | data->powernow_table[j].frequency/1000); | ||
631 | } else { | ||
632 | printk(KERN_INFO PFX | ||
633 | "fid 0x%x (%d MHz), vid 0x%x\n", | ||
634 | data->powernow_table[j].index & 0xff, | ||
635 | data->powernow_table[j].frequency/1000, | ||
636 | data->powernow_table[j].index >> 8); | ||
637 | } | ||
638 | } | ||
639 | } | ||
640 | if (data->batps) | ||
641 | printk(KERN_INFO PFX "Only %d pstates on battery\n", | ||
642 | data->batps); | ||
643 | } | ||
644 | |||
645 | static u32 freq_from_fid_did(u32 fid, u32 did) | ||
646 | { | ||
647 | u32 mhz = 0; | ||
648 | |||
649 | if (boot_cpu_data.x86 == 0x10) | ||
650 | mhz = (100 * (fid + 0x10)) >> did; | ||
651 | else if (boot_cpu_data.x86 == 0x11) | ||
652 | mhz = (100 * (fid + 8)) >> did; | ||
653 | else | ||
654 | BUG(); | ||
655 | |||
656 | return mhz * 1000; | ||
657 | } | ||
658 | |||
659 | static int fill_powernow_table(struct powernow_k8_data *data, | ||
660 | struct pst_s *pst, u8 maxvid) | ||
661 | { | ||
662 | struct cpufreq_frequency_table *powernow_table; | ||
663 | unsigned int j; | ||
664 | |||
665 | if (data->batps) { | ||
666 | /* use ACPI support to get full speed on mains power */ | ||
667 | printk(KERN_WARNING PFX | ||
668 | "Only %d pstates usable (use ACPI driver for full " | ||
669 | "range\n", data->batps); | ||
670 | data->numps = data->batps; | ||
671 | } | ||
672 | |||
673 | for (j = 1; j < data->numps; j++) { | ||
674 | if (pst[j-1].fid >= pst[j].fid) { | ||
675 | printk(KERN_ERR PFX "PST out of sequence\n"); | ||
676 | return -EINVAL; | ||
677 | } | ||
678 | } | ||
679 | |||
680 | if (data->numps < 2) { | ||
681 | printk(KERN_ERR PFX "no p states to transition\n"); | ||
682 | return -ENODEV; | ||
683 | } | ||
684 | |||
685 | if (check_pst_table(data, pst, maxvid)) | ||
686 | return -EINVAL; | ||
687 | |||
688 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | ||
689 | * (data->numps + 1)), GFP_KERNEL); | ||
690 | if (!powernow_table) { | ||
691 | printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); | ||
692 | return -ENOMEM; | ||
693 | } | ||
694 | |||
695 | for (j = 0; j < data->numps; j++) { | ||
696 | int freq; | ||
697 | powernow_table[j].index = pst[j].fid; /* lower 8 bits */ | ||
698 | powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ | ||
699 | freq = find_khz_freq_from_fid(pst[j].fid); | ||
700 | powernow_table[j].frequency = freq; | ||
701 | } | ||
702 | powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; | ||
703 | powernow_table[data->numps].index = 0; | ||
704 | |||
705 | if (query_current_values_with_pending_wait(data)) { | ||
706 | kfree(powernow_table); | ||
707 | return -EIO; | ||
708 | } | ||
709 | |||
710 | dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); | ||
711 | data->powernow_table = powernow_table; | ||
712 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | ||
713 | print_basics(data); | ||
714 | |||
715 | for (j = 0; j < data->numps; j++) | ||
716 | if ((pst[j].fid == data->currfid) && | ||
717 | (pst[j].vid == data->currvid)) | ||
718 | return 0; | ||
719 | |||
720 | dprintk("currfid/vid do not match PST, ignoring\n"); | ||
721 | return 0; | ||
722 | } | ||
723 | |||
724 | /* Find and validate the PSB/PST table in BIOS. */ | ||
725 | static int find_psb_table(struct powernow_k8_data *data) | ||
726 | { | ||
727 | struct psb_s *psb; | ||
728 | unsigned int i; | ||
729 | u32 mvs; | ||
730 | u8 maxvid; | ||
731 | u32 cpst = 0; | ||
732 | u32 thiscpuid; | ||
733 | |||
734 | for (i = 0xc0000; i < 0xffff0; i += 0x10) { | ||
735 | /* Scan BIOS looking for the signature. */ | ||
736 | /* It can not be at ffff0 - it is too big. */ | ||
737 | |||
738 | psb = phys_to_virt(i); | ||
739 | if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) | ||
740 | continue; | ||
741 | |||
742 | dprintk("found PSB header at 0x%p\n", psb); | ||
743 | |||
744 | dprintk("table vers: 0x%x\n", psb->tableversion); | ||
745 | if (psb->tableversion != PSB_VERSION_1_4) { | ||
746 | printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n"); | ||
747 | return -ENODEV; | ||
748 | } | ||
749 | |||
750 | dprintk("flags: 0x%x\n", psb->flags1); | ||
751 | if (psb->flags1) { | ||
752 | printk(KERN_ERR FW_BUG PFX "unknown flags\n"); | ||
753 | return -ENODEV; | ||
754 | } | ||
755 | |||
756 | data->vstable = psb->vstable; | ||
757 | dprintk("voltage stabilization time: %d(*20us)\n", | ||
758 | data->vstable); | ||
759 | |||
760 | dprintk("flags2: 0x%x\n", psb->flags2); | ||
761 | data->rvo = psb->flags2 & 3; | ||
762 | data->irt = ((psb->flags2) >> 2) & 3; | ||
763 | mvs = ((psb->flags2) >> 4) & 3; | ||
764 | data->vidmvs = 1 << mvs; | ||
765 | data->batps = ((psb->flags2) >> 6) & 3; | ||
766 | |||
767 | dprintk("ramp voltage offset: %d\n", data->rvo); | ||
768 | dprintk("isochronous relief time: %d\n", data->irt); | ||
769 | dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); | ||
770 | |||
771 | dprintk("numpst: 0x%x\n", psb->num_tables); | ||
772 | cpst = psb->num_tables; | ||
773 | if ((psb->cpuid == 0x00000fc0) || | ||
774 | (psb->cpuid == 0x00000fe0)) { | ||
775 | thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | ||
776 | if ((thiscpuid == 0x00000fc0) || | ||
777 | (thiscpuid == 0x00000fe0)) | ||
778 | cpst = 1; | ||
779 | } | ||
780 | if (cpst != 1) { | ||
781 | printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); | ||
782 | return -ENODEV; | ||
783 | } | ||
784 | |||
785 | data->plllock = psb->plllocktime; | ||
786 | dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); | ||
787 | dprintk("maxfid: 0x%x\n", psb->maxfid); | ||
788 | dprintk("maxvid: 0x%x\n", psb->maxvid); | ||
789 | maxvid = psb->maxvid; | ||
790 | |||
791 | data->numps = psb->numps; | ||
792 | dprintk("numpstates: 0x%x\n", data->numps); | ||
793 | return fill_powernow_table(data, | ||
794 | (struct pst_s *)(psb+1), maxvid); | ||
795 | } | ||
796 | /* | ||
797 | * If you see this message, complain to BIOS manufacturer. If | ||
798 | * he tells you "we do not support Linux" or some similar | ||
799 | * nonsense, remember that Windows 2000 uses the same legacy | ||
800 | * mechanism that the old Linux PSB driver uses. Tell them it | ||
801 | * is broken with Windows 2000. | ||
802 | * | ||
803 | * The reference to the AMD documentation is chapter 9 in the | ||
804 | * BIOS and Kernel Developer's Guide, which is available on | ||
805 | * www.amd.com | ||
806 | */ | ||
807 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); | ||
808 | printk(KERN_ERR PFX "Make sure that your BIOS is up to date" | ||
809 | " and Cool'N'Quiet support is enabled in BIOS setup\n"); | ||
810 | return -ENODEV; | ||
811 | } | ||
812 | |||
813 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, | ||
814 | unsigned int index) | ||
815 | { | ||
816 | u64 control; | ||
817 | |||
818 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | ||
819 | return; | ||
820 | |||
821 | control = data->acpi_data.states[index].control; | ||
822 | data->irt = (control >> IRT_SHIFT) & IRT_MASK; | ||
823 | data->rvo = (control >> RVO_SHIFT) & RVO_MASK; | ||
824 | data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; | ||
825 | data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; | ||
826 | data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK); | ||
827 | data->vstable = (control >> VST_SHIFT) & VST_MASK; | ||
828 | } | ||
829 | |||
830 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | ||
831 | { | ||
832 | struct cpufreq_frequency_table *powernow_table; | ||
833 | int ret_val = -ENODEV; | ||
834 | u64 control, status; | ||
835 | |||
836 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | ||
837 | dprintk("register performance failed: bad ACPI data\n"); | ||
838 | return -EIO; | ||
839 | } | ||
840 | |||
841 | /* verify the data contained in the ACPI structures */ | ||
842 | if (data->acpi_data.state_count <= 1) { | ||
843 | dprintk("No ACPI P-States\n"); | ||
844 | goto err_out; | ||
845 | } | ||
846 | |||
847 | control = data->acpi_data.control_register.space_id; | ||
848 | status = data->acpi_data.status_register.space_id; | ||
849 | |||
850 | if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) || | ||
851 | (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) { | ||
852 | dprintk("Invalid control/status registers (%x - %x)\n", | ||
853 | control, status); | ||
854 | goto err_out; | ||
855 | } | ||
856 | |||
857 | /* fill in data->powernow_table */ | ||
858 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | ||
859 | * (data->acpi_data.state_count + 1)), GFP_KERNEL); | ||
860 | if (!powernow_table) { | ||
861 | dprintk("powernow_table memory alloc failure\n"); | ||
862 | goto err_out; | ||
863 | } | ||
864 | |||
865 | /* fill in data */ | ||
866 | data->numps = data->acpi_data.state_count; | ||
867 | powernow_k8_acpi_pst_values(data, 0); | ||
868 | |||
869 | if (cpu_family == CPU_HW_PSTATE) | ||
870 | ret_val = fill_powernow_table_pstate(data, powernow_table); | ||
871 | else | ||
872 | ret_val = fill_powernow_table_fidvid(data, powernow_table); | ||
873 | if (ret_val) | ||
874 | goto err_out_mem; | ||
875 | |||
876 | powernow_table[data->acpi_data.state_count].frequency = | ||
877 | CPUFREQ_TABLE_END; | ||
878 | powernow_table[data->acpi_data.state_count].index = 0; | ||
879 | data->powernow_table = powernow_table; | ||
880 | |||
881 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | ||
882 | print_basics(data); | ||
883 | |||
884 | /* notify BIOS that we exist */ | ||
885 | acpi_processor_notify_smm(THIS_MODULE); | ||
886 | |||
887 | if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { | ||
888 | printk(KERN_ERR PFX | ||
889 | "unable to alloc powernow_k8_data cpumask\n"); | ||
890 | ret_val = -ENOMEM; | ||
891 | goto err_out_mem; | ||
892 | } | ||
893 | |||
894 | return 0; | ||
895 | |||
896 | err_out_mem: | ||
897 | kfree(powernow_table); | ||
898 | |||
899 | err_out: | ||
900 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | ||
901 | |||
902 | /* data->acpi_data.state_count informs us at ->exit() | ||
903 | * whether ACPI was used */ | ||
904 | data->acpi_data.state_count = 0; | ||
905 | |||
906 | return ret_val; | ||
907 | } | ||
908 | |||
909 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, | ||
910 | struct cpufreq_frequency_table *powernow_table) | ||
911 | { | ||
912 | int i; | ||
913 | u32 hi = 0, lo = 0; | ||
914 | rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); | ||
915 | data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | ||
916 | |||
917 | for (i = 0; i < data->acpi_data.state_count; i++) { | ||
918 | u32 index; | ||
919 | |||
920 | index = data->acpi_data.states[i].control & HW_PSTATE_MASK; | ||
921 | if (index > data->max_hw_pstate) { | ||
922 | printk(KERN_ERR PFX "invalid pstate %d - " | ||
923 | "bad value %d.\n", i, index); | ||
924 | printk(KERN_ERR PFX "Please report to BIOS " | ||
925 | "manufacturer\n"); | ||
926 | invalidate_entry(powernow_table, i); | ||
927 | continue; | ||
928 | } | ||
929 | rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); | ||
930 | if (!(hi & HW_PSTATE_VALID_MASK)) { | ||
931 | dprintk("invalid pstate %d, ignoring\n", index); | ||
932 | invalidate_entry(powernow_table, i); | ||
933 | continue; | ||
934 | } | ||
935 | |||
936 | powernow_table[i].index = index; | ||
937 | |||
938 | /* Frequency may be rounded for these */ | ||
939 | if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) | ||
940 | || boot_cpu_data.x86 == 0x11) { | ||
941 | powernow_table[i].frequency = | ||
942 | freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); | ||
943 | } else | ||
944 | powernow_table[i].frequency = | ||
945 | data->acpi_data.states[i].core_frequency * 1000; | ||
946 | } | ||
947 | return 0; | ||
948 | } | ||
949 | |||
950 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | ||
951 | struct cpufreq_frequency_table *powernow_table) | ||
952 | { | ||
953 | int i; | ||
954 | |||
955 | for (i = 0; i < data->acpi_data.state_count; i++) { | ||
956 | u32 fid; | ||
957 | u32 vid; | ||
958 | u32 freq, index; | ||
959 | u64 status, control; | ||
960 | |||
961 | if (data->exttype) { | ||
962 | status = data->acpi_data.states[i].status; | ||
963 | fid = status & EXT_FID_MASK; | ||
964 | vid = (status >> VID_SHIFT) & EXT_VID_MASK; | ||
965 | } else { | ||
966 | control = data->acpi_data.states[i].control; | ||
967 | fid = control & FID_MASK; | ||
968 | vid = (control >> VID_SHIFT) & VID_MASK; | ||
969 | } | ||
970 | |||
971 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); | ||
972 | |||
973 | index = fid | (vid<<8); | ||
974 | powernow_table[i].index = index; | ||
975 | |||
976 | freq = find_khz_freq_from_fid(fid); | ||
977 | powernow_table[i].frequency = freq; | ||
978 | |||
979 | /* verify frequency is OK */ | ||
980 | if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { | ||
981 | dprintk("invalid freq %u kHz, ignoring\n", freq); | ||
982 | invalidate_entry(powernow_table, i); | ||
983 | continue; | ||
984 | } | ||
985 | |||
986 | /* verify voltage is OK - | ||
987 | * BIOSs are using "off" to indicate invalid */ | ||
988 | if (vid == VID_OFF) { | ||
989 | dprintk("invalid vid %u, ignoring\n", vid); | ||
990 | invalidate_entry(powernow_table, i); | ||
991 | continue; | ||
992 | } | ||
993 | |||
994 | if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { | ||
995 | printk(KERN_INFO PFX "invalid freq entries " | ||
996 | "%u kHz vs. %u kHz\n", freq, | ||
997 | (unsigned int) | ||
998 | (data->acpi_data.states[i].core_frequency | ||
999 | * 1000)); | ||
1000 | invalidate_entry(powernow_table, i); | ||
1001 | continue; | ||
1002 | } | ||
1003 | } | ||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) | ||
1008 | { | ||
1009 | if (data->acpi_data.state_count) | ||
1010 | acpi_processor_unregister_performance(&data->acpi_data, | ||
1011 | data->cpu); | ||
1012 | free_cpumask_var(data->acpi_data.shared_cpu_map); | ||
1013 | } | ||
1014 | |||
1015 | static int get_transition_latency(struct powernow_k8_data *data) | ||
1016 | { | ||
1017 | int max_latency = 0; | ||
1018 | int i; | ||
1019 | for (i = 0; i < data->acpi_data.state_count; i++) { | ||
1020 | int cur_latency = data->acpi_data.states[i].transition_latency | ||
1021 | + data->acpi_data.states[i].bus_master_latency; | ||
1022 | if (cur_latency > max_latency) | ||
1023 | max_latency = cur_latency; | ||
1024 | } | ||
1025 | if (max_latency == 0) { | ||
1026 | /* | ||
1027 | * Fam 11h and later may return 0 as transition latency. This | ||
1028 | * is intended and means "very fast". While cpufreq core and | ||
1029 | * governors currently can handle that gracefully, better set it | ||
1030 | * to 1 to avoid problems in the future. | ||
1031 | */ | ||
1032 | if (boot_cpu_data.x86 < 0x11) | ||
1033 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " | ||
1034 | "latency\n"); | ||
1035 | max_latency = 1; | ||
1036 | } | ||
1037 | /* value in usecs, needs to be in nanoseconds */ | ||
1038 | return 1000 * max_latency; | ||
1039 | } | ||
1040 | |||
1041 | /* Take a frequency, and issue the fid/vid transition command */ | ||
1042 | static int transition_frequency_fidvid(struct powernow_k8_data *data, | ||
1043 | unsigned int index) | ||
1044 | { | ||
1045 | u32 fid = 0; | ||
1046 | u32 vid = 0; | ||
1047 | int res, i; | ||
1048 | struct cpufreq_freqs freqs; | ||
1049 | |||
1050 | dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); | ||
1051 | |||
1052 | /* fid/vid correctness check for k8 */ | ||
1053 | /* fid are the lower 8 bits of the index we stored into | ||
1054 | * the cpufreq frequency table in find_psb_table, vid | ||
1055 | * are the upper 8 bits. | ||
1056 | */ | ||
1057 | fid = data->powernow_table[index].index & 0xFF; | ||
1058 | vid = (data->powernow_table[index].index & 0xFF00) >> 8; | ||
1059 | |||
1060 | dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); | ||
1061 | |||
1062 | if (query_current_values_with_pending_wait(data)) | ||
1063 | return 1; | ||
1064 | |||
1065 | if ((data->currvid == vid) && (data->currfid == fid)) { | ||
1066 | dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", | ||
1067 | fid, vid); | ||
1068 | return 0; | ||
1069 | } | ||
1070 | |||
1071 | dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", | ||
1072 | smp_processor_id(), fid, vid); | ||
1073 | freqs.old = find_khz_freq_from_fid(data->currfid); | ||
1074 | freqs.new = find_khz_freq_from_fid(fid); | ||
1075 | |||
1076 | for_each_cpu(i, data->available_cores) { | ||
1077 | freqs.cpu = i; | ||
1078 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
1079 | } | ||
1080 | |||
1081 | res = transition_fid_vid(data, fid, vid); | ||
1082 | freqs.new = find_khz_freq_from_fid(data->currfid); | ||
1083 | |||
1084 | for_each_cpu(i, data->available_cores) { | ||
1085 | freqs.cpu = i; | ||
1086 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
1087 | } | ||
1088 | return res; | ||
1089 | } | ||
1090 | |||
1091 | /* Take a frequency, and issue the hardware pstate transition command */ | ||
1092 | static int transition_frequency_pstate(struct powernow_k8_data *data, | ||
1093 | unsigned int index) | ||
1094 | { | ||
1095 | u32 pstate = 0; | ||
1096 | int res, i; | ||
1097 | struct cpufreq_freqs freqs; | ||
1098 | |||
1099 | dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); | ||
1100 | |||
1101 | /* get MSR index for hardware pstate transition */ | ||
1102 | pstate = index & HW_PSTATE_MASK; | ||
1103 | if (pstate > data->max_hw_pstate) | ||
1104 | return 0; | ||
1105 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, | ||
1106 | data->currpstate); | ||
1107 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | ||
1108 | |||
1109 | for_each_cpu(i, data->available_cores) { | ||
1110 | freqs.cpu = i; | ||
1111 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
1112 | } | ||
1113 | |||
1114 | res = transition_pstate(data, pstate); | ||
1115 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | ||
1116 | |||
1117 | for_each_cpu(i, data->available_cores) { | ||
1118 | freqs.cpu = i; | ||
1119 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
1120 | } | ||
1121 | return res; | ||
1122 | } | ||
1123 | |||
1124 | /* Driver entry point to switch to the target frequency */ | ||
1125 | static int powernowk8_target(struct cpufreq_policy *pol, | ||
1126 | unsigned targfreq, unsigned relation) | ||
1127 | { | ||
1128 | cpumask_var_t oldmask; | ||
1129 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | ||
1130 | u32 checkfid; | ||
1131 | u32 checkvid; | ||
1132 | unsigned int newstate; | ||
1133 | int ret = -EIO; | ||
1134 | |||
1135 | if (!data) | ||
1136 | return -EINVAL; | ||
1137 | |||
1138 | checkfid = data->currfid; | ||
1139 | checkvid = data->currvid; | ||
1140 | |||
1141 | /* only run on specific CPU from here on. */ | ||
1142 | /* This is poor form: use a workqueue or smp_call_function_single */ | ||
1143 | if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) | ||
1144 | return -ENOMEM; | ||
1145 | |||
1146 | cpumask_copy(oldmask, tsk_cpus_allowed(current)); | ||
1147 | set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); | ||
1148 | |||
1149 | if (smp_processor_id() != pol->cpu) { | ||
1150 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | ||
1151 | goto err_out; | ||
1152 | } | ||
1153 | |||
1154 | if (pending_bit_stuck()) { | ||
1155 | printk(KERN_ERR PFX "failing targ, change pending bit set\n"); | ||
1156 | goto err_out; | ||
1157 | } | ||
1158 | |||
1159 | dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", | ||
1160 | pol->cpu, targfreq, pol->min, pol->max, relation); | ||
1161 | |||
1162 | if (query_current_values_with_pending_wait(data)) | ||
1163 | goto err_out; | ||
1164 | |||
1165 | if (cpu_family != CPU_HW_PSTATE) { | ||
1166 | dprintk("targ: curr fid 0x%x, vid 0x%x\n", | ||
1167 | data->currfid, data->currvid); | ||
1168 | |||
1169 | if ((checkvid != data->currvid) || | ||
1170 | (checkfid != data->currfid)) { | ||
1171 | printk(KERN_INFO PFX | ||
1172 | "error - out of sync, fix 0x%x 0x%x, " | ||
1173 | "vid 0x%x 0x%x\n", | ||
1174 | checkfid, data->currfid, | ||
1175 | checkvid, data->currvid); | ||
1176 | } | ||
1177 | } | ||
1178 | |||
1179 | if (cpufreq_frequency_table_target(pol, data->powernow_table, | ||
1180 | targfreq, relation, &newstate)) | ||
1181 | goto err_out; | ||
1182 | |||
1183 | mutex_lock(&fidvid_mutex); | ||
1184 | |||
1185 | powernow_k8_acpi_pst_values(data, newstate); | ||
1186 | |||
1187 | if (cpu_family == CPU_HW_PSTATE) | ||
1188 | ret = transition_frequency_pstate(data, newstate); | ||
1189 | else | ||
1190 | ret = transition_frequency_fidvid(data, newstate); | ||
1191 | if (ret) { | ||
1192 | printk(KERN_ERR PFX "transition frequency failed\n"); | ||
1193 | ret = 1; | ||
1194 | mutex_unlock(&fidvid_mutex); | ||
1195 | goto err_out; | ||
1196 | } | ||
1197 | mutex_unlock(&fidvid_mutex); | ||
1198 | |||
1199 | if (cpu_family == CPU_HW_PSTATE) | ||
1200 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, | ||
1201 | newstate); | ||
1202 | else | ||
1203 | pol->cur = find_khz_freq_from_fid(data->currfid); | ||
1204 | ret = 0; | ||
1205 | |||
1206 | err_out: | ||
1207 | set_cpus_allowed_ptr(current, oldmask); | ||
1208 | free_cpumask_var(oldmask); | ||
1209 | return ret; | ||
1210 | } | ||
1211 | |||
1212 | /* Driver entry point to verify the policy and range of frequencies */ | ||
1213 | static int powernowk8_verify(struct cpufreq_policy *pol) | ||
1214 | { | ||
1215 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | ||
1216 | |||
1217 | if (!data) | ||
1218 | return -EINVAL; | ||
1219 | |||
1220 | return cpufreq_frequency_table_verify(pol, data->powernow_table); | ||
1221 | } | ||
1222 | |||
1223 | struct init_on_cpu { | ||
1224 | struct powernow_k8_data *data; | ||
1225 | int rc; | ||
1226 | }; | ||
1227 | |||
1228 | static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu) | ||
1229 | { | ||
1230 | struct init_on_cpu *init_on_cpu = _init_on_cpu; | ||
1231 | |||
1232 | if (pending_bit_stuck()) { | ||
1233 | printk(KERN_ERR PFX "failing init, change pending bit set\n"); | ||
1234 | init_on_cpu->rc = -ENODEV; | ||
1235 | return; | ||
1236 | } | ||
1237 | |||
1238 | if (query_current_values_with_pending_wait(init_on_cpu->data)) { | ||
1239 | init_on_cpu->rc = -ENODEV; | ||
1240 | return; | ||
1241 | } | ||
1242 | |||
1243 | if (cpu_family == CPU_OPTERON) | ||
1244 | fidvid_msr_init(); | ||
1245 | |||
1246 | init_on_cpu->rc = 0; | ||
1247 | } | ||
1248 | |||
1249 | /* per CPU init entry point to the driver */ | ||
1250 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | ||
1251 | { | ||
1252 | static const char ACPI_PSS_BIOS_BUG_MSG[] = | ||
1253 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" | ||
1254 | FW_BUG PFX "Try again with latest BIOS.\n"; | ||
1255 | struct powernow_k8_data *data; | ||
1256 | struct init_on_cpu init_on_cpu; | ||
1257 | int rc; | ||
1258 | struct cpuinfo_x86 *c = &cpu_data(pol->cpu); | ||
1259 | |||
1260 | if (!cpu_online(pol->cpu)) | ||
1261 | return -ENODEV; | ||
1262 | |||
1263 | smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1); | ||
1264 | if (rc) | ||
1265 | return -ENODEV; | ||
1266 | |||
1267 | data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); | ||
1268 | if (!data) { | ||
1269 | printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); | ||
1270 | return -ENOMEM; | ||
1271 | } | ||
1272 | |||
1273 | data->cpu = pol->cpu; | ||
1274 | data->currpstate = HW_PSTATE_INVALID; | ||
1275 | |||
1276 | if (powernow_k8_cpu_init_acpi(data)) { | ||
1277 | /* | ||
1278 | * Use the PSB BIOS structure. This is only available on | ||
1279 | * an UP version, and is deprecated by AMD. | ||
1280 | */ | ||
1281 | if (num_online_cpus() != 1) { | ||
1282 | printk_once(ACPI_PSS_BIOS_BUG_MSG); | ||
1283 | goto err_out; | ||
1284 | } | ||
1285 | if (pol->cpu != 0) { | ||
1286 | printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " | ||
1287 | "CPU other than CPU0. Complain to your BIOS " | ||
1288 | "vendor.\n"); | ||
1289 | goto err_out; | ||
1290 | } | ||
1291 | rc = find_psb_table(data); | ||
1292 | if (rc) | ||
1293 | goto err_out; | ||
1294 | |||
1295 | /* Take a crude guess here. | ||
1296 | * That guess was in microseconds, so multiply with 1000 */ | ||
1297 | pol->cpuinfo.transition_latency = ( | ||
1298 | ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + | ||
1299 | ((1 << data->irt) * 30)) * 1000; | ||
1300 | } else /* ACPI _PSS objects available */ | ||
1301 | pol->cpuinfo.transition_latency = get_transition_latency(data); | ||
1302 | |||
1303 | /* only run on specific CPU from here on */ | ||
1304 | init_on_cpu.data = data; | ||
1305 | smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu, | ||
1306 | &init_on_cpu, 1); | ||
1307 | rc = init_on_cpu.rc; | ||
1308 | if (rc != 0) | ||
1309 | goto err_out_exit_acpi; | ||
1310 | |||
1311 | if (cpu_family == CPU_HW_PSTATE) | ||
1312 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); | ||
1313 | else | ||
1314 | cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); | ||
1315 | data->available_cores = pol->cpus; | ||
1316 | |||
1317 | if (cpu_family == CPU_HW_PSTATE) | ||
1318 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, | ||
1319 | data->currpstate); | ||
1320 | else | ||
1321 | pol->cur = find_khz_freq_from_fid(data->currfid); | ||
1322 | dprintk("policy current frequency %d kHz\n", pol->cur); | ||
1323 | |||
1324 | /* min/max the cpu is capable of */ | ||
1325 | if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { | ||
1326 | printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n"); | ||
1327 | powernow_k8_cpu_exit_acpi(data); | ||
1328 | kfree(data->powernow_table); | ||
1329 | kfree(data); | ||
1330 | return -EINVAL; | ||
1331 | } | ||
1332 | |||
1333 | /* Check for APERF/MPERF support in hardware */ | ||
1334 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
1335 | cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; | ||
1336 | |||
1337 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | ||
1338 | |||
1339 | if (cpu_family == CPU_HW_PSTATE) | ||
1340 | dprintk("cpu_init done, current pstate 0x%x\n", | ||
1341 | data->currpstate); | ||
1342 | else | ||
1343 | dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", | ||
1344 | data->currfid, data->currvid); | ||
1345 | |||
1346 | per_cpu(powernow_data, pol->cpu) = data; | ||
1347 | |||
1348 | return 0; | ||
1349 | |||
1350 | err_out_exit_acpi: | ||
1351 | powernow_k8_cpu_exit_acpi(data); | ||
1352 | |||
1353 | err_out: | ||
1354 | kfree(data); | ||
1355 | return -ENODEV; | ||
1356 | } | ||
1357 | |||
1358 | static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) | ||
1359 | { | ||
1360 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | ||
1361 | |||
1362 | if (!data) | ||
1363 | return -EINVAL; | ||
1364 | |||
1365 | powernow_k8_cpu_exit_acpi(data); | ||
1366 | |||
1367 | cpufreq_frequency_table_put_attr(pol->cpu); | ||
1368 | |||
1369 | kfree(data->powernow_table); | ||
1370 | kfree(data); | ||
1371 | per_cpu(powernow_data, pol->cpu) = NULL; | ||
1372 | |||
1373 | return 0; | ||
1374 | } | ||
1375 | |||
1376 | static void query_values_on_cpu(void *_err) | ||
1377 | { | ||
1378 | int *err = _err; | ||
1379 | struct powernow_k8_data *data = __this_cpu_read(powernow_data); | ||
1380 | |||
1381 | *err = query_current_values_with_pending_wait(data); | ||
1382 | } | ||
1383 | |||
1384 | static unsigned int powernowk8_get(unsigned int cpu) | ||
1385 | { | ||
1386 | struct powernow_k8_data *data = per_cpu(powernow_data, cpu); | ||
1387 | unsigned int khz = 0; | ||
1388 | int err; | ||
1389 | |||
1390 | if (!data) | ||
1391 | return 0; | ||
1392 | |||
1393 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); | ||
1394 | if (err) | ||
1395 | goto out; | ||
1396 | |||
1397 | if (cpu_family == CPU_HW_PSTATE) | ||
1398 | khz = find_khz_freq_from_pstate(data->powernow_table, | ||
1399 | data->currpstate); | ||
1400 | else | ||
1401 | khz = find_khz_freq_from_fid(data->currfid); | ||
1402 | |||
1403 | |||
1404 | out: | ||
1405 | return khz; | ||
1406 | } | ||
1407 | |||
1408 | static void _cpb_toggle_msrs(bool t) | ||
1409 | { | ||
1410 | int cpu; | ||
1411 | |||
1412 | get_online_cpus(); | ||
1413 | |||
1414 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1415 | |||
1416 | for_each_cpu(cpu, cpu_online_mask) { | ||
1417 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1418 | if (t) | ||
1419 | reg->l &= ~BIT(25); | ||
1420 | else | ||
1421 | reg->l |= BIT(25); | ||
1422 | } | ||
1423 | wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1424 | |||
1425 | put_online_cpus(); | ||
1426 | } | ||
1427 | |||
1428 | /* | ||
1429 | * Switch on/off core performance boosting. | ||
1430 | * | ||
1431 | * 0=disable | ||
1432 | * 1=enable. | ||
1433 | */ | ||
1434 | static void cpb_toggle(bool t) | ||
1435 | { | ||
1436 | if (!cpb_capable) | ||
1437 | return; | ||
1438 | |||
1439 | if (t && !cpb_enabled) { | ||
1440 | cpb_enabled = true; | ||
1441 | _cpb_toggle_msrs(t); | ||
1442 | printk(KERN_INFO PFX "Core Boosting enabled.\n"); | ||
1443 | } else if (!t && cpb_enabled) { | ||
1444 | cpb_enabled = false; | ||
1445 | _cpb_toggle_msrs(t); | ||
1446 | printk(KERN_INFO PFX "Core Boosting disabled.\n"); | ||
1447 | } | ||
1448 | } | ||
1449 | |||
1450 | static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, | ||
1451 | size_t count) | ||
1452 | { | ||
1453 | int ret = -EINVAL; | ||
1454 | unsigned long val = 0; | ||
1455 | |||
1456 | ret = strict_strtoul(buf, 10, &val); | ||
1457 | if (!ret && (val == 0 || val == 1) && cpb_capable) | ||
1458 | cpb_toggle(val); | ||
1459 | else | ||
1460 | return -EINVAL; | ||
1461 | |||
1462 | return count; | ||
1463 | } | ||
1464 | |||
1465 | static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) | ||
1466 | { | ||
1467 | return sprintf(buf, "%u\n", cpb_enabled); | ||
1468 | } | ||
1469 | |||
1470 | #define define_one_rw(_name) \ | ||
1471 | static struct freq_attr _name = \ | ||
1472 | __ATTR(_name, 0644, show_##_name, store_##_name) | ||
1473 | |||
1474 | define_one_rw(cpb); | ||
1475 | |||
1476 | static struct freq_attr *powernow_k8_attr[] = { | ||
1477 | &cpufreq_freq_attr_scaling_available_freqs, | ||
1478 | &cpb, | ||
1479 | NULL, | ||
1480 | }; | ||
1481 | |||
1482 | static struct cpufreq_driver cpufreq_amd64_driver = { | ||
1483 | .verify = powernowk8_verify, | ||
1484 | .target = powernowk8_target, | ||
1485 | .bios_limit = acpi_processor_get_bios_limit, | ||
1486 | .init = powernowk8_cpu_init, | ||
1487 | .exit = __devexit_p(powernowk8_cpu_exit), | ||
1488 | .get = powernowk8_get, | ||
1489 | .name = "powernow-k8", | ||
1490 | .owner = THIS_MODULE, | ||
1491 | .attr = powernow_k8_attr, | ||
1492 | }; | ||
1493 | |||
1494 | /* | ||
1495 | * Clear the boost-disable flag on the CPU_DOWN path so that this cpu | ||
1496 | * cannot block the remaining ones from boosting. On the CPU_UP path we | ||
1497 | * simply keep the boost-disable flag in sync with the current global | ||
1498 | * state. | ||
1499 | */ | ||
1500 | static int cpb_notify(struct notifier_block *nb, unsigned long action, | ||
1501 | void *hcpu) | ||
1502 | { | ||
1503 | unsigned cpu = (long)hcpu; | ||
1504 | u32 lo, hi; | ||
1505 | |||
1506 | switch (action) { | ||
1507 | case CPU_UP_PREPARE: | ||
1508 | case CPU_UP_PREPARE_FROZEN: | ||
1509 | |||
1510 | if (!cpb_enabled) { | ||
1511 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1512 | lo |= BIT(25); | ||
1513 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1514 | } | ||
1515 | break; | ||
1516 | |||
1517 | case CPU_DOWN_PREPARE: | ||
1518 | case CPU_DOWN_PREPARE_FROZEN: | ||
1519 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1520 | lo &= ~BIT(25); | ||
1521 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1522 | break; | ||
1523 | |||
1524 | default: | ||
1525 | break; | ||
1526 | } | ||
1527 | |||
1528 | return NOTIFY_OK; | ||
1529 | } | ||
1530 | |||
1531 | static struct notifier_block cpb_nb = { | ||
1532 | .notifier_call = cpb_notify, | ||
1533 | }; | ||
1534 | |||
1535 | /* driver entry point for init */ | ||
1536 | static int __cpuinit powernowk8_init(void) | ||
1537 | { | ||
1538 | unsigned int i, supported_cpus = 0, cpu; | ||
1539 | int rv; | ||
1540 | |||
1541 | for_each_online_cpu(i) { | ||
1542 | int rc; | ||
1543 | smp_call_function_single(i, check_supported_cpu, &rc, 1); | ||
1544 | if (rc == 0) | ||
1545 | supported_cpus++; | ||
1546 | } | ||
1547 | |||
1548 | if (supported_cpus != num_online_cpus()) | ||
1549 | return -ENODEV; | ||
1550 | |||
1551 | printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", | ||
1552 | num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); | ||
1553 | |||
1554 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1555 | |||
1556 | cpb_capable = true; | ||
1557 | |||
1558 | msrs = msrs_alloc(); | ||
1559 | if (!msrs) { | ||
1560 | printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); | ||
1561 | return -ENOMEM; | ||
1562 | } | ||
1563 | |||
1564 | register_cpu_notifier(&cpb_nb); | ||
1565 | |||
1566 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1567 | |||
1568 | for_each_cpu(cpu, cpu_online_mask) { | ||
1569 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1570 | cpb_enabled |= !(!!(reg->l & BIT(25))); | ||
1571 | } | ||
1572 | |||
1573 | printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", | ||
1574 | (cpb_enabled ? "on" : "off")); | ||
1575 | } | ||
1576 | |||
1577 | rv = cpufreq_register_driver(&cpufreq_amd64_driver); | ||
1578 | if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) { | ||
1579 | unregister_cpu_notifier(&cpb_nb); | ||
1580 | msrs_free(msrs); | ||
1581 | msrs = NULL; | ||
1582 | } | ||
1583 | return rv; | ||
1584 | } | ||
1585 | |||
1586 | /* driver entry point for term */ | ||
1587 | static void __exit powernowk8_exit(void) | ||
1588 | { | ||
1589 | dprintk("exit\n"); | ||
1590 | |||
1591 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1592 | msrs_free(msrs); | ||
1593 | msrs = NULL; | ||
1594 | |||
1595 | unregister_cpu_notifier(&cpb_nb); | ||
1596 | } | ||
1597 | |||
1598 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | ||
1599 | } | ||
1600 | |||
1601 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and " | ||
1602 | "Mark Langsdorf <mark.langsdorf@amd.com>"); | ||
1603 | MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); | ||
1604 | MODULE_LICENSE("GPL"); | ||
1605 | |||
1606 | late_initcall(powernowk8_init); | ||
1607 | module_exit(powernowk8_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h deleted file mode 100644 index df3529b1c02d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ /dev/null | |||
@@ -1,224 +0,0 @@ | |||
1 | /* | ||
2 | * (c) 2003-2006 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | enum pstate { | ||
9 | HW_PSTATE_INVALID = 0xff, | ||
10 | HW_PSTATE_0 = 0, | ||
11 | HW_PSTATE_1 = 1, | ||
12 | HW_PSTATE_2 = 2, | ||
13 | HW_PSTATE_3 = 3, | ||
14 | HW_PSTATE_4 = 4, | ||
15 | HW_PSTATE_5 = 5, | ||
16 | HW_PSTATE_6 = 6, | ||
17 | HW_PSTATE_7 = 7, | ||
18 | }; | ||
19 | |||
20 | struct powernow_k8_data { | ||
21 | unsigned int cpu; | ||
22 | |||
23 | u32 numps; /* number of p-states */ | ||
24 | u32 batps; /* number of p-states supported on battery */ | ||
25 | u32 max_hw_pstate; /* maximum legal hardware pstate */ | ||
26 | |||
27 | /* these values are constant when the PSB is used to determine | ||
28 | * vid/fid pairings, but are modified during the ->target() call | ||
29 | * when ACPI is used */ | ||
30 | u32 rvo; /* ramp voltage offset */ | ||
31 | u32 irt; /* isochronous relief time */ | ||
32 | u32 vidmvs; /* usable value calculated from mvs */ | ||
33 | u32 vstable; /* voltage stabilization time, units 20 us */ | ||
34 | u32 plllock; /* pll lock time, units 1 us */ | ||
35 | u32 exttype; /* extended interface = 1 */ | ||
36 | |||
37 | /* keep track of the current fid / vid or pstate */ | ||
38 | u32 currvid; | ||
39 | u32 currfid; | ||
40 | enum pstate currpstate; | ||
41 | |||
42 | /* the powernow_table includes all frequency and vid/fid pairings: | ||
43 | * fid are the lower 8 bits of the index, vid are the upper 8 bits. | ||
44 | * frequency is in kHz */ | ||
45 | struct cpufreq_frequency_table *powernow_table; | ||
46 | |||
47 | /* the acpi table needs to be kept. it's only available if ACPI was | ||
48 | * used to determine valid frequency/vid/fid states */ | ||
49 | struct acpi_processor_performance acpi_data; | ||
50 | |||
51 | /* we need to keep track of associated cores, but let cpufreq | ||
52 | * handle hotplug events - so just point at cpufreq pol->cpus | ||
53 | * structure */ | ||
54 | struct cpumask *available_cores; | ||
55 | }; | ||
56 | |||
57 | /* processor's cpuid instruction support */ | ||
58 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | ||
59 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | ||
60 | #define CPUID_XFAM_K8 0 | ||
61 | #define CPUID_XMOD 0x000f0000 /* extended model */ | ||
62 | #define CPUID_XMOD_REV_MASK 0x000c0000 | ||
63 | #define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ | ||
64 | #define CPUID_USE_XFAM_XMOD 0x00000f00 | ||
65 | #define CPUID_GET_MAX_CAPABILITIES 0x80000000 | ||
66 | #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 | ||
67 | #define P_STATE_TRANSITION_CAPABLE 6 | ||
68 | |||
69 | /* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ | ||
70 | /* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ | ||
71 | /* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ | ||
72 | /* the register number is placed in ecx, and the data is returned in edx:eax. */ | ||
73 | |||
74 | #define MSR_FIDVID_CTL 0xc0010041 | ||
75 | #define MSR_FIDVID_STATUS 0xc0010042 | ||
76 | |||
77 | /* Field definitions within the FID VID Low Control MSR : */ | ||
78 | #define MSR_C_LO_INIT_FID_VID 0x00010000 | ||
79 | #define MSR_C_LO_NEW_VID 0x00003f00 | ||
80 | #define MSR_C_LO_NEW_FID 0x0000003f | ||
81 | #define MSR_C_LO_VID_SHIFT 8 | ||
82 | |||
83 | /* Field definitions within the FID VID High Control MSR : */ | ||
84 | #define MSR_C_HI_STP_GNT_TO 0x000fffff | ||
85 | |||
86 | /* Field definitions within the FID VID Low Status MSR : */ | ||
87 | #define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ | ||
88 | #define MSR_S_LO_MAX_RAMP_VID 0x3f000000 | ||
89 | #define MSR_S_LO_MAX_FID 0x003f0000 | ||
90 | #define MSR_S_LO_START_FID 0x00003f00 | ||
91 | #define MSR_S_LO_CURRENT_FID 0x0000003f | ||
92 | |||
93 | /* Field definitions within the FID VID High Status MSR : */ | ||
94 | #define MSR_S_HI_MIN_WORKING_VID 0x3f000000 | ||
95 | #define MSR_S_HI_MAX_WORKING_VID 0x003f0000 | ||
96 | #define MSR_S_HI_START_VID 0x00003f00 | ||
97 | #define MSR_S_HI_CURRENT_VID 0x0000003f | ||
98 | #define MSR_C_HI_STP_GNT_BENIGN 0x00000001 | ||
99 | |||
100 | |||
101 | /* Hardware Pstate _PSS and MSR definitions */ | ||
102 | #define USE_HW_PSTATE 0x00000080 | ||
103 | #define HW_PSTATE_MASK 0x00000007 | ||
104 | #define HW_PSTATE_VALID_MASK 0x80000000 | ||
105 | #define HW_PSTATE_MAX_MASK 0x000000f0 | ||
106 | #define HW_PSTATE_MAX_SHIFT 4 | ||
107 | #define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ | ||
108 | #define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ | ||
109 | #define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ | ||
110 | #define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */ | ||
111 | |||
112 | /* define the two driver architectures */ | ||
113 | #define CPU_OPTERON 0 | ||
114 | #define CPU_HW_PSTATE 1 | ||
115 | |||
116 | |||
117 | /* | ||
118 | * There are restrictions frequencies have to follow: | ||
119 | * - only 1 entry in the low fid table ( <=1.4GHz ) | ||
120 | * - lowest entry in the high fid table must be >= 2 * the entry in the | ||
121 | * low fid table | ||
122 | * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry | ||
123 | * in the low fid table | ||
124 | * - the parts can only step at <= 200 MHz intervals, odd fid values are | ||
125 | * supported in revision G and later revisions. | ||
126 | * - lowest frequency must be >= interprocessor hypertransport link speed | ||
127 | * (only applies to MP systems obviously) | ||
128 | */ | ||
129 | |||
130 | /* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ | ||
131 | #define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ | ||
132 | #define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ | ||
133 | |||
134 | #define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ | ||
135 | #define HI_VCOFREQ_TABLE_BOTTOM 1600 | ||
136 | |||
137 | #define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ | ||
138 | |||
139 | #define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ | ||
140 | #define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ | ||
141 | |||
142 | #define MIN_FREQ 800 /* Min and max freqs, per spec */ | ||
143 | #define MAX_FREQ 5000 | ||
144 | |||
145 | #define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ | ||
146 | #define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ | ||
147 | |||
148 | #define VID_OFF 0x3f | ||
149 | |||
150 | #define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ | ||
151 | |||
152 | #define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ | ||
153 | |||
154 | #define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ | ||
155 | #define VST_UNITS_20US 20 /* Voltage Stabilization Time is in units of 20us */ | ||
156 | |||
157 | /* | ||
158 | * Most values of interest are encoded in a single field of the _PSS | ||
159 | * entries: the "control" value. | ||
160 | */ | ||
161 | |||
162 | #define IRT_SHIFT 30 | ||
163 | #define RVO_SHIFT 28 | ||
164 | #define EXT_TYPE_SHIFT 27 | ||
165 | #define PLL_L_SHIFT 20 | ||
166 | #define MVS_SHIFT 18 | ||
167 | #define VST_SHIFT 11 | ||
168 | #define VID_SHIFT 6 | ||
169 | #define IRT_MASK 3 | ||
170 | #define RVO_MASK 3 | ||
171 | #define EXT_TYPE_MASK 1 | ||
172 | #define PLL_L_MASK 0x7f | ||
173 | #define MVS_MASK 3 | ||
174 | #define VST_MASK 0x7f | ||
175 | #define VID_MASK 0x1f | ||
176 | #define FID_MASK 0x1f | ||
177 | #define EXT_VID_MASK 0x3f | ||
178 | #define EXT_FID_MASK 0x3f | ||
179 | |||
180 | |||
181 | /* | ||
182 | * Version 1.4 of the PSB table. This table is constructed by BIOS and is | ||
183 | * to tell the OS's power management driver which VIDs and FIDs are | ||
184 | * supported by this particular processor. | ||
185 | * If the data in the PSB / PST is wrong, then this driver will program the | ||
186 | * wrong values into hardware, which is very likely to lead to a crash. | ||
187 | */ | ||
188 | |||
189 | #define PSB_ID_STRING "AMDK7PNOW!" | ||
190 | #define PSB_ID_STRING_LEN 10 | ||
191 | |||
192 | #define PSB_VERSION_1_4 0x14 | ||
193 | |||
194 | struct psb_s { | ||
195 | u8 signature[10]; | ||
196 | u8 tableversion; | ||
197 | u8 flags1; | ||
198 | u16 vstable; | ||
199 | u8 flags2; | ||
200 | u8 num_tables; | ||
201 | u32 cpuid; | ||
202 | u8 plllocktime; | ||
203 | u8 maxfid; | ||
204 | u8 maxvid; | ||
205 | u8 numps; | ||
206 | }; | ||
207 | |||
208 | /* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ | ||
209 | struct pst_s { | ||
210 | u8 fid; | ||
211 | u8 vid; | ||
212 | }; | ||
213 | |||
214 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) | ||
215 | |||
216 | static int core_voltage_pre_transition(struct powernow_k8_data *data, | ||
217 | u32 reqvid, u32 regfid); | ||
218 | static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); | ||
219 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); | ||
220 | |||
221 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); | ||
222 | |||
223 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); | ||
224 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c deleted file mode 100644 index 435a996a613a..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ /dev/null | |||
@@ -1,194 +0,0 @@ | |||
1 | /* | ||
2 | * sc520_freq.c: cpufreq driver for the AMD Elan sc520 | ||
3 | * | ||
4 | * Copyright (C) 2005 Sean Young <sean@mess.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * Based on elanfreq.c | ||
12 | * | ||
13 | * 2005-03-30: - initial revision | ||
14 | */ | ||
15 | |||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/init.h> | ||
19 | |||
20 | #include <linux/delay.h> | ||
21 | #include <linux/cpufreq.h> | ||
22 | #include <linux/timex.h> | ||
23 | #include <linux/io.h> | ||
24 | |||
25 | #include <asm/msr.h> | ||
26 | |||
27 | #define MMCR_BASE 0xfffef000 /* The default base address */ | ||
28 | #define OFFS_CPUCTL 0x2 /* CPU Control Register */ | ||
29 | |||
30 | static __u8 __iomem *cpuctl; | ||
31 | |||
32 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
33 | "sc520_freq", msg) | ||
34 | #define PFX "sc520_freq: " | ||
35 | |||
36 | static struct cpufreq_frequency_table sc520_freq_table[] = { | ||
37 | {0x01, 100000}, | ||
38 | {0x02, 133000}, | ||
39 | {0, CPUFREQ_TABLE_END}, | ||
40 | }; | ||
41 | |||
42 | static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) | ||
43 | { | ||
44 | u8 clockspeed_reg = *cpuctl; | ||
45 | |||
46 | switch (clockspeed_reg & 0x03) { | ||
47 | default: | ||
48 | printk(KERN_ERR PFX "error: cpuctl register has unexpected " | ||
49 | "value %02x\n", clockspeed_reg); | ||
50 | case 0x01: | ||
51 | return 100000; | ||
52 | case 0x02: | ||
53 | return 133000; | ||
54 | } | ||
55 | } | ||
56 | |||
57 | static void sc520_freq_set_cpu_state(unsigned int state) | ||
58 | { | ||
59 | |||
60 | struct cpufreq_freqs freqs; | ||
61 | u8 clockspeed_reg; | ||
62 | |||
63 | freqs.old = sc520_freq_get_cpu_frequency(0); | ||
64 | freqs.new = sc520_freq_table[state].frequency; | ||
65 | freqs.cpu = 0; /* AMD Elan is UP */ | ||
66 | |||
67 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
68 | |||
69 | dprintk("attempting to set frequency to %i kHz\n", | ||
70 | sc520_freq_table[state].frequency); | ||
71 | |||
72 | local_irq_disable(); | ||
73 | |||
74 | clockspeed_reg = *cpuctl & ~0x03; | ||
75 | *cpuctl = clockspeed_reg | sc520_freq_table[state].index; | ||
76 | |||
77 | local_irq_enable(); | ||
78 | |||
79 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
80 | }; | ||
81 | |||
82 | static int sc520_freq_verify(struct cpufreq_policy *policy) | ||
83 | { | ||
84 | return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); | ||
85 | } | ||
86 | |||
87 | static int sc520_freq_target(struct cpufreq_policy *policy, | ||
88 | unsigned int target_freq, | ||
89 | unsigned int relation) | ||
90 | { | ||
91 | unsigned int newstate = 0; | ||
92 | |||
93 | if (cpufreq_frequency_table_target(policy, sc520_freq_table, | ||
94 | target_freq, relation, &newstate)) | ||
95 | return -EINVAL; | ||
96 | |||
97 | sc520_freq_set_cpu_state(newstate); | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | |||
103 | /* | ||
104 | * Module init and exit code | ||
105 | */ | ||
106 | |||
107 | static int sc520_freq_cpu_init(struct cpufreq_policy *policy) | ||
108 | { | ||
109 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
110 | int result; | ||
111 | |||
112 | /* capability check */ | ||
113 | if (c->x86_vendor != X86_VENDOR_AMD || | ||
114 | c->x86 != 4 || c->x86_model != 9) | ||
115 | return -ENODEV; | ||
116 | |||
117 | /* cpuinfo and default policy values */ | ||
118 | policy->cpuinfo.transition_latency = 1000000; /* 1ms */ | ||
119 | policy->cur = sc520_freq_get_cpu_frequency(0); | ||
120 | |||
121 | result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); | ||
122 | if (result) | ||
123 | return result; | ||
124 | |||
125 | cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); | ||
126 | |||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | |||
131 | static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) | ||
132 | { | ||
133 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | |||
138 | static struct freq_attr *sc520_freq_attr[] = { | ||
139 | &cpufreq_freq_attr_scaling_available_freqs, | ||
140 | NULL, | ||
141 | }; | ||
142 | |||
143 | |||
144 | static struct cpufreq_driver sc520_freq_driver = { | ||
145 | .get = sc520_freq_get_cpu_frequency, | ||
146 | .verify = sc520_freq_verify, | ||
147 | .target = sc520_freq_target, | ||
148 | .init = sc520_freq_cpu_init, | ||
149 | .exit = sc520_freq_cpu_exit, | ||
150 | .name = "sc520_freq", | ||
151 | .owner = THIS_MODULE, | ||
152 | .attr = sc520_freq_attr, | ||
153 | }; | ||
154 | |||
155 | |||
156 | static int __init sc520_freq_init(void) | ||
157 | { | ||
158 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
159 | int err; | ||
160 | |||
161 | /* Test if we have the right hardware */ | ||
162 | if (c->x86_vendor != X86_VENDOR_AMD || | ||
163 | c->x86 != 4 || c->x86_model != 9) { | ||
164 | dprintk("no Elan SC520 processor found!\n"); | ||
165 | return -ENODEV; | ||
166 | } | ||
167 | cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); | ||
168 | if (!cpuctl) { | ||
169 | printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); | ||
170 | return -ENOMEM; | ||
171 | } | ||
172 | |||
173 | err = cpufreq_register_driver(&sc520_freq_driver); | ||
174 | if (err) | ||
175 | iounmap(cpuctl); | ||
176 | |||
177 | return err; | ||
178 | } | ||
179 | |||
180 | |||
181 | static void __exit sc520_freq_exit(void) | ||
182 | { | ||
183 | cpufreq_unregister_driver(&sc520_freq_driver); | ||
184 | iounmap(cpuctl); | ||
185 | } | ||
186 | |||
187 | |||
188 | MODULE_LICENSE("GPL"); | ||
189 | MODULE_AUTHOR("Sean Young <sean@mess.org>"); | ||
190 | MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); | ||
191 | |||
192 | module_init(sc520_freq_init); | ||
193 | module_exit(sc520_freq_exit); | ||
194 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c deleted file mode 100644 index 9b1ff37de46a..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ /dev/null | |||
@@ -1,636 +0,0 @@ | |||
1 | /* | ||
2 | * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium | ||
3 | * M (part of the Centrino chipset). | ||
4 | * | ||
5 | * Since the original Pentium M, most new Intel CPUs support Enhanced | ||
6 | * SpeedStep. | ||
7 | * | ||
8 | * Despite the "SpeedStep" in the name, this is almost entirely unlike | ||
9 | * traditional SpeedStep. | ||
10 | * | ||
11 | * Modelled on speedstep.c | ||
12 | * | ||
13 | * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> | ||
14 | */ | ||
15 | |||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/cpufreq.h> | ||
20 | #include <linux/sched.h> /* current */ | ||
21 | #include <linux/delay.h> | ||
22 | #include <linux/compiler.h> | ||
23 | #include <linux/gfp.h> | ||
24 | |||
25 | #include <asm/msr.h> | ||
26 | #include <asm/processor.h> | ||
27 | #include <asm/cpufeature.h> | ||
28 | |||
29 | #define PFX "speedstep-centrino: " | ||
30 | #define MAINTAINER "cpufreq@vger.kernel.org" | ||
31 | |||
32 | #define dprintk(msg...) \ | ||
33 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | ||
34 | |||
35 | #define INTEL_MSR_RANGE (0xffff) | ||
36 | |||
37 | struct cpu_id | ||
38 | { | ||
39 | __u8 x86; /* CPU family */ | ||
40 | __u8 x86_model; /* model */ | ||
41 | __u8 x86_mask; /* stepping */ | ||
42 | }; | ||
43 | |||
44 | enum { | ||
45 | CPU_BANIAS, | ||
46 | CPU_DOTHAN_A1, | ||
47 | CPU_DOTHAN_A2, | ||
48 | CPU_DOTHAN_B0, | ||
49 | CPU_MP4HT_D0, | ||
50 | CPU_MP4HT_E0, | ||
51 | }; | ||
52 | |||
53 | static const struct cpu_id cpu_ids[] = { | ||
54 | [CPU_BANIAS] = { 6, 9, 5 }, | ||
55 | [CPU_DOTHAN_A1] = { 6, 13, 1 }, | ||
56 | [CPU_DOTHAN_A2] = { 6, 13, 2 }, | ||
57 | [CPU_DOTHAN_B0] = { 6, 13, 6 }, | ||
58 | [CPU_MP4HT_D0] = {15, 3, 4 }, | ||
59 | [CPU_MP4HT_E0] = {15, 4, 1 }, | ||
60 | }; | ||
61 | #define N_IDS ARRAY_SIZE(cpu_ids) | ||
62 | |||
63 | struct cpu_model | ||
64 | { | ||
65 | const struct cpu_id *cpu_id; | ||
66 | const char *model_name; | ||
67 | unsigned max_freq; /* max clock in kHz */ | ||
68 | |||
69 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ | ||
70 | }; | ||
71 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, | ||
72 | const struct cpu_id *x); | ||
73 | |||
74 | /* Operating points for current CPU */ | ||
75 | static DEFINE_PER_CPU(struct cpu_model *, centrino_model); | ||
76 | static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); | ||
77 | |||
78 | static struct cpufreq_driver centrino_driver; | ||
79 | |||
80 | #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE | ||
81 | |||
82 | /* Computes the correct form for IA32_PERF_CTL MSR for a particular | ||
83 | frequency/voltage operating point; frequency in MHz, volts in mV. | ||
84 | This is stored as "index" in the structure. */ | ||
85 | #define OP(mhz, mv) \ | ||
86 | { \ | ||
87 | .frequency = (mhz) * 1000, \ | ||
88 | .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * These voltage tables were derived from the Intel Pentium M | ||
93 | * datasheet, document 25261202.pdf, Table 5. I have verified they | ||
94 | * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium | ||
95 | * M. | ||
96 | */ | ||
97 | |||
98 | /* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ | ||
99 | static struct cpufreq_frequency_table banias_900[] = | ||
100 | { | ||
101 | OP(600, 844), | ||
102 | OP(800, 988), | ||
103 | OP(900, 1004), | ||
104 | { .frequency = CPUFREQ_TABLE_END } | ||
105 | }; | ||
106 | |||
107 | /* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ | ||
108 | static struct cpufreq_frequency_table banias_1000[] = | ||
109 | { | ||
110 | OP(600, 844), | ||
111 | OP(800, 972), | ||
112 | OP(900, 988), | ||
113 | OP(1000, 1004), | ||
114 | { .frequency = CPUFREQ_TABLE_END } | ||
115 | }; | ||
116 | |||
117 | /* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ | ||
118 | static struct cpufreq_frequency_table banias_1100[] = | ||
119 | { | ||
120 | OP( 600, 956), | ||
121 | OP( 800, 1020), | ||
122 | OP( 900, 1100), | ||
123 | OP(1000, 1164), | ||
124 | OP(1100, 1180), | ||
125 | { .frequency = CPUFREQ_TABLE_END } | ||
126 | }; | ||
127 | |||
128 | |||
129 | /* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ | ||
130 | static struct cpufreq_frequency_table banias_1200[] = | ||
131 | { | ||
132 | OP( 600, 956), | ||
133 | OP( 800, 1004), | ||
134 | OP( 900, 1020), | ||
135 | OP(1000, 1100), | ||
136 | OP(1100, 1164), | ||
137 | OP(1200, 1180), | ||
138 | { .frequency = CPUFREQ_TABLE_END } | ||
139 | }; | ||
140 | |||
141 | /* Intel Pentium M processor 1.30GHz (Banias) */ | ||
142 | static struct cpufreq_frequency_table banias_1300[] = | ||
143 | { | ||
144 | OP( 600, 956), | ||
145 | OP( 800, 1260), | ||
146 | OP(1000, 1292), | ||
147 | OP(1200, 1356), | ||
148 | OP(1300, 1388), | ||
149 | { .frequency = CPUFREQ_TABLE_END } | ||
150 | }; | ||
151 | |||
152 | /* Intel Pentium M processor 1.40GHz (Banias) */ | ||
153 | static struct cpufreq_frequency_table banias_1400[] = | ||
154 | { | ||
155 | OP( 600, 956), | ||
156 | OP( 800, 1180), | ||
157 | OP(1000, 1308), | ||
158 | OP(1200, 1436), | ||
159 | OP(1400, 1484), | ||
160 | { .frequency = CPUFREQ_TABLE_END } | ||
161 | }; | ||
162 | |||
163 | /* Intel Pentium M processor 1.50GHz (Banias) */ | ||
164 | static struct cpufreq_frequency_table banias_1500[] = | ||
165 | { | ||
166 | OP( 600, 956), | ||
167 | OP( 800, 1116), | ||
168 | OP(1000, 1228), | ||
169 | OP(1200, 1356), | ||
170 | OP(1400, 1452), | ||
171 | OP(1500, 1484), | ||
172 | { .frequency = CPUFREQ_TABLE_END } | ||
173 | }; | ||
174 | |||
175 | /* Intel Pentium M processor 1.60GHz (Banias) */ | ||
176 | static struct cpufreq_frequency_table banias_1600[] = | ||
177 | { | ||
178 | OP( 600, 956), | ||
179 | OP( 800, 1036), | ||
180 | OP(1000, 1164), | ||
181 | OP(1200, 1276), | ||
182 | OP(1400, 1420), | ||
183 | OP(1600, 1484), | ||
184 | { .frequency = CPUFREQ_TABLE_END } | ||
185 | }; | ||
186 | |||
187 | /* Intel Pentium M processor 1.70GHz (Banias) */ | ||
188 | static struct cpufreq_frequency_table banias_1700[] = | ||
189 | { | ||
190 | OP( 600, 956), | ||
191 | OP( 800, 1004), | ||
192 | OP(1000, 1116), | ||
193 | OP(1200, 1228), | ||
194 | OP(1400, 1308), | ||
195 | OP(1700, 1484), | ||
196 | { .frequency = CPUFREQ_TABLE_END } | ||
197 | }; | ||
198 | #undef OP | ||
199 | |||
200 | #define _BANIAS(cpuid, max, name) \ | ||
201 | { .cpu_id = cpuid, \ | ||
202 | .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ | ||
203 | .max_freq = (max)*1000, \ | ||
204 | .op_points = banias_##max, \ | ||
205 | } | ||
206 | #define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) | ||
207 | |||
208 | /* CPU models, their operating frequency range, and freq/voltage | ||
209 | operating points */ | ||
210 | static struct cpu_model models[] = | ||
211 | { | ||
212 | _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), | ||
213 | BANIAS(1000), | ||
214 | BANIAS(1100), | ||
215 | BANIAS(1200), | ||
216 | BANIAS(1300), | ||
217 | BANIAS(1400), | ||
218 | BANIAS(1500), | ||
219 | BANIAS(1600), | ||
220 | BANIAS(1700), | ||
221 | |||
222 | /* NULL model_name is a wildcard */ | ||
223 | { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, | ||
224 | { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, | ||
225 | { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, | ||
226 | { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, | ||
227 | { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, | ||
228 | |||
229 | { NULL, } | ||
230 | }; | ||
231 | #undef _BANIAS | ||
232 | #undef BANIAS | ||
233 | |||
234 | static int centrino_cpu_init_table(struct cpufreq_policy *policy) | ||
235 | { | ||
236 | struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); | ||
237 | struct cpu_model *model; | ||
238 | |||
239 | for(model = models; model->cpu_id != NULL; model++) | ||
240 | if (centrino_verify_cpu_id(cpu, model->cpu_id) && | ||
241 | (model->model_name == NULL || | ||
242 | strcmp(cpu->x86_model_id, model->model_name) == 0)) | ||
243 | break; | ||
244 | |||
245 | if (model->cpu_id == NULL) { | ||
246 | /* No match at all */ | ||
247 | dprintk("no support for CPU model \"%s\": " | ||
248 | "send /proc/cpuinfo to " MAINTAINER "\n", | ||
249 | cpu->x86_model_id); | ||
250 | return -ENOENT; | ||
251 | } | ||
252 | |||
253 | if (model->op_points == NULL) { | ||
254 | /* Matched a non-match */ | ||
255 | dprintk("no table support for CPU model \"%s\"\n", | ||
256 | cpu->x86_model_id); | ||
257 | dprintk("try using the acpi-cpufreq driver\n"); | ||
258 | return -ENOENT; | ||
259 | } | ||
260 | |||
261 | per_cpu(centrino_model, policy->cpu) = model; | ||
262 | |||
263 | dprintk("found \"%s\": max frequency: %dkHz\n", | ||
264 | model->model_name, model->max_freq); | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | #else | ||
270 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) | ||
271 | { | ||
272 | return -ENODEV; | ||
273 | } | ||
274 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ | ||
275 | |||
276 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, | ||
277 | const struct cpu_id *x) | ||
278 | { | ||
279 | if ((c->x86 == x->x86) && | ||
280 | (c->x86_model == x->x86_model) && | ||
281 | (c->x86_mask == x->x86_mask)) | ||
282 | return 1; | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | /* To be called only after centrino_model is initialized */ | ||
287 | static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) | ||
288 | { | ||
289 | int i; | ||
290 | |||
291 | /* | ||
292 | * Extract clock in kHz from PERF_CTL value | ||
293 | * for centrino, as some DSDTs are buggy. | ||
294 | * Ideally, this can be done using the acpi_data structure. | ||
295 | */ | ||
296 | if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || | ||
297 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || | ||
298 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { | ||
299 | msr = (msr >> 8) & 0xff; | ||
300 | return msr * 100000; | ||
301 | } | ||
302 | |||
303 | if ((!per_cpu(centrino_model, cpu)) || | ||
304 | (!per_cpu(centrino_model, cpu)->op_points)) | ||
305 | return 0; | ||
306 | |||
307 | msr &= 0xffff; | ||
308 | for (i = 0; | ||
309 | per_cpu(centrino_model, cpu)->op_points[i].frequency | ||
310 | != CPUFREQ_TABLE_END; | ||
311 | i++) { | ||
312 | if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) | ||
313 | return per_cpu(centrino_model, cpu)-> | ||
314 | op_points[i].frequency; | ||
315 | } | ||
316 | if (failsafe) | ||
317 | return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; | ||
318 | else | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | /* Return the current CPU frequency in kHz */ | ||
323 | static unsigned int get_cur_freq(unsigned int cpu) | ||
324 | { | ||
325 | unsigned l, h; | ||
326 | unsigned clock_freq; | ||
327 | |||
328 | rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h); | ||
329 | clock_freq = extract_clock(l, cpu, 0); | ||
330 | |||
331 | if (unlikely(clock_freq == 0)) { | ||
332 | /* | ||
333 | * On some CPUs, we can see transient MSR values (which are | ||
334 | * not present in _PSS), while CPU is doing some automatic | ||
335 | * P-state transition (like TM2). Get the last freq set | ||
336 | * in PERF_CTL. | ||
337 | */ | ||
338 | rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h); | ||
339 | clock_freq = extract_clock(l, cpu, 1); | ||
340 | } | ||
341 | return clock_freq; | ||
342 | } | ||
343 | |||
344 | |||
345 | static int centrino_cpu_init(struct cpufreq_policy *policy) | ||
346 | { | ||
347 | struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); | ||
348 | unsigned freq; | ||
349 | unsigned l, h; | ||
350 | int ret; | ||
351 | int i; | ||
352 | |||
353 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ | ||
354 | if (cpu->x86_vendor != X86_VENDOR_INTEL || | ||
355 | !cpu_has(cpu, X86_FEATURE_EST)) | ||
356 | return -ENODEV; | ||
357 | |||
358 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) | ||
359 | centrino_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
360 | |||
361 | if (policy->cpu != 0) | ||
362 | return -ENODEV; | ||
363 | |||
364 | for (i = 0; i < N_IDS; i++) | ||
365 | if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) | ||
366 | break; | ||
367 | |||
368 | if (i != N_IDS) | ||
369 | per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; | ||
370 | |||
371 | if (!per_cpu(centrino_cpu, policy->cpu)) { | ||
372 | dprintk("found unsupported CPU with " | ||
373 | "Enhanced SpeedStep: send /proc/cpuinfo to " | ||
374 | MAINTAINER "\n"); | ||
375 | return -ENODEV; | ||
376 | } | ||
377 | |||
378 | if (centrino_cpu_init_table(policy)) { | ||
379 | return -ENODEV; | ||
380 | } | ||
381 | |||
382 | /* Check to see if Enhanced SpeedStep is enabled, and try to | ||
383 | enable it if not. */ | ||
384 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
385 | |||
386 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | ||
387 | l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; | ||
388 | dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); | ||
389 | wrmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
390 | |||
391 | /* check to see if it stuck */ | ||
392 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
393 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | ||
394 | printk(KERN_INFO PFX | ||
395 | "couldn't enable Enhanced SpeedStep\n"); | ||
396 | return -ENODEV; | ||
397 | } | ||
398 | } | ||
399 | |||
400 | freq = get_cur_freq(policy->cpu); | ||
401 | policy->cpuinfo.transition_latency = 10000; | ||
402 | /* 10uS transition latency */ | ||
403 | policy->cur = freq; | ||
404 | |||
405 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); | ||
406 | |||
407 | ret = cpufreq_frequency_table_cpuinfo(policy, | ||
408 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
409 | if (ret) | ||
410 | return (ret); | ||
411 | |||
412 | cpufreq_frequency_table_get_attr( | ||
413 | per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); | ||
414 | |||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | static int centrino_cpu_exit(struct cpufreq_policy *policy) | ||
419 | { | ||
420 | unsigned int cpu = policy->cpu; | ||
421 | |||
422 | if (!per_cpu(centrino_model, cpu)) | ||
423 | return -ENODEV; | ||
424 | |||
425 | cpufreq_frequency_table_put_attr(cpu); | ||
426 | |||
427 | per_cpu(centrino_model, cpu) = NULL; | ||
428 | |||
429 | return 0; | ||
430 | } | ||
431 | |||
432 | /** | ||
433 | * centrino_verify - verifies a new CPUFreq policy | ||
434 | * @policy: new policy | ||
435 | * | ||
436 | * Limit must be within this model's frequency range at least one | ||
437 | * border included. | ||
438 | */ | ||
439 | static int centrino_verify (struct cpufreq_policy *policy) | ||
440 | { | ||
441 | return cpufreq_frequency_table_verify(policy, | ||
442 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
443 | } | ||
444 | |||
445 | /** | ||
446 | * centrino_setpolicy - set a new CPUFreq policy | ||
447 | * @policy: new policy | ||
448 | * @target_freq: the target frequency | ||
449 | * @relation: how that frequency relates to achieved frequency | ||
450 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
451 | * | ||
452 | * Sets a new CPUFreq policy. | ||
453 | */ | ||
454 | static int centrino_target (struct cpufreq_policy *policy, | ||
455 | unsigned int target_freq, | ||
456 | unsigned int relation) | ||
457 | { | ||
458 | unsigned int newstate = 0; | ||
459 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; | ||
460 | struct cpufreq_freqs freqs; | ||
461 | int retval = 0; | ||
462 | unsigned int j, k, first_cpu, tmp; | ||
463 | cpumask_var_t covered_cpus; | ||
464 | |||
465 | if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) | ||
466 | return -ENOMEM; | ||
467 | |||
468 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | ||
469 | retval = -ENODEV; | ||
470 | goto out; | ||
471 | } | ||
472 | |||
473 | if (unlikely(cpufreq_frequency_table_target(policy, | ||
474 | per_cpu(centrino_model, cpu)->op_points, | ||
475 | target_freq, | ||
476 | relation, | ||
477 | &newstate))) { | ||
478 | retval = -EINVAL; | ||
479 | goto out; | ||
480 | } | ||
481 | |||
482 | first_cpu = 1; | ||
483 | for_each_cpu(j, policy->cpus) { | ||
484 | int good_cpu; | ||
485 | |||
486 | /* cpufreq holds the hotplug lock, so we are safe here */ | ||
487 | if (!cpu_online(j)) | ||
488 | continue; | ||
489 | |||
490 | /* | ||
491 | * Support for SMP systems. | ||
492 | * Make sure we are running on CPU that wants to change freq | ||
493 | */ | ||
494 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | ||
495 | good_cpu = cpumask_any_and(policy->cpus, | ||
496 | cpu_online_mask); | ||
497 | else | ||
498 | good_cpu = j; | ||
499 | |||
500 | if (good_cpu >= nr_cpu_ids) { | ||
501 | dprintk("couldn't limit to CPUs in this domain\n"); | ||
502 | retval = -EAGAIN; | ||
503 | if (first_cpu) { | ||
504 | /* We haven't started the transition yet. */ | ||
505 | goto out; | ||
506 | } | ||
507 | break; | ||
508 | } | ||
509 | |||
510 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; | ||
511 | |||
512 | if (first_cpu) { | ||
513 | rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h); | ||
514 | if (msr == (oldmsr & 0xffff)) { | ||
515 | dprintk("no change needed - msr was and needs " | ||
516 | "to be %x\n", oldmsr); | ||
517 | retval = 0; | ||
518 | goto out; | ||
519 | } | ||
520 | |||
521 | freqs.old = extract_clock(oldmsr, cpu, 0); | ||
522 | freqs.new = extract_clock(msr, cpu, 0); | ||
523 | |||
524 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | ||
525 | target_freq, freqs.old, freqs.new, msr); | ||
526 | |||
527 | for_each_cpu(k, policy->cpus) { | ||
528 | if (!cpu_online(k)) | ||
529 | continue; | ||
530 | freqs.cpu = k; | ||
531 | cpufreq_notify_transition(&freqs, | ||
532 | CPUFREQ_PRECHANGE); | ||
533 | } | ||
534 | |||
535 | first_cpu = 0; | ||
536 | /* all but 16 LSB are reserved, treat them with care */ | ||
537 | oldmsr &= ~0xffff; | ||
538 | msr &= 0xffff; | ||
539 | oldmsr |= msr; | ||
540 | } | ||
541 | |||
542 | wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h); | ||
543 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | ||
544 | break; | ||
545 | |||
546 | cpumask_set_cpu(j, covered_cpus); | ||
547 | } | ||
548 | |||
549 | for_each_cpu(k, policy->cpus) { | ||
550 | if (!cpu_online(k)) | ||
551 | continue; | ||
552 | freqs.cpu = k; | ||
553 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
554 | } | ||
555 | |||
556 | if (unlikely(retval)) { | ||
557 | /* | ||
558 | * We have failed halfway through the frequency change. | ||
559 | * We have sent callbacks to policy->cpus and | ||
560 | * MSRs have already been written on coverd_cpus. | ||
561 | * Best effort undo.. | ||
562 | */ | ||
563 | |||
564 | for_each_cpu(j, covered_cpus) | ||
565 | wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h); | ||
566 | |||
567 | tmp = freqs.new; | ||
568 | freqs.new = freqs.old; | ||
569 | freqs.old = tmp; | ||
570 | for_each_cpu(j, policy->cpus) { | ||
571 | if (!cpu_online(j)) | ||
572 | continue; | ||
573 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
574 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
575 | } | ||
576 | } | ||
577 | retval = 0; | ||
578 | |||
579 | out: | ||
580 | free_cpumask_var(covered_cpus); | ||
581 | return retval; | ||
582 | } | ||
583 | |||
584 | static struct freq_attr* centrino_attr[] = { | ||
585 | &cpufreq_freq_attr_scaling_available_freqs, | ||
586 | NULL, | ||
587 | }; | ||
588 | |||
589 | static struct cpufreq_driver centrino_driver = { | ||
590 | .name = "centrino", /* should be speedstep-centrino, | ||
591 | but there's a 16 char limit */ | ||
592 | .init = centrino_cpu_init, | ||
593 | .exit = centrino_cpu_exit, | ||
594 | .verify = centrino_verify, | ||
595 | .target = centrino_target, | ||
596 | .get = get_cur_freq, | ||
597 | .attr = centrino_attr, | ||
598 | .owner = THIS_MODULE, | ||
599 | }; | ||
600 | |||
601 | |||
602 | /** | ||
603 | * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver | ||
604 | * | ||
605 | * Initializes the Enhanced SpeedStep support. Returns -ENODEV on | ||
606 | * unsupported devices, -ENOENT if there's no voltage table for this | ||
607 | * particular CPU model, -EINVAL on problems during initiatization, | ||
608 | * and zero on success. | ||
609 | * | ||
610 | * This is quite picky. Not only does the CPU have to advertise the | ||
611 | * "est" flag in the cpuid capability flags, we look for a specific | ||
612 | * CPU model and stepping, and we need to have the exact model name in | ||
613 | * our voltage tables. That is, be paranoid about not releasing | ||
614 | * someone's valuable magic smoke. | ||
615 | */ | ||
616 | static int __init centrino_init(void) | ||
617 | { | ||
618 | struct cpuinfo_x86 *cpu = &cpu_data(0); | ||
619 | |||
620 | if (!cpu_has(cpu, X86_FEATURE_EST)) | ||
621 | return -ENODEV; | ||
622 | |||
623 | return cpufreq_register_driver(¢rino_driver); | ||
624 | } | ||
625 | |||
626 | static void __exit centrino_exit(void) | ||
627 | { | ||
628 | cpufreq_unregister_driver(¢rino_driver); | ||
629 | } | ||
630 | |||
631 | MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); | ||
632 | MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); | ||
633 | MODULE_LICENSE ("GPL"); | ||
634 | |||
635 | late_initcall(centrino_init); | ||
636 | module_exit(centrino_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c deleted file mode 100644 index 561758e95180..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ /dev/null | |||
@@ -1,452 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2001 Dave Jones, Arjan van de ven. | ||
3 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * Based upon reverse engineered information, and on Intel documentation | ||
7 | * for chipsets ICH2-M and ICH3-M. | ||
8 | * | ||
9 | * Many thanks to Ducrot Bruno for finding and fixing the last | ||
10 | * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler | ||
11 | * for extensive testing. | ||
12 | * | ||
13 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
14 | */ | ||
15 | |||
16 | |||
17 | /********************************************************************* | ||
18 | * SPEEDSTEP - DEFINITIONS * | ||
19 | *********************************************************************/ | ||
20 | |||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/cpufreq.h> | ||
25 | #include <linux/pci.h> | ||
26 | #include <linux/sched.h> | ||
27 | |||
28 | #include "speedstep-lib.h" | ||
29 | |||
30 | |||
31 | /* speedstep_chipset: | ||
32 | * It is necessary to know which chipset is used. As accesses to | ||
33 | * this device occur at various places in this module, we need a | ||
34 | * static struct pci_dev * pointing to that device. | ||
35 | */ | ||
36 | static struct pci_dev *speedstep_chipset_dev; | ||
37 | |||
38 | |||
39 | /* speedstep_processor | ||
40 | */ | ||
41 | static enum speedstep_processor speedstep_processor; | ||
42 | |||
43 | static u32 pmbase; | ||
44 | |||
45 | /* | ||
46 | * There are only two frequency states for each processor. Values | ||
47 | * are in kHz for the time being. | ||
48 | */ | ||
49 | static struct cpufreq_frequency_table speedstep_freqs[] = { | ||
50 | {SPEEDSTEP_HIGH, 0}, | ||
51 | {SPEEDSTEP_LOW, 0}, | ||
52 | {0, CPUFREQ_TABLE_END}, | ||
53 | }; | ||
54 | |||
55 | |||
56 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
57 | "speedstep-ich", msg) | ||
58 | |||
59 | |||
60 | /** | ||
61 | * speedstep_find_register - read the PMBASE address | ||
62 | * | ||
63 | * Returns: -ENODEV if no register could be found | ||
64 | */ | ||
65 | static int speedstep_find_register(void) | ||
66 | { | ||
67 | if (!speedstep_chipset_dev) | ||
68 | return -ENODEV; | ||
69 | |||
70 | /* get PMBASE */ | ||
71 | pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); | ||
72 | if (!(pmbase & 0x01)) { | ||
73 | printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); | ||
74 | return -ENODEV; | ||
75 | } | ||
76 | |||
77 | pmbase &= 0xFFFFFFFE; | ||
78 | if (!pmbase) { | ||
79 | printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); | ||
80 | return -ENODEV; | ||
81 | } | ||
82 | |||
83 | dprintk("pmbase is 0x%x\n", pmbase); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | /** | ||
88 | * speedstep_set_state - set the SpeedStep state | ||
89 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | ||
90 | * | ||
91 | * Tries to change the SpeedStep state. Can be called from | ||
92 | * smp_call_function_single. | ||
93 | */ | ||
94 | static void speedstep_set_state(unsigned int state) | ||
95 | { | ||
96 | u8 pm2_blk; | ||
97 | u8 value; | ||
98 | unsigned long flags; | ||
99 | |||
100 | if (state > 0x1) | ||
101 | return; | ||
102 | |||
103 | /* Disable IRQs */ | ||
104 | local_irq_save(flags); | ||
105 | |||
106 | /* read state */ | ||
107 | value = inb(pmbase + 0x50); | ||
108 | |||
109 | dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); | ||
110 | |||
111 | /* write new state */ | ||
112 | value &= 0xFE; | ||
113 | value |= state; | ||
114 | |||
115 | dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase); | ||
116 | |||
117 | /* Disable bus master arbitration */ | ||
118 | pm2_blk = inb(pmbase + 0x20); | ||
119 | pm2_blk |= 0x01; | ||
120 | outb(pm2_blk, (pmbase + 0x20)); | ||
121 | |||
122 | /* Actual transition */ | ||
123 | outb(value, (pmbase + 0x50)); | ||
124 | |||
125 | /* Restore bus master arbitration */ | ||
126 | pm2_blk &= 0xfe; | ||
127 | outb(pm2_blk, (pmbase + 0x20)); | ||
128 | |||
129 | /* check if transition was successful */ | ||
130 | value = inb(pmbase + 0x50); | ||
131 | |||
132 | /* Enable IRQs */ | ||
133 | local_irq_restore(flags); | ||
134 | |||
135 | dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); | ||
136 | |||
137 | if (state == (value & 0x1)) | ||
138 | dprintk("change to %u MHz succeeded\n", | ||
139 | speedstep_get_frequency(speedstep_processor) / 1000); | ||
140 | else | ||
141 | printk(KERN_ERR "cpufreq: change failed - I/O error\n"); | ||
142 | |||
143 | return; | ||
144 | } | ||
145 | |||
146 | /* Wrapper for smp_call_function_single. */ | ||
147 | static void _speedstep_set_state(void *_state) | ||
148 | { | ||
149 | speedstep_set_state(*(unsigned int *)_state); | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * speedstep_activate - activate SpeedStep control in the chipset | ||
154 | * | ||
155 | * Tries to activate the SpeedStep status and control registers. | ||
156 | * Returns -EINVAL on an unsupported chipset, and zero on success. | ||
157 | */ | ||
158 | static int speedstep_activate(void) | ||
159 | { | ||
160 | u16 value = 0; | ||
161 | |||
162 | if (!speedstep_chipset_dev) | ||
163 | return -EINVAL; | ||
164 | |||
165 | pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value); | ||
166 | if (!(value & 0x08)) { | ||
167 | value |= 0x08; | ||
168 | dprintk("activating SpeedStep (TM) registers\n"); | ||
169 | pci_write_config_word(speedstep_chipset_dev, 0x00A0, value); | ||
170 | } | ||
171 | |||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | |||
176 | /** | ||
177 | * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic | ||
178 | * | ||
179 | * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to | ||
180 | * the LPC bridge / PM module which contains all power-management | ||
181 | * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected | ||
182 | * chipset, or zero on failure. | ||
183 | */ | ||
184 | static unsigned int speedstep_detect_chipset(void) | ||
185 | { | ||
186 | speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
187 | PCI_DEVICE_ID_INTEL_82801DB_12, | ||
188 | PCI_ANY_ID, PCI_ANY_ID, | ||
189 | NULL); | ||
190 | if (speedstep_chipset_dev) | ||
191 | return 4; /* 4-M */ | ||
192 | |||
193 | speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
194 | PCI_DEVICE_ID_INTEL_82801CA_12, | ||
195 | PCI_ANY_ID, PCI_ANY_ID, | ||
196 | NULL); | ||
197 | if (speedstep_chipset_dev) | ||
198 | return 3; /* 3-M */ | ||
199 | |||
200 | |||
201 | speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
202 | PCI_DEVICE_ID_INTEL_82801BA_10, | ||
203 | PCI_ANY_ID, PCI_ANY_ID, | ||
204 | NULL); | ||
205 | if (speedstep_chipset_dev) { | ||
206 | /* speedstep.c causes lockups on Dell Inspirons 8000 and | ||
207 | * 8100 which use a pretty old revision of the 82815 | ||
208 | * host brige. Abort on these systems. | ||
209 | */ | ||
210 | static struct pci_dev *hostbridge; | ||
211 | |||
212 | hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
213 | PCI_DEVICE_ID_INTEL_82815_MC, | ||
214 | PCI_ANY_ID, PCI_ANY_ID, | ||
215 | NULL); | ||
216 | |||
217 | if (!hostbridge) | ||
218 | return 2; /* 2-M */ | ||
219 | |||
220 | if (hostbridge->revision < 5) { | ||
221 | dprintk("hostbridge does not support speedstep\n"); | ||
222 | speedstep_chipset_dev = NULL; | ||
223 | pci_dev_put(hostbridge); | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | pci_dev_put(hostbridge); | ||
228 | return 2; /* 2-M */ | ||
229 | } | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static void get_freq_data(void *_speed) | ||
235 | { | ||
236 | unsigned int *speed = _speed; | ||
237 | |||
238 | *speed = speedstep_get_frequency(speedstep_processor); | ||
239 | } | ||
240 | |||
241 | static unsigned int speedstep_get(unsigned int cpu) | ||
242 | { | ||
243 | unsigned int speed; | ||
244 | |||
245 | /* You're supposed to ensure CPU is online. */ | ||
246 | if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0) | ||
247 | BUG(); | ||
248 | |||
249 | dprintk("detected %u kHz as current frequency\n", speed); | ||
250 | return speed; | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * speedstep_target - set a new CPUFreq policy | ||
255 | * @policy: new policy | ||
256 | * @target_freq: the target frequency | ||
257 | * @relation: how that frequency relates to achieved frequency | ||
258 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
259 | * | ||
260 | * Sets a new CPUFreq policy. | ||
261 | */ | ||
262 | static int speedstep_target(struct cpufreq_policy *policy, | ||
263 | unsigned int target_freq, | ||
264 | unsigned int relation) | ||
265 | { | ||
266 | unsigned int newstate = 0, policy_cpu; | ||
267 | struct cpufreq_freqs freqs; | ||
268 | int i; | ||
269 | |||
270 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], | ||
271 | target_freq, relation, &newstate)) | ||
272 | return -EINVAL; | ||
273 | |||
274 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); | ||
275 | freqs.old = speedstep_get(policy_cpu); | ||
276 | freqs.new = speedstep_freqs[newstate].frequency; | ||
277 | freqs.cpu = policy->cpu; | ||
278 | |||
279 | dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new); | ||
280 | |||
281 | /* no transition necessary */ | ||
282 | if (freqs.old == freqs.new) | ||
283 | return 0; | ||
284 | |||
285 | for_each_cpu(i, policy->cpus) { | ||
286 | freqs.cpu = i; | ||
287 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
288 | } | ||
289 | |||
290 | smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate, | ||
291 | true); | ||
292 | |||
293 | for_each_cpu(i, policy->cpus) { | ||
294 | freqs.cpu = i; | ||
295 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
296 | } | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | |||
302 | /** | ||
303 | * speedstep_verify - verifies a new CPUFreq policy | ||
304 | * @policy: new policy | ||
305 | * | ||
306 | * Limit must be within speedstep_low_freq and speedstep_high_freq, with | ||
307 | * at least one border included. | ||
308 | */ | ||
309 | static int speedstep_verify(struct cpufreq_policy *policy) | ||
310 | { | ||
311 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); | ||
312 | } | ||
313 | |||
314 | struct get_freqs { | ||
315 | struct cpufreq_policy *policy; | ||
316 | int ret; | ||
317 | }; | ||
318 | |||
319 | static void get_freqs_on_cpu(void *_get_freqs) | ||
320 | { | ||
321 | struct get_freqs *get_freqs = _get_freqs; | ||
322 | |||
323 | get_freqs->ret = | ||
324 | speedstep_get_freqs(speedstep_processor, | ||
325 | &speedstep_freqs[SPEEDSTEP_LOW].frequency, | ||
326 | &speedstep_freqs[SPEEDSTEP_HIGH].frequency, | ||
327 | &get_freqs->policy->cpuinfo.transition_latency, | ||
328 | &speedstep_set_state); | ||
329 | } | ||
330 | |||
331 | static int speedstep_cpu_init(struct cpufreq_policy *policy) | ||
332 | { | ||
333 | int result; | ||
334 | unsigned int policy_cpu, speed; | ||
335 | struct get_freqs gf; | ||
336 | |||
337 | /* only run on CPU to be set, or on its sibling */ | ||
338 | #ifdef CONFIG_SMP | ||
339 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); | ||
340 | #endif | ||
341 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); | ||
342 | |||
343 | /* detect low and high frequency and transition latency */ | ||
344 | gf.policy = policy; | ||
345 | smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1); | ||
346 | if (gf.ret) | ||
347 | return gf.ret; | ||
348 | |||
349 | /* get current speed setting */ | ||
350 | speed = speedstep_get(policy_cpu); | ||
351 | if (!speed) | ||
352 | return -EIO; | ||
353 | |||
354 | dprintk("currently at %s speed setting - %i MHz\n", | ||
355 | (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) | ||
356 | ? "low" : "high", | ||
357 | (speed / 1000)); | ||
358 | |||
359 | /* cpuinfo and default policy values */ | ||
360 | policy->cur = speed; | ||
361 | |||
362 | result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); | ||
363 | if (result) | ||
364 | return result; | ||
365 | |||
366 | cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); | ||
367 | |||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | |||
372 | static int speedstep_cpu_exit(struct cpufreq_policy *policy) | ||
373 | { | ||
374 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
375 | return 0; | ||
376 | } | ||
377 | |||
378 | static struct freq_attr *speedstep_attr[] = { | ||
379 | &cpufreq_freq_attr_scaling_available_freqs, | ||
380 | NULL, | ||
381 | }; | ||
382 | |||
383 | |||
384 | static struct cpufreq_driver speedstep_driver = { | ||
385 | .name = "speedstep-ich", | ||
386 | .verify = speedstep_verify, | ||
387 | .target = speedstep_target, | ||
388 | .init = speedstep_cpu_init, | ||
389 | .exit = speedstep_cpu_exit, | ||
390 | .get = speedstep_get, | ||
391 | .owner = THIS_MODULE, | ||
392 | .attr = speedstep_attr, | ||
393 | }; | ||
394 | |||
395 | |||
396 | /** | ||
397 | * speedstep_init - initializes the SpeedStep CPUFreq driver | ||
398 | * | ||
399 | * Initializes the SpeedStep support. Returns -ENODEV on unsupported | ||
400 | * devices, -EINVAL on problems during initiatization, and zero on | ||
401 | * success. | ||
402 | */ | ||
403 | static int __init speedstep_init(void) | ||
404 | { | ||
405 | /* detect processor */ | ||
406 | speedstep_processor = speedstep_detect_processor(); | ||
407 | if (!speedstep_processor) { | ||
408 | dprintk("Intel(R) SpeedStep(TM) capable processor " | ||
409 | "not found\n"); | ||
410 | return -ENODEV; | ||
411 | } | ||
412 | |||
413 | /* detect chipset */ | ||
414 | if (!speedstep_detect_chipset()) { | ||
415 | dprintk("Intel(R) SpeedStep(TM) for this chipset not " | ||
416 | "(yet) available.\n"); | ||
417 | return -ENODEV; | ||
418 | } | ||
419 | |||
420 | /* activate speedstep support */ | ||
421 | if (speedstep_activate()) { | ||
422 | pci_dev_put(speedstep_chipset_dev); | ||
423 | return -EINVAL; | ||
424 | } | ||
425 | |||
426 | if (speedstep_find_register()) | ||
427 | return -ENODEV; | ||
428 | |||
429 | return cpufreq_register_driver(&speedstep_driver); | ||
430 | } | ||
431 | |||
432 | |||
433 | /** | ||
434 | * speedstep_exit - unregisters SpeedStep support | ||
435 | * | ||
436 | * Unregisters SpeedStep support. | ||
437 | */ | ||
438 | static void __exit speedstep_exit(void) | ||
439 | { | ||
440 | pci_dev_put(speedstep_chipset_dev); | ||
441 | cpufreq_unregister_driver(&speedstep_driver); | ||
442 | } | ||
443 | |||
444 | |||
445 | MODULE_AUTHOR("Dave Jones <davej@redhat.com>, " | ||
446 | "Dominik Brodowski <linux@brodo.de>"); | ||
447 | MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets " | ||
448 | "with ICH-M southbridges."); | ||
449 | MODULE_LICENSE("GPL"); | ||
450 | |||
451 | module_init(speedstep_init); | ||
452 | module_exit(speedstep_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c deleted file mode 100644 index a94ec6be69fa..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ /dev/null | |||
@@ -1,481 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * Library for common functions for Intel SpeedStep v.1 and v.2 support | ||
7 | * | ||
8 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
9 | */ | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/moduleparam.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/cpufreq.h> | ||
16 | |||
17 | #include <asm/msr.h> | ||
18 | #include <asm/tsc.h> | ||
19 | #include "speedstep-lib.h" | ||
20 | |||
21 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
22 | "speedstep-lib", msg) | ||
23 | |||
24 | #define PFX "speedstep-lib: " | ||
25 | |||
26 | #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK | ||
27 | static int relaxed_check; | ||
28 | #else | ||
29 | #define relaxed_check 0 | ||
30 | #endif | ||
31 | |||
32 | /********************************************************************* | ||
33 | * GET PROCESSOR CORE SPEED IN KHZ * | ||
34 | *********************************************************************/ | ||
35 | |||
36 | static unsigned int pentium3_get_frequency(enum speedstep_processor processor) | ||
37 | { | ||
38 | /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ | ||
39 | struct { | ||
40 | unsigned int ratio; /* Frequency Multiplier (x10) */ | ||
41 | u8 bitmap; /* power on configuration bits | ||
42 | [27, 25:22] (in MSR 0x2a) */ | ||
43 | } msr_decode_mult[] = { | ||
44 | { 30, 0x01 }, | ||
45 | { 35, 0x05 }, | ||
46 | { 40, 0x02 }, | ||
47 | { 45, 0x06 }, | ||
48 | { 50, 0x00 }, | ||
49 | { 55, 0x04 }, | ||
50 | { 60, 0x0b }, | ||
51 | { 65, 0x0f }, | ||
52 | { 70, 0x09 }, | ||
53 | { 75, 0x0d }, | ||
54 | { 80, 0x0a }, | ||
55 | { 85, 0x26 }, | ||
56 | { 90, 0x20 }, | ||
57 | { 100, 0x2b }, | ||
58 | { 0, 0xff } /* error or unknown value */ | ||
59 | }; | ||
60 | |||
61 | /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ | ||
62 | struct { | ||
63 | unsigned int value; /* Front Side Bus speed in MHz */ | ||
64 | u8 bitmap; /* power on configuration bits [18: 19] | ||
65 | (in MSR 0x2a) */ | ||
66 | } msr_decode_fsb[] = { | ||
67 | { 66, 0x0 }, | ||
68 | { 100, 0x2 }, | ||
69 | { 133, 0x1 }, | ||
70 | { 0, 0xff} | ||
71 | }; | ||
72 | |||
73 | u32 msr_lo, msr_tmp; | ||
74 | int i = 0, j = 0; | ||
75 | |||
76 | /* read MSR 0x2a - we only need the low 32 bits */ | ||
77 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); | ||
78 | dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); | ||
79 | msr_tmp = msr_lo; | ||
80 | |||
81 | /* decode the FSB */ | ||
82 | msr_tmp &= 0x00c0000; | ||
83 | msr_tmp >>= 18; | ||
84 | while (msr_tmp != msr_decode_fsb[i].bitmap) { | ||
85 | if (msr_decode_fsb[i].bitmap == 0xff) | ||
86 | return 0; | ||
87 | i++; | ||
88 | } | ||
89 | |||
90 | /* decode the multiplier */ | ||
91 | if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) { | ||
92 | dprintk("workaround for early PIIIs\n"); | ||
93 | msr_lo &= 0x03c00000; | ||
94 | } else | ||
95 | msr_lo &= 0x0bc00000; | ||
96 | msr_lo >>= 22; | ||
97 | while (msr_lo != msr_decode_mult[j].bitmap) { | ||
98 | if (msr_decode_mult[j].bitmap == 0xff) | ||
99 | return 0; | ||
100 | j++; | ||
101 | } | ||
102 | |||
103 | dprintk("speed is %u\n", | ||
104 | (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); | ||
105 | |||
106 | return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100; | ||
107 | } | ||
108 | |||
109 | |||
110 | static unsigned int pentiumM_get_frequency(void) | ||
111 | { | ||
112 | u32 msr_lo, msr_tmp; | ||
113 | |||
114 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); | ||
115 | dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); | ||
116 | |||
117 | /* see table B-2 of 24547212.pdf */ | ||
118 | if (msr_lo & 0x00040000) { | ||
119 | printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n", | ||
120 | msr_lo, msr_tmp); | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | msr_tmp = (msr_lo >> 22) & 0x1f; | ||
125 | dprintk("bits 22-26 are 0x%x, speed is %u\n", | ||
126 | msr_tmp, (msr_tmp * 100 * 1000)); | ||
127 | |||
128 | return msr_tmp * 100 * 1000; | ||
129 | } | ||
130 | |||
131 | static unsigned int pentium_core_get_frequency(void) | ||
132 | { | ||
133 | u32 fsb = 0; | ||
134 | u32 msr_lo, msr_tmp; | ||
135 | int ret; | ||
136 | |||
137 | rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); | ||
138 | /* see table B-2 of 25366920.pdf */ | ||
139 | switch (msr_lo & 0x07) { | ||
140 | case 5: | ||
141 | fsb = 100000; | ||
142 | break; | ||
143 | case 1: | ||
144 | fsb = 133333; | ||
145 | break; | ||
146 | case 3: | ||
147 | fsb = 166667; | ||
148 | break; | ||
149 | case 2: | ||
150 | fsb = 200000; | ||
151 | break; | ||
152 | case 0: | ||
153 | fsb = 266667; | ||
154 | break; | ||
155 | case 4: | ||
156 | fsb = 333333; | ||
157 | break; | ||
158 | default: | ||
159 | printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); | ||
160 | } | ||
161 | |||
162 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); | ||
163 | dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", | ||
164 | msr_lo, msr_tmp); | ||
165 | |||
166 | msr_tmp = (msr_lo >> 22) & 0x1f; | ||
167 | dprintk("bits 22-26 are 0x%x, speed is %u\n", | ||
168 | msr_tmp, (msr_tmp * fsb)); | ||
169 | |||
170 | ret = (msr_tmp * fsb); | ||
171 | return ret; | ||
172 | } | ||
173 | |||
174 | |||
175 | static unsigned int pentium4_get_frequency(void) | ||
176 | { | ||
177 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
178 | u32 msr_lo, msr_hi, mult; | ||
179 | unsigned int fsb = 0; | ||
180 | unsigned int ret; | ||
181 | u8 fsb_code; | ||
182 | |||
183 | /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency | ||
184 | * to System Bus Frequency Ratio Field in the Processor Frequency | ||
185 | * Configuration Register of the MSR. Therefore the current | ||
186 | * frequency cannot be calculated and has to be measured. | ||
187 | */ | ||
188 | if (c->x86_model < 2) | ||
189 | return cpu_khz; | ||
190 | |||
191 | rdmsr(0x2c, msr_lo, msr_hi); | ||
192 | |||
193 | dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); | ||
194 | |||
195 | /* decode the FSB: see IA-32 Intel (C) Architecture Software | ||
196 | * Developer's Manual, Volume 3: System Prgramming Guide, | ||
197 | * revision #12 in Table B-1: MSRs in the Pentium 4 and | ||
198 | * Intel Xeon Processors, on page B-4 and B-5. | ||
199 | */ | ||
200 | fsb_code = (msr_lo >> 16) & 0x7; | ||
201 | switch (fsb_code) { | ||
202 | case 0: | ||
203 | fsb = 100 * 1000; | ||
204 | break; | ||
205 | case 1: | ||
206 | fsb = 13333 * 10; | ||
207 | break; | ||
208 | case 2: | ||
209 | fsb = 200 * 1000; | ||
210 | break; | ||
211 | } | ||
212 | |||
213 | if (!fsb) | ||
214 | printk(KERN_DEBUG PFX "couldn't detect FSB speed. " | ||
215 | "Please send an e-mail to <linux@brodo.de>\n"); | ||
216 | |||
217 | /* Multiplier. */ | ||
218 | mult = msr_lo >> 24; | ||
219 | |||
220 | dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", | ||
221 | fsb, mult, (fsb * mult)); | ||
222 | |||
223 | ret = (fsb * mult); | ||
224 | return ret; | ||
225 | } | ||
226 | |||
227 | |||
228 | /* Warning: may get called from smp_call_function_single. */ | ||
229 | unsigned int speedstep_get_frequency(enum speedstep_processor processor) | ||
230 | { | ||
231 | switch (processor) { | ||
232 | case SPEEDSTEP_CPU_PCORE: | ||
233 | return pentium_core_get_frequency(); | ||
234 | case SPEEDSTEP_CPU_PM: | ||
235 | return pentiumM_get_frequency(); | ||
236 | case SPEEDSTEP_CPU_P4D: | ||
237 | case SPEEDSTEP_CPU_P4M: | ||
238 | return pentium4_get_frequency(); | ||
239 | case SPEEDSTEP_CPU_PIII_T: | ||
240 | case SPEEDSTEP_CPU_PIII_C: | ||
241 | case SPEEDSTEP_CPU_PIII_C_EARLY: | ||
242 | return pentium3_get_frequency(processor); | ||
243 | default: | ||
244 | return 0; | ||
245 | }; | ||
246 | return 0; | ||
247 | } | ||
248 | EXPORT_SYMBOL_GPL(speedstep_get_frequency); | ||
249 | |||
250 | |||
251 | /********************************************************************* | ||
252 | * DETECT SPEEDSTEP-CAPABLE PROCESSOR * | ||
253 | *********************************************************************/ | ||
254 | |||
255 | unsigned int speedstep_detect_processor(void) | ||
256 | { | ||
257 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
258 | u32 ebx, msr_lo, msr_hi; | ||
259 | |||
260 | dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); | ||
261 | |||
262 | if ((c->x86_vendor != X86_VENDOR_INTEL) || | ||
263 | ((c->x86 != 6) && (c->x86 != 0xF))) | ||
264 | return 0; | ||
265 | |||
266 | if (c->x86 == 0xF) { | ||
267 | /* Intel Mobile Pentium 4-M | ||
268 | * or Intel Mobile Pentium 4 with 533 MHz FSB */ | ||
269 | if (c->x86_model != 2) | ||
270 | return 0; | ||
271 | |||
272 | ebx = cpuid_ebx(0x00000001); | ||
273 | ebx &= 0x000000FF; | ||
274 | |||
275 | dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); | ||
276 | |||
277 | switch (c->x86_mask) { | ||
278 | case 4: | ||
279 | /* | ||
280 | * B-stepping [M-P4-M] | ||
281 | * sample has ebx = 0x0f, production has 0x0e. | ||
282 | */ | ||
283 | if ((ebx == 0x0e) || (ebx == 0x0f)) | ||
284 | return SPEEDSTEP_CPU_P4M; | ||
285 | break; | ||
286 | case 7: | ||
287 | /* | ||
288 | * C-stepping [M-P4-M] | ||
289 | * needs to have ebx=0x0e, else it's a celeron: | ||
290 | * cf. 25130917.pdf / page 7, footnote 5 even | ||
291 | * though 25072120.pdf / page 7 doesn't say | ||
292 | * samples are only of B-stepping... | ||
293 | */ | ||
294 | if (ebx == 0x0e) | ||
295 | return SPEEDSTEP_CPU_P4M; | ||
296 | break; | ||
297 | case 9: | ||
298 | /* | ||
299 | * D-stepping [M-P4-M or M-P4/533] | ||
300 | * | ||
301 | * this is totally strange: CPUID 0x0F29 is | ||
302 | * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. | ||
303 | * The latter need to be sorted out as they don't | ||
304 | * support speedstep. | ||
305 | * Celerons with CPUID 0x0F29 may have either | ||
306 | * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything | ||
307 | * specific. | ||
308 | * M-P4-Ms may have either ebx=0xe or 0xf [see above] | ||
309 | * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] | ||
310 | * also, M-P4M HTs have ebx=0x8, too | ||
311 | * For now, they are distinguished by the model_id | ||
312 | * string | ||
313 | */ | ||
314 | if ((ebx == 0x0e) || | ||
315 | (strstr(c->x86_model_id, | ||
316 | "Mobile Intel(R) Pentium(R) 4") != NULL)) | ||
317 | return SPEEDSTEP_CPU_P4M; | ||
318 | break; | ||
319 | default: | ||
320 | break; | ||
321 | } | ||
322 | return 0; | ||
323 | } | ||
324 | |||
325 | switch (c->x86_model) { | ||
326 | case 0x0B: /* Intel PIII [Tualatin] */ | ||
327 | /* cpuid_ebx(1) is 0x04 for desktop PIII, | ||
328 | * 0x06 for mobile PIII-M */ | ||
329 | ebx = cpuid_ebx(0x00000001); | ||
330 | dprintk("ebx is %x\n", ebx); | ||
331 | |||
332 | ebx &= 0x000000FF; | ||
333 | |||
334 | if (ebx != 0x06) | ||
335 | return 0; | ||
336 | |||
337 | /* So far all PIII-M processors support SpeedStep. See | ||
338 | * Intel's 24540640.pdf of June 2003 | ||
339 | */ | ||
340 | return SPEEDSTEP_CPU_PIII_T; | ||
341 | |||
342 | case 0x08: /* Intel PIII [Coppermine] */ | ||
343 | |||
344 | /* all mobile PIII Coppermines have FSB 100 MHz | ||
345 | * ==> sort out a few desktop PIIIs. */ | ||
346 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); | ||
347 | dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", | ||
348 | msr_lo, msr_hi); | ||
349 | msr_lo &= 0x00c0000; | ||
350 | if (msr_lo != 0x0080000) | ||
351 | return 0; | ||
352 | |||
353 | /* | ||
354 | * If the processor is a mobile version, | ||
355 | * platform ID has bit 50 set | ||
356 | * it has SpeedStep technology if either | ||
357 | * bit 56 or 57 is set | ||
358 | */ | ||
359 | rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); | ||
360 | dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", | ||
361 | msr_lo, msr_hi); | ||
362 | if ((msr_hi & (1<<18)) && | ||
363 | (relaxed_check ? 1 : (msr_hi & (3<<24)))) { | ||
364 | if (c->x86_mask == 0x01) { | ||
365 | dprintk("early PIII version\n"); | ||
366 | return SPEEDSTEP_CPU_PIII_C_EARLY; | ||
367 | } else | ||
368 | return SPEEDSTEP_CPU_PIII_C; | ||
369 | } | ||
370 | |||
371 | default: | ||
372 | return 0; | ||
373 | } | ||
374 | } | ||
375 | EXPORT_SYMBOL_GPL(speedstep_detect_processor); | ||
376 | |||
377 | |||
378 | /********************************************************************* | ||
379 | * DETECT SPEEDSTEP SPEEDS * | ||
380 | *********************************************************************/ | ||
381 | |||
382 | unsigned int speedstep_get_freqs(enum speedstep_processor processor, | ||
383 | unsigned int *low_speed, | ||
384 | unsigned int *high_speed, | ||
385 | unsigned int *transition_latency, | ||
386 | void (*set_state) (unsigned int state)) | ||
387 | { | ||
388 | unsigned int prev_speed; | ||
389 | unsigned int ret = 0; | ||
390 | unsigned long flags; | ||
391 | struct timeval tv1, tv2; | ||
392 | |||
393 | if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) | ||
394 | return -EINVAL; | ||
395 | |||
396 | dprintk("trying to determine both speeds\n"); | ||
397 | |||
398 | /* get current speed */ | ||
399 | prev_speed = speedstep_get_frequency(processor); | ||
400 | if (!prev_speed) | ||
401 | return -EIO; | ||
402 | |||
403 | dprintk("previous speed is %u\n", prev_speed); | ||
404 | |||
405 | local_irq_save(flags); | ||
406 | |||
407 | /* switch to low state */ | ||
408 | set_state(SPEEDSTEP_LOW); | ||
409 | *low_speed = speedstep_get_frequency(processor); | ||
410 | if (!*low_speed) { | ||
411 | ret = -EIO; | ||
412 | goto out; | ||
413 | } | ||
414 | |||
415 | dprintk("low speed is %u\n", *low_speed); | ||
416 | |||
417 | /* start latency measurement */ | ||
418 | if (transition_latency) | ||
419 | do_gettimeofday(&tv1); | ||
420 | |||
421 | /* switch to high state */ | ||
422 | set_state(SPEEDSTEP_HIGH); | ||
423 | |||
424 | /* end latency measurement */ | ||
425 | if (transition_latency) | ||
426 | do_gettimeofday(&tv2); | ||
427 | |||
428 | *high_speed = speedstep_get_frequency(processor); | ||
429 | if (!*high_speed) { | ||
430 | ret = -EIO; | ||
431 | goto out; | ||
432 | } | ||
433 | |||
434 | dprintk("high speed is %u\n", *high_speed); | ||
435 | |||
436 | if (*low_speed == *high_speed) { | ||
437 | ret = -ENODEV; | ||
438 | goto out; | ||
439 | } | ||
440 | |||
441 | /* switch to previous state, if necessary */ | ||
442 | if (*high_speed != prev_speed) | ||
443 | set_state(SPEEDSTEP_LOW); | ||
444 | |||
445 | if (transition_latency) { | ||
446 | *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + | ||
447 | tv2.tv_usec - tv1.tv_usec; | ||
448 | dprintk("transition latency is %u uSec\n", *transition_latency); | ||
449 | |||
450 | /* convert uSec to nSec and add 20% for safety reasons */ | ||
451 | *transition_latency *= 1200; | ||
452 | |||
453 | /* check if the latency measurement is too high or too low | ||
454 | * and set it to a safe value (500uSec) in that case | ||
455 | */ | ||
456 | if (*transition_latency > 10000000 || | ||
457 | *transition_latency < 50000) { | ||
458 | printk(KERN_WARNING PFX "frequency transition " | ||
459 | "measured seems out of range (%u " | ||
460 | "nSec), falling back to a safe one of" | ||
461 | "%u nSec.\n", | ||
462 | *transition_latency, 500000); | ||
463 | *transition_latency = 500000; | ||
464 | } | ||
465 | } | ||
466 | |||
467 | out: | ||
468 | local_irq_restore(flags); | ||
469 | return ret; | ||
470 | } | ||
471 | EXPORT_SYMBOL_GPL(speedstep_get_freqs); | ||
472 | |||
473 | #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK | ||
474 | module_param(relaxed_check, int, 0444); | ||
475 | MODULE_PARM_DESC(relaxed_check, | ||
476 | "Don't do all checks for speedstep capability."); | ||
477 | #endif | ||
478 | |||
479 | MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); | ||
480 | MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); | ||
481 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h deleted file mode 100644 index 70d9cea1219d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ /dev/null | |||
@@ -1,49 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * Library for common functions for Intel SpeedStep v.1 and v.2 support | ||
7 | * | ||
8 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
9 | */ | ||
10 | |||
11 | |||
12 | |||
13 | /* processors */ | ||
14 | enum speedstep_processor { | ||
15 | SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ | ||
16 | SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ | ||
17 | SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ | ||
18 | SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ | ||
19 | /* the following processors are not speedstep-capable and are not auto-detected | ||
20 | * in speedstep_detect_processor(). However, their speed can be detected using | ||
21 | * the speedstep_get_frequency() call. */ | ||
22 | SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ | ||
23 | SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ | ||
24 | SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ | ||
25 | }; | ||
26 | |||
27 | /* speedstep states -- only two of them */ | ||
28 | |||
29 | #define SPEEDSTEP_HIGH 0x00000000 | ||
30 | #define SPEEDSTEP_LOW 0x00000001 | ||
31 | |||
32 | |||
33 | /* detect a speedstep-capable processor */ | ||
34 | extern enum speedstep_processor speedstep_detect_processor(void); | ||
35 | |||
36 | /* detect the current speed (in khz) of the processor */ | ||
37 | extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); | ||
38 | |||
39 | |||
40 | /* detect the low and high speeds of the processor. The callback | ||
41 | * set_state"'s first argument is either SPEEDSTEP_HIGH or | ||
42 | * SPEEDSTEP_LOW; the second argument is zero so that no | ||
43 | * cpufreq_notify_transition calls are initiated. | ||
44 | */ | ||
45 | extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, | ||
46 | unsigned int *low_speed, | ||
47 | unsigned int *high_speed, | ||
48 | unsigned int *transition_latency, | ||
49 | void (*set_state) (unsigned int state)); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c deleted file mode 100644 index 91bc25b67bc1..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ /dev/null | |||
@@ -1,467 +0,0 @@ | |||
1 | /* | ||
2 | * Intel SpeedStep SMI driver. | ||
3 | * | ||
4 | * (C) 2003 Hiroshi Miura <miura@da-cha.org> | ||
5 | * | ||
6 | * Licensed under the terms of the GNU GPL License version 2. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | |||
11 | /********************************************************************* | ||
12 | * SPEEDSTEP - DEFINITIONS * | ||
13 | *********************************************************************/ | ||
14 | |||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/moduleparam.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/cpufreq.h> | ||
20 | #include <linux/delay.h> | ||
21 | #include <linux/io.h> | ||
22 | #include <asm/ist.h> | ||
23 | |||
24 | #include "speedstep-lib.h" | ||
25 | |||
26 | /* speedstep system management interface port/command. | ||
27 | * | ||
28 | * These parameters are got from IST-SMI BIOS call. | ||
29 | * If user gives it, these are used. | ||
30 | * | ||
31 | */ | ||
32 | static int smi_port; | ||
33 | static int smi_cmd; | ||
34 | static unsigned int smi_sig; | ||
35 | |||
36 | /* info about the processor */ | ||
37 | static enum speedstep_processor speedstep_processor; | ||
38 | |||
39 | /* | ||
40 | * There are only two frequency states for each processor. Values | ||
41 | * are in kHz for the time being. | ||
42 | */ | ||
43 | static struct cpufreq_frequency_table speedstep_freqs[] = { | ||
44 | {SPEEDSTEP_HIGH, 0}, | ||
45 | {SPEEDSTEP_LOW, 0}, | ||
46 | {0, CPUFREQ_TABLE_END}, | ||
47 | }; | ||
48 | |||
49 | #define GET_SPEEDSTEP_OWNER 0 | ||
50 | #define GET_SPEEDSTEP_STATE 1 | ||
51 | #define SET_SPEEDSTEP_STATE 2 | ||
52 | #define GET_SPEEDSTEP_FREQS 4 | ||
53 | |||
54 | /* how often shall the SMI call be tried if it failed, e.g. because | ||
55 | * of DMA activity going on? */ | ||
56 | #define SMI_TRIES 5 | ||
57 | |||
58 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
59 | "speedstep-smi", msg) | ||
60 | |||
61 | /** | ||
62 | * speedstep_smi_ownership | ||
63 | */ | ||
64 | static int speedstep_smi_ownership(void) | ||
65 | { | ||
66 | u32 command, result, magic, dummy; | ||
67 | u32 function = GET_SPEEDSTEP_OWNER; | ||
68 | unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; | ||
69 | |||
70 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
71 | magic = virt_to_phys(magic_data); | ||
72 | |||
73 | dprintk("trying to obtain ownership with command %x at port %x\n", | ||
74 | command, smi_port); | ||
75 | |||
76 | __asm__ __volatile__( | ||
77 | "push %%ebp\n" | ||
78 | "out %%al, (%%dx)\n" | ||
79 | "pop %%ebp\n" | ||
80 | : "=D" (result), | ||
81 | "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), | ||
82 | "=S" (dummy) | ||
83 | : "a" (command), "b" (function), "c" (0), "d" (smi_port), | ||
84 | "D" (0), "S" (magic) | ||
85 | : "memory" | ||
86 | ); | ||
87 | |||
88 | dprintk("result is %x\n", result); | ||
89 | |||
90 | return result; | ||
91 | } | ||
92 | |||
93 | /** | ||
94 | * speedstep_smi_get_freqs - get SpeedStep preferred & current freq. | ||
95 | * @low: the low frequency value is placed here | ||
96 | * @high: the high frequency value is placed here | ||
97 | * | ||
98 | * Only available on later SpeedStep-enabled systems, returns false results or | ||
99 | * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing | ||
100 | * shows that the latter occurs if !(ist_info.event & 0xFFFF). | ||
101 | */ | ||
102 | static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high) | ||
103 | { | ||
104 | u32 command, result = 0, edi, high_mhz, low_mhz, dummy; | ||
105 | u32 state = 0; | ||
106 | u32 function = GET_SPEEDSTEP_FREQS; | ||
107 | |||
108 | if (!(ist_info.event & 0xFFFF)) { | ||
109 | dprintk("bug #1422 -- can't read freqs from BIOS\n"); | ||
110 | return -ENODEV; | ||
111 | } | ||
112 | |||
113 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
114 | |||
115 | dprintk("trying to determine frequencies with command %x at port %x\n", | ||
116 | command, smi_port); | ||
117 | |||
118 | __asm__ __volatile__( | ||
119 | "push %%ebp\n" | ||
120 | "out %%al, (%%dx)\n" | ||
121 | "pop %%ebp" | ||
122 | : "=a" (result), | ||
123 | "=b" (high_mhz), | ||
124 | "=c" (low_mhz), | ||
125 | "=d" (state), "=D" (edi), "=S" (dummy) | ||
126 | : "a" (command), | ||
127 | "b" (function), | ||
128 | "c" (state), | ||
129 | "d" (smi_port), "S" (0), "D" (0) | ||
130 | ); | ||
131 | |||
132 | dprintk("result %x, low_freq %u, high_freq %u\n", | ||
133 | result, low_mhz, high_mhz); | ||
134 | |||
135 | /* abort if results are obviously incorrect... */ | ||
136 | if ((high_mhz + low_mhz) < 600) | ||
137 | return -EINVAL; | ||
138 | |||
139 | *high = high_mhz * 1000; | ||
140 | *low = low_mhz * 1000; | ||
141 | |||
142 | return result; | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * speedstep_get_state - set the SpeedStep state | ||
147 | * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | ||
148 | * | ||
149 | */ | ||
150 | static int speedstep_get_state(void) | ||
151 | { | ||
152 | u32 function = GET_SPEEDSTEP_STATE; | ||
153 | u32 result, state, edi, command, dummy; | ||
154 | |||
155 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
156 | |||
157 | dprintk("trying to determine current setting with command %x " | ||
158 | "at port %x\n", command, smi_port); | ||
159 | |||
160 | __asm__ __volatile__( | ||
161 | "push %%ebp\n" | ||
162 | "out %%al, (%%dx)\n" | ||
163 | "pop %%ebp\n" | ||
164 | : "=a" (result), | ||
165 | "=b" (state), "=D" (edi), | ||
166 | "=c" (dummy), "=d" (dummy), "=S" (dummy) | ||
167 | : "a" (command), "b" (function), "c" (0), | ||
168 | "d" (smi_port), "S" (0), "D" (0) | ||
169 | ); | ||
170 | |||
171 | dprintk("state is %x, result is %x\n", state, result); | ||
172 | |||
173 | return state & 1; | ||
174 | } | ||
175 | |||
176 | |||
177 | /** | ||
178 | * speedstep_set_state - set the SpeedStep state | ||
179 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | ||
180 | * | ||
181 | */ | ||
182 | static void speedstep_set_state(unsigned int state) | ||
183 | { | ||
184 | unsigned int result = 0, command, new_state, dummy; | ||
185 | unsigned long flags; | ||
186 | unsigned int function = SET_SPEEDSTEP_STATE; | ||
187 | unsigned int retry = 0; | ||
188 | |||
189 | if (state > 0x1) | ||
190 | return; | ||
191 | |||
192 | /* Disable IRQs */ | ||
193 | local_irq_save(flags); | ||
194 | |||
195 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
196 | |||
197 | dprintk("trying to set frequency to state %u " | ||
198 | "with command %x at port %x\n", | ||
199 | state, command, smi_port); | ||
200 | |||
201 | do { | ||
202 | if (retry) { | ||
203 | dprintk("retry %u, previous result %u, waiting...\n", | ||
204 | retry, result); | ||
205 | mdelay(retry * 50); | ||
206 | } | ||
207 | retry++; | ||
208 | __asm__ __volatile__( | ||
209 | "push %%ebp\n" | ||
210 | "out %%al, (%%dx)\n" | ||
211 | "pop %%ebp" | ||
212 | : "=b" (new_state), "=D" (result), | ||
213 | "=c" (dummy), "=a" (dummy), | ||
214 | "=d" (dummy), "=S" (dummy) | ||
215 | : "a" (command), "b" (function), "c" (state), | ||
216 | "d" (smi_port), "S" (0), "D" (0) | ||
217 | ); | ||
218 | } while ((new_state != state) && (retry <= SMI_TRIES)); | ||
219 | |||
220 | /* enable IRQs */ | ||
221 | local_irq_restore(flags); | ||
222 | |||
223 | if (new_state == state) | ||
224 | dprintk("change to %u MHz succeeded after %u tries " | ||
225 | "with result %u\n", | ||
226 | (speedstep_freqs[new_state].frequency / 1000), | ||
227 | retry, result); | ||
228 | else | ||
229 | printk(KERN_ERR "cpufreq: change to state %u " | ||
230 | "failed with new_state %u and result %u\n", | ||
231 | state, new_state, result); | ||
232 | |||
233 | return; | ||
234 | } | ||
235 | |||
236 | |||
237 | /** | ||
238 | * speedstep_target - set a new CPUFreq policy | ||
239 | * @policy: new policy | ||
240 | * @target_freq: new freq | ||
241 | * @relation: | ||
242 | * | ||
243 | * Sets a new CPUFreq policy/freq. | ||
244 | */ | ||
245 | static int speedstep_target(struct cpufreq_policy *policy, | ||
246 | unsigned int target_freq, unsigned int relation) | ||
247 | { | ||
248 | unsigned int newstate = 0; | ||
249 | struct cpufreq_freqs freqs; | ||
250 | |||
251 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], | ||
252 | target_freq, relation, &newstate)) | ||
253 | return -EINVAL; | ||
254 | |||
255 | freqs.old = speedstep_freqs[speedstep_get_state()].frequency; | ||
256 | freqs.new = speedstep_freqs[newstate].frequency; | ||
257 | freqs.cpu = 0; /* speedstep.c is UP only driver */ | ||
258 | |||
259 | if (freqs.old == freqs.new) | ||
260 | return 0; | ||
261 | |||
262 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
263 | speedstep_set_state(newstate); | ||
264 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | |||
270 | /** | ||
271 | * speedstep_verify - verifies a new CPUFreq policy | ||
272 | * @policy: new policy | ||
273 | * | ||
274 | * Limit must be within speedstep_low_freq and speedstep_high_freq, with | ||
275 | * at least one border included. | ||
276 | */ | ||
277 | static int speedstep_verify(struct cpufreq_policy *policy) | ||
278 | { | ||
279 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); | ||
280 | } | ||
281 | |||
282 | |||
283 | static int speedstep_cpu_init(struct cpufreq_policy *policy) | ||
284 | { | ||
285 | int result; | ||
286 | unsigned int speed, state; | ||
287 | unsigned int *low, *high; | ||
288 | |||
289 | /* capability check */ | ||
290 | if (policy->cpu != 0) | ||
291 | return -ENODEV; | ||
292 | |||
293 | result = speedstep_smi_ownership(); | ||
294 | if (result) { | ||
295 | dprintk("fails in acquiring ownership of a SMI interface.\n"); | ||
296 | return -EINVAL; | ||
297 | } | ||
298 | |||
299 | /* detect low and high frequency */ | ||
300 | low = &speedstep_freqs[SPEEDSTEP_LOW].frequency; | ||
301 | high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency; | ||
302 | |||
303 | result = speedstep_smi_get_freqs(low, high); | ||
304 | if (result) { | ||
305 | /* fall back to speedstep_lib.c dection mechanism: | ||
306 | * try both states out */ | ||
307 | dprintk("could not detect low and high frequencies " | ||
308 | "by SMI call.\n"); | ||
309 | result = speedstep_get_freqs(speedstep_processor, | ||
310 | low, high, | ||
311 | NULL, | ||
312 | &speedstep_set_state); | ||
313 | |||
314 | if (result) { | ||
315 | dprintk("could not detect two different speeds" | ||
316 | " -- aborting.\n"); | ||
317 | return result; | ||
318 | } else | ||
319 | dprintk("workaround worked.\n"); | ||
320 | } | ||
321 | |||
322 | /* get current speed setting */ | ||
323 | state = speedstep_get_state(); | ||
324 | speed = speedstep_freqs[state].frequency; | ||
325 | |||
326 | dprintk("currently at %s speed setting - %i MHz\n", | ||
327 | (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) | ||
328 | ? "low" : "high", | ||
329 | (speed / 1000)); | ||
330 | |||
331 | /* cpuinfo and default policy values */ | ||
332 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
333 | policy->cur = speed; | ||
334 | |||
335 | result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); | ||
336 | if (result) | ||
337 | return result; | ||
338 | |||
339 | cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); | ||
340 | |||
341 | return 0; | ||
342 | } | ||
343 | |||
344 | static int speedstep_cpu_exit(struct cpufreq_policy *policy) | ||
345 | { | ||
346 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
347 | return 0; | ||
348 | } | ||
349 | |||
350 | static unsigned int speedstep_get(unsigned int cpu) | ||
351 | { | ||
352 | if (cpu) | ||
353 | return -ENODEV; | ||
354 | return speedstep_get_frequency(speedstep_processor); | ||
355 | } | ||
356 | |||
357 | |||
358 | static int speedstep_resume(struct cpufreq_policy *policy) | ||
359 | { | ||
360 | int result = speedstep_smi_ownership(); | ||
361 | |||
362 | if (result) | ||
363 | dprintk("fails in re-acquiring ownership of a SMI interface.\n"); | ||
364 | |||
365 | return result; | ||
366 | } | ||
367 | |||
368 | static struct freq_attr *speedstep_attr[] = { | ||
369 | &cpufreq_freq_attr_scaling_available_freqs, | ||
370 | NULL, | ||
371 | }; | ||
372 | |||
373 | static struct cpufreq_driver speedstep_driver = { | ||
374 | .name = "speedstep-smi", | ||
375 | .verify = speedstep_verify, | ||
376 | .target = speedstep_target, | ||
377 | .init = speedstep_cpu_init, | ||
378 | .exit = speedstep_cpu_exit, | ||
379 | .get = speedstep_get, | ||
380 | .resume = speedstep_resume, | ||
381 | .owner = THIS_MODULE, | ||
382 | .attr = speedstep_attr, | ||
383 | }; | ||
384 | |||
385 | /** | ||
386 | * speedstep_init - initializes the SpeedStep CPUFreq driver | ||
387 | * | ||
388 | * Initializes the SpeedStep support. Returns -ENODEV on unsupported | ||
389 | * BIOS, -EINVAL on problems during initiatization, and zero on | ||
390 | * success. | ||
391 | */ | ||
392 | static int __init speedstep_init(void) | ||
393 | { | ||
394 | speedstep_processor = speedstep_detect_processor(); | ||
395 | |||
396 | switch (speedstep_processor) { | ||
397 | case SPEEDSTEP_CPU_PIII_T: | ||
398 | case SPEEDSTEP_CPU_PIII_C: | ||
399 | case SPEEDSTEP_CPU_PIII_C_EARLY: | ||
400 | break; | ||
401 | default: | ||
402 | speedstep_processor = 0; | ||
403 | } | ||
404 | |||
405 | if (!speedstep_processor) { | ||
406 | dprintk("No supported Intel CPU detected.\n"); | ||
407 | return -ENODEV; | ||
408 | } | ||
409 | |||
410 | dprintk("signature:0x%.8lx, command:0x%.8lx, " | ||
411 | "event:0x%.8lx, perf_level:0x%.8lx.\n", | ||
412 | ist_info.signature, ist_info.command, | ||
413 | ist_info.event, ist_info.perf_level); | ||
414 | |||
415 | /* Error if no IST-SMI BIOS or no PARM | ||
416 | sig= 'ISGE' aka 'Intel Speedstep Gate E' */ | ||
417 | if ((ist_info.signature != 0x47534943) && ( | ||
418 | (smi_port == 0) || (smi_cmd == 0))) | ||
419 | return -ENODEV; | ||
420 | |||
421 | if (smi_sig == 1) | ||
422 | smi_sig = 0x47534943; | ||
423 | else | ||
424 | smi_sig = ist_info.signature; | ||
425 | |||
426 | /* setup smi_port from MODLULE_PARM or BIOS */ | ||
427 | if ((smi_port > 0xff) || (smi_port < 0)) | ||
428 | return -EINVAL; | ||
429 | else if (smi_port == 0) | ||
430 | smi_port = ist_info.command & 0xff; | ||
431 | |||
432 | if ((smi_cmd > 0xff) || (smi_cmd < 0)) | ||
433 | return -EINVAL; | ||
434 | else if (smi_cmd == 0) | ||
435 | smi_cmd = (ist_info.command >> 16) & 0xff; | ||
436 | |||
437 | return cpufreq_register_driver(&speedstep_driver); | ||
438 | } | ||
439 | |||
440 | |||
441 | /** | ||
442 | * speedstep_exit - unregisters SpeedStep support | ||
443 | * | ||
444 | * Unregisters SpeedStep support. | ||
445 | */ | ||
446 | static void __exit speedstep_exit(void) | ||
447 | { | ||
448 | cpufreq_unregister_driver(&speedstep_driver); | ||
449 | } | ||
450 | |||
451 | module_param(smi_port, int, 0444); | ||
452 | module_param(smi_cmd, int, 0444); | ||
453 | module_param(smi_sig, uint, 0444); | ||
454 | |||
455 | MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value " | ||
456 | "-- Intel's default setting is 0xb2"); | ||
457 | MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value " | ||
458 | "-- Intel's default setting is 0x82"); | ||
459 | MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the " | ||
460 | "SMI interface."); | ||
461 | |||
462 | MODULE_AUTHOR("Hiroshi Miura"); | ||
463 | MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface."); | ||
464 | MODULE_LICENSE("GPL"); | ||
465 | |||
466 | module_init(speedstep_init); | ||
467 | module_exit(speedstep_exit); | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index df86bc8c859d..1edf5ba4fb2b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -29,10 +29,10 @@ | |||
29 | 29 | ||
30 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | 30 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) |
31 | { | 31 | { |
32 | u64 misc_enable; | ||
33 | |||
32 | /* Unmask CPUID levels if masked: */ | 34 | /* Unmask CPUID levels if masked: */ |
33 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { | 35 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { |
34 | u64 misc_enable; | ||
35 | |||
36 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 36 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
37 | 37 | ||
38 | if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { | 38 | if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { |
@@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
118 | * (model 2) with the same problem. | 118 | * (model 2) with the same problem. |
119 | */ | 119 | */ |
120 | if (c->x86 == 15) { | 120 | if (c->x86 == 15) { |
121 | u64 misc_enable; | ||
122 | |||
123 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 121 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
124 | 122 | ||
125 | if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { | 123 | if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { |
@@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
130 | } | 128 | } |
131 | } | 129 | } |
132 | #endif | 130 | #endif |
131 | |||
132 | /* | ||
133 | * If fast string is not enabled in IA32_MISC_ENABLE for any reason, | ||
134 | * clear the fast string and enhanced fast string CPU capabilities. | ||
135 | */ | ||
136 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { | ||
137 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
138 | if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { | ||
139 | printk(KERN_INFO "Disabled fast string operations\n"); | ||
140 | setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); | ||
141 | setup_clear_cpu_cap(X86_FEATURE_ERMS); | ||
142 | } | ||
143 | } | ||
133 | } | 144 | } |
134 | 145 | ||
135 | #ifdef CONFIG_X86_32 | 146 | #ifdef CONFIG_X86_32 |
@@ -400,12 +411,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
400 | 411 | ||
401 | switch (c->x86_model) { | 412 | switch (c->x86_model) { |
402 | case 5: | 413 | case 5: |
403 | if (c->x86_mask == 0) { | 414 | if (l2 == 0) |
404 | if (l2 == 0) | 415 | p = "Celeron (Covington)"; |
405 | p = "Celeron (Covington)"; | 416 | else if (l2 == 256) |
406 | else if (l2 == 256) | 417 | p = "Mobile Pentium II (Dixon)"; |
407 | p = "Mobile Pentium II (Dixon)"; | ||
408 | } | ||
409 | break; | 418 | break; |
410 | 419 | ||
411 | case 6: | 420 | case 6: |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 1ce1af2899df..c105c533ed94 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -327,7 +327,6 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | |||
327 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); | 327 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
328 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); | 328 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
329 | 329 | ||
330 | l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | ||
331 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; | 330 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; |
332 | } | 331 | } |
333 | 332 | ||
@@ -454,27 +453,16 @@ int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, | |||
454 | { | 453 | { |
455 | int ret = 0; | 454 | int ret = 0; |
456 | 455 | ||
457 | #define SUBCACHE_MASK (3UL << 20) | 456 | /* check if @slot is already used or the index is already disabled */ |
458 | #define SUBCACHE_INDEX 0xfff | ||
459 | |||
460 | /* | ||
461 | * check whether this slot is already used or | ||
462 | * the index is already disabled | ||
463 | */ | ||
464 | ret = amd_get_l3_disable_slot(l3, slot); | 457 | ret = amd_get_l3_disable_slot(l3, slot); |
465 | if (ret >= 0) | 458 | if (ret >= 0) |
466 | return -EINVAL; | 459 | return -EINVAL; |
467 | 460 | ||
468 | /* | 461 | if (index > l3->indices) |
469 | * check whether the other slot has disabled the | ||
470 | * same index already | ||
471 | */ | ||
472 | if (index == amd_get_l3_disable_slot(l3, !slot)) | ||
473 | return -EINVAL; | 462 | return -EINVAL; |
474 | 463 | ||
475 | /* do not allow writes outside of allowed bits */ | 464 | /* check whether the other slot has disabled the same index already */ |
476 | if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | 465 | if (index == amd_get_l3_disable_slot(l3, !slot)) |
477 | ((index & SUBCACHE_INDEX) > l3->indices)) | ||
478 | return -EINVAL; | 466 | return -EINVAL; |
479 | 467 | ||
480 | amd_l3_disable_index(l3, cpu, slot, index); | 468 | amd_l3_disable_index(l3, cpu, slot, index); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3385ea26f684..ff1ae9b6464d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -105,20 +105,6 @@ static int cpu_missing; | |||
105 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | 105 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); |
106 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | 106 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); |
107 | 107 | ||
108 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | ||
109 | void *data) | ||
110 | { | ||
111 | pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); | ||
112 | pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n"); | ||
113 | |||
114 | return NOTIFY_STOP; | ||
115 | } | ||
116 | |||
117 | static struct notifier_block mce_dec_nb = { | ||
118 | .notifier_call = default_decode_mce, | ||
119 | .priority = -1, | ||
120 | }; | ||
121 | |||
122 | /* MCA banks polled by the period polling timer for corrected events */ | 108 | /* MCA banks polled by the period polling timer for corrected events */ |
123 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 109 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
124 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 110 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
@@ -212,6 +198,8 @@ void mce_log(struct mce *mce) | |||
212 | 198 | ||
213 | static void print_mce(struct mce *m) | 199 | static void print_mce(struct mce *m) |
214 | { | 200 | { |
201 | int ret = 0; | ||
202 | |||
215 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", | 203 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", |
216 | m->extcpu, m->mcgstatus, m->bank, m->status); | 204 | m->extcpu, m->mcgstatus, m->bank, m->status); |
217 | 205 | ||
@@ -239,7 +227,11 @@ static void print_mce(struct mce *m) | |||
239 | * Print out human-readable details about the MCE error, | 227 | * Print out human-readable details about the MCE error, |
240 | * (if the CPU has an implementation for that) | 228 | * (if the CPU has an implementation for that) |
241 | */ | 229 | */ |
242 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | 230 | ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
231 | if (ret == NOTIFY_STOP) | ||
232 | return; | ||
233 | |||
234 | pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); | ||
243 | } | 235 | } |
244 | 236 | ||
245 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 237 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
@@ -590,7 +582,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
590 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | 582 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { |
591 | mce_log(&m); | 583 | mce_log(&m); |
592 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); | 584 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); |
593 | add_taint(TAINT_MACHINE_CHECK); | ||
594 | } | 585 | } |
595 | 586 | ||
596 | /* | 587 | /* |
@@ -1722,8 +1713,6 @@ __setup("mce", mcheck_enable); | |||
1722 | 1713 | ||
1723 | int __init mcheck_init(void) | 1714 | int __init mcheck_init(void) |
1724 | { | 1715 | { |
1725 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); | ||
1726 | |||
1727 | mcheck_intel_therm_init(); | 1716 | mcheck_intel_therm_init(); |
1728 | 1717 | ||
1729 | return 0; | 1718 | return 0; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 167f97b5596e..bb0adad35143 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -509,6 +509,7 @@ recurse: | |||
509 | out_free: | 509 | out_free: |
510 | if (b) { | 510 | if (b) { |
511 | kobject_put(&b->kobj); | 511 | kobject_put(&b->kobj); |
512 | list_del(&b->miscj); | ||
512 | kfree(b); | 513 | kfree(b); |
513 | } | 514 | } |
514 | return err; | 515 | return err; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9da97f..27c625178bf1 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -187,8 +187,6 @@ static int therm_throt_process(bool new_event, int event, int level) | |||
187 | this_cpu, | 187 | this_cpu, |
188 | level == CORE_LEVEL ? "Core" : "Package", | 188 | level == CORE_LEVEL ? "Core" : "Package", |
189 | state->count); | 189 | state->count); |
190 | |||
191 | add_taint(TAINT_MACHINE_CHECK); | ||
192 | return 1; | 190 | return 1; |
193 | } | 191 | } |
194 | if (old_event) { | 192 | if (old_event) { |
@@ -355,7 +353,6 @@ static void notify_thresholds(__u64 msr_val) | |||
355 | static void intel_thermal_interrupt(void) | 353 | static void intel_thermal_interrupt(void) |
356 | { | 354 | { |
357 | __u64 msr_val; | 355 | __u64 msr_val; |
358 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
359 | 356 | ||
360 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 357 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
361 | 358 | ||
@@ -367,19 +364,19 @@ static void intel_thermal_interrupt(void) | |||
367 | CORE_LEVEL) != 0) | 364 | CORE_LEVEL) != 0) |
368 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | 365 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); |
369 | 366 | ||
370 | if (cpu_has(c, X86_FEATURE_PLN)) | 367 | if (this_cpu_has(X86_FEATURE_PLN)) |
371 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | 368 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, |
372 | POWER_LIMIT_EVENT, | 369 | POWER_LIMIT_EVENT, |
373 | CORE_LEVEL) != 0) | 370 | CORE_LEVEL) != 0) |
374 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | 371 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); |
375 | 372 | ||
376 | if (cpu_has(c, X86_FEATURE_PTS)) { | 373 | if (this_cpu_has(X86_FEATURE_PTS)) { |
377 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | 374 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); |
378 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | 375 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, |
379 | THERMAL_THROTTLING_EVENT, | 376 | THERMAL_THROTTLING_EVENT, |
380 | PACKAGE_LEVEL) != 0) | 377 | PACKAGE_LEVEL) != 0) |
381 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | 378 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); |
382 | if (cpu_has(c, X86_FEATURE_PLN)) | 379 | if (this_cpu_has(X86_FEATURE_PLN)) |
383 | if (therm_throt_process(msr_val & | 380 | if (therm_throt_process(msr_val & |
384 | PACKAGE_THERM_STATUS_POWER_LIMIT, | 381 | PACKAGE_THERM_STATUS_POWER_LIMIT, |
385 | POWER_LIMIT_EVENT, | 382 | POWER_LIMIT_EVENT, |
@@ -393,7 +390,6 @@ static void unexpected_thermal_interrupt(void) | |||
393 | { | 390 | { |
394 | printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", | 391 | printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", |
395 | smp_processor_id()); | 392 | smp_processor_id()); |
396 | add_taint(TAINT_MACHINE_CHECK); | ||
397 | } | 393 | } |
398 | 394 | ||
399 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | 395 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; |
@@ -446,18 +442,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
446 | */ | 442 | */ |
447 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 443 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
448 | 444 | ||
445 | h = lvtthmr_init; | ||
449 | /* | 446 | /* |
450 | * The initial value of thermal LVT entries on all APs always reads | 447 | * The initial value of thermal LVT entries on all APs always reads |
451 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | 448 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI |
452 | * sequence to them and LVT registers are reset to 0s except for | 449 | * sequence to them and LVT registers are reset to 0s except for |
453 | * the mask bits which are set to 1s when APs receive INIT IPI. | 450 | * the mask bits which are set to 1s when APs receive INIT IPI. |
454 | * Always restore the value that BIOS has programmed on AP based on | 451 | * If BIOS takes over the thermal interrupt and sets its interrupt |
455 | * BSP's info we saved since BIOS is always setting the same value | 452 | * delivery mode to SMI (not fixed), it restores the value that the |
456 | * for all threads/cores | 453 | * BIOS has programmed on AP based on BSP's info we saved since BIOS |
454 | * is always setting the same value for all threads/cores. | ||
457 | */ | 455 | */ |
458 | apic_write(APIC_LVTTHMR, lvtthmr_init); | 456 | if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) |
457 | apic_write(APIC_LVTTHMR, lvtthmr_init); | ||
459 | 458 | ||
460 | h = lvtthmr_init; | ||
461 | 459 | ||
462 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 460 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
463 | printk(KERN_DEBUG | 461 | printk(KERN_DEBUG |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index eed3673a8656..3a0338b4b179 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
33 | #include <asm/smp.h> | 33 | #include <asm/smp.h> |
34 | #include <asm/alternative.h> | ||
34 | 35 | ||
35 | #if 0 | 36 | #if 0 |
36 | #undef wrmsrl | 37 | #undef wrmsrl |
@@ -363,12 +364,18 @@ again: | |||
363 | return new_raw_count; | 364 | return new_raw_count; |
364 | } | 365 | } |
365 | 366 | ||
366 | /* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */ | ||
367 | static inline int x86_pmu_addr_offset(int index) | 367 | static inline int x86_pmu_addr_offset(int index) |
368 | { | 368 | { |
369 | if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) | 369 | int offset; |
370 | return index << 1; | 370 | |
371 | return index; | 371 | /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ |
372 | alternative_io(ASM_NOP2, | ||
373 | "shll $1, %%eax", | ||
374 | X86_FEATURE_PERFCTR_CORE, | ||
375 | "=a" (offset), | ||
376 | "a" (index)); | ||
377 | |||
378 | return offset; | ||
372 | } | 379 | } |
373 | 380 | ||
374 | static inline unsigned int x86_pmu_config_addr(int index) | 381 | static inline unsigned int x86_pmu_config_addr(int index) |
@@ -586,8 +593,12 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
586 | return -EOPNOTSUPP; | 593 | return -EOPNOTSUPP; |
587 | } | 594 | } |
588 | 595 | ||
596 | /* | ||
597 | * Do not allow config1 (extended registers) to propagate, | ||
598 | * there's no sane user-space generalization yet: | ||
599 | */ | ||
589 | if (attr->type == PERF_TYPE_RAW) | 600 | if (attr->type == PERF_TYPE_RAW) |
590 | return x86_pmu_extra_regs(event->attr.config, event); | 601 | return 0; |
591 | 602 | ||
592 | if (attr->type == PERF_TYPE_HW_CACHE) | 603 | if (attr->type == PERF_TYPE_HW_CACHE) |
593 | return set_ext_hw_attr(hwc, event); | 604 | return set_ext_hw_attr(hwc, event); |
@@ -609,8 +620,8 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
609 | /* | 620 | /* |
610 | * Branch tracing: | 621 | * Branch tracing: |
611 | */ | 622 | */ |
612 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 623 | if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && |
613 | (hwc->sample_period == 1)) { | 624 | !attr->freq && hwc->sample_period == 1) { |
614 | /* BTS is not supported by this architecture. */ | 625 | /* BTS is not supported by this architecture. */ |
615 | if (!x86_pmu.bts_active) | 626 | if (!x86_pmu.bts_active) |
616 | return -EOPNOTSUPP; | 627 | return -EOPNOTSUPP; |
@@ -1284,6 +1295,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1284 | 1295 | ||
1285 | cpuc = &__get_cpu_var(cpu_hw_events); | 1296 | cpuc = &__get_cpu_var(cpu_hw_events); |
1286 | 1297 | ||
1298 | /* | ||
1299 | * Some chipsets need to unmask the LVTPC in a particular spot | ||
1300 | * inside the nmi handler. As a result, the unmasking was pushed | ||
1301 | * into all the nmi handlers. | ||
1302 | * | ||
1303 | * This generic handler doesn't seem to have any issues where the | ||
1304 | * unmasking occurs so it was left at the top. | ||
1305 | */ | ||
1306 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1307 | |||
1287 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1308 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1288 | if (!test_bit(idx, cpuc->active_mask)) { | 1309 | if (!test_bit(idx, cpuc->active_mask)) { |
1289 | /* | 1310 | /* |
@@ -1370,8 +1391,6 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1370 | return NOTIFY_DONE; | 1391 | return NOTIFY_DONE; |
1371 | } | 1392 | } |
1372 | 1393 | ||
1373 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1374 | |||
1375 | handled = x86_pmu.handle_irq(args->regs); | 1394 | handled = x86_pmu.handle_irq(args->regs); |
1376 | if (!handled) | 1395 | if (!handled) |
1377 | return NOTIFY_DONE; | 1396 | return NOTIFY_DONE; |
@@ -1754,17 +1773,6 @@ static struct pmu pmu = { | |||
1754 | * callchain support | 1773 | * callchain support |
1755 | */ | 1774 | */ |
1756 | 1775 | ||
1757 | static void | ||
1758 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
1759 | { | ||
1760 | /* Ignore warnings */ | ||
1761 | } | ||
1762 | |||
1763 | static void backtrace_warning(void *data, char *msg) | ||
1764 | { | ||
1765 | /* Ignore warnings */ | ||
1766 | } | ||
1767 | |||
1768 | static int backtrace_stack(void *data, char *name) | 1776 | static int backtrace_stack(void *data, char *name) |
1769 | { | 1777 | { |
1770 | return 0; | 1778 | return 0; |
@@ -1778,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
1778 | } | 1786 | } |
1779 | 1787 | ||
1780 | static const struct stacktrace_ops backtrace_ops = { | 1788 | static const struct stacktrace_ops backtrace_ops = { |
1781 | .warning = backtrace_warning, | ||
1782 | .warning_symbol = backtrace_warning_symbol, | ||
1783 | .stack = backtrace_stack, | 1789 | .stack = backtrace_stack, |
1784 | .address = backtrace_address, | 1790 | .address = backtrace_address, |
1785 | .walk_stack = print_context_stack_bp, | 1791 | .walk_stack = print_context_stack_bp, |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 461f62bbd774..fe29c1d2219e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -8,7 +8,7 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
8 | [ C(L1D) ] = { | 8 | [ C(L1D) ] = { |
9 | [ C(OP_READ) ] = { | 9 | [ C(OP_READ) ] = { |
10 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | 10 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
11 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | 11 | [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ |
12 | }, | 12 | }, |
13 | [ C(OP_WRITE) ] = { | 13 | [ C(OP_WRITE) ] = { |
14 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | 14 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ |
@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
96 | */ | 96 | */ |
97 | static const u64 amd_perfmon_event_map[] = | 97 | static const u64 amd_perfmon_event_map[] = |
98 | { | 98 | { |
99 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | 99 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, |
100 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 100 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
101 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | 101 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, |
102 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | 102 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, |
103 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, | 103 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, |
104 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, | 104 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, |
105 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ | ||
106 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ | ||
105 | }; | 107 | }; |
106 | 108 | ||
107 | static u64 amd_pmu_event_map(int hw_event) | 109 | static u64 amd_pmu_event_map(int hw_event) |
@@ -427,7 +429,9 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
427 | * | 429 | * |
428 | * Exceptions: | 430 | * Exceptions: |
429 | * | 431 | * |
432 | * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) | ||
430 | * 0x003 FP PERF_CTL[3] | 433 | * 0x003 FP PERF_CTL[3] |
434 | * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) | ||
431 | * 0x00B FP PERF_CTL[3] | 435 | * 0x00B FP PERF_CTL[3] |
432 | * 0x00D FP PERF_CTL[3] | 436 | * 0x00D FP PERF_CTL[3] |
433 | * 0x023 DE PERF_CTL[2:0] | 437 | * 0x023 DE PERF_CTL[2:0] |
@@ -448,6 +452,8 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
448 | * 0x0DF LS PERF_CTL[5:0] | 452 | * 0x0DF LS PERF_CTL[5:0] |
449 | * 0x1D6 EX PERF_CTL[5:0] | 453 | * 0x1D6 EX PERF_CTL[5:0] |
450 | * 0x1D8 EX PERF_CTL[5:0] | 454 | * 0x1D8 EX PERF_CTL[5:0] |
455 | * | ||
456 | * (*) depending on the umask all FPU counters may be used | ||
451 | */ | 457 | */ |
452 | 458 | ||
453 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | 459 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
@@ -460,18 +466,28 @@ static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | |||
460 | static struct event_constraint * | 466 | static struct event_constraint * |
461 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) | 467 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) |
462 | { | 468 | { |
463 | unsigned int event_code = amd_get_event_code(&event->hw); | 469 | struct hw_perf_event *hwc = &event->hw; |
470 | unsigned int event_code = amd_get_event_code(hwc); | ||
464 | 471 | ||
465 | switch (event_code & AMD_EVENT_TYPE_MASK) { | 472 | switch (event_code & AMD_EVENT_TYPE_MASK) { |
466 | case AMD_EVENT_FP: | 473 | case AMD_EVENT_FP: |
467 | switch (event_code) { | 474 | switch (event_code) { |
475 | case 0x000: | ||
476 | if (!(hwc->config & 0x0000F000ULL)) | ||
477 | break; | ||
478 | if (!(hwc->config & 0x00000F00ULL)) | ||
479 | break; | ||
480 | return &amd_f15_PMC3; | ||
481 | case 0x004: | ||
482 | if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) | ||
483 | break; | ||
484 | return &amd_f15_PMC3; | ||
468 | case 0x003: | 485 | case 0x003: |
469 | case 0x00B: | 486 | case 0x00B: |
470 | case 0x00D: | 487 | case 0x00D: |
471 | return &amd_f15_PMC3; | 488 | return &amd_f15_PMC3; |
472 | default: | ||
473 | return &amd_f15_PMC53; | ||
474 | } | 489 | } |
490 | return &amd_f15_PMC53; | ||
475 | case AMD_EVENT_LS: | 491 | case AMD_EVENT_LS: |
476 | case AMD_EVENT_DC: | 492 | case AMD_EVENT_DC: |
477 | case AMD_EVENT_EX_LS: | 493 | case AMD_EVENT_EX_LS: |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 8fc2b2cee1da..41178c826c48 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -25,7 +25,7 @@ struct intel_percore { | |||
25 | /* | 25 | /* |
26 | * Intel PerfMon, used on Core and later. | 26 | * Intel PerfMon, used on Core and later. |
27 | */ | 27 | */ |
28 | static const u64 intel_perfmon_event_map[] = | 28 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = |
29 | { | 29 | { |
30 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | 30 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, |
31 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 31 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
@@ -36,7 +36,7 @@ static const u64 intel_perfmon_event_map[] = | |||
36 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 36 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
37 | }; | 37 | }; |
38 | 38 | ||
39 | static struct event_constraint intel_core_event_constraints[] = | 39 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
40 | { | 40 | { |
41 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | 41 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
42 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 42 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
@@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] = | |||
47 | EVENT_CONSTRAINT_END | 47 | EVENT_CONSTRAINT_END |
48 | }; | 48 | }; |
49 | 49 | ||
50 | static struct event_constraint intel_core2_event_constraints[] = | 50 | static struct event_constraint intel_core2_event_constraints[] __read_mostly = |
51 | { | 51 | { |
52 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 52 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
53 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 53 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] = | |||
70 | EVENT_CONSTRAINT_END | 70 | EVENT_CONSTRAINT_END |
71 | }; | 71 | }; |
72 | 72 | ||
73 | static struct event_constraint intel_nehalem_event_constraints[] = | 73 | static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = |
74 | { | 74 | { |
75 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 75 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
76 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 76 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] = | |||
86 | EVENT_CONSTRAINT_END | 86 | EVENT_CONSTRAINT_END |
87 | }; | 87 | }; |
88 | 88 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] = | 89 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
90 | { | 90 | { |
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), |
92 | EVENT_EXTRA_END | 92 | EVENT_EXTRA_END |
93 | }; | 93 | }; |
94 | 94 | ||
95 | static struct event_constraint intel_nehalem_percore_constraints[] = | 95 | static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = |
96 | { | 96 | { |
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | 97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), |
98 | EVENT_CONSTRAINT_END | 98 | EVENT_CONSTRAINT_END |
99 | }; | 99 | }; |
100 | 100 | ||
101 | static struct event_constraint intel_westmere_event_constraints[] = | 101 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = |
102 | { | 102 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
104 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 104 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] = | |||
110 | EVENT_CONSTRAINT_END | 110 | EVENT_CONSTRAINT_END |
111 | }; | 111 | }; |
112 | 112 | ||
113 | static struct event_constraint intel_snb_event_constraints[] = | 113 | static struct event_constraint intel_snb_event_constraints[] __read_mostly = |
114 | { | 114 | { |
115 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 115 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
116 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 116 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] = | |||
123 | EVENT_CONSTRAINT_END | 123 | EVENT_CONSTRAINT_END |
124 | }; | 124 | }; |
125 | 125 | ||
126 | static struct extra_reg intel_westmere_extra_regs[] = | 126 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
127 | { | 127 | { |
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), |
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | 129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), |
130 | EVENT_EXTRA_END | 130 | EVENT_EXTRA_END |
131 | }; | 131 | }; |
132 | 132 | ||
133 | static struct event_constraint intel_westmere_percore_constraints[] = | 133 | static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = |
134 | { | 134 | { |
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | 135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), |
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | 136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), |
137 | EVENT_CONSTRAINT_END | 137 | EVENT_CONSTRAINT_END |
138 | }; | 138 | }; |
139 | 139 | ||
140 | static struct event_constraint intel_gen_event_constraints[] = | 140 | static struct event_constraint intel_gen_event_constraints[] __read_mostly = |
141 | { | 141 | { |
142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
143 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 143 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids | |||
184 | }, | 184 | }, |
185 | }, | 185 | }, |
186 | [ C(LL ) ] = { | 186 | [ C(LL ) ] = { |
187 | /* | ||
188 | * TBD: Need Off-core Response Performance Monitoring support | ||
189 | */ | ||
190 | [ C(OP_READ) ] = { | 187 | [ C(OP_READ) ] = { |
191 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ | 188 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
192 | [ C(RESULT_ACCESS) ] = 0x01b7, | 189 | [ C(RESULT_ACCESS) ] = 0x01b7, |
193 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | 190 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
194 | [ C(RESULT_MISS) ] = 0x01bb, | 191 | [ C(RESULT_MISS) ] = 0x01b7, |
195 | }, | 192 | }, |
196 | [ C(OP_WRITE) ] = { | 193 | [ C(OP_WRITE) ] = { |
197 | /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ | 194 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
198 | [ C(RESULT_ACCESS) ] = 0x01b7, | 195 | [ C(RESULT_ACCESS) ] = 0x01b7, |
199 | /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ | 196 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
200 | [ C(RESULT_MISS) ] = 0x01bb, | 197 | [ C(RESULT_MISS) ] = 0x01b7, |
201 | }, | 198 | }, |
202 | [ C(OP_PREFETCH) ] = { | 199 | [ C(OP_PREFETCH) ] = { |
203 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ | 200 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
204 | [ C(RESULT_ACCESS) ] = 0x01b7, | 201 | [ C(RESULT_ACCESS) ] = 0x01b7, |
205 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | 202 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
206 | [ C(RESULT_MISS) ] = 0x01bb, | 203 | [ C(RESULT_MISS) ] = 0x01b7, |
207 | }, | 204 | }, |
208 | }, | 205 | }, |
209 | [ C(DTLB) ] = { | 206 | [ C(DTLB) ] = { |
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
285 | }, | 282 | }, |
286 | [ C(LL ) ] = { | 283 | [ C(LL ) ] = { |
287 | [ C(OP_READ) ] = { | 284 | [ C(OP_READ) ] = { |
288 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ | 285 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
289 | [ C(RESULT_ACCESS) ] = 0x01b7, | 286 | [ C(RESULT_ACCESS) ] = 0x01b7, |
290 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | 287 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
291 | [ C(RESULT_MISS) ] = 0x01bb, | 288 | [ C(RESULT_MISS) ] = 0x01b7, |
292 | }, | 289 | }, |
293 | /* | 290 | /* |
294 | * Use RFO, not WRITEBACK, because a write miss would typically occur | 291 | * Use RFO, not WRITEBACK, because a write miss would typically occur |
295 | * on RFO. | 292 | * on RFO. |
296 | */ | 293 | */ |
297 | [ C(OP_WRITE) ] = { | 294 | [ C(OP_WRITE) ] = { |
298 | /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ | 295 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
299 | [ C(RESULT_ACCESS) ] = 0x01bb, | 296 | [ C(RESULT_ACCESS) ] = 0x01b7, |
300 | /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ | 297 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
301 | [ C(RESULT_MISS) ] = 0x01b7, | 298 | [ C(RESULT_MISS) ] = 0x01b7, |
302 | }, | 299 | }, |
303 | [ C(OP_PREFETCH) ] = { | 300 | [ C(OP_PREFETCH) ] = { |
304 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ | 301 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
305 | [ C(RESULT_ACCESS) ] = 0x01b7, | 302 | [ C(RESULT_ACCESS) ] = 0x01b7, |
306 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | 303 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
307 | [ C(RESULT_MISS) ] = 0x01bb, | 304 | [ C(RESULT_MISS) ] = 0x01b7, |
308 | }, | 305 | }, |
309 | }, | 306 | }, |
310 | [ C(DTLB) ] = { | 307 | [ C(DTLB) ] = { |
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
352 | }; | 349 | }; |
353 | 350 | ||
354 | /* | 351 | /* |
355 | * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 | 352 | * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; |
353 | * See IA32 SDM Vol 3B 30.6.1.3 | ||
356 | */ | 354 | */ |
357 | 355 | ||
358 | #define DMND_DATA_RD (1 << 0) | 356 | #define NHM_DMND_DATA_RD (1 << 0) |
359 | #define DMND_RFO (1 << 1) | 357 | #define NHM_DMND_RFO (1 << 1) |
360 | #define DMND_WB (1 << 3) | 358 | #define NHM_DMND_IFETCH (1 << 2) |
361 | #define PF_DATA_RD (1 << 4) | 359 | #define NHM_DMND_WB (1 << 3) |
362 | #define PF_DATA_RFO (1 << 5) | 360 | #define NHM_PF_DATA_RD (1 << 4) |
363 | #define RESP_UNCORE_HIT (1 << 8) | 361 | #define NHM_PF_DATA_RFO (1 << 5) |
364 | #define RESP_MISS (0xf600) /* non uncore hit */ | 362 | #define NHM_PF_IFETCH (1 << 6) |
363 | #define NHM_OFFCORE_OTHER (1 << 7) | ||
364 | #define NHM_UNCORE_HIT (1 << 8) | ||
365 | #define NHM_OTHER_CORE_HIT_SNP (1 << 9) | ||
366 | #define NHM_OTHER_CORE_HITM (1 << 10) | ||
367 | /* reserved */ | ||
368 | #define NHM_REMOTE_CACHE_FWD (1 << 12) | ||
369 | #define NHM_REMOTE_DRAM (1 << 13) | ||
370 | #define NHM_LOCAL_DRAM (1 << 14) | ||
371 | #define NHM_NON_DRAM (1 << 15) | ||
372 | |||
373 | #define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) | ||
374 | |||
375 | #define NHM_DMND_READ (NHM_DMND_DATA_RD) | ||
376 | #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) | ||
377 | #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) | ||
378 | |||
379 | #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) | ||
380 | #define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) | ||
381 | #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) | ||
365 | 382 | ||
366 | static __initconst const u64 nehalem_hw_cache_extra_regs | 383 | static __initconst const u64 nehalem_hw_cache_extra_regs |
367 | [PERF_COUNT_HW_CACHE_MAX] | 384 | [PERF_COUNT_HW_CACHE_MAX] |
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs | |||
370 | { | 387 | { |
371 | [ C(LL ) ] = { | 388 | [ C(LL ) ] = { |
372 | [ C(OP_READ) ] = { | 389 | [ C(OP_READ) ] = { |
373 | [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, | 390 | [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, |
374 | [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, | 391 | [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, |
375 | }, | 392 | }, |
376 | [ C(OP_WRITE) ] = { | 393 | [ C(OP_WRITE) ] = { |
377 | [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, | 394 | [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, |
378 | [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, | 395 | [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, |
379 | }, | 396 | }, |
380 | [ C(OP_PREFETCH) ] = { | 397 | [ C(OP_PREFETCH) ] = { |
381 | [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, | 398 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, |
382 | [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, | 399 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, |
383 | }, | 400 | }, |
384 | } | 401 | } |
385 | }; | 402 | }; |
@@ -391,12 +408,12 @@ static __initconst const u64 nehalem_hw_cache_event_ids | |||
391 | { | 408 | { |
392 | [ C(L1D) ] = { | 409 | [ C(L1D) ] = { |
393 | [ C(OP_READ) ] = { | 410 | [ C(OP_READ) ] = { |
394 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | 411 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
395 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | 412 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ |
396 | }, | 413 | }, |
397 | [ C(OP_WRITE) ] = { | 414 | [ C(OP_WRITE) ] = { |
398 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | 415 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
399 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | 416 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ |
400 | }, | 417 | }, |
401 | [ C(OP_PREFETCH) ] = { | 418 | [ C(OP_PREFETCH) ] = { |
402 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | 419 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ |
@@ -933,6 +950,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
933 | 950 | ||
934 | cpuc = &__get_cpu_var(cpu_hw_events); | 951 | cpuc = &__get_cpu_var(cpu_hw_events); |
935 | 952 | ||
953 | /* | ||
954 | * Some chipsets need to unmask the LVTPC in a particular spot | ||
955 | * inside the nmi handler. As a result, the unmasking was pushed | ||
956 | * into all the nmi handlers. | ||
957 | * | ||
958 | * This handler doesn't seem to have any issues with the unmasking | ||
959 | * so it was left at the top. | ||
960 | */ | ||
961 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
962 | |||
936 | intel_pmu_disable_all(); | 963 | intel_pmu_disable_all(); |
937 | handled = intel_pmu_drain_bts_buffer(); | 964 | handled = intel_pmu_drain_bts_buffer(); |
938 | status = intel_pmu_get_status(); | 965 | status = intel_pmu_get_status(); |
@@ -998,6 +1025,9 @@ intel_bts_constraints(struct perf_event *event) | |||
998 | struct hw_perf_event *hwc = &event->hw; | 1025 | struct hw_perf_event *hwc = &event->hw; |
999 | unsigned int hw_event, bts_event; | 1026 | unsigned int hw_event, bts_event; |
1000 | 1027 | ||
1028 | if (event->attr.freq) | ||
1029 | return NULL; | ||
1030 | |||
1001 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; | 1031 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
1002 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); | 1032 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
1003 | 1033 | ||
@@ -1305,7 +1335,7 @@ static void intel_clovertown_quirks(void) | |||
1305 | * AJ106 could possibly be worked around by not allowing LBR | 1335 | * AJ106 could possibly be worked around by not allowing LBR |
1306 | * usage from PEBS, including the fixup. | 1336 | * usage from PEBS, including the fixup. |
1307 | * AJ68 could possibly be worked around by always programming | 1337 | * AJ68 could possibly be worked around by always programming |
1308 | * a pebs_event_reset[0] value and coping with the lost events. | 1338 | * a pebs_event_reset[0] value and coping with the lost events. |
1309 | * | 1339 | * |
1310 | * But taken together it might just make sense to not enable PEBS on | 1340 | * But taken together it might just make sense to not enable PEBS on |
1311 | * these chips. | 1341 | * these chips. |
@@ -1409,6 +1439,23 @@ static __init int intel_pmu_init(void) | |||
1409 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | 1439 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; |
1410 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1440 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1411 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1441 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1442 | |||
1443 | /* UOPS_ISSUED.STALLED_CYCLES */ | ||
1444 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | ||
1445 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | ||
1446 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | ||
1447 | |||
1448 | if (ebx & 0x40) { | ||
1449 | /* | ||
1450 | * Erratum AAJ80 detected, we work it around by using | ||
1451 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1452 | * branch-misses, but it's still much better than the | ||
1453 | * architectural event which is often completely bogus: | ||
1454 | */ | ||
1455 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1456 | |||
1457 | pr_cont("erratum AAJ80 worked around, "); | ||
1458 | } | ||
1412 | pr_cont("Nehalem events, "); | 1459 | pr_cont("Nehalem events, "); |
1413 | break; | 1460 | break; |
1414 | 1461 | ||
@@ -1425,6 +1472,7 @@ static __init int intel_pmu_init(void) | |||
1425 | 1472 | ||
1426 | case 37: /* 32 nm nehalem, "Clarkdale" */ | 1473 | case 37: /* 32 nm nehalem, "Clarkdale" */ |
1427 | case 44: /* 32 nm nehalem, "Gulftown" */ | 1474 | case 44: /* 32 nm nehalem, "Gulftown" */ |
1475 | case 47: /* 32 nm Xeon E7 */ | ||
1428 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 1476 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
1429 | sizeof(hw_cache_event_ids)); | 1477 | sizeof(hw_cache_event_ids)); |
1430 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | 1478 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, |
@@ -1437,6 +1485,12 @@ static __init int intel_pmu_init(void) | |||
1437 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1485 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1438 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | 1486 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
1439 | x86_pmu.extra_regs = intel_westmere_extra_regs; | 1487 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
1488 | |||
1489 | /* UOPS_ISSUED.STALLED_CYCLES */ | ||
1490 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | ||
1491 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | ||
1492 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | ||
1493 | |||
1440 | pr_cont("Westmere events, "); | 1494 | pr_cont("Westmere events, "); |
1441 | break; | 1495 | break; |
1442 | 1496 | ||
@@ -1448,6 +1502,12 @@ static __init int intel_pmu_init(void) | |||
1448 | 1502 | ||
1449 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1503 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1450 | x86_pmu.pebs_constraints = intel_snb_pebs_events; | 1504 | x86_pmu.pebs_constraints = intel_snb_pebs_events; |
1505 | |||
1506 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | ||
1507 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | ||
1508 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ | ||
1509 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; | ||
1510 | |||
1451 | pr_cont("SandyBridge events, "); | 1511 | pr_cont("SandyBridge events, "); |
1452 | break; | 1512 | break; |
1453 | 1513 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index c2520e178d32..ead584fb6a7d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = { | |||
468 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | 468 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), |
469 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 469 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
470 | .escr_emask = | 470 | .escr_emask = |
471 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), | 471 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), |
472 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 472 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
473 | }, | 473 | }, |
474 | [P4_EVENT_X87_ASSIST] = { | 474 | [P4_EVENT_X87_ASSIST] = { |
@@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
912 | int idx, handled = 0; | 912 | int idx, handled = 0; |
913 | u64 val; | 913 | u64 val; |
914 | 914 | ||
915 | data.addr = 0; | 915 | perf_sample_data_init(&data, 0); |
916 | data.raw = NULL; | ||
917 | 916 | ||
918 | cpuc = &__get_cpu_var(cpu_hw_events); | 917 | cpuc = &__get_cpu_var(cpu_hw_events); |
919 | 918 | ||
@@ -947,14 +946,23 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
947 | if (!x86_perf_event_set_period(event)) | 946 | if (!x86_perf_event_set_period(event)) |
948 | continue; | 947 | continue; |
949 | if (perf_event_overflow(event, 1, &data, regs)) | 948 | if (perf_event_overflow(event, 1, &data, regs)) |
950 | p4_pmu_disable_event(event); | 949 | x86_pmu_stop(event, 0); |
951 | } | 950 | } |
952 | 951 | ||
953 | if (handled) { | 952 | if (handled) |
954 | /* p4 quirk: unmask it again */ | ||
955 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
956 | inc_irq_stat(apic_perf_irqs); | 953 | inc_irq_stat(apic_perf_irqs); |
957 | } | 954 | |
955 | /* | ||
956 | * When dealing with the unmasking of the LVTPC on P4 perf hw, it has | ||
957 | * been observed that the OVF bit flag has to be cleared first _before_ | ||
958 | * the LVTPC can be unmasked. | ||
959 | * | ||
960 | * The reason is the NMI line will continue to be asserted while the OVF | ||
961 | * bit is set. This causes a second NMI to generate if the LVTPC is | ||
962 | * unmasked before the OVF bit is cleared, leading to unknown NMI | ||
963 | * messages. | ||
964 | */ | ||
965 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
958 | 966 | ||
959 | return handled; | 967 | return handled; |
960 | } | 968 | } |
@@ -1188,7 +1196,7 @@ static __init int p4_pmu_init(void) | |||
1188 | { | 1196 | { |
1189 | unsigned int low, high; | 1197 | unsigned int low, high; |
1190 | 1198 | ||
1191 | /* If we get stripped -- indexig fails */ | 1199 | /* If we get stripped -- indexing fails */ |
1192 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | 1200 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); |
1193 | 1201 | ||
1194 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | 1202 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 706a9fb46a58..690bc8461835 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
@@ -369,6 +369,7 @@ static struct of_ioapic_type of_ioapic_type[] = | |||
369 | static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize, | 369 | static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize, |
370 | u32 *out_hwirq, u32 *out_type) | 370 | u32 *out_hwirq, u32 *out_type) |
371 | { | 371 | { |
372 | struct mp_ioapic_gsi *gsi_cfg; | ||
372 | struct io_apic_irq_attr attr; | 373 | struct io_apic_irq_attr attr; |
373 | struct of_ioapic_type *it; | 374 | struct of_ioapic_type *it; |
374 | u32 line, idx, type; | 375 | u32 line, idx, type; |
@@ -378,7 +379,8 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize, | |||
378 | 379 | ||
379 | line = *intspec; | 380 | line = *intspec; |
380 | idx = (u32) id->priv; | 381 | idx = (u32) id->priv; |
381 | *out_hwirq = line + mp_gsi_routing[idx].gsi_base; | 382 | gsi_cfg = mp_ioapic_gsi_routing(idx); |
383 | *out_hwirq = line + gsi_cfg->gsi_base; | ||
382 | 384 | ||
383 | intspec++; | 385 | intspec++; |
384 | type = *intspec; | 386 | type = *intspec; |
@@ -391,7 +393,7 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize, | |||
391 | 393 | ||
392 | set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); | 394 | set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); |
393 | 395 | ||
394 | return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr); | 396 | return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr); |
395 | } | 397 | } |
396 | 398 | ||
397 | static void __init ioapic_add_ofnode(struct device_node *np) | 399 | static void __init ioapic_add_ofnode(struct device_node *np) |
@@ -407,7 +409,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) | |||
407 | } | 409 | } |
408 | 410 | ||
409 | for (i = 0; i < nr_ioapics; i++) { | 411 | for (i = 0; i < nr_ioapics; i++) { |
410 | if (r.start == mp_ioapics[i].apicaddr) { | 412 | if (r.start == mpc_ioapic_addr(i)) { |
411 | struct irq_domain *id; | 413 | struct irq_domain *id; |
412 | 414 | ||
413 | id = kzalloc(sizeof(*id), GFP_KERNEL); | 415 | id = kzalloc(sizeof(*id), GFP_KERNEL); |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index e2a3f0606da4..1aae78f775fc 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo, | |||
135 | } | 135 | } |
136 | EXPORT_SYMBOL_GPL(print_context_stack_bp); | 136 | EXPORT_SYMBOL_GPL(print_context_stack_bp); |
137 | 137 | ||
138 | |||
139 | static void | ||
140 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
141 | { | ||
142 | printk(data); | ||
143 | print_symbol(msg, symbol); | ||
144 | printk("\n"); | ||
145 | } | ||
146 | |||
147 | static void print_trace_warning(void *data, char *msg) | ||
148 | { | ||
149 | printk("%s%s\n", (char *)data, msg); | ||
150 | } | ||
151 | |||
152 | static int print_trace_stack(void *data, char *name) | 138 | static int print_trace_stack(void *data, char *name) |
153 | { | 139 | { |
154 | printk("%s <%s> ", (char *)data, name); | 140 | printk("%s <%s> ", (char *)data, name); |
@@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) | |||
166 | } | 152 | } |
167 | 153 | ||
168 | static const struct stacktrace_ops print_trace_ops = { | 154 | static const struct stacktrace_ops print_trace_ops = { |
169 | .warning = print_trace_warning, | ||
170 | .warning_symbol = print_trace_warning_symbol, | ||
171 | .stack = print_trace_stack, | 155 | .stack = print_trace_stack, |
172 | .address = print_trace_address, | 156 | .address = print_trace_address, |
173 | .walk_stack = print_context_stack, | 157 | .walk_stack = print_context_stack, |
@@ -279,7 +263,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
279 | printk("DEBUG_PAGEALLOC"); | 263 | printk("DEBUG_PAGEALLOC"); |
280 | #endif | 264 | #endif |
281 | printk("\n"); | 265 | printk("\n"); |
282 | sysfs_printk_last_file(); | ||
283 | if (notify_die(DIE_OOPS, str, regs, err, | 266 | if (notify_die(DIE_OOPS, str, regs, err, |
284 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | 267 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) |
285 | return 1; | 268 | return 1; |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index a93742a57468..0ba15a6cc57e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -260,9 +260,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) | |||
260 | return mod_code_status; | 260 | return mod_code_status; |
261 | } | 261 | } |
262 | 262 | ||
263 | static unsigned char *ftrace_nop_replace(void) | 263 | static const unsigned char *ftrace_nop_replace(void) |
264 | { | 264 | { |
265 | return ideal_nop5; | 265 | return ideal_nops[NOP_ATOMIC5]; |
266 | } | 266 | } |
267 | 267 | ||
268 | static int | 268 | static int |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index d6d6bb361931..3bb08509a7a1 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -23,7 +23,6 @@ | |||
23 | static void __init i386_default_early_setup(void) | 23 | static void __init i386_default_early_setup(void) |
24 | { | 24 | { |
25 | /* Initialize 32bit specific setup functions */ | 25 | /* Initialize 32bit specific setup functions */ |
26 | x86_init.resources.probe_roms = probe_roms; | ||
27 | x86_init.resources.reserve_resources = i386_reserve_resources; | 26 | x86_init.resources.reserve_resources = i386_reserve_resources; |
28 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; | 27 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; |
29 | 28 | ||
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index bfe8f729e086..6781765b3a0d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -217,7 +217,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { } | |||
217 | /* | 217 | /* |
218 | * Common hpet info | 218 | * Common hpet info |
219 | */ | 219 | */ |
220 | static unsigned long hpet_period; | 220 | static unsigned long hpet_freq; |
221 | 221 | ||
222 | static void hpet_legacy_set_mode(enum clock_event_mode mode, | 222 | static void hpet_legacy_set_mode(enum clock_event_mode mode, |
223 | struct clock_event_device *evt); | 223 | struct clock_event_device *evt); |
@@ -232,7 +232,6 @@ static struct clock_event_device hpet_clockevent = { | |||
232 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | 232 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, |
233 | .set_mode = hpet_legacy_set_mode, | 233 | .set_mode = hpet_legacy_set_mode, |
234 | .set_next_event = hpet_legacy_next_event, | 234 | .set_next_event = hpet_legacy_next_event, |
235 | .shift = 32, | ||
236 | .irq = 0, | 235 | .irq = 0, |
237 | .rating = 50, | 236 | .rating = 50, |
238 | }; | 237 | }; |
@@ -290,28 +289,12 @@ static void hpet_legacy_clockevent_register(void) | |||
290 | hpet_enable_legacy_int(); | 289 | hpet_enable_legacy_int(); |
291 | 290 | ||
292 | /* | 291 | /* |
293 | * The mult factor is defined as (include/linux/clockchips.h) | ||
294 | * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h) | ||
295 | * hpet_period is in units of femtoseconds (per cycle), so | ||
296 | * mult/2^shift = cyc/ns = 10^6/hpet_period | ||
297 | * mult = (10^6 * 2^shift)/hpet_period | ||
298 | * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period | ||
299 | */ | ||
300 | hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC, | ||
301 | hpet_period, hpet_clockevent.shift); | ||
302 | /* Calculate the min / max delta */ | ||
303 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | ||
304 | &hpet_clockevent); | ||
305 | /* Setup minimum reprogramming delta. */ | ||
306 | hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, | ||
307 | &hpet_clockevent); | ||
308 | |||
309 | /* | ||
310 | * Start hpet with the boot cpu mask and make it | 292 | * Start hpet with the boot cpu mask and make it |
311 | * global after the IO_APIC has been initialized. | 293 | * global after the IO_APIC has been initialized. |
312 | */ | 294 | */ |
313 | hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); | 295 | hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); |
314 | clockevents_register_device(&hpet_clockevent); | 296 | clockevents_config_and_register(&hpet_clockevent, hpet_freq, |
297 | HPET_MIN_PROG_DELTA, 0x7FFFFFFF); | ||
315 | global_clock_event = &hpet_clockevent; | 298 | global_clock_event = &hpet_clockevent; |
316 | printk(KERN_DEBUG "hpet clockevent registered\n"); | 299 | printk(KERN_DEBUG "hpet clockevent registered\n"); |
317 | } | 300 | } |
@@ -549,7 +532,6 @@ static int hpet_setup_irq(struct hpet_dev *dev) | |||
549 | static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) | 532 | static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) |
550 | { | 533 | { |
551 | struct clock_event_device *evt = &hdev->evt; | 534 | struct clock_event_device *evt = &hdev->evt; |
552 | uint64_t hpet_freq; | ||
553 | 535 | ||
554 | WARN_ON(cpu != smp_processor_id()); | 536 | WARN_ON(cpu != smp_processor_id()); |
555 | if (!(hdev->flags & HPET_DEV_VALID)) | 537 | if (!(hdev->flags & HPET_DEV_VALID)) |
@@ -571,24 +553,10 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) | |||
571 | 553 | ||
572 | evt->set_mode = hpet_msi_set_mode; | 554 | evt->set_mode = hpet_msi_set_mode; |
573 | evt->set_next_event = hpet_msi_next_event; | 555 | evt->set_next_event = hpet_msi_next_event; |
574 | evt->shift = 32; | ||
575 | |||
576 | /* | ||
577 | * The period is a femto seconds value. We need to calculate the | ||
578 | * scaled math multiplication factor for nanosecond to hpet tick | ||
579 | * conversion. | ||
580 | */ | ||
581 | hpet_freq = FSEC_PER_SEC; | ||
582 | do_div(hpet_freq, hpet_period); | ||
583 | evt->mult = div_sc((unsigned long) hpet_freq, | ||
584 | NSEC_PER_SEC, evt->shift); | ||
585 | /* Calculate the max delta */ | ||
586 | evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt); | ||
587 | /* 5 usec minimum reprogramming delta. */ | ||
588 | evt->min_delta_ns = 5000; | ||
589 | |||
590 | evt->cpumask = cpumask_of(hdev->cpu); | 556 | evt->cpumask = cpumask_of(hdev->cpu); |
591 | clockevents_register_device(evt); | 557 | |
558 | clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA, | ||
559 | 0x7FFFFFFF); | ||
592 | } | 560 | } |
593 | 561 | ||
594 | #ifdef CONFIG_HPET | 562 | #ifdef CONFIG_HPET |
@@ -792,7 +760,6 @@ static struct clocksource clocksource_hpet = { | |||
792 | static int hpet_clocksource_register(void) | 760 | static int hpet_clocksource_register(void) |
793 | { | 761 | { |
794 | u64 start, now; | 762 | u64 start, now; |
795 | u64 hpet_freq; | ||
796 | cycle_t t1; | 763 | cycle_t t1; |
797 | 764 | ||
798 | /* Start the counter */ | 765 | /* Start the counter */ |
@@ -819,24 +786,7 @@ static int hpet_clocksource_register(void) | |||
819 | return -ENODEV; | 786 | return -ENODEV; |
820 | } | 787 | } |
821 | 788 | ||
822 | /* | ||
823 | * The definition of mult is (include/linux/clocksource.h) | ||
824 | * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc | ||
825 | * so we first need to convert hpet_period to ns/cyc units: | ||
826 | * mult/2^shift = ns/cyc = hpet_period/10^6 | ||
827 | * mult = (hpet_period * 2^shift)/10^6 | ||
828 | * mult = (hpet_period << shift)/FSEC_PER_NSEC | ||
829 | */ | ||
830 | |||
831 | /* Need to convert hpet_period (fsec/cyc) to cyc/sec: | ||
832 | * | ||
833 | * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc) | ||
834 | * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period | ||
835 | */ | ||
836 | hpet_freq = FSEC_PER_SEC; | ||
837 | do_div(hpet_freq, hpet_period); | ||
838 | clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); | 789 | clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); |
839 | |||
840 | return 0; | 790 | return 0; |
841 | } | 791 | } |
842 | 792 | ||
@@ -845,7 +795,9 @@ static int hpet_clocksource_register(void) | |||
845 | */ | 795 | */ |
846 | int __init hpet_enable(void) | 796 | int __init hpet_enable(void) |
847 | { | 797 | { |
798 | unsigned long hpet_period; | ||
848 | unsigned int id; | 799 | unsigned int id; |
800 | u64 freq; | ||
849 | int i; | 801 | int i; |
850 | 802 | ||
851 | if (!is_hpet_capable()) | 803 | if (!is_hpet_capable()) |
@@ -884,6 +836,14 @@ int __init hpet_enable(void) | |||
884 | goto out_nohpet; | 836 | goto out_nohpet; |
885 | 837 | ||
886 | /* | 838 | /* |
839 | * The period is a femto seconds value. Convert it to a | ||
840 | * frequency. | ||
841 | */ | ||
842 | freq = FSEC_PER_SEC; | ||
843 | do_div(freq, hpet_period); | ||
844 | hpet_freq = freq; | ||
845 | |||
846 | /* | ||
887 | * Read the HPET ID register to retrieve the IRQ routing | 847 | * Read the HPET ID register to retrieve the IRQ routing |
888 | * information and the number of channels | 848 | * information and the number of channels |
889 | */ | 849 | */ |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 2dfd31597443..fb66dc9e36cb 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -93,7 +93,6 @@ static struct clock_event_device pit_ce = { | |||
93 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | 93 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, |
94 | .set_mode = init_pit_timer, | 94 | .set_mode = init_pit_timer, |
95 | .set_next_event = pit_next_event, | 95 | .set_next_event = pit_next_event, |
96 | .shift = 32, | ||
97 | .irq = 0, | 96 | .irq = 0, |
98 | }; | 97 | }; |
99 | 98 | ||
@@ -108,90 +107,12 @@ void __init setup_pit_timer(void) | |||
108 | * IO_APIC has been initialized. | 107 | * IO_APIC has been initialized. |
109 | */ | 108 | */ |
110 | pit_ce.cpumask = cpumask_of(smp_processor_id()); | 109 | pit_ce.cpumask = cpumask_of(smp_processor_id()); |
111 | pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift); | ||
112 | pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce); | ||
113 | pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce); | ||
114 | 110 | ||
115 | clockevents_register_device(&pit_ce); | 111 | clockevents_config_and_register(&pit_ce, CLOCK_TICK_RATE, 0xF, 0x7FFF); |
116 | global_clock_event = &pit_ce; | 112 | global_clock_event = &pit_ce; |
117 | } | 113 | } |
118 | 114 | ||
119 | #ifndef CONFIG_X86_64 | 115 | #ifndef CONFIG_X86_64 |
120 | /* | ||
121 | * Since the PIT overflows every tick, its not very useful | ||
122 | * to just read by itself. So use jiffies to emulate a free | ||
123 | * running counter: | ||
124 | */ | ||
125 | static cycle_t pit_read(struct clocksource *cs) | ||
126 | { | ||
127 | static int old_count; | ||
128 | static u32 old_jifs; | ||
129 | unsigned long flags; | ||
130 | int count; | ||
131 | u32 jifs; | ||
132 | |||
133 | raw_spin_lock_irqsave(&i8253_lock, flags); | ||
134 | /* | ||
135 | * Although our caller may have the read side of xtime_lock, | ||
136 | * this is now a seqlock, and we are cheating in this routine | ||
137 | * by having side effects on state that we cannot undo if | ||
138 | * there is a collision on the seqlock and our caller has to | ||
139 | * retry. (Namely, old_jifs and old_count.) So we must treat | ||
140 | * jiffies as volatile despite the lock. We read jiffies | ||
141 | * before latching the timer count to guarantee that although | ||
142 | * the jiffies value might be older than the count (that is, | ||
143 | * the counter may underflow between the last point where | ||
144 | * jiffies was incremented and the point where we latch the | ||
145 | * count), it cannot be newer. | ||
146 | */ | ||
147 | jifs = jiffies; | ||
148 | outb_pit(0x00, PIT_MODE); /* latch the count ASAP */ | ||
149 | count = inb_pit(PIT_CH0); /* read the latched count */ | ||
150 | count |= inb_pit(PIT_CH0) << 8; | ||
151 | |||
152 | /* VIA686a test code... reset the latch if count > max + 1 */ | ||
153 | if (count > LATCH) { | ||
154 | outb_pit(0x34, PIT_MODE); | ||
155 | outb_pit(LATCH & 0xff, PIT_CH0); | ||
156 | outb_pit(LATCH >> 8, PIT_CH0); | ||
157 | count = LATCH - 1; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * It's possible for count to appear to go the wrong way for a | ||
162 | * couple of reasons: | ||
163 | * | ||
164 | * 1. The timer counter underflows, but we haven't handled the | ||
165 | * resulting interrupt and incremented jiffies yet. | ||
166 | * 2. Hardware problem with the timer, not giving us continuous time, | ||
167 | * the counter does small "jumps" upwards on some Pentium systems, | ||
168 | * (see c't 95/10 page 335 for Neptun bug.) | ||
169 | * | ||
170 | * Previous attempts to handle these cases intelligently were | ||
171 | * buggy, so we just do the simple thing now. | ||
172 | */ | ||
173 | if (count > old_count && jifs == old_jifs) | ||
174 | count = old_count; | ||
175 | |||
176 | old_count = count; | ||
177 | old_jifs = jifs; | ||
178 | |||
179 | raw_spin_unlock_irqrestore(&i8253_lock, flags); | ||
180 | |||
181 | count = (LATCH - 1) - count; | ||
182 | |||
183 | return (cycle_t)(jifs * LATCH) + count; | ||
184 | } | ||
185 | |||
186 | static struct clocksource pit_cs = { | ||
187 | .name = "pit", | ||
188 | .rating = 110, | ||
189 | .read = pit_read, | ||
190 | .mask = CLOCKSOURCE_MASK(32), | ||
191 | .mult = 0, | ||
192 | .shift = 20, | ||
193 | }; | ||
194 | |||
195 | static int __init init_pit_clocksource(void) | 116 | static int __init init_pit_clocksource(void) |
196 | { | 117 | { |
197 | /* | 118 | /* |
@@ -205,10 +126,7 @@ static int __init init_pit_clocksource(void) | |||
205 | pit_ce.mode != CLOCK_EVT_MODE_PERIODIC) | 126 | pit_ce.mode != CLOCK_EVT_MODE_PERIODIC) |
206 | return 0; | 127 | return 0; |
207 | 128 | ||
208 | pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift); | 129 | return clocksource_i8253_init(); |
209 | |||
210 | return clocksource_register(&pit_cs); | ||
211 | } | 130 | } |
212 | arch_initcall(init_pit_clocksource); | 131 | arch_initcall(init_pit_clocksource); |
213 | |||
214 | #endif /* !CONFIG_X86_64 */ | 132 | #endif /* !CONFIG_X86_64 */ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 1cb0b9fc78dc..6c0802eb2f7f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -249,7 +249,7 @@ void fixup_irqs(void) | |||
249 | 249 | ||
250 | data = irq_desc_get_irq_data(desc); | 250 | data = irq_desc_get_irq_data(desc); |
251 | affinity = data->affinity; | 251 | affinity = data->affinity; |
252 | if (!irq_has_action(irq) || | 252 | if (!irq_has_action(irq) || irqd_is_per_cpu(data) || |
253 | cpumask_subset(affinity, cpu_online_mask)) { | 253 | cpumask_subset(affinity, cpu_online_mask)) { |
254 | raw_spin_unlock(&desc->lock); | 254 | raw_spin_unlock(&desc->lock); |
255 | continue; | 255 | continue; |
@@ -276,7 +276,8 @@ void fixup_irqs(void) | |||
276 | else if (!(warned++)) | 276 | else if (!(warned++)) |
277 | set_affinity = 0; | 277 | set_affinity = 0; |
278 | 278 | ||
279 | if (!irqd_can_move_in_process_context(data) && chip->irq_unmask) | 279 | if (!irqd_can_move_in_process_context(data) && |
280 | !irqd_irq_disabled(data) && chip->irq_unmask) | ||
280 | chip->irq_unmask(data); | 281 | chip->irq_unmask(data); |
281 | 282 | ||
282 | raw_spin_unlock(&desc->lock); | 283 | raw_spin_unlock(&desc->lock); |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 961b6b30ba90..3fee346ef545 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
@@ -34,7 +34,7 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
34 | code.offset = entry->target - | 34 | code.offset = entry->target - |
35 | (entry->code + JUMP_LABEL_NOP_SIZE); | 35 | (entry->code + JUMP_LABEL_NOP_SIZE); |
36 | } else | 36 | } else |
37 | memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE); | 37 | memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); |
38 | get_online_cpus(); | 38 | get_online_cpus(); |
39 | mutex_lock(&text_mutex); | 39 | mutex_lock(&text_mutex); |
40 | text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); | 40 | text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); |
@@ -44,7 +44,8 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
44 | 44 | ||
45 | void arch_jump_label_text_poke_early(jump_label_t addr) | 45 | void arch_jump_label_text_poke_early(jump_label_t addr) |
46 | { | 46 | { |
47 | text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE); | 47 | text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5], |
48 | JUMP_LABEL_NOP_SIZE); | ||
48 | } | 49 | } |
49 | 50 | ||
50 | #endif | 51 | #endif |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index c969fd9d1566..f1a6244d7d93 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1183 | struct pt_regs *regs) | 1183 | struct pt_regs *regs) |
1184 | { | 1184 | { |
1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
1186 | unsigned long flags; | ||
1186 | 1187 | ||
1187 | /* This is possible if op is under delayed unoptimizing */ | 1188 | /* This is possible if op is under delayed unoptimizing */ |
1188 | if (kprobe_disabled(&op->kp)) | 1189 | if (kprobe_disabled(&op->kp)) |
1189 | return; | 1190 | return; |
1190 | 1191 | ||
1191 | preempt_disable(); | 1192 | local_irq_save(flags); |
1192 | if (kprobe_running()) { | 1193 | if (kprobe_running()) { |
1193 | kprobes_inc_nmissed_count(&op->kp); | 1194 | kprobes_inc_nmissed_count(&op->kp); |
1194 | } else { | 1195 | } else { |
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1207 | opt_pre_handler(&op->kp, regs); | 1208 | opt_pre_handler(&op->kp, regs); |
1208 | __this_cpu_write(current_kprobe, NULL); | 1209 | __this_cpu_write(current_kprobe, NULL); |
1209 | } | 1210 | } |
1210 | preempt_enable_no_resched(); | 1211 | local_irq_restore(flags); |
1211 | } | 1212 | } |
1212 | 1213 | ||
1213 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | 1214 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index f98d3eafe07a..6389a6bca11b 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -26,8 +26,6 @@ | |||
26 | #include <asm/x86_init.h> | 26 | #include <asm/x86_init.h> |
27 | #include <asm/reboot.h> | 27 | #include <asm/reboot.h> |
28 | 28 | ||
29 | #define KVM_SCALE 22 | ||
30 | |||
31 | static int kvmclock = 1; | 29 | static int kvmclock = 1; |
32 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | 30 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; |
33 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | 31 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; |
@@ -120,8 +118,6 @@ static struct clocksource kvm_clock = { | |||
120 | .read = kvm_clock_get_cycles, | 118 | .read = kvm_clock_get_cycles, |
121 | .rating = 400, | 119 | .rating = 400, |
122 | .mask = CLOCKSOURCE_MASK(64), | 120 | .mask = CLOCKSOURCE_MASK(64), |
123 | .mult = 1 << KVM_SCALE, | ||
124 | .shift = KVM_SCALE, | ||
125 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 121 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
126 | }; | 122 | }; |
127 | 123 | ||
@@ -203,7 +199,7 @@ void __init kvmclock_init(void) | |||
203 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 199 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
204 | #endif | 200 | #endif |
205 | kvm_get_preset_lpj(); | 201 | kvm_get_preset_lpj(); |
206 | clocksource_register(&kvm_clock); | 202 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); |
207 | pv_info.paravirt_enabled = 1; | 203 | pv_info.paravirt_enabled = 1; |
208 | pv_info.name = "KVM"; | 204 | pv_info.name = "KVM"; |
209 | 205 | ||
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index ab23f1ad4bf1..52f256f2cc81 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/bug.h> | 24 | #include <linux/bug.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/gfp.h> | 26 | #include <linux/gfp.h> |
27 | #include <linux/jump_label.h> | ||
27 | 28 | ||
28 | #include <asm/system.h> | 29 | #include <asm/system.h> |
29 | #include <asm/page.h> | 30 | #include <asm/page.h> |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5a532ce646bf..9103b89c145a 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -285,7 +285,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
285 | intsrc.type = MP_INTSRC; | 285 | intsrc.type = MP_INTSRC; |
286 | intsrc.irqflag = 0; /* conforming */ | 286 | intsrc.irqflag = 0; /* conforming */ |
287 | intsrc.srcbus = 0; | 287 | intsrc.srcbus = 0; |
288 | intsrc.dstapic = mp_ioapics[0].apicid; | 288 | intsrc.dstapic = mpc_ioapic_id(0); |
289 | 289 | ||
290 | intsrc.irqtype = mp_INT; | 290 | intsrc.irqtype = mp_INT; |
291 | 291 | ||
@@ -715,17 +715,15 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) | |||
715 | } | 715 | } |
716 | } | 716 | } |
717 | 717 | ||
718 | static int | 718 | static int __init |
719 | check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) | 719 | check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) |
720 | { | 720 | { |
721 | int ret = 0; | ||
722 | |||
723 | if (!mpc_new_phys || count <= mpc_new_length) { | 721 | if (!mpc_new_phys || count <= mpc_new_length) { |
724 | WARN(1, "update_mptable: No spare slots (length: %x)\n", count); | 722 | WARN(1, "update_mptable: No spare slots (length: %x)\n", count); |
725 | return -1; | 723 | return -1; |
726 | } | 724 | } |
727 | 725 | ||
728 | return ret; | 726 | return 0; |
729 | } | 727 | } |
730 | #else /* CONFIG_X86_IO_APIC */ | 728 | #else /* CONFIG_X86_IO_APIC */ |
731 | static | 729 | static |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 9ea999a4dcc1..b49d00da2aed 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -68,74 +68,10 @@ int dma_set_mask(struct device *dev, u64 mask) | |||
68 | } | 68 | } |
69 | EXPORT_SYMBOL(dma_set_mask); | 69 | EXPORT_SYMBOL(dma_set_mask); |
70 | 70 | ||
71 | #if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA) | ||
72 | static __initdata void *dma32_bootmem_ptr; | ||
73 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); | ||
74 | |||
75 | static int __init parse_dma32_size_opt(char *p) | ||
76 | { | ||
77 | if (!p) | ||
78 | return -EINVAL; | ||
79 | dma32_bootmem_size = memparse(p, &p); | ||
80 | return 0; | ||
81 | } | ||
82 | early_param("dma32_size", parse_dma32_size_opt); | ||
83 | |||
84 | void __init dma32_reserve_bootmem(void) | ||
85 | { | ||
86 | unsigned long size, align; | ||
87 | if (max_pfn <= MAX_DMA32_PFN) | ||
88 | return; | ||
89 | |||
90 | /* | ||
91 | * check aperture_64.c allocate_aperture() for reason about | ||
92 | * using 512M as goal | ||
93 | */ | ||
94 | align = 64ULL<<20; | ||
95 | size = roundup(dma32_bootmem_size, align); | ||
96 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | ||
97 | 512ULL<<20); | ||
98 | /* | ||
99 | * Kmemleak should not scan this block as it may not be mapped via the | ||
100 | * kernel direct mapping. | ||
101 | */ | ||
102 | kmemleak_ignore(dma32_bootmem_ptr); | ||
103 | if (dma32_bootmem_ptr) | ||
104 | dma32_bootmem_size = size; | ||
105 | else | ||
106 | dma32_bootmem_size = 0; | ||
107 | } | ||
108 | static void __init dma32_free_bootmem(void) | ||
109 | { | ||
110 | |||
111 | if (max_pfn <= MAX_DMA32_PFN) | ||
112 | return; | ||
113 | |||
114 | if (!dma32_bootmem_ptr) | ||
115 | return; | ||
116 | |||
117 | free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size); | ||
118 | |||
119 | dma32_bootmem_ptr = NULL; | ||
120 | dma32_bootmem_size = 0; | ||
121 | } | ||
122 | #else | ||
123 | void __init dma32_reserve_bootmem(void) | ||
124 | { | ||
125 | } | ||
126 | static void __init dma32_free_bootmem(void) | ||
127 | { | ||
128 | } | ||
129 | |||
130 | #endif | ||
131 | |||
132 | void __init pci_iommu_alloc(void) | 71 | void __init pci_iommu_alloc(void) |
133 | { | 72 | { |
134 | struct iommu_table_entry *p; | 73 | struct iommu_table_entry *p; |
135 | 74 | ||
136 | /* free the range so iommu could get some range less than 4G */ | ||
137 | dma32_free_bootmem(); | ||
138 | |||
139 | sort_iommu_table(__iommu_table, __iommu_table_end); | 75 | sort_iommu_table(__iommu_table, __iommu_table_end); |
140 | check_iommu_entries(__iommu_table, __iommu_table_end); | 76 | check_iommu_entries(__iommu_table, __iommu_table_end); |
141 | 77 | ||
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c index 55d745ec1181..35ccf75696eb 100644 --- a/arch/x86/kernel/pci-iommu_table.c +++ b/arch/x86/kernel/pci-iommu_table.c | |||
@@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start, | |||
50 | struct iommu_table_entry *finish) | 50 | struct iommu_table_entry *finish) |
51 | { | 51 | { |
52 | struct iommu_table_entry *p, *q, *x; | 52 | struct iommu_table_entry *p, *q, *x; |
53 | char sym_p[KSYM_SYMBOL_LEN]; | ||
54 | char sym_q[KSYM_SYMBOL_LEN]; | ||
55 | 53 | ||
56 | /* Simple cyclic dependency checker. */ | 54 | /* Simple cyclic dependency checker. */ |
57 | for (p = start; p < finish; p++) { | 55 | for (p = start; p < finish; p++) { |
58 | q = find_dependents_of(start, finish, p); | 56 | q = find_dependents_of(start, finish, p); |
59 | x = find_dependents_of(start, finish, q); | 57 | x = find_dependents_of(start, finish, q); |
60 | if (p == x) { | 58 | if (p == x) { |
61 | sprint_symbol(sym_p, (unsigned long)p->detect); | 59 | printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n", |
62 | sprint_symbol(sym_q, (unsigned long)q->detect); | 60 | p->detect, q->detect); |
63 | |||
64 | printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \ | ||
65 | " on %s and vice-versa. BREAKING IT.\n", | ||
66 | sym_p, sym_q); | ||
67 | /* Heavy handed way..*/ | 61 | /* Heavy handed way..*/ |
68 | x->depend = 0; | 62 | x->depend = 0; |
69 | } | 63 | } |
@@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start, | |||
72 | for (p = start; p < finish; p++) { | 66 | for (p = start; p < finish; p++) { |
73 | q = find_dependents_of(p, finish, p); | 67 | q = find_dependents_of(p, finish, p); |
74 | if (q && q > p) { | 68 | if (q && q > p) { |
75 | sprint_symbol(sym_p, (unsigned long)p->detect); | 69 | printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n", |
76 | sprint_symbol(sym_q, (unsigned long)q->detect); | 70 | p->detect, q->detect); |
77 | |||
78 | printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\ | ||
79 | "should be called before %s!\n", | ||
80 | sym_p, sym_q); | ||
81 | } | 71 | } |
82 | } | 72 | } |
83 | } | 73 | } |
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms.c index 071e7fea42e5..ba0a4cce53be 100644 --- a/arch/x86/kernel/probe_roms_32.c +++ b/arch/x86/kernel/probe_roms.c | |||
@@ -73,6 +73,107 @@ static struct resource video_rom_resource = { | |||
73 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | 73 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
74 | }; | 74 | }; |
75 | 75 | ||
76 | /* does this oprom support the given pci device, or any of the devices | ||
77 | * that the driver supports? | ||
78 | */ | ||
79 | static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device) | ||
80 | { | ||
81 | struct pci_driver *drv = pdev->driver; | ||
82 | const struct pci_device_id *id; | ||
83 | |||
84 | if (pdev->vendor == vendor && pdev->device == device) | ||
85 | return true; | ||
86 | |||
87 | for (id = drv ? drv->id_table : NULL; id && id->vendor; id++) | ||
88 | if (id->vendor == vendor && id->device == device) | ||
89 | break; | ||
90 | |||
91 | return id && id->vendor; | ||
92 | } | ||
93 | |||
94 | static bool probe_list(struct pci_dev *pdev, unsigned short vendor, | ||
95 | const unsigned char *rom_list) | ||
96 | { | ||
97 | unsigned short device; | ||
98 | |||
99 | do { | ||
100 | if (probe_kernel_address(rom_list, device) != 0) | ||
101 | device = 0; | ||
102 | |||
103 | if (device && match_id(pdev, vendor, device)) | ||
104 | break; | ||
105 | |||
106 | rom_list += 2; | ||
107 | } while (device); | ||
108 | |||
109 | return !!device; | ||
110 | } | ||
111 | |||
112 | static struct resource *find_oprom(struct pci_dev *pdev) | ||
113 | { | ||
114 | struct resource *oprom = NULL; | ||
115 | int i; | ||
116 | |||
117 | for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) { | ||
118 | struct resource *res = &adapter_rom_resources[i]; | ||
119 | unsigned short offset, vendor, device, list, rev; | ||
120 | const unsigned char *rom; | ||
121 | |||
122 | if (res->end == 0) | ||
123 | break; | ||
124 | |||
125 | rom = isa_bus_to_virt(res->start); | ||
126 | if (probe_kernel_address(rom + 0x18, offset) != 0) | ||
127 | continue; | ||
128 | |||
129 | if (probe_kernel_address(rom + offset + 0x4, vendor) != 0) | ||
130 | continue; | ||
131 | |||
132 | if (probe_kernel_address(rom + offset + 0x6, device) != 0) | ||
133 | continue; | ||
134 | |||
135 | if (match_id(pdev, vendor, device)) { | ||
136 | oprom = res; | ||
137 | break; | ||
138 | } | ||
139 | |||
140 | if (probe_kernel_address(rom + offset + 0x8, list) == 0 && | ||
141 | probe_kernel_address(rom + offset + 0xc, rev) == 0 && | ||
142 | rev >= 3 && list && | ||
143 | probe_list(pdev, vendor, rom + offset + list)) { | ||
144 | oprom = res; | ||
145 | break; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | return oprom; | ||
150 | } | ||
151 | |||
152 | void *pci_map_biosrom(struct pci_dev *pdev) | ||
153 | { | ||
154 | struct resource *oprom = find_oprom(pdev); | ||
155 | |||
156 | if (!oprom) | ||
157 | return NULL; | ||
158 | |||
159 | return ioremap(oprom->start, resource_size(oprom)); | ||
160 | } | ||
161 | EXPORT_SYMBOL(pci_map_biosrom); | ||
162 | |||
163 | void pci_unmap_biosrom(void __iomem *image) | ||
164 | { | ||
165 | iounmap(image); | ||
166 | } | ||
167 | EXPORT_SYMBOL(pci_unmap_biosrom); | ||
168 | |||
169 | size_t pci_biosrom_size(struct pci_dev *pdev) | ||
170 | { | ||
171 | struct resource *oprom = find_oprom(pdev); | ||
172 | |||
173 | return oprom ? resource_size(oprom) : 0; | ||
174 | } | ||
175 | EXPORT_SYMBOL(pci_biosrom_size); | ||
176 | |||
76 | #define ROMSIGNATURE 0xaa55 | 177 | #define ROMSIGNATURE 0xaa55 |
77 | 178 | ||
78 | static int __init romsignature(const unsigned char *rom) | 179 | static int __init romsignature(const unsigned char *rom) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d46cbe46b7ab..88a90a977f8e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
449 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 449 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
450 | { | 450 | { |
451 | if (!need_resched()) { | 451 | if (!need_resched()) { |
452 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) | 452 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) |
453 | clflush((void *)¤t_thread_info()->flags); | 453 | clflush((void *)¤t_thread_info()->flags); |
454 | 454 | ||
455 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 455 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
@@ -465,7 +465,7 @@ static void mwait_idle(void) | |||
465 | if (!need_resched()) { | 465 | if (!need_resched()) { |
466 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 466 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); |
467 | trace_cpu_idle(1, smp_processor_id()); | 467 | trace_cpu_idle(1, smp_processor_id()); |
468 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) | 468 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) |
469 | clflush((void *)¤t_thread_info()->flags); | 469 | clflush((void *)¤t_thread_info()->flags); |
470 | 470 | ||
471 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 471 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 45892dc4b72a..807c2a2b80f1 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | |||
608 | unsigned len, type; | 608 | unsigned len, type; |
609 | struct perf_event *bp; | 609 | struct perf_event *bp; |
610 | 610 | ||
611 | if (ptrace_get_breakpoints(tsk) < 0) | ||
612 | return -ESRCH; | ||
613 | |||
611 | data &= ~DR_CONTROL_RESERVED; | 614 | data &= ~DR_CONTROL_RESERVED; |
612 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | 615 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); |
613 | restore: | 616 | restore: |
@@ -655,6 +658,9 @@ restore: | |||
655 | } | 658 | } |
656 | goto restore; | 659 | goto restore; |
657 | } | 660 | } |
661 | |||
662 | ptrace_put_breakpoints(tsk); | ||
663 | |||
658 | return ((orig_ret < 0) ? orig_ret : rc); | 664 | return ((orig_ret < 0) ? orig_ret : rc); |
659 | } | 665 | } |
660 | 666 | ||
@@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | |||
668 | 674 | ||
669 | if (n < HBP_NUM) { | 675 | if (n < HBP_NUM) { |
670 | struct perf_event *bp; | 676 | struct perf_event *bp; |
677 | |||
678 | if (ptrace_get_breakpoints(tsk) < 0) | ||
679 | return -ESRCH; | ||
680 | |||
671 | bp = thread->ptrace_bps[n]; | 681 | bp = thread->ptrace_bps[n]; |
672 | if (!bp) | 682 | if (!bp) |
673 | return 0; | 683 | val = 0; |
674 | val = bp->hw.info.address; | 684 | else |
685 | val = bp->hw.info.address; | ||
686 | |||
687 | ptrace_put_breakpoints(tsk); | ||
675 | } else if (n == 6) { | 688 | } else if (n == 6) { |
676 | val = thread->debugreg6; | 689 | val = thread->debugreg6; |
677 | } else if (n == 7) { | 690 | } else if (n == 7) { |
@@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | |||
686 | struct perf_event *bp; | 699 | struct perf_event *bp; |
687 | struct thread_struct *t = &tsk->thread; | 700 | struct thread_struct *t = &tsk->thread; |
688 | struct perf_event_attr attr; | 701 | struct perf_event_attr attr; |
702 | int err = 0; | ||
703 | |||
704 | if (ptrace_get_breakpoints(tsk) < 0) | ||
705 | return -ESRCH; | ||
689 | 706 | ||
690 | if (!t->ptrace_bps[nr]) { | 707 | if (!t->ptrace_bps[nr]) { |
691 | ptrace_breakpoint_init(&attr); | 708 | ptrace_breakpoint_init(&attr); |
@@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | |||
709 | * writing for the user. And anyway this is the previous | 726 | * writing for the user. And anyway this is the previous |
710 | * behaviour. | 727 | * behaviour. |
711 | */ | 728 | */ |
712 | if (IS_ERR(bp)) | 729 | if (IS_ERR(bp)) { |
713 | return PTR_ERR(bp); | 730 | err = PTR_ERR(bp); |
731 | goto put; | ||
732 | } | ||
714 | 733 | ||
715 | t->ptrace_bps[nr] = bp; | 734 | t->ptrace_bps[nr] = bp; |
716 | } else { | 735 | } else { |
717 | int err; | ||
718 | |||
719 | bp = t->ptrace_bps[nr]; | 736 | bp = t->ptrace_bps[nr]; |
720 | 737 | ||
721 | attr = bp->attr; | 738 | attr = bp->attr; |
722 | attr.bp_addr = addr; | 739 | attr.bp_addr = addr; |
723 | err = modify_user_hw_breakpoint(bp, &attr); | 740 | err = modify_user_hw_breakpoint(bp, &attr); |
724 | if (err) | ||
725 | return err; | ||
726 | } | 741 | } |
727 | 742 | ||
728 | 743 | put: | |
729 | return 0; | 744 | ptrace_put_breakpoints(tsk); |
745 | return err; | ||
730 | } | 746 | } |
731 | 747 | ||
732 | /* | 748 | /* |
@@ -1347,7 +1363,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, | |||
1347 | * We must return the syscall number to actually look up in the table. | 1363 | * We must return the syscall number to actually look up in the table. |
1348 | * This can be -1L to skip running any syscall at all. | 1364 | * This can be -1L to skip running any syscall at all. |
1349 | */ | 1365 | */ |
1350 | asmregparm long syscall_trace_enter(struct pt_regs *regs) | 1366 | long syscall_trace_enter(struct pt_regs *regs) |
1351 | { | 1367 | { |
1352 | long ret = 0; | 1368 | long ret = 0; |
1353 | 1369 | ||
@@ -1392,7 +1408,7 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) | |||
1392 | return ret ?: regs->orig_ax; | 1408 | return ret ?: regs->orig_ax; |
1393 | } | 1409 | } |
1394 | 1410 | ||
1395 | asmregparm void syscall_trace_leave(struct pt_regs *regs) | 1411 | void syscall_trace_leave(struct pt_regs *regs) |
1396 | { | 1412 | { |
1397 | bool step; | 1413 | bool step; |
1398 | 1414 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 08c44b08bf5b..0c016f727695 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(pm_power_off); | |||
36 | 36 | ||
37 | static const struct desc_ptr no_idt = {}; | 37 | static const struct desc_ptr no_idt = {}; |
38 | static int reboot_mode; | 38 | static int reboot_mode; |
39 | enum reboot_type reboot_type = BOOT_KBD; | 39 | enum reboot_type reboot_type = BOOT_ACPI; |
40 | int reboot_force; | 40 | int reboot_force; |
41 | 41 | ||
42 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | 42 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) |
@@ -478,9 +478,24 @@ void __attribute__((weak)) mach_reboot_fixups(void) | |||
478 | { | 478 | { |
479 | } | 479 | } |
480 | 480 | ||
481 | /* | ||
482 | * Windows compatible x86 hardware expects the following on reboot: | ||
483 | * | ||
484 | * 1) If the FADT has the ACPI reboot register flag set, try it | ||
485 | * 2) If still alive, write to the keyboard controller | ||
486 | * 3) If still alive, write to the ACPI reboot register again | ||
487 | * 4) If still alive, write to the keyboard controller again | ||
488 | * | ||
489 | * If the machine is still alive at this stage, it gives up. We default to | ||
490 | * following the same pattern, except that if we're still alive after (4) we'll | ||
491 | * try to force a triple fault and then cycle between hitting the keyboard | ||
492 | * controller and doing that | ||
493 | */ | ||
481 | static void native_machine_emergency_restart(void) | 494 | static void native_machine_emergency_restart(void) |
482 | { | 495 | { |
483 | int i; | 496 | int i; |
497 | int attempt = 0; | ||
498 | int orig_reboot_type = reboot_type; | ||
484 | 499 | ||
485 | if (reboot_emergency) | 500 | if (reboot_emergency) |
486 | emergency_vmx_disable_all(); | 501 | emergency_vmx_disable_all(); |
@@ -502,6 +517,13 @@ static void native_machine_emergency_restart(void) | |||
502 | outb(0xfe, 0x64); /* pulse reset low */ | 517 | outb(0xfe, 0x64); /* pulse reset low */ |
503 | udelay(50); | 518 | udelay(50); |
504 | } | 519 | } |
520 | if (attempt == 0 && orig_reboot_type == BOOT_ACPI) { | ||
521 | attempt = 1; | ||
522 | reboot_type = BOOT_ACPI; | ||
523 | } else { | ||
524 | reboot_type = BOOT_TRIPLE; | ||
525 | } | ||
526 | break; | ||
505 | 527 | ||
506 | case BOOT_TRIPLE: | 528 | case BOOT_TRIPLE: |
507 | load_idt(&no_idt); | 529 | load_idt(&no_idt); |
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S index 29092b38d816..1d5c46df0d78 100644 --- a/arch/x86/kernel/reboot_32.S +++ b/arch/x86/kernel/reboot_32.S | |||
@@ -21,26 +21,26 @@ r_base = . | |||
21 | /* Get our own relocated address */ | 21 | /* Get our own relocated address */ |
22 | call 1f | 22 | call 1f |
23 | 1: popl %ebx | 23 | 1: popl %ebx |
24 | subl $1b, %ebx | 24 | subl $(1b - r_base), %ebx |
25 | 25 | ||
26 | /* Compute the equivalent real-mode segment */ | 26 | /* Compute the equivalent real-mode segment */ |
27 | movl %ebx, %ecx | 27 | movl %ebx, %ecx |
28 | shrl $4, %ecx | 28 | shrl $4, %ecx |
29 | 29 | ||
30 | /* Patch post-real-mode segment jump */ | 30 | /* Patch post-real-mode segment jump */ |
31 | movw dispatch_table(%ebx,%eax,2),%ax | 31 | movw (dispatch_table - r_base)(%ebx,%eax,2),%ax |
32 | movw %ax, 101f(%ebx) | 32 | movw %ax, (101f - r_base)(%ebx) |
33 | movw %cx, 102f(%ebx) | 33 | movw %cx, (102f - r_base)(%ebx) |
34 | 34 | ||
35 | /* Set up the IDT for real mode. */ | 35 | /* Set up the IDT for real mode. */ |
36 | lidtl machine_real_restart_idt(%ebx) | 36 | lidtl (machine_real_restart_idt - r_base)(%ebx) |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Set up a GDT from which we can load segment descriptors for real | 39 | * Set up a GDT from which we can load segment descriptors for real |
40 | * mode. The GDT is not used in real mode; it is just needed here to | 40 | * mode. The GDT is not used in real mode; it is just needed here to |
41 | * prepare the descriptors. | 41 | * prepare the descriptors. |
42 | */ | 42 | */ |
43 | lgdtl machine_real_restart_gdt(%ebx) | 43 | lgdtl (machine_real_restart_gdt - r_base)(%ebx) |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Load the data segment registers with 16-bit compatible values | 46 | * Load the data segment registers with 16-bit compatible values |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4be9b398470e..a3e5948670c2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -691,8 +691,6 @@ early_param("reservelow", parse_reservelow); | |||
691 | 691 | ||
692 | void __init setup_arch(char **cmdline_p) | 692 | void __init setup_arch(char **cmdline_p) |
693 | { | 693 | { |
694 | unsigned long flags; | ||
695 | |||
696 | #ifdef CONFIG_X86_32 | 694 | #ifdef CONFIG_X86_32 |
697 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 695 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
698 | visws_early_detect(); | 696 | visws_early_detect(); |
@@ -948,6 +946,8 @@ void __init setup_arch(char **cmdline_p) | |||
948 | if (init_ohci1394_dma_early) | 946 | if (init_ohci1394_dma_early) |
949 | init_ohci1394_dma_on_all_controllers(); | 947 | init_ohci1394_dma_on_all_controllers(); |
950 | #endif | 948 | #endif |
949 | /* Allocate bigger log buffer */ | ||
950 | setup_log_buf(1); | ||
951 | 951 | ||
952 | reserve_initrd(); | 952 | reserve_initrd(); |
953 | 953 | ||
@@ -966,7 +966,6 @@ void __init setup_arch(char **cmdline_p) | |||
966 | 966 | ||
967 | initmem_init(); | 967 | initmem_init(); |
968 | memblock_find_dma_reserve(); | 968 | memblock_find_dma_reserve(); |
969 | dma32_reserve_bootmem(); | ||
970 | 969 | ||
971 | #ifdef CONFIG_KVM_CLOCK | 970 | #ifdef CONFIG_KVM_CLOCK |
972 | kvmclock_init(); | 971 | kvmclock_init(); |
@@ -1041,9 +1040,7 @@ void __init setup_arch(char **cmdline_p) | |||
1041 | 1040 | ||
1042 | mcheck_init(); | 1041 | mcheck_init(); |
1043 | 1042 | ||
1044 | local_irq_save(flags); | 1043 | arch_init_ideal_nops(); |
1045 | arch_init_ideal_nop5(); | ||
1046 | local_irq_restore(flags); | ||
1047 | } | 1044 | } |
1048 | 1045 | ||
1049 | #ifdef CONFIG_X86_32 | 1046 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4fd173cd8e57..40a24932a8a1 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs) | |||
601 | goto badframe; | 601 | goto badframe; |
602 | 602 | ||
603 | sigdelsetmask(&set, ~_BLOCKABLE); | 603 | sigdelsetmask(&set, ~_BLOCKABLE); |
604 | spin_lock_irq(¤t->sighand->siglock); | 604 | set_current_blocked(&set); |
605 | current->blocked = set; | ||
606 | recalc_sigpending(); | ||
607 | spin_unlock_irq(¤t->sighand->siglock); | ||
608 | 605 | ||
609 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 606 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
610 | goto badframe; | 607 | goto badframe; |
@@ -682,6 +679,7 @@ static int | |||
682 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 679 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
683 | sigset_t *oldset, struct pt_regs *regs) | 680 | sigset_t *oldset, struct pt_regs *regs) |
684 | { | 681 | { |
682 | sigset_t blocked; | ||
685 | int ret; | 683 | int ret; |
686 | 684 | ||
687 | /* Are we from a system call? */ | 685 | /* Are we from a system call? */ |
@@ -741,12 +739,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
741 | */ | 739 | */ |
742 | regs->flags &= ~X86_EFLAGS_TF; | 740 | regs->flags &= ~X86_EFLAGS_TF; |
743 | 741 | ||
744 | spin_lock_irq(¤t->sighand->siglock); | 742 | sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); |
745 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | ||
746 | if (!(ka->sa.sa_flags & SA_NODEFER)) | 743 | if (!(ka->sa.sa_flags & SA_NODEFER)) |
747 | sigaddset(¤t->blocked, sig); | 744 | sigaddset(&blocked, sig); |
748 | recalc_sigpending(); | 745 | set_current_blocked(&blocked); |
749 | spin_unlock_irq(¤t->sighand->siglock); | ||
750 | 746 | ||
751 | tracehook_signal_handler(sig, info, ka, regs, | 747 | tracehook_signal_handler(sig, info, ka, regs, |
752 | test_thread_flag(TIF_SINGLESTEP)); | 748 | test_thread_flag(TIF_SINGLESTEP)); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 513deac7228d..013e7eba83bb 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait) | |||
194 | } | 194 | } |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * Reschedule call back. Nothing to do, | 197 | * Reschedule call back. |
198 | * all the work is done automatically when | ||
199 | * we return from the interrupt. | ||
200 | */ | 198 | */ |
201 | void smp_reschedule_interrupt(struct pt_regs *regs) | 199 | void smp_reschedule_interrupt(struct pt_regs *regs) |
202 | { | 200 | { |
203 | ack_APIC_irq(); | 201 | ack_APIC_irq(); |
204 | inc_irq_stat(irq_resched_count); | 202 | inc_irq_stat(irq_resched_count); |
203 | scheduler_ipi(); | ||
205 | /* | 204 | /* |
206 | * KVM uses this interrupt to force a cpu out of guest mode | 205 | * KVM uses this interrupt to force a cpu out of guest mode |
207 | */ | 206 | */ |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c2871d3c71b6..a3c430bdfb60 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void) | |||
1332 | void *mwait_ptr; | 1332 | void *mwait_ptr; |
1333 | struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); | 1333 | struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); |
1334 | 1334 | ||
1335 | if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c))) | 1335 | if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)) |
1336 | return; | 1336 | return; |
1337 | if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH)) | 1337 | if (!this_cpu_has(X86_FEATURE_CLFLSH)) |
1338 | return; | 1338 | return; |
1339 | if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) | 1339 | if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) |
1340 | return; | 1340 | return; |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6515733a289d..55d9bc03f696 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -9,15 +9,6 @@ | |||
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <asm/stacktrace.h> | 10 | #include <asm/stacktrace.h> |
11 | 11 | ||
12 | static void save_stack_warning(void *data, char *msg) | ||
13 | { | ||
14 | } | ||
15 | |||
16 | static void | ||
17 | save_stack_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
18 | { | ||
19 | } | ||
20 | |||
21 | static int save_stack_stack(void *data, char *name) | 12 | static int save_stack_stack(void *data, char *name) |
22 | { | 13 | { |
23 | return 0; | 14 | return 0; |
@@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) | |||
53 | } | 44 | } |
54 | 45 | ||
55 | static const struct stacktrace_ops save_stack_ops = { | 46 | static const struct stacktrace_ops save_stack_ops = { |
56 | .warning = save_stack_warning, | ||
57 | .warning_symbol = save_stack_warning_symbol, | ||
58 | .stack = save_stack_stack, | 47 | .stack = save_stack_stack, |
59 | .address = save_stack_address, | 48 | .address = save_stack_address, |
60 | .walk_stack = print_context_stack, | 49 | .walk_stack = print_context_stack, |
61 | }; | 50 | }; |
62 | 51 | ||
63 | static const struct stacktrace_ops save_stack_ops_nosched = { | 52 | static const struct stacktrace_ops save_stack_ops_nosched = { |
64 | .warning = save_stack_warning, | ||
65 | .warning_symbol = save_stack_warning_symbol, | ||
66 | .stack = save_stack_stack, | 53 | .stack = save_stack_stack, |
67 | .address = save_stack_address_nosched, | 54 | .address = save_stack_address_nosched, |
68 | .walk_stack = print_context_stack, | 55 | .walk_stack = print_context_stack, |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index abce34d5c79d..32cbffb0c494 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -344,3 +344,4 @@ ENTRY(sys_call_table) | |||
344 | .long sys_open_by_handle_at | 344 | .long sys_open_by_handle_at |
345 | .long sys_clock_adjtime | 345 | .long sys_clock_adjtime |
346 | .long sys_syncfs | 346 | .long sys_syncfs |
347 | .long sys_sendmmsg /* 345 */ | ||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 998e972f3b1a..30ac65df7d4e 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -110,7 +110,6 @@ static struct mm_struct tboot_mm = { | |||
110 | .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), | 110 | .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), |
111 | .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), | 111 | .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), |
112 | .mmlist = LIST_HEAD_INIT(init_mm.mmlist), | 112 | .mmlist = LIST_HEAD_INIT(init_mm.mmlist), |
113 | .cpu_vm_mask = CPU_MASK_ALL, | ||
114 | }; | 113 | }; |
115 | 114 | ||
116 | static inline void switch_to_tboot_pt(void) | 115 | static inline void switch_to_tboot_pt(void) |
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 787a5e499dd1..3f92ce07e525 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c | |||
@@ -161,7 +161,7 @@ static int test_NX(void) | |||
161 | } | 161 | } |
162 | 162 | ||
163 | #endif | 163 | #endif |
164 | return 0; | 164 | return ret; |
165 | } | 165 | } |
166 | 166 | ||
167 | static void test_exit(void) | 167 | static void test_exit(void) |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 25a28a245937..00cbb272627f 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <asm/time.h> | 23 | #include <asm/time.h> |
24 | 24 | ||
25 | #ifdef CONFIG_X86_64 | 25 | #ifdef CONFIG_X86_64 |
26 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | 26 | DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | unsigned long profile_pc(struct pt_regs *regs) | 29 | unsigned long profile_pc(struct pt_regs *regs) |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9335bf7dd2e7..6cc6922262af 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -763,25 +763,6 @@ static cycle_t read_tsc(struct clocksource *cs) | |||
763 | ret : clocksource_tsc.cycle_last; | 763 | ret : clocksource_tsc.cycle_last; |
764 | } | 764 | } |
765 | 765 | ||
766 | #ifdef CONFIG_X86_64 | ||
767 | static cycle_t __vsyscall_fn vread_tsc(void) | ||
768 | { | ||
769 | cycle_t ret; | ||
770 | |||
771 | /* | ||
772 | * Surround the RDTSC by barriers, to make sure it's not | ||
773 | * speculated to outside the seqlock critical section and | ||
774 | * does not cause time warps: | ||
775 | */ | ||
776 | rdtsc_barrier(); | ||
777 | ret = (cycle_t)vget_cycles(); | ||
778 | rdtsc_barrier(); | ||
779 | |||
780 | return ret >= __vsyscall_gtod_data.clock.cycle_last ? | ||
781 | ret : __vsyscall_gtod_data.clock.cycle_last; | ||
782 | } | ||
783 | #endif | ||
784 | |||
785 | static void resume_tsc(struct clocksource *cs) | 766 | static void resume_tsc(struct clocksource *cs) |
786 | { | 767 | { |
787 | clocksource_tsc.cycle_last = 0; | 768 | clocksource_tsc.cycle_last = 0; |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 624a2016198e..89aed99aafce 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -161,6 +161,12 @@ SECTIONS | |||
161 | 161 | ||
162 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) | 162 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) |
163 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | 163 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) |
164 | #define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \ | ||
165 | ADDR(.vsyscall_0) + offset \ | ||
166 | : AT(VLOAD(.vsyscall_var_ ## x)) { \ | ||
167 | *(.vsyscall_var_ ## x) \ | ||
168 | } \ | ||
169 | x = VVIRT(.vsyscall_var_ ## x); | ||
164 | 170 | ||
165 | . = ALIGN(4096); | 171 | . = ALIGN(4096); |
166 | __vsyscall_0 = .; | 172 | __vsyscall_0 = .; |
@@ -175,18 +181,6 @@ SECTIONS | |||
175 | *(.vsyscall_fn) | 181 | *(.vsyscall_fn) |
176 | } | 182 | } |
177 | 183 | ||
178 | . = ALIGN(L1_CACHE_BYTES); | ||
179 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { | ||
180 | *(.vsyscall_gtod_data) | ||
181 | } | ||
182 | |||
183 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); | ||
184 | .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) { | ||
185 | *(.vsyscall_clock) | ||
186 | } | ||
187 | vsyscall_clock = VVIRT(.vsyscall_clock); | ||
188 | |||
189 | |||
190 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { | 184 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { |
191 | *(.vsyscall_1) | 185 | *(.vsyscall_1) |
192 | } | 186 | } |
@@ -194,21 +188,14 @@ SECTIONS | |||
194 | *(.vsyscall_2) | 188 | *(.vsyscall_2) |
195 | } | 189 | } |
196 | 190 | ||
197 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { | ||
198 | *(.vgetcpu_mode) | ||
199 | } | ||
200 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | ||
201 | |||
202 | . = ALIGN(L1_CACHE_BYTES); | ||
203 | .jiffies : AT(VLOAD(.jiffies)) { | ||
204 | *(.jiffies) | ||
205 | } | ||
206 | jiffies = VVIRT(.jiffies); | ||
207 | |||
208 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { | 191 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { |
209 | *(.vsyscall_3) | 192 | *(.vsyscall_3) |
210 | } | 193 | } |
211 | 194 | ||
195 | #define __VVAR_KERNEL_LDS | ||
196 | #include <asm/vvar.h> | ||
197 | #undef __VVAR_KERNEL_LDS | ||
198 | |||
212 | . = __vsyscall_0 + PAGE_SIZE; | 199 | . = __vsyscall_0 + PAGE_SIZE; |
213 | 200 | ||
214 | #undef VSYSCALL_ADDR | 201 | #undef VSYSCALL_ADDR |
@@ -216,6 +203,7 @@ SECTIONS | |||
216 | #undef VLOAD | 203 | #undef VLOAD |
217 | #undef VVIRT_OFFSET | 204 | #undef VVIRT_OFFSET |
218 | #undef VVIRT | 205 | #undef VVIRT |
206 | #undef EMIT_VVAR | ||
219 | 207 | ||
220 | #endif /* CONFIG_X86_64 */ | 208 | #endif /* CONFIG_X86_64 */ |
221 | 209 | ||
@@ -306,6 +294,13 @@ SECTIONS | |||
306 | } | 294 | } |
307 | 295 | ||
308 | . = ALIGN(8); | 296 | . = ALIGN(8); |
297 | .apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) { | ||
298 | __apicdrivers = .; | ||
299 | *(.apicdrivers); | ||
300 | __apicdrivers_end = .; | ||
301 | } | ||
302 | |||
303 | . = ALIGN(8); | ||
309 | /* | 304 | /* |
310 | * .exit.text is discard at runtime, not link time, to deal with | 305 | * .exit.text is discard at runtime, not link time, to deal with |
311 | * references from .altinstructions and .eh_frame | 306 | * references from .altinstructions and .eh_frame |
@@ -319,7 +314,7 @@ SECTIONS | |||
319 | } | 314 | } |
320 | 315 | ||
321 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) | 316 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
322 | PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE) | 317 | PERCPU_SECTION(INTERNODE_CACHE_BYTES) |
323 | #endif | 318 | #endif |
324 | 319 | ||
325 | . = ALIGN(PAGE_SIZE); | 320 | . = ALIGN(PAGE_SIZE); |
diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c new file mode 100644 index 000000000000..a81aa9e9894c --- /dev/null +++ b/arch/x86/kernel/vread_tsc_64.c | |||
@@ -0,0 +1,36 @@ | |||
1 | /* This code runs in userspace. */ | ||
2 | |||
3 | #define DISABLE_BRANCH_PROFILING | ||
4 | #include <asm/vgtod.h> | ||
5 | |||
6 | notrace cycle_t __vsyscall_fn vread_tsc(void) | ||
7 | { | ||
8 | cycle_t ret; | ||
9 | u64 last; | ||
10 | |||
11 | /* | ||
12 | * Empirically, a fence (of type that depends on the CPU) | ||
13 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
14 | * with respect to loads. The various CPU manuals are unclear | ||
15 | * as to whether rdtsc can be reordered with later loads, | ||
16 | * but no one has ever seen it happen. | ||
17 | */ | ||
18 | rdtsc_barrier(); | ||
19 | ret = (cycle_t)vget_cycles(); | ||
20 | |||
21 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
22 | |||
23 | if (likely(ret >= last)) | ||
24 | return ret; | ||
25 | |||
26 | /* | ||
27 | * GCC likes to generate cmov here, but this branch is extremely | ||
28 | * predictable (it's just a funciton of time and the likely is | ||
29 | * very likely) and there's a data dependence, so force GCC | ||
30 | * to generate a branch instead. I don't barrier() because | ||
31 | * we don't actually need a barrier, and if this function | ||
32 | * ever gets inlined it will generate worse code. | ||
33 | */ | ||
34 | asm volatile (""); | ||
35 | return last; | ||
36 | } | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index dcbb28c4b694..3e682184d76c 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -49,17 +49,10 @@ | |||
49 | __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace | 49 | __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace |
50 | #define __syscall_clobber "r11","cx","memory" | 50 | #define __syscall_clobber "r11","cx","memory" |
51 | 51 | ||
52 | /* | 52 | DEFINE_VVAR(int, vgetcpu_mode); |
53 | * vsyscall_gtod_data contains data that is : | 53 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = |
54 | * - readonly from vsyscalls | ||
55 | * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) | ||
56 | * Try to keep this structure as small as possible to avoid cache line ping pongs | ||
57 | */ | ||
58 | int __vgetcpu_mode __section_vgetcpu_mode; | ||
59 | |||
60 | struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = | ||
61 | { | 54 | { |
62 | .lock = SEQLOCK_UNLOCKED, | 55 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
63 | .sysctl_enabled = 1, | 56 | .sysctl_enabled = 1, |
64 | }; | 57 | }; |
65 | 58 | ||
@@ -97,7 +90,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
97 | */ | 90 | */ |
98 | static __always_inline void do_get_tz(struct timezone * tz) | 91 | static __always_inline void do_get_tz(struct timezone * tz) |
99 | { | 92 | { |
100 | *tz = __vsyscall_gtod_data.sys_tz; | 93 | *tz = VVAR(vsyscall_gtod_data).sys_tz; |
101 | } | 94 | } |
102 | 95 | ||
103 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | 96 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) |
@@ -126,23 +119,24 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
126 | unsigned long mult, shift, nsec; | 119 | unsigned long mult, shift, nsec; |
127 | cycle_t (*vread)(void); | 120 | cycle_t (*vread)(void); |
128 | do { | 121 | do { |
129 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); | 122 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); |
130 | 123 | ||
131 | vread = __vsyscall_gtod_data.clock.vread; | 124 | vread = VVAR(vsyscall_gtod_data).clock.vread; |
132 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { | 125 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled || |
126 | !vread)) { | ||
133 | gettimeofday(tv,NULL); | 127 | gettimeofday(tv,NULL); |
134 | return; | 128 | return; |
135 | } | 129 | } |
136 | 130 | ||
137 | now = vread(); | 131 | now = vread(); |
138 | base = __vsyscall_gtod_data.clock.cycle_last; | 132 | base = VVAR(vsyscall_gtod_data).clock.cycle_last; |
139 | mask = __vsyscall_gtod_data.clock.mask; | 133 | mask = VVAR(vsyscall_gtod_data).clock.mask; |
140 | mult = __vsyscall_gtod_data.clock.mult; | 134 | mult = VVAR(vsyscall_gtod_data).clock.mult; |
141 | shift = __vsyscall_gtod_data.clock.shift; | 135 | shift = VVAR(vsyscall_gtod_data).clock.shift; |
142 | 136 | ||
143 | tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; | 137 | tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec; |
144 | nsec = __vsyscall_gtod_data.wall_time_nsec; | 138 | nsec = VVAR(vsyscall_gtod_data).wall_time_nsec; |
145 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); | 139 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); |
146 | 140 | ||
147 | /* calculate interval: */ | 141 | /* calculate interval: */ |
148 | cycle_delta = (now - base) & mask; | 142 | cycle_delta = (now - base) & mask; |
@@ -171,15 +165,15 @@ time_t __vsyscall(1) vtime(time_t *t) | |||
171 | { | 165 | { |
172 | unsigned seq; | 166 | unsigned seq; |
173 | time_t result; | 167 | time_t result; |
174 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) | 168 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled)) |
175 | return time_syscall(t); | 169 | return time_syscall(t); |
176 | 170 | ||
177 | do { | 171 | do { |
178 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); | 172 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); |
179 | 173 | ||
180 | result = __vsyscall_gtod_data.wall_time_sec; | 174 | result = VVAR(vsyscall_gtod_data).wall_time_sec; |
181 | 175 | ||
182 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); | 176 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); |
183 | 177 | ||
184 | if (t) | 178 | if (t) |
185 | *t = result; | 179 | *t = result; |
@@ -208,9 +202,9 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | |||
208 | We do this here because otherwise user space would do it on | 202 | We do this here because otherwise user space would do it on |
209 | its own in a likely inferior way (no access to jiffies). | 203 | its own in a likely inferior way (no access to jiffies). |
210 | If you don't like it pass NULL. */ | 204 | If you don't like it pass NULL. */ |
211 | if (tcache && tcache->blob[0] == (j = __jiffies)) { | 205 | if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) { |
212 | p = tcache->blob[1]; | 206 | p = tcache->blob[1]; |
213 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { | 207 | } else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { |
214 | /* Load per CPU data from RDTSCP */ | 208 | /* Load per CPU data from RDTSCP */ |
215 | native_read_tscp(&p); | 209 | native_read_tscp(&p); |
216 | } else { | 210 | } else { |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c11514e9128b..6f164bd5e14d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -35,7 +35,7 @@ void iommu_shutdown_noop(void) { } | |||
35 | struct x86_init_ops x86_init __initdata = { | 35 | struct x86_init_ops x86_init __initdata = { |
36 | 36 | ||
37 | .resources = { | 37 | .resources = { |
38 | .probe_roms = x86_init_noop, | 38 | .probe_roms = probe_roms, |
39 | .reserve_resources = reserve_standard_io_resources, | 39 | .reserve_resources = reserve_standard_io_resources, |
40 | .memory_setup = default_machine_specific_memory_setup, | 40 | .memory_setup = default_machine_specific_memory_setup, |
41 | }, | 41 | }, |
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = { | |||
61 | .banner = default_banner, | 61 | .banner = default_banner, |
62 | }, | 62 | }, |
63 | 63 | ||
64 | .mapping = { | ||
65 | .pagetable_reserve = native_pagetable_reserve, | ||
66 | }, | ||
67 | |||
64 | .paging = { | 68 | .paging = { |
65 | .pagetable_setup_start = native_pagetable_setup_start, | 69 | .pagetable_setup_start = native_pagetable_setup_start, |
66 | .pagetable_setup_done = native_pagetable_setup_done, | 70 | .pagetable_setup_done = native_pagetable_setup_done, |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0ad47b819a8b..d6e2477feb18 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -73,9 +73,14 @@ | |||
73 | #define MemAbs (1<<11) /* Memory operand is absolute displacement */ | 73 | #define MemAbs (1<<11) /* Memory operand is absolute displacement */ |
74 | #define String (1<<12) /* String instruction (rep capable) */ | 74 | #define String (1<<12) /* String instruction (rep capable) */ |
75 | #define Stack (1<<13) /* Stack instruction (push/pop) */ | 75 | #define Stack (1<<13) /* Stack instruction (push/pop) */ |
76 | #define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */ | ||
76 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 77 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
77 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 78 | #define GroupDual (2<<14) /* Alternate decoding of mod == 3 */ |
79 | #define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */ | ||
80 | #define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */ | ||
81 | #define Sse (1<<17) /* SSE Vector instruction */ | ||
78 | /* Misc flags */ | 82 | /* Misc flags */ |
83 | #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ | ||
79 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ | 84 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ |
80 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ | 85 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ |
81 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ | 86 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ |
@@ -102,11 +107,14 @@ | |||
102 | 107 | ||
103 | struct opcode { | 108 | struct opcode { |
104 | u32 flags; | 109 | u32 flags; |
110 | u8 intercept; | ||
105 | union { | 111 | union { |
106 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 112 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
107 | struct opcode *group; | 113 | struct opcode *group; |
108 | struct group_dual *gdual; | 114 | struct group_dual *gdual; |
115 | struct gprefix *gprefix; | ||
109 | } u; | 116 | } u; |
117 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | ||
110 | }; | 118 | }; |
111 | 119 | ||
112 | struct group_dual { | 120 | struct group_dual { |
@@ -114,6 +122,13 @@ struct group_dual { | |||
114 | struct opcode mod3[8]; | 122 | struct opcode mod3[8]; |
115 | }; | 123 | }; |
116 | 124 | ||
125 | struct gprefix { | ||
126 | struct opcode pfx_no; | ||
127 | struct opcode pfx_66; | ||
128 | struct opcode pfx_f2; | ||
129 | struct opcode pfx_f3; | ||
130 | }; | ||
131 | |||
117 | /* EFLAGS bit definitions. */ | 132 | /* EFLAGS bit definitions. */ |
118 | #define EFLG_ID (1<<21) | 133 | #define EFLG_ID (1<<21) |
119 | #define EFLG_VIP (1<<20) | 134 | #define EFLG_VIP (1<<20) |
@@ -248,42 +263,42 @@ struct group_dual { | |||
248 | "w", "r", _LO32, "r", "", "r") | 263 | "w", "r", _LO32, "r", "", "r") |
249 | 264 | ||
250 | /* Instruction has three operands and one operand is stored in ECX register */ | 265 | /* Instruction has three operands and one operand is stored in ECX register */ |
251 | #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ | 266 | #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ |
252 | do { \ | 267 | do { \ |
253 | unsigned long _tmp; \ | 268 | unsigned long _tmp; \ |
254 | _type _clv = (_cl).val; \ | 269 | _type _clv = (_cl).val; \ |
255 | _type _srcv = (_src).val; \ | 270 | _type _srcv = (_src).val; \ |
256 | _type _dstv = (_dst).val; \ | 271 | _type _dstv = (_dst).val; \ |
257 | \ | 272 | \ |
258 | __asm__ __volatile__ ( \ | 273 | __asm__ __volatile__ ( \ |
259 | _PRE_EFLAGS("0", "5", "2") \ | 274 | _PRE_EFLAGS("0", "5", "2") \ |
260 | _op _suffix " %4,%1 \n" \ | 275 | _op _suffix " %4,%1 \n" \ |
261 | _POST_EFLAGS("0", "5", "2") \ | 276 | _POST_EFLAGS("0", "5", "2") \ |
262 | : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ | 277 | : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ |
263 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ | 278 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ |
264 | ); \ | 279 | ); \ |
265 | \ | 280 | \ |
266 | (_cl).val = (unsigned long) _clv; \ | 281 | (_cl).val = (unsigned long) _clv; \ |
267 | (_src).val = (unsigned long) _srcv; \ | 282 | (_src).val = (unsigned long) _srcv; \ |
268 | (_dst).val = (unsigned long) _dstv; \ | 283 | (_dst).val = (unsigned long) _dstv; \ |
269 | } while (0) | 284 | } while (0) |
270 | 285 | ||
271 | #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ | 286 | #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ |
272 | do { \ | 287 | do { \ |
273 | switch ((_dst).bytes) { \ | 288 | switch ((_dst).bytes) { \ |
274 | case 2: \ | 289 | case 2: \ |
275 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 290 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
276 | "w", unsigned short); \ | 291 | "w", unsigned short); \ |
277 | break; \ | 292 | break; \ |
278 | case 4: \ | 293 | case 4: \ |
279 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 294 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
280 | "l", unsigned int); \ | 295 | "l", unsigned int); \ |
281 | break; \ | 296 | break; \ |
282 | case 8: \ | 297 | case 8: \ |
283 | ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 298 | ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
284 | "q", unsigned long)); \ | 299 | "q", unsigned long)); \ |
285 | break; \ | 300 | break; \ |
286 | } \ | 301 | } \ |
287 | } while (0) | 302 | } while (0) |
288 | 303 | ||
289 | #define __emulate_1op(_op, _dst, _eflags, _suffix) \ | 304 | #define __emulate_1op(_op, _dst, _eflags, _suffix) \ |
@@ -346,13 +361,25 @@ struct group_dual { | |||
346 | } while (0) | 361 | } while (0) |
347 | 362 | ||
348 | /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ | 363 | /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ |
349 | #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ | 364 | #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ |
350 | do { \ | 365 | do { \ |
351 | switch((_src).bytes) { \ | 366 | switch((_src).bytes) { \ |
352 | case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \ | 367 | case 1: \ |
353 | case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \ | 368 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ |
354 | case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \ | 369 | _eflags, "b"); \ |
355 | case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \ | 370 | break; \ |
371 | case 2: \ | ||
372 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | ||
373 | _eflags, "w"); \ | ||
374 | break; \ | ||
375 | case 4: \ | ||
376 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | ||
377 | _eflags, "l"); \ | ||
378 | break; \ | ||
379 | case 8: \ | ||
380 | ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | ||
381 | _eflags, "q")); \ | ||
382 | break; \ | ||
356 | } \ | 383 | } \ |
357 | } while (0) | 384 | } while (0) |
358 | 385 | ||
@@ -388,13 +415,33 @@ struct group_dual { | |||
388 | (_type)_x; \ | 415 | (_type)_x; \ |
389 | }) | 416 | }) |
390 | 417 | ||
391 | #define insn_fetch_arr(_arr, _size, _eip) \ | 418 | #define insn_fetch_arr(_arr, _size, _eip) \ |
392 | ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ | 419 | ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ |
393 | if (rc != X86EMUL_CONTINUE) \ | 420 | if (rc != X86EMUL_CONTINUE) \ |
394 | goto done; \ | 421 | goto done; \ |
395 | (_eip) += (_size); \ | 422 | (_eip) += (_size); \ |
396 | }) | 423 | }) |
397 | 424 | ||
425 | static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, | ||
426 | enum x86_intercept intercept, | ||
427 | enum x86_intercept_stage stage) | ||
428 | { | ||
429 | struct x86_instruction_info info = { | ||
430 | .intercept = intercept, | ||
431 | .rep_prefix = ctxt->decode.rep_prefix, | ||
432 | .modrm_mod = ctxt->decode.modrm_mod, | ||
433 | .modrm_reg = ctxt->decode.modrm_reg, | ||
434 | .modrm_rm = ctxt->decode.modrm_rm, | ||
435 | .src_val = ctxt->decode.src.val64, | ||
436 | .src_bytes = ctxt->decode.src.bytes, | ||
437 | .dst_bytes = ctxt->decode.dst.bytes, | ||
438 | .ad_bytes = ctxt->decode.ad_bytes, | ||
439 | .next_rip = ctxt->eip, | ||
440 | }; | ||
441 | |||
442 | return ctxt->ops->intercept(ctxt, &info, stage); | ||
443 | } | ||
444 | |||
398 | static inline unsigned long ad_mask(struct decode_cache *c) | 445 | static inline unsigned long ad_mask(struct decode_cache *c) |
399 | { | 446 | { |
400 | return (1UL << (c->ad_bytes << 3)) - 1; | 447 | return (1UL << (c->ad_bytes << 3)) - 1; |
@@ -430,6 +477,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel) | |||
430 | register_address_increment(c, &c->eip, rel); | 477 | register_address_increment(c, &c->eip, rel); |
431 | } | 478 | } |
432 | 479 | ||
480 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
481 | { | ||
482 | u32 limit = get_desc_limit(desc); | ||
483 | |||
484 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
485 | } | ||
486 | |||
433 | static void set_seg_override(struct decode_cache *c, int seg) | 487 | static void set_seg_override(struct decode_cache *c, int seg) |
434 | { | 488 | { |
435 | c->has_seg_override = true; | 489 | c->has_seg_override = true; |
@@ -442,11 +496,10 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, | |||
442 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) | 496 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) |
443 | return 0; | 497 | return 0; |
444 | 498 | ||
445 | return ops->get_cached_segment_base(seg, ctxt->vcpu); | 499 | return ops->get_cached_segment_base(ctxt, seg); |
446 | } | 500 | } |
447 | 501 | ||
448 | static unsigned seg_override(struct x86_emulate_ctxt *ctxt, | 502 | static unsigned seg_override(struct x86_emulate_ctxt *ctxt, |
449 | struct x86_emulate_ops *ops, | ||
450 | struct decode_cache *c) | 503 | struct decode_cache *c) |
451 | { | 504 | { |
452 | if (!c->has_seg_override) | 505 | if (!c->has_seg_override) |
@@ -455,18 +508,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, | |||
455 | return c->seg_override; | 508 | return c->seg_override; |
456 | } | 509 | } |
457 | 510 | ||
458 | static ulong linear(struct x86_emulate_ctxt *ctxt, | ||
459 | struct segmented_address addr) | ||
460 | { | ||
461 | struct decode_cache *c = &ctxt->decode; | ||
462 | ulong la; | ||
463 | |||
464 | la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; | ||
465 | if (c->ad_bytes != 8) | ||
466 | la &= (u32)-1; | ||
467 | return la; | ||
468 | } | ||
469 | |||
470 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | 511 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, |
471 | u32 error, bool valid) | 512 | u32 error, bool valid) |
472 | { | 513 | { |
@@ -476,11 +517,21 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | |||
476 | return X86EMUL_PROPAGATE_FAULT; | 517 | return X86EMUL_PROPAGATE_FAULT; |
477 | } | 518 | } |
478 | 519 | ||
520 | static int emulate_db(struct x86_emulate_ctxt *ctxt) | ||
521 | { | ||
522 | return emulate_exception(ctxt, DB_VECTOR, 0, false); | ||
523 | } | ||
524 | |||
479 | static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) | 525 | static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) |
480 | { | 526 | { |
481 | return emulate_exception(ctxt, GP_VECTOR, err, true); | 527 | return emulate_exception(ctxt, GP_VECTOR, err, true); |
482 | } | 528 | } |
483 | 529 | ||
530 | static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err) | ||
531 | { | ||
532 | return emulate_exception(ctxt, SS_VECTOR, err, true); | ||
533 | } | ||
534 | |||
484 | static int emulate_ud(struct x86_emulate_ctxt *ctxt) | 535 | static int emulate_ud(struct x86_emulate_ctxt *ctxt) |
485 | { | 536 | { |
486 | return emulate_exception(ctxt, UD_VECTOR, 0, false); | 537 | return emulate_exception(ctxt, UD_VECTOR, 0, false); |
@@ -496,6 +547,128 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt) | |||
496 | return emulate_exception(ctxt, DE_VECTOR, 0, false); | 547 | return emulate_exception(ctxt, DE_VECTOR, 0, false); |
497 | } | 548 | } |
498 | 549 | ||
550 | static int emulate_nm(struct x86_emulate_ctxt *ctxt) | ||
551 | { | ||
552 | return emulate_exception(ctxt, NM_VECTOR, 0, false); | ||
553 | } | ||
554 | |||
555 | static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) | ||
556 | { | ||
557 | u16 selector; | ||
558 | struct desc_struct desc; | ||
559 | |||
560 | ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg); | ||
561 | return selector; | ||
562 | } | ||
563 | |||
564 | static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, | ||
565 | unsigned seg) | ||
566 | { | ||
567 | u16 dummy; | ||
568 | u32 base3; | ||
569 | struct desc_struct desc; | ||
570 | |||
571 | ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg); | ||
572 | ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); | ||
573 | } | ||
574 | |||
575 | static int __linearize(struct x86_emulate_ctxt *ctxt, | ||
576 | struct segmented_address addr, | ||
577 | unsigned size, bool write, bool fetch, | ||
578 | ulong *linear) | ||
579 | { | ||
580 | struct decode_cache *c = &ctxt->decode; | ||
581 | struct desc_struct desc; | ||
582 | bool usable; | ||
583 | ulong la; | ||
584 | u32 lim; | ||
585 | u16 sel; | ||
586 | unsigned cpl, rpl; | ||
587 | |||
588 | la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; | ||
589 | switch (ctxt->mode) { | ||
590 | case X86EMUL_MODE_REAL: | ||
591 | break; | ||
592 | case X86EMUL_MODE_PROT64: | ||
593 | if (((signed long)la << 16) >> 16 != la) | ||
594 | return emulate_gp(ctxt, 0); | ||
595 | break; | ||
596 | default: | ||
597 | usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, | ||
598 | addr.seg); | ||
599 | if (!usable) | ||
600 | goto bad; | ||
601 | /* code segment or read-only data segment */ | ||
602 | if (((desc.type & 8) || !(desc.type & 2)) && write) | ||
603 | goto bad; | ||
604 | /* unreadable code segment */ | ||
605 | if (!fetch && (desc.type & 8) && !(desc.type & 2)) | ||
606 | goto bad; | ||
607 | lim = desc_limit_scaled(&desc); | ||
608 | if ((desc.type & 8) || !(desc.type & 4)) { | ||
609 | /* expand-up segment */ | ||
610 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | ||
611 | goto bad; | ||
612 | } else { | ||
613 | /* exapand-down segment */ | ||
614 | if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) | ||
615 | goto bad; | ||
616 | lim = desc.d ? 0xffffffff : 0xffff; | ||
617 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | ||
618 | goto bad; | ||
619 | } | ||
620 | cpl = ctxt->ops->cpl(ctxt); | ||
621 | rpl = sel & 3; | ||
622 | cpl = max(cpl, rpl); | ||
623 | if (!(desc.type & 8)) { | ||
624 | /* data segment */ | ||
625 | if (cpl > desc.dpl) | ||
626 | goto bad; | ||
627 | } else if ((desc.type & 8) && !(desc.type & 4)) { | ||
628 | /* nonconforming code segment */ | ||
629 | if (cpl != desc.dpl) | ||
630 | goto bad; | ||
631 | } else if ((desc.type & 8) && (desc.type & 4)) { | ||
632 | /* conforming code segment */ | ||
633 | if (cpl < desc.dpl) | ||
634 | goto bad; | ||
635 | } | ||
636 | break; | ||
637 | } | ||
638 | if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8) | ||
639 | la &= (u32)-1; | ||
640 | *linear = la; | ||
641 | return X86EMUL_CONTINUE; | ||
642 | bad: | ||
643 | if (addr.seg == VCPU_SREG_SS) | ||
644 | return emulate_ss(ctxt, addr.seg); | ||
645 | else | ||
646 | return emulate_gp(ctxt, addr.seg); | ||
647 | } | ||
648 | |||
649 | static int linearize(struct x86_emulate_ctxt *ctxt, | ||
650 | struct segmented_address addr, | ||
651 | unsigned size, bool write, | ||
652 | ulong *linear) | ||
653 | { | ||
654 | return __linearize(ctxt, addr, size, write, false, linear); | ||
655 | } | ||
656 | |||
657 | |||
658 | static int segmented_read_std(struct x86_emulate_ctxt *ctxt, | ||
659 | struct segmented_address addr, | ||
660 | void *data, | ||
661 | unsigned size) | ||
662 | { | ||
663 | int rc; | ||
664 | ulong linear; | ||
665 | |||
666 | rc = linearize(ctxt, addr, size, false, &linear); | ||
667 | if (rc != X86EMUL_CONTINUE) | ||
668 | return rc; | ||
669 | return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); | ||
670 | } | ||
671 | |||
499 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 672 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
500 | struct x86_emulate_ops *ops, | 673 | struct x86_emulate_ops *ops, |
501 | unsigned long eip, u8 *dest) | 674 | unsigned long eip, u8 *dest) |
@@ -505,10 +678,15 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | |||
505 | int size, cur_size; | 678 | int size, cur_size; |
506 | 679 | ||
507 | if (eip == fc->end) { | 680 | if (eip == fc->end) { |
681 | unsigned long linear; | ||
682 | struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; | ||
508 | cur_size = fc->end - fc->start; | 683 | cur_size = fc->end - fc->start; |
509 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); | 684 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
510 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, | 685 | rc = __linearize(ctxt, addr, size, false, true, &linear); |
511 | size, ctxt->vcpu, &ctxt->exception); | 686 | if (rc != X86EMUL_CONTINUE) |
687 | return rc; | ||
688 | rc = ops->fetch(ctxt, linear, fc->data + cur_size, | ||
689 | size, &ctxt->exception); | ||
512 | if (rc != X86EMUL_CONTINUE) | 690 | if (rc != X86EMUL_CONTINUE) |
513 | return rc; | 691 | return rc; |
514 | fc->end += size; | 692 | fc->end += size; |
@@ -551,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, | |||
551 | } | 729 | } |
552 | 730 | ||
553 | static int read_descriptor(struct x86_emulate_ctxt *ctxt, | 731 | static int read_descriptor(struct x86_emulate_ctxt *ctxt, |
554 | struct x86_emulate_ops *ops, | ||
555 | struct segmented_address addr, | 732 | struct segmented_address addr, |
556 | u16 *size, unsigned long *address, int op_bytes) | 733 | u16 *size, unsigned long *address, int op_bytes) |
557 | { | 734 | { |
@@ -560,13 +737,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
560 | if (op_bytes == 2) | 737 | if (op_bytes == 2) |
561 | op_bytes = 3; | 738 | op_bytes = 3; |
562 | *address = 0; | 739 | *address = 0; |
563 | rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, | 740 | rc = segmented_read_std(ctxt, addr, size, 2); |
564 | ctxt->vcpu, &ctxt->exception); | ||
565 | if (rc != X86EMUL_CONTINUE) | 741 | if (rc != X86EMUL_CONTINUE) |
566 | return rc; | 742 | return rc; |
567 | addr.ea += 2; | 743 | addr.ea += 2; |
568 | rc = ops->read_std(linear(ctxt, addr), address, op_bytes, | 744 | rc = segmented_read_std(ctxt, addr, address, op_bytes); |
569 | ctxt->vcpu, &ctxt->exception); | ||
570 | return rc; | 745 | return rc; |
571 | } | 746 | } |
572 | 747 | ||
@@ -623,7 +798,63 @@ static void fetch_register_operand(struct operand *op) | |||
623 | } | 798 | } |
624 | } | 799 | } |
625 | 800 | ||
626 | static void decode_register_operand(struct operand *op, | 801 | static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) |
802 | { | ||
803 | ctxt->ops->get_fpu(ctxt); | ||
804 | switch (reg) { | ||
805 | case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; | ||
806 | case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; | ||
807 | case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; | ||
808 | case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; | ||
809 | case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; | ||
810 | case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; | ||
811 | case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; | ||
812 | case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; | ||
813 | #ifdef CONFIG_X86_64 | ||
814 | case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; | ||
815 | case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; | ||
816 | case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; | ||
817 | case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; | ||
818 | case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; | ||
819 | case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; | ||
820 | case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; | ||
821 | case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; | ||
822 | #endif | ||
823 | default: BUG(); | ||
824 | } | ||
825 | ctxt->ops->put_fpu(ctxt); | ||
826 | } | ||
827 | |||
828 | static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, | ||
829 | int reg) | ||
830 | { | ||
831 | ctxt->ops->get_fpu(ctxt); | ||
832 | switch (reg) { | ||
833 | case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; | ||
834 | case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; | ||
835 | case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; | ||
836 | case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; | ||
837 | case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; | ||
838 | case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; | ||
839 | case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; | ||
840 | case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; | ||
841 | #ifdef CONFIG_X86_64 | ||
842 | case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; | ||
843 | case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; | ||
844 | case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; | ||
845 | case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; | ||
846 | case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; | ||
847 | case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; | ||
848 | case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; | ||
849 | case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; | ||
850 | #endif | ||
851 | default: BUG(); | ||
852 | } | ||
853 | ctxt->ops->put_fpu(ctxt); | ||
854 | } | ||
855 | |||
856 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | ||
857 | struct operand *op, | ||
627 | struct decode_cache *c, | 858 | struct decode_cache *c, |
628 | int inhibit_bytereg) | 859 | int inhibit_bytereg) |
629 | { | 860 | { |
@@ -632,6 +863,15 @@ static void decode_register_operand(struct operand *op, | |||
632 | 863 | ||
633 | if (!(c->d & ModRM)) | 864 | if (!(c->d & ModRM)) |
634 | reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); | 865 | reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); |
866 | |||
867 | if (c->d & Sse) { | ||
868 | op->type = OP_XMM; | ||
869 | op->bytes = 16; | ||
870 | op->addr.xmm = reg; | ||
871 | read_sse_reg(ctxt, &op->vec_val, reg); | ||
872 | return; | ||
873 | } | ||
874 | |||
635 | op->type = OP_REG; | 875 | op->type = OP_REG; |
636 | if ((c->d & ByteOp) && !inhibit_bytereg) { | 876 | if ((c->d & ByteOp) && !inhibit_bytereg) { |
637 | op->addr.reg = decode_register(reg, c->regs, highbyte_regs); | 877 | op->addr.reg = decode_register(reg, c->regs, highbyte_regs); |
@@ -671,6 +911,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
671 | op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 911 | op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
672 | op->addr.reg = decode_register(c->modrm_rm, | 912 | op->addr.reg = decode_register(c->modrm_rm, |
673 | c->regs, c->d & ByteOp); | 913 | c->regs, c->d & ByteOp); |
914 | if (c->d & Sse) { | ||
915 | op->type = OP_XMM; | ||
916 | op->bytes = 16; | ||
917 | op->addr.xmm = c->modrm_rm; | ||
918 | read_sse_reg(ctxt, &op->vec_val, c->modrm_rm); | ||
919 | return rc; | ||
920 | } | ||
674 | fetch_register_operand(op); | 921 | fetch_register_operand(op); |
675 | return rc; | 922 | return rc; |
676 | } | 923 | } |
@@ -819,8 +1066,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
819 | if (mc->pos < mc->end) | 1066 | if (mc->pos < mc->end) |
820 | goto read_cached; | 1067 | goto read_cached; |
821 | 1068 | ||
822 | rc = ops->read_emulated(addr, mc->data + mc->end, n, | 1069 | rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, |
823 | &ctxt->exception, ctxt->vcpu); | 1070 | &ctxt->exception); |
824 | if (rc != X86EMUL_CONTINUE) | 1071 | if (rc != X86EMUL_CONTINUE) |
825 | return rc; | 1072 | return rc; |
826 | mc->end += n; | 1073 | mc->end += n; |
@@ -834,6 +1081,50 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
834 | return X86EMUL_CONTINUE; | 1081 | return X86EMUL_CONTINUE; |
835 | } | 1082 | } |
836 | 1083 | ||
1084 | static int segmented_read(struct x86_emulate_ctxt *ctxt, | ||
1085 | struct segmented_address addr, | ||
1086 | void *data, | ||
1087 | unsigned size) | ||
1088 | { | ||
1089 | int rc; | ||
1090 | ulong linear; | ||
1091 | |||
1092 | rc = linearize(ctxt, addr, size, false, &linear); | ||
1093 | if (rc != X86EMUL_CONTINUE) | ||
1094 | return rc; | ||
1095 | return read_emulated(ctxt, ctxt->ops, linear, data, size); | ||
1096 | } | ||
1097 | |||
1098 | static int segmented_write(struct x86_emulate_ctxt *ctxt, | ||
1099 | struct segmented_address addr, | ||
1100 | const void *data, | ||
1101 | unsigned size) | ||
1102 | { | ||
1103 | int rc; | ||
1104 | ulong linear; | ||
1105 | |||
1106 | rc = linearize(ctxt, addr, size, true, &linear); | ||
1107 | if (rc != X86EMUL_CONTINUE) | ||
1108 | return rc; | ||
1109 | return ctxt->ops->write_emulated(ctxt, linear, data, size, | ||
1110 | &ctxt->exception); | ||
1111 | } | ||
1112 | |||
1113 | static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, | ||
1114 | struct segmented_address addr, | ||
1115 | const void *orig_data, const void *data, | ||
1116 | unsigned size) | ||
1117 | { | ||
1118 | int rc; | ||
1119 | ulong linear; | ||
1120 | |||
1121 | rc = linearize(ctxt, addr, size, true, &linear); | ||
1122 | if (rc != X86EMUL_CONTINUE) | ||
1123 | return rc; | ||
1124 | return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data, | ||
1125 | size, &ctxt->exception); | ||
1126 | } | ||
1127 | |||
837 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | 1128 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
838 | struct x86_emulate_ops *ops, | 1129 | struct x86_emulate_ops *ops, |
839 | unsigned int size, unsigned short port, | 1130 | unsigned int size, unsigned short port, |
@@ -854,7 +1145,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
854 | if (n == 0) | 1145 | if (n == 0) |
855 | n = 1; | 1146 | n = 1; |
856 | rc->pos = rc->end = 0; | 1147 | rc->pos = rc->end = 0; |
857 | if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) | 1148 | if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) |
858 | return 0; | 1149 | return 0; |
859 | rc->end = n * size; | 1150 | rc->end = n * size; |
860 | } | 1151 | } |
@@ -864,28 +1155,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
864 | return 1; | 1155 | return 1; |
865 | } | 1156 | } |
866 | 1157 | ||
867 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
868 | { | ||
869 | u32 limit = get_desc_limit(desc); | ||
870 | |||
871 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
872 | } | ||
873 | |||
874 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | 1158 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, |
875 | struct x86_emulate_ops *ops, | 1159 | struct x86_emulate_ops *ops, |
876 | u16 selector, struct desc_ptr *dt) | 1160 | u16 selector, struct desc_ptr *dt) |
877 | { | 1161 | { |
878 | if (selector & 1 << 2) { | 1162 | if (selector & 1 << 2) { |
879 | struct desc_struct desc; | 1163 | struct desc_struct desc; |
1164 | u16 sel; | ||
1165 | |||
880 | memset (dt, 0, sizeof *dt); | 1166 | memset (dt, 0, sizeof *dt); |
881 | if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, | 1167 | if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) |
882 | ctxt->vcpu)) | ||
883 | return; | 1168 | return; |
884 | 1169 | ||
885 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | 1170 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ |
886 | dt->address = get_desc_base(&desc); | 1171 | dt->address = get_desc_base(&desc); |
887 | } else | 1172 | } else |
888 | ops->get_gdt(dt, ctxt->vcpu); | 1173 | ops->get_gdt(ctxt, dt); |
889 | } | 1174 | } |
890 | 1175 | ||
891 | /* allowed just for 8 bytes segments */ | 1176 | /* allowed just for 8 bytes segments */ |
@@ -903,8 +1188,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
903 | if (dt.size < index * 8 + 7) | 1188 | if (dt.size < index * 8 + 7) |
904 | return emulate_gp(ctxt, selector & 0xfffc); | 1189 | return emulate_gp(ctxt, selector & 0xfffc); |
905 | addr = dt.address + index * 8; | 1190 | addr = dt.address + index * 8; |
906 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, | 1191 | ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); |
907 | &ctxt->exception); | ||
908 | 1192 | ||
909 | return ret; | 1193 | return ret; |
910 | } | 1194 | } |
@@ -925,8 +1209,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
925 | return emulate_gp(ctxt, selector & 0xfffc); | 1209 | return emulate_gp(ctxt, selector & 0xfffc); |
926 | 1210 | ||
927 | addr = dt.address + index * 8; | 1211 | addr = dt.address + index * 8; |
928 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, | 1212 | ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); |
929 | &ctxt->exception); | ||
930 | 1213 | ||
931 | return ret; | 1214 | return ret; |
932 | } | 1215 | } |
@@ -986,7 +1269,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
986 | 1269 | ||
987 | rpl = selector & 3; | 1270 | rpl = selector & 3; |
988 | dpl = seg_desc.dpl; | 1271 | dpl = seg_desc.dpl; |
989 | cpl = ops->cpl(ctxt->vcpu); | 1272 | cpl = ops->cpl(ctxt); |
990 | 1273 | ||
991 | switch (seg) { | 1274 | switch (seg) { |
992 | case VCPU_SREG_SS: | 1275 | case VCPU_SREG_SS: |
@@ -1042,8 +1325,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1042 | return ret; | 1325 | return ret; |
1043 | } | 1326 | } |
1044 | load: | 1327 | load: |
1045 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | 1328 | ops->set_segment(ctxt, selector, &seg_desc, 0, seg); |
1046 | ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); | ||
1047 | return X86EMUL_CONTINUE; | 1329 | return X86EMUL_CONTINUE; |
1048 | exception: | 1330 | exception: |
1049 | emulate_exception(ctxt, err_vec, err_code, true); | 1331 | emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1069,8 +1351,7 @@ static void write_register_operand(struct operand *op) | |||
1069 | } | 1351 | } |
1070 | } | 1352 | } |
1071 | 1353 | ||
1072 | static inline int writeback(struct x86_emulate_ctxt *ctxt, | 1354 | static int writeback(struct x86_emulate_ctxt *ctxt) |
1073 | struct x86_emulate_ops *ops) | ||
1074 | { | 1355 | { |
1075 | int rc; | 1356 | int rc; |
1076 | struct decode_cache *c = &ctxt->decode; | 1357 | struct decode_cache *c = &ctxt->decode; |
@@ -1081,23 +1362,22 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1081 | break; | 1362 | break; |
1082 | case OP_MEM: | 1363 | case OP_MEM: |
1083 | if (c->lock_prefix) | 1364 | if (c->lock_prefix) |
1084 | rc = ops->cmpxchg_emulated( | 1365 | rc = segmented_cmpxchg(ctxt, |
1085 | linear(ctxt, c->dst.addr.mem), | 1366 | c->dst.addr.mem, |
1086 | &c->dst.orig_val, | 1367 | &c->dst.orig_val, |
1087 | &c->dst.val, | 1368 | &c->dst.val, |
1088 | c->dst.bytes, | 1369 | c->dst.bytes); |
1089 | &ctxt->exception, | ||
1090 | ctxt->vcpu); | ||
1091 | else | 1370 | else |
1092 | rc = ops->write_emulated( | 1371 | rc = segmented_write(ctxt, |
1093 | linear(ctxt, c->dst.addr.mem), | 1372 | c->dst.addr.mem, |
1094 | &c->dst.val, | 1373 | &c->dst.val, |
1095 | c->dst.bytes, | 1374 | c->dst.bytes); |
1096 | &ctxt->exception, | ||
1097 | ctxt->vcpu); | ||
1098 | if (rc != X86EMUL_CONTINUE) | 1375 | if (rc != X86EMUL_CONTINUE) |
1099 | return rc; | 1376 | return rc; |
1100 | break; | 1377 | break; |
1378 | case OP_XMM: | ||
1379 | write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm); | ||
1380 | break; | ||
1101 | case OP_NONE: | 1381 | case OP_NONE: |
1102 | /* no writeback */ | 1382 | /* no writeback */ |
1103 | break; | 1383 | break; |
@@ -1107,21 +1387,21 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1107 | return X86EMUL_CONTINUE; | 1387 | return X86EMUL_CONTINUE; |
1108 | } | 1388 | } |
1109 | 1389 | ||
1110 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt, | 1390 | static int em_push(struct x86_emulate_ctxt *ctxt) |
1111 | struct x86_emulate_ops *ops) | ||
1112 | { | 1391 | { |
1113 | struct decode_cache *c = &ctxt->decode; | 1392 | struct decode_cache *c = &ctxt->decode; |
1393 | struct segmented_address addr; | ||
1114 | 1394 | ||
1115 | c->dst.type = OP_MEM; | ||
1116 | c->dst.bytes = c->op_bytes; | ||
1117 | c->dst.val = c->src.val; | ||
1118 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1395 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); |
1119 | c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); | 1396 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); |
1120 | c->dst.addr.mem.seg = VCPU_SREG_SS; | 1397 | addr.seg = VCPU_SREG_SS; |
1398 | |||
1399 | /* Disable writeback. */ | ||
1400 | c->dst.type = OP_NONE; | ||
1401 | return segmented_write(ctxt, addr, &c->src.val, c->op_bytes); | ||
1121 | } | 1402 | } |
1122 | 1403 | ||
1123 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, | 1404 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, |
1124 | struct x86_emulate_ops *ops, | ||
1125 | void *dest, int len) | 1405 | void *dest, int len) |
1126 | { | 1406 | { |
1127 | struct decode_cache *c = &ctxt->decode; | 1407 | struct decode_cache *c = &ctxt->decode; |
@@ -1130,7 +1410,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1130 | 1410 | ||
1131 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); | 1411 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); |
1132 | addr.seg = VCPU_SREG_SS; | 1412 | addr.seg = VCPU_SREG_SS; |
1133 | rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); | 1413 | rc = segmented_read(ctxt, addr, dest, len); |
1134 | if (rc != X86EMUL_CONTINUE) | 1414 | if (rc != X86EMUL_CONTINUE) |
1135 | return rc; | 1415 | return rc; |
1136 | 1416 | ||
@@ -1138,6 +1418,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1138 | return rc; | 1418 | return rc; |
1139 | } | 1419 | } |
1140 | 1420 | ||
1421 | static int em_pop(struct x86_emulate_ctxt *ctxt) | ||
1422 | { | ||
1423 | struct decode_cache *c = &ctxt->decode; | ||
1424 | |||
1425 | return emulate_pop(ctxt, &c->dst.val, c->op_bytes); | ||
1426 | } | ||
1427 | |||
1141 | static int emulate_popf(struct x86_emulate_ctxt *ctxt, | 1428 | static int emulate_popf(struct x86_emulate_ctxt *ctxt, |
1142 | struct x86_emulate_ops *ops, | 1429 | struct x86_emulate_ops *ops, |
1143 | void *dest, int len) | 1430 | void *dest, int len) |
@@ -1145,9 +1432,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1145 | int rc; | 1432 | int rc; |
1146 | unsigned long val, change_mask; | 1433 | unsigned long val, change_mask; |
1147 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1434 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1148 | int cpl = ops->cpl(ctxt->vcpu); | 1435 | int cpl = ops->cpl(ctxt); |
1149 | 1436 | ||
1150 | rc = emulate_pop(ctxt, ops, &val, len); | 1437 | rc = emulate_pop(ctxt, &val, len); |
1151 | if (rc != X86EMUL_CONTINUE) | 1438 | if (rc != X86EMUL_CONTINUE) |
1152 | return rc; | 1439 | return rc; |
1153 | 1440 | ||
@@ -1179,14 +1466,24 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1179 | return rc; | 1466 | return rc; |
1180 | } | 1467 | } |
1181 | 1468 | ||
1182 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, | 1469 | static int em_popf(struct x86_emulate_ctxt *ctxt) |
1183 | struct x86_emulate_ops *ops, int seg) | ||
1184 | { | 1470 | { |
1185 | struct decode_cache *c = &ctxt->decode; | 1471 | struct decode_cache *c = &ctxt->decode; |
1186 | 1472 | ||
1187 | c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); | 1473 | c->dst.type = OP_REG; |
1474 | c->dst.addr.reg = &ctxt->eflags; | ||
1475 | c->dst.bytes = c->op_bytes; | ||
1476 | return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); | ||
1477 | } | ||
1188 | 1478 | ||
1189 | emulate_push(ctxt, ops); | 1479 | static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, |
1480 | struct x86_emulate_ops *ops, int seg) | ||
1481 | { | ||
1482 | struct decode_cache *c = &ctxt->decode; | ||
1483 | |||
1484 | c->src.val = get_segment_selector(ctxt, seg); | ||
1485 | |||
1486 | return em_push(ctxt); | ||
1190 | } | 1487 | } |
1191 | 1488 | ||
1192 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | 1489 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, |
@@ -1196,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1196 | unsigned long selector; | 1493 | unsigned long selector; |
1197 | int rc; | 1494 | int rc; |
1198 | 1495 | ||
1199 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | 1496 | rc = emulate_pop(ctxt, &selector, c->op_bytes); |
1200 | if (rc != X86EMUL_CONTINUE) | 1497 | if (rc != X86EMUL_CONTINUE) |
1201 | return rc; | 1498 | return rc; |
1202 | 1499 | ||
@@ -1204,8 +1501,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1204 | return rc; | 1501 | return rc; |
1205 | } | 1502 | } |
1206 | 1503 | ||
1207 | static int emulate_pusha(struct x86_emulate_ctxt *ctxt, | 1504 | static int em_pusha(struct x86_emulate_ctxt *ctxt) |
1208 | struct x86_emulate_ops *ops) | ||
1209 | { | 1505 | { |
1210 | struct decode_cache *c = &ctxt->decode; | 1506 | struct decode_cache *c = &ctxt->decode; |
1211 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | 1507 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; |
@@ -1216,23 +1512,25 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt, | |||
1216 | (reg == VCPU_REGS_RSP) ? | 1512 | (reg == VCPU_REGS_RSP) ? |
1217 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | 1513 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); |
1218 | 1514 | ||
1219 | emulate_push(ctxt, ops); | 1515 | rc = em_push(ctxt); |
1220 | |||
1221 | rc = writeback(ctxt, ops); | ||
1222 | if (rc != X86EMUL_CONTINUE) | 1516 | if (rc != X86EMUL_CONTINUE) |
1223 | return rc; | 1517 | return rc; |
1224 | 1518 | ||
1225 | ++reg; | 1519 | ++reg; |
1226 | } | 1520 | } |
1227 | 1521 | ||
1228 | /* Disable writeback. */ | ||
1229 | c->dst.type = OP_NONE; | ||
1230 | |||
1231 | return rc; | 1522 | return rc; |
1232 | } | 1523 | } |
1233 | 1524 | ||
1234 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, | 1525 | static int em_pushf(struct x86_emulate_ctxt *ctxt) |
1235 | struct x86_emulate_ops *ops) | 1526 | { |
1527 | struct decode_cache *c = &ctxt->decode; | ||
1528 | |||
1529 | c->src.val = (unsigned long)ctxt->eflags; | ||
1530 | return em_push(ctxt); | ||
1531 | } | ||
1532 | |||
1533 | static int em_popa(struct x86_emulate_ctxt *ctxt) | ||
1236 | { | 1534 | { |
1237 | struct decode_cache *c = &ctxt->decode; | 1535 | struct decode_cache *c = &ctxt->decode; |
1238 | int rc = X86EMUL_CONTINUE; | 1536 | int rc = X86EMUL_CONTINUE; |
@@ -1245,7 +1543,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1245 | --reg; | 1543 | --reg; |
1246 | } | 1544 | } |
1247 | 1545 | ||
1248 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | 1546 | rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes); |
1249 | if (rc != X86EMUL_CONTINUE) | 1547 | if (rc != X86EMUL_CONTINUE) |
1250 | break; | 1548 | break; |
1251 | --reg; | 1549 | --reg; |
@@ -1265,37 +1563,32 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, | |||
1265 | 1563 | ||
1266 | /* TODO: Add limit checks */ | 1564 | /* TODO: Add limit checks */ |
1267 | c->src.val = ctxt->eflags; | 1565 | c->src.val = ctxt->eflags; |
1268 | emulate_push(ctxt, ops); | 1566 | rc = em_push(ctxt); |
1269 | rc = writeback(ctxt, ops); | ||
1270 | if (rc != X86EMUL_CONTINUE) | 1567 | if (rc != X86EMUL_CONTINUE) |
1271 | return rc; | 1568 | return rc; |
1272 | 1569 | ||
1273 | ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); | 1570 | ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); |
1274 | 1571 | ||
1275 | c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | 1572 | c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); |
1276 | emulate_push(ctxt, ops); | 1573 | rc = em_push(ctxt); |
1277 | rc = writeback(ctxt, ops); | ||
1278 | if (rc != X86EMUL_CONTINUE) | 1574 | if (rc != X86EMUL_CONTINUE) |
1279 | return rc; | 1575 | return rc; |
1280 | 1576 | ||
1281 | c->src.val = c->eip; | 1577 | c->src.val = c->eip; |
1282 | emulate_push(ctxt, ops); | 1578 | rc = em_push(ctxt); |
1283 | rc = writeback(ctxt, ops); | ||
1284 | if (rc != X86EMUL_CONTINUE) | 1579 | if (rc != X86EMUL_CONTINUE) |
1285 | return rc; | 1580 | return rc; |
1286 | 1581 | ||
1287 | c->dst.type = OP_NONE; | 1582 | ops->get_idt(ctxt, &dt); |
1288 | |||
1289 | ops->get_idt(&dt, ctxt->vcpu); | ||
1290 | 1583 | ||
1291 | eip_addr = dt.address + (irq << 2); | 1584 | eip_addr = dt.address + (irq << 2); |
1292 | cs_addr = dt.address + (irq << 2) + 2; | 1585 | cs_addr = dt.address + (irq << 2) + 2; |
1293 | 1586 | ||
1294 | rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); | 1587 | rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); |
1295 | if (rc != X86EMUL_CONTINUE) | 1588 | if (rc != X86EMUL_CONTINUE) |
1296 | return rc; | 1589 | return rc; |
1297 | 1590 | ||
1298 | rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); | 1591 | rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); |
1299 | if (rc != X86EMUL_CONTINUE) | 1592 | if (rc != X86EMUL_CONTINUE) |
1300 | return rc; | 1593 | return rc; |
1301 | 1594 | ||
@@ -1339,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | |||
1339 | 1632 | ||
1340 | /* TODO: Add stack limit check */ | 1633 | /* TODO: Add stack limit check */ |
1341 | 1634 | ||
1342 | rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes); | 1635 | rc = emulate_pop(ctxt, &temp_eip, c->op_bytes); |
1343 | 1636 | ||
1344 | if (rc != X86EMUL_CONTINUE) | 1637 | if (rc != X86EMUL_CONTINUE) |
1345 | return rc; | 1638 | return rc; |
@@ -1347,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | |||
1347 | if (temp_eip & ~0xffff) | 1640 | if (temp_eip & ~0xffff) |
1348 | return emulate_gp(ctxt, 0); | 1641 | return emulate_gp(ctxt, 0); |
1349 | 1642 | ||
1350 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1643 | rc = emulate_pop(ctxt, &cs, c->op_bytes); |
1351 | 1644 | ||
1352 | if (rc != X86EMUL_CONTINUE) | 1645 | if (rc != X86EMUL_CONTINUE) |
1353 | return rc; | 1646 | return rc; |
1354 | 1647 | ||
1355 | rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes); | 1648 | rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes); |
1356 | 1649 | ||
1357 | if (rc != X86EMUL_CONTINUE) | 1650 | if (rc != X86EMUL_CONTINUE) |
1358 | return rc; | 1651 | return rc; |
@@ -1394,15 +1687,31 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, | |||
1394 | } | 1687 | } |
1395 | } | 1688 | } |
1396 | 1689 | ||
1397 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | 1690 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) |
1398 | struct x86_emulate_ops *ops) | 1691 | { |
1692 | struct decode_cache *c = &ctxt->decode; | ||
1693 | int rc; | ||
1694 | unsigned short sel; | ||
1695 | |||
1696 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); | ||
1697 | |||
1698 | rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); | ||
1699 | if (rc != X86EMUL_CONTINUE) | ||
1700 | return rc; | ||
1701 | |||
1702 | c->eip = 0; | ||
1703 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | ||
1704 | return X86EMUL_CONTINUE; | ||
1705 | } | ||
1706 | |||
1707 | static int em_grp1a(struct x86_emulate_ctxt *ctxt) | ||
1399 | { | 1708 | { |
1400 | struct decode_cache *c = &ctxt->decode; | 1709 | struct decode_cache *c = &ctxt->decode; |
1401 | 1710 | ||
1402 | return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); | 1711 | return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); |
1403 | } | 1712 | } |
1404 | 1713 | ||
1405 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | 1714 | static int em_grp2(struct x86_emulate_ctxt *ctxt) |
1406 | { | 1715 | { |
1407 | struct decode_cache *c = &ctxt->decode; | 1716 | struct decode_cache *c = &ctxt->decode; |
1408 | switch (c->modrm_reg) { | 1717 | switch (c->modrm_reg) { |
@@ -1429,10 +1738,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | |||
1429 | emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); | 1738 | emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); |
1430 | break; | 1739 | break; |
1431 | } | 1740 | } |
1741 | return X86EMUL_CONTINUE; | ||
1432 | } | 1742 | } |
1433 | 1743 | ||
1434 | static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | 1744 | static int em_grp3(struct x86_emulate_ctxt *ctxt) |
1435 | struct x86_emulate_ops *ops) | ||
1436 | { | 1745 | { |
1437 | struct decode_cache *c = &ctxt->decode; | 1746 | struct decode_cache *c = &ctxt->decode; |
1438 | unsigned long *rax = &c->regs[VCPU_REGS_RAX]; | 1747 | unsigned long *rax = &c->regs[VCPU_REGS_RAX]; |
@@ -1471,10 +1780,10 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1471 | return X86EMUL_CONTINUE; | 1780 | return X86EMUL_CONTINUE; |
1472 | } | 1781 | } |
1473 | 1782 | ||
1474 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | 1783 | static int em_grp45(struct x86_emulate_ctxt *ctxt) |
1475 | struct x86_emulate_ops *ops) | ||
1476 | { | 1784 | { |
1477 | struct decode_cache *c = &ctxt->decode; | 1785 | struct decode_cache *c = &ctxt->decode; |
1786 | int rc = X86EMUL_CONTINUE; | ||
1478 | 1787 | ||
1479 | switch (c->modrm_reg) { | 1788 | switch (c->modrm_reg) { |
1480 | case 0: /* inc */ | 1789 | case 0: /* inc */ |
@@ -1488,21 +1797,23 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1488 | old_eip = c->eip; | 1797 | old_eip = c->eip; |
1489 | c->eip = c->src.val; | 1798 | c->eip = c->src.val; |
1490 | c->src.val = old_eip; | 1799 | c->src.val = old_eip; |
1491 | emulate_push(ctxt, ops); | 1800 | rc = em_push(ctxt); |
1492 | break; | 1801 | break; |
1493 | } | 1802 | } |
1494 | case 4: /* jmp abs */ | 1803 | case 4: /* jmp abs */ |
1495 | c->eip = c->src.val; | 1804 | c->eip = c->src.val; |
1496 | break; | 1805 | break; |
1806 | case 5: /* jmp far */ | ||
1807 | rc = em_jmp_far(ctxt); | ||
1808 | break; | ||
1497 | case 6: /* push */ | 1809 | case 6: /* push */ |
1498 | emulate_push(ctxt, ops); | 1810 | rc = em_push(ctxt); |
1499 | break; | 1811 | break; |
1500 | } | 1812 | } |
1501 | return X86EMUL_CONTINUE; | 1813 | return rc; |
1502 | } | 1814 | } |
1503 | 1815 | ||
1504 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | 1816 | static int em_grp9(struct x86_emulate_ctxt *ctxt) |
1505 | struct x86_emulate_ops *ops) | ||
1506 | { | 1817 | { |
1507 | struct decode_cache *c = &ctxt->decode; | 1818 | struct decode_cache *c = &ctxt->decode; |
1508 | u64 old = c->dst.orig_val64; | 1819 | u64 old = c->dst.orig_val64; |
@@ -1528,12 +1839,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1528 | int rc; | 1839 | int rc; |
1529 | unsigned long cs; | 1840 | unsigned long cs; |
1530 | 1841 | ||
1531 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); | 1842 | rc = emulate_pop(ctxt, &c->eip, c->op_bytes); |
1532 | if (rc != X86EMUL_CONTINUE) | 1843 | if (rc != X86EMUL_CONTINUE) |
1533 | return rc; | 1844 | return rc; |
1534 | if (c->op_bytes == 4) | 1845 | if (c->op_bytes == 4) |
1535 | c->eip = (u32)c->eip; | 1846 | c->eip = (u32)c->eip; |
1536 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1847 | rc = emulate_pop(ctxt, &cs, c->op_bytes); |
1537 | if (rc != X86EMUL_CONTINUE) | 1848 | if (rc != X86EMUL_CONTINUE) |
1538 | return rc; | 1849 | return rc; |
1539 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); | 1850 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); |
@@ -1562,8 +1873,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
1562 | struct x86_emulate_ops *ops, struct desc_struct *cs, | 1873 | struct x86_emulate_ops *ops, struct desc_struct *cs, |
1563 | struct desc_struct *ss) | 1874 | struct desc_struct *ss) |
1564 | { | 1875 | { |
1876 | u16 selector; | ||
1877 | |||
1565 | memset(cs, 0, sizeof(struct desc_struct)); | 1878 | memset(cs, 0, sizeof(struct desc_struct)); |
1566 | ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); | 1879 | ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); |
1567 | memset(ss, 0, sizeof(struct desc_struct)); | 1880 | memset(ss, 0, sizeof(struct desc_struct)); |
1568 | 1881 | ||
1569 | cs->l = 0; /* will be adjusted later */ | 1882 | cs->l = 0; /* will be adjusted later */ |
@@ -1593,44 +1906,44 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1593 | struct desc_struct cs, ss; | 1906 | struct desc_struct cs, ss; |
1594 | u64 msr_data; | 1907 | u64 msr_data; |
1595 | u16 cs_sel, ss_sel; | 1908 | u16 cs_sel, ss_sel; |
1909 | u64 efer = 0; | ||
1596 | 1910 | ||
1597 | /* syscall is not available in real mode */ | 1911 | /* syscall is not available in real mode */ |
1598 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1912 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1599 | ctxt->mode == X86EMUL_MODE_VM86) | 1913 | ctxt->mode == X86EMUL_MODE_VM86) |
1600 | return emulate_ud(ctxt); | 1914 | return emulate_ud(ctxt); |
1601 | 1915 | ||
1916 | ops->get_msr(ctxt, MSR_EFER, &efer); | ||
1602 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1917 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1603 | 1918 | ||
1604 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1919 | ops->get_msr(ctxt, MSR_STAR, &msr_data); |
1605 | msr_data >>= 32; | 1920 | msr_data >>= 32; |
1606 | cs_sel = (u16)(msr_data & 0xfffc); | 1921 | cs_sel = (u16)(msr_data & 0xfffc); |
1607 | ss_sel = (u16)(msr_data + 8); | 1922 | ss_sel = (u16)(msr_data + 8); |
1608 | 1923 | ||
1609 | if (is_long_mode(ctxt->vcpu)) { | 1924 | if (efer & EFER_LMA) { |
1610 | cs.d = 0; | 1925 | cs.d = 0; |
1611 | cs.l = 1; | 1926 | cs.l = 1; |
1612 | } | 1927 | } |
1613 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); | 1928 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
1614 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1929 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
1615 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); | ||
1616 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1617 | 1930 | ||
1618 | c->regs[VCPU_REGS_RCX] = c->eip; | 1931 | c->regs[VCPU_REGS_RCX] = c->eip; |
1619 | if (is_long_mode(ctxt->vcpu)) { | 1932 | if (efer & EFER_LMA) { |
1620 | #ifdef CONFIG_X86_64 | 1933 | #ifdef CONFIG_X86_64 |
1621 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; | 1934 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; |
1622 | 1935 | ||
1623 | ops->get_msr(ctxt->vcpu, | 1936 | ops->get_msr(ctxt, |
1624 | ctxt->mode == X86EMUL_MODE_PROT64 ? | 1937 | ctxt->mode == X86EMUL_MODE_PROT64 ? |
1625 | MSR_LSTAR : MSR_CSTAR, &msr_data); | 1938 | MSR_LSTAR : MSR_CSTAR, &msr_data); |
1626 | c->eip = msr_data; | 1939 | c->eip = msr_data; |
1627 | 1940 | ||
1628 | ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); | 1941 | ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); |
1629 | ctxt->eflags &= ~(msr_data | EFLG_RF); | 1942 | ctxt->eflags &= ~(msr_data | EFLG_RF); |
1630 | #endif | 1943 | #endif |
1631 | } else { | 1944 | } else { |
1632 | /* legacy mode */ | 1945 | /* legacy mode */ |
1633 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1946 | ops->get_msr(ctxt, MSR_STAR, &msr_data); |
1634 | c->eip = (u32)msr_data; | 1947 | c->eip = (u32)msr_data; |
1635 | 1948 | ||
1636 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 1949 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
@@ -1646,7 +1959,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1646 | struct desc_struct cs, ss; | 1959 | struct desc_struct cs, ss; |
1647 | u64 msr_data; | 1960 | u64 msr_data; |
1648 | u16 cs_sel, ss_sel; | 1961 | u16 cs_sel, ss_sel; |
1962 | u64 efer = 0; | ||
1649 | 1963 | ||
1964 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
1650 | /* inject #GP if in real mode */ | 1965 | /* inject #GP if in real mode */ |
1651 | if (ctxt->mode == X86EMUL_MODE_REAL) | 1966 | if (ctxt->mode == X86EMUL_MODE_REAL) |
1652 | return emulate_gp(ctxt, 0); | 1967 | return emulate_gp(ctxt, 0); |
@@ -1659,7 +1974,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1659 | 1974 | ||
1660 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1975 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1661 | 1976 | ||
1662 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 1977 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); |
1663 | switch (ctxt->mode) { | 1978 | switch (ctxt->mode) { |
1664 | case X86EMUL_MODE_PROT32: | 1979 | case X86EMUL_MODE_PROT32: |
1665 | if ((msr_data & 0xfffc) == 0x0) | 1980 | if ((msr_data & 0xfffc) == 0x0) |
@@ -1676,21 +1991,18 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1676 | cs_sel &= ~SELECTOR_RPL_MASK; | 1991 | cs_sel &= ~SELECTOR_RPL_MASK; |
1677 | ss_sel = cs_sel + 8; | 1992 | ss_sel = cs_sel + 8; |
1678 | ss_sel &= ~SELECTOR_RPL_MASK; | 1993 | ss_sel &= ~SELECTOR_RPL_MASK; |
1679 | if (ctxt->mode == X86EMUL_MODE_PROT64 | 1994 | if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { |
1680 | || is_long_mode(ctxt->vcpu)) { | ||
1681 | cs.d = 0; | 1995 | cs.d = 0; |
1682 | cs.l = 1; | 1996 | cs.l = 1; |
1683 | } | 1997 | } |
1684 | 1998 | ||
1685 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); | 1999 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
1686 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 2000 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
1687 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); | ||
1688 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1689 | 2001 | ||
1690 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); | 2002 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); |
1691 | c->eip = msr_data; | 2003 | c->eip = msr_data; |
1692 | 2004 | ||
1693 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); | 2005 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); |
1694 | c->regs[VCPU_REGS_RSP] = msr_data; | 2006 | c->regs[VCPU_REGS_RSP] = msr_data; |
1695 | 2007 | ||
1696 | return X86EMUL_CONTINUE; | 2008 | return X86EMUL_CONTINUE; |
@@ -1719,7 +2031,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1719 | 2031 | ||
1720 | cs.dpl = 3; | 2032 | cs.dpl = 3; |
1721 | ss.dpl = 3; | 2033 | ss.dpl = 3; |
1722 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 2034 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); |
1723 | switch (usermode) { | 2035 | switch (usermode) { |
1724 | case X86EMUL_MODE_PROT32: | 2036 | case X86EMUL_MODE_PROT32: |
1725 | cs_sel = (u16)(msr_data + 16); | 2037 | cs_sel = (u16)(msr_data + 16); |
@@ -1739,10 +2051,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1739 | cs_sel |= SELECTOR_RPL_MASK; | 2051 | cs_sel |= SELECTOR_RPL_MASK; |
1740 | ss_sel |= SELECTOR_RPL_MASK; | 2052 | ss_sel |= SELECTOR_RPL_MASK; |
1741 | 2053 | ||
1742 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); | 2054 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
1743 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 2055 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
1744 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); | ||
1745 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1746 | 2056 | ||
1747 | c->eip = c->regs[VCPU_REGS_RDX]; | 2057 | c->eip = c->regs[VCPU_REGS_RDX]; |
1748 | c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; | 2058 | c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; |
@@ -1759,7 +2069,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, | |||
1759 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2069 | if (ctxt->mode == X86EMUL_MODE_VM86) |
1760 | return true; | 2070 | return true; |
1761 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2071 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1762 | return ops->cpl(ctxt->vcpu) > iopl; | 2072 | return ops->cpl(ctxt) > iopl; |
1763 | } | 2073 | } |
1764 | 2074 | ||
1765 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2075 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
@@ -1769,11 +2079,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
1769 | struct desc_struct tr_seg; | 2079 | struct desc_struct tr_seg; |
1770 | u32 base3; | 2080 | u32 base3; |
1771 | int r; | 2081 | int r; |
1772 | u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; | 2082 | u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7; |
1773 | unsigned mask = (1 << len) - 1; | 2083 | unsigned mask = (1 << len) - 1; |
1774 | unsigned long base; | 2084 | unsigned long base; |
1775 | 2085 | ||
1776 | ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); | 2086 | ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); |
1777 | if (!tr_seg.p) | 2087 | if (!tr_seg.p) |
1778 | return false; | 2088 | return false; |
1779 | if (desc_limit_scaled(&tr_seg) < 103) | 2089 | if (desc_limit_scaled(&tr_seg) < 103) |
@@ -1782,13 +2092,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
1782 | #ifdef CONFIG_X86_64 | 2092 | #ifdef CONFIG_X86_64 |
1783 | base |= ((u64)base3) << 32; | 2093 | base |= ((u64)base3) << 32; |
1784 | #endif | 2094 | #endif |
1785 | r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); | 2095 | r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); |
1786 | if (r != X86EMUL_CONTINUE) | 2096 | if (r != X86EMUL_CONTINUE) |
1787 | return false; | 2097 | return false; |
1788 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) | 2098 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) |
1789 | return false; | 2099 | return false; |
1790 | r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, | 2100 | r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); |
1791 | NULL); | ||
1792 | if (r != X86EMUL_CONTINUE) | 2101 | if (r != X86EMUL_CONTINUE) |
1793 | return false; | 2102 | return false; |
1794 | if ((perm >> bit_idx) & mask) | 2103 | if ((perm >> bit_idx) & mask) |
@@ -1829,11 +2138,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | |||
1829 | tss->si = c->regs[VCPU_REGS_RSI]; | 2138 | tss->si = c->regs[VCPU_REGS_RSI]; |
1830 | tss->di = c->regs[VCPU_REGS_RDI]; | 2139 | tss->di = c->regs[VCPU_REGS_RDI]; |
1831 | 2140 | ||
1832 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | 2141 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); |
1833 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | 2142 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
1834 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | 2143 | tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); |
1835 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | 2144 | tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); |
1836 | tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | 2145 | tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR); |
1837 | } | 2146 | } |
1838 | 2147 | ||
1839 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | 2148 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, |
@@ -1858,11 +2167,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
1858 | * SDM says that segment selectors are loaded before segment | 2167 | * SDM says that segment selectors are loaded before segment |
1859 | * descriptors | 2168 | * descriptors |
1860 | */ | 2169 | */ |
1861 | ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); | 2170 | set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); |
1862 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | 2171 | set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); |
1863 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | 2172 | set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); |
1864 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | 2173 | set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); |
1865 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | 2174 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); |
1866 | 2175 | ||
1867 | /* | 2176 | /* |
1868 | * Now load segment descriptors. If fault happenes at this stage | 2177 | * Now load segment descriptors. If fault happenes at this stage |
@@ -1896,7 +2205,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1896 | int ret; | 2205 | int ret; |
1897 | u32 new_tss_base = get_desc_base(new_desc); | 2206 | u32 new_tss_base = get_desc_base(new_desc); |
1898 | 2207 | ||
1899 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2208 | ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
1900 | &ctxt->exception); | 2209 | &ctxt->exception); |
1901 | if (ret != X86EMUL_CONTINUE) | 2210 | if (ret != X86EMUL_CONTINUE) |
1902 | /* FIXME: need to provide precise fault address */ | 2211 | /* FIXME: need to provide precise fault address */ |
@@ -1904,13 +2213,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1904 | 2213 | ||
1905 | save_state_to_tss16(ctxt, ops, &tss_seg); | 2214 | save_state_to_tss16(ctxt, ops, &tss_seg); |
1906 | 2215 | ||
1907 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2216 | ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
1908 | &ctxt->exception); | 2217 | &ctxt->exception); |
1909 | if (ret != X86EMUL_CONTINUE) | 2218 | if (ret != X86EMUL_CONTINUE) |
1910 | /* FIXME: need to provide precise fault address */ | 2219 | /* FIXME: need to provide precise fault address */ |
1911 | return ret; | 2220 | return ret; |
1912 | 2221 | ||
1913 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2222 | ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, |
1914 | &ctxt->exception); | 2223 | &ctxt->exception); |
1915 | if (ret != X86EMUL_CONTINUE) | 2224 | if (ret != X86EMUL_CONTINUE) |
1916 | /* FIXME: need to provide precise fault address */ | 2225 | /* FIXME: need to provide precise fault address */ |
@@ -1919,10 +2228,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1919 | if (old_tss_sel != 0xffff) { | 2228 | if (old_tss_sel != 0xffff) { |
1920 | tss_seg.prev_task_link = old_tss_sel; | 2229 | tss_seg.prev_task_link = old_tss_sel; |
1921 | 2230 | ||
1922 | ret = ops->write_std(new_tss_base, | 2231 | ret = ops->write_std(ctxt, new_tss_base, |
1923 | &tss_seg.prev_task_link, | 2232 | &tss_seg.prev_task_link, |
1924 | sizeof tss_seg.prev_task_link, | 2233 | sizeof tss_seg.prev_task_link, |
1925 | ctxt->vcpu, &ctxt->exception); | 2234 | &ctxt->exception); |
1926 | if (ret != X86EMUL_CONTINUE) | 2235 | if (ret != X86EMUL_CONTINUE) |
1927 | /* FIXME: need to provide precise fault address */ | 2236 | /* FIXME: need to provide precise fault address */ |
1928 | return ret; | 2237 | return ret; |
@@ -1937,7 +2246,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | |||
1937 | { | 2246 | { |
1938 | struct decode_cache *c = &ctxt->decode; | 2247 | struct decode_cache *c = &ctxt->decode; |
1939 | 2248 | ||
1940 | tss->cr3 = ops->get_cr(3, ctxt->vcpu); | 2249 | tss->cr3 = ops->get_cr(ctxt, 3); |
1941 | tss->eip = c->eip; | 2250 | tss->eip = c->eip; |
1942 | tss->eflags = ctxt->eflags; | 2251 | tss->eflags = ctxt->eflags; |
1943 | tss->eax = c->regs[VCPU_REGS_RAX]; | 2252 | tss->eax = c->regs[VCPU_REGS_RAX]; |
@@ -1949,13 +2258,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | |||
1949 | tss->esi = c->regs[VCPU_REGS_RSI]; | 2258 | tss->esi = c->regs[VCPU_REGS_RSI]; |
1950 | tss->edi = c->regs[VCPU_REGS_RDI]; | 2259 | tss->edi = c->regs[VCPU_REGS_RDI]; |
1951 | 2260 | ||
1952 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | 2261 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); |
1953 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | 2262 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
1954 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | 2263 | tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); |
1955 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | 2264 | tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); |
1956 | tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); | 2265 | tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS); |
1957 | tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); | 2266 | tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS); |
1958 | tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | 2267 | tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR); |
1959 | } | 2268 | } |
1960 | 2269 | ||
1961 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | 2270 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, |
@@ -1965,7 +2274,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
1965 | struct decode_cache *c = &ctxt->decode; | 2274 | struct decode_cache *c = &ctxt->decode; |
1966 | int ret; | 2275 | int ret; |
1967 | 2276 | ||
1968 | if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) | 2277 | if (ops->set_cr(ctxt, 3, tss->cr3)) |
1969 | return emulate_gp(ctxt, 0); | 2278 | return emulate_gp(ctxt, 0); |
1970 | c->eip = tss->eip; | 2279 | c->eip = tss->eip; |
1971 | ctxt->eflags = tss->eflags | 2; | 2280 | ctxt->eflags = tss->eflags | 2; |
@@ -1982,13 +2291,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
1982 | * SDM says that segment selectors are loaded before segment | 2291 | * SDM says that segment selectors are loaded before segment |
1983 | * descriptors | 2292 | * descriptors |
1984 | */ | 2293 | */ |
1985 | ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); | 2294 | set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); |
1986 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | 2295 | set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); |
1987 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | 2296 | set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); |
1988 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | 2297 | set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); |
1989 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | 2298 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); |
1990 | ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); | 2299 | set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); |
1991 | ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); | 2300 | set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); |
1992 | 2301 | ||
1993 | /* | 2302 | /* |
1994 | * Now load segment descriptors. If fault happenes at this stage | 2303 | * Now load segment descriptors. If fault happenes at this stage |
@@ -2028,7 +2337,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2028 | int ret; | 2337 | int ret; |
2029 | u32 new_tss_base = get_desc_base(new_desc); | 2338 | u32 new_tss_base = get_desc_base(new_desc); |
2030 | 2339 | ||
2031 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2340 | ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
2032 | &ctxt->exception); | 2341 | &ctxt->exception); |
2033 | if (ret != X86EMUL_CONTINUE) | 2342 | if (ret != X86EMUL_CONTINUE) |
2034 | /* FIXME: need to provide precise fault address */ | 2343 | /* FIXME: need to provide precise fault address */ |
@@ -2036,13 +2345,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2036 | 2345 | ||
2037 | save_state_to_tss32(ctxt, ops, &tss_seg); | 2346 | save_state_to_tss32(ctxt, ops, &tss_seg); |
2038 | 2347 | ||
2039 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2348 | ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
2040 | &ctxt->exception); | 2349 | &ctxt->exception); |
2041 | if (ret != X86EMUL_CONTINUE) | 2350 | if (ret != X86EMUL_CONTINUE) |
2042 | /* FIXME: need to provide precise fault address */ | 2351 | /* FIXME: need to provide precise fault address */ |
2043 | return ret; | 2352 | return ret; |
2044 | 2353 | ||
2045 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2354 | ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, |
2046 | &ctxt->exception); | 2355 | &ctxt->exception); |
2047 | if (ret != X86EMUL_CONTINUE) | 2356 | if (ret != X86EMUL_CONTINUE) |
2048 | /* FIXME: need to provide precise fault address */ | 2357 | /* FIXME: need to provide precise fault address */ |
@@ -2051,10 +2360,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2051 | if (old_tss_sel != 0xffff) { | 2360 | if (old_tss_sel != 0xffff) { |
2052 | tss_seg.prev_task_link = old_tss_sel; | 2361 | tss_seg.prev_task_link = old_tss_sel; |
2053 | 2362 | ||
2054 | ret = ops->write_std(new_tss_base, | 2363 | ret = ops->write_std(ctxt, new_tss_base, |
2055 | &tss_seg.prev_task_link, | 2364 | &tss_seg.prev_task_link, |
2056 | sizeof tss_seg.prev_task_link, | 2365 | sizeof tss_seg.prev_task_link, |
2057 | ctxt->vcpu, &ctxt->exception); | 2366 | &ctxt->exception); |
2058 | if (ret != X86EMUL_CONTINUE) | 2367 | if (ret != X86EMUL_CONTINUE) |
2059 | /* FIXME: need to provide precise fault address */ | 2368 | /* FIXME: need to provide precise fault address */ |
2060 | return ret; | 2369 | return ret; |
@@ -2070,9 +2379,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2070 | { | 2379 | { |
2071 | struct desc_struct curr_tss_desc, next_tss_desc; | 2380 | struct desc_struct curr_tss_desc, next_tss_desc; |
2072 | int ret; | 2381 | int ret; |
2073 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | 2382 | u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); |
2074 | ulong old_tss_base = | 2383 | ulong old_tss_base = |
2075 | ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); | 2384 | ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); |
2076 | u32 desc_limit; | 2385 | u32 desc_limit; |
2077 | 2386 | ||
2078 | /* FIXME: old_tss_base == ~0 ? */ | 2387 | /* FIXME: old_tss_base == ~0 ? */ |
@@ -2088,7 +2397,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2088 | 2397 | ||
2089 | if (reason != TASK_SWITCH_IRET) { | 2398 | if (reason != TASK_SWITCH_IRET) { |
2090 | if ((tss_selector & 3) > next_tss_desc.dpl || | 2399 | if ((tss_selector & 3) > next_tss_desc.dpl || |
2091 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) | 2400 | ops->cpl(ctxt) > next_tss_desc.dpl) |
2092 | return emulate_gp(ctxt, 0); | 2401 | return emulate_gp(ctxt, 0); |
2093 | } | 2402 | } |
2094 | 2403 | ||
@@ -2132,9 +2441,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2132 | &next_tss_desc); | 2441 | &next_tss_desc); |
2133 | } | 2442 | } |
2134 | 2443 | ||
2135 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | 2444 | ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); |
2136 | ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); | 2445 | ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); |
2137 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | ||
2138 | 2446 | ||
2139 | if (has_error_code) { | 2447 | if (has_error_code) { |
2140 | struct decode_cache *c = &ctxt->decode; | 2448 | struct decode_cache *c = &ctxt->decode; |
@@ -2142,7 +2450,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2142 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | 2450 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; |
2143 | c->lock_prefix = 0; | 2451 | c->lock_prefix = 0; |
2144 | c->src.val = (unsigned long) error_code; | 2452 | c->src.val = (unsigned long) error_code; |
2145 | emulate_push(ctxt, ops); | 2453 | ret = em_push(ctxt); |
2146 | } | 2454 | } |
2147 | 2455 | ||
2148 | return ret; | 2456 | return ret; |
@@ -2162,13 +2470,10 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2162 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | 2470 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, |
2163 | has_error_code, error_code); | 2471 | has_error_code, error_code); |
2164 | 2472 | ||
2165 | if (rc == X86EMUL_CONTINUE) { | 2473 | if (rc == X86EMUL_CONTINUE) |
2166 | rc = writeback(ctxt, ops); | 2474 | ctxt->eip = c->eip; |
2167 | if (rc == X86EMUL_CONTINUE) | ||
2168 | ctxt->eip = c->eip; | ||
2169 | } | ||
2170 | 2475 | ||
2171 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 2476 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
2172 | } | 2477 | } |
2173 | 2478 | ||
2174 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, | 2479 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, |
@@ -2182,12 +2487,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, | |||
2182 | op->addr.mem.seg = seg; | 2487 | op->addr.mem.seg = seg; |
2183 | } | 2488 | } |
2184 | 2489 | ||
2185 | static int em_push(struct x86_emulate_ctxt *ctxt) | ||
2186 | { | ||
2187 | emulate_push(ctxt, ctxt->ops); | ||
2188 | return X86EMUL_CONTINUE; | ||
2189 | } | ||
2190 | |||
2191 | static int em_das(struct x86_emulate_ctxt *ctxt) | 2490 | static int em_das(struct x86_emulate_ctxt *ctxt) |
2192 | { | 2491 | { |
2193 | struct decode_cache *c = &ctxt->decode; | 2492 | struct decode_cache *c = &ctxt->decode; |
@@ -2234,7 +2533,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) | |||
2234 | ulong old_eip; | 2533 | ulong old_eip; |
2235 | int rc; | 2534 | int rc; |
2236 | 2535 | ||
2237 | old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | 2536 | old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
2238 | old_eip = c->eip; | 2537 | old_eip = c->eip; |
2239 | 2538 | ||
2240 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); | 2539 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); |
@@ -2245,20 +2544,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) | |||
2245 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | 2544 | memcpy(&c->eip, c->src.valptr, c->op_bytes); |
2246 | 2545 | ||
2247 | c->src.val = old_cs; | 2546 | c->src.val = old_cs; |
2248 | emulate_push(ctxt, ctxt->ops); | 2547 | rc = em_push(ctxt); |
2249 | rc = writeback(ctxt, ctxt->ops); | ||
2250 | if (rc != X86EMUL_CONTINUE) | 2548 | if (rc != X86EMUL_CONTINUE) |
2251 | return rc; | 2549 | return rc; |
2252 | 2550 | ||
2253 | c->src.val = old_eip; | 2551 | c->src.val = old_eip; |
2254 | emulate_push(ctxt, ctxt->ops); | 2552 | return em_push(ctxt); |
2255 | rc = writeback(ctxt, ctxt->ops); | ||
2256 | if (rc != X86EMUL_CONTINUE) | ||
2257 | return rc; | ||
2258 | |||
2259 | c->dst.type = OP_NONE; | ||
2260 | |||
2261 | return X86EMUL_CONTINUE; | ||
2262 | } | 2553 | } |
2263 | 2554 | ||
2264 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | 2555 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) |
@@ -2269,13 +2560,79 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | |||
2269 | c->dst.type = OP_REG; | 2560 | c->dst.type = OP_REG; |
2270 | c->dst.addr.reg = &c->eip; | 2561 | c->dst.addr.reg = &c->eip; |
2271 | c->dst.bytes = c->op_bytes; | 2562 | c->dst.bytes = c->op_bytes; |
2272 | rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); | 2563 | rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes); |
2273 | if (rc != X86EMUL_CONTINUE) | 2564 | if (rc != X86EMUL_CONTINUE) |
2274 | return rc; | 2565 | return rc; |
2275 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); | 2566 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); |
2276 | return X86EMUL_CONTINUE; | 2567 | return X86EMUL_CONTINUE; |
2277 | } | 2568 | } |
2278 | 2569 | ||
2570 | static int em_add(struct x86_emulate_ctxt *ctxt) | ||
2571 | { | ||
2572 | struct decode_cache *c = &ctxt->decode; | ||
2573 | |||
2574 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | ||
2575 | return X86EMUL_CONTINUE; | ||
2576 | } | ||
2577 | |||
2578 | static int em_or(struct x86_emulate_ctxt *ctxt) | ||
2579 | { | ||
2580 | struct decode_cache *c = &ctxt->decode; | ||
2581 | |||
2582 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | ||
2583 | return X86EMUL_CONTINUE; | ||
2584 | } | ||
2585 | |||
2586 | static int em_adc(struct x86_emulate_ctxt *ctxt) | ||
2587 | { | ||
2588 | struct decode_cache *c = &ctxt->decode; | ||
2589 | |||
2590 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | ||
2591 | return X86EMUL_CONTINUE; | ||
2592 | } | ||
2593 | |||
2594 | static int em_sbb(struct x86_emulate_ctxt *ctxt) | ||
2595 | { | ||
2596 | struct decode_cache *c = &ctxt->decode; | ||
2597 | |||
2598 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | ||
2599 | return X86EMUL_CONTINUE; | ||
2600 | } | ||
2601 | |||
2602 | static int em_and(struct x86_emulate_ctxt *ctxt) | ||
2603 | { | ||
2604 | struct decode_cache *c = &ctxt->decode; | ||
2605 | |||
2606 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | ||
2607 | return X86EMUL_CONTINUE; | ||
2608 | } | ||
2609 | |||
2610 | static int em_sub(struct x86_emulate_ctxt *ctxt) | ||
2611 | { | ||
2612 | struct decode_cache *c = &ctxt->decode; | ||
2613 | |||
2614 | emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); | ||
2615 | return X86EMUL_CONTINUE; | ||
2616 | } | ||
2617 | |||
2618 | static int em_xor(struct x86_emulate_ctxt *ctxt) | ||
2619 | { | ||
2620 | struct decode_cache *c = &ctxt->decode; | ||
2621 | |||
2622 | emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); | ||
2623 | return X86EMUL_CONTINUE; | ||
2624 | } | ||
2625 | |||
2626 | static int em_cmp(struct x86_emulate_ctxt *ctxt) | ||
2627 | { | ||
2628 | struct decode_cache *c = &ctxt->decode; | ||
2629 | |||
2630 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
2631 | /* Disable writeback. */ | ||
2632 | c->dst.type = OP_NONE; | ||
2633 | return X86EMUL_CONTINUE; | ||
2634 | } | ||
2635 | |||
2279 | static int em_imul(struct x86_emulate_ctxt *ctxt) | 2636 | static int em_imul(struct x86_emulate_ctxt *ctxt) |
2280 | { | 2637 | { |
2281 | struct decode_cache *c = &ctxt->decode; | 2638 | struct decode_cache *c = &ctxt->decode; |
@@ -2306,13 +2663,10 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) | |||
2306 | 2663 | ||
2307 | static int em_rdtsc(struct x86_emulate_ctxt *ctxt) | 2664 | static int em_rdtsc(struct x86_emulate_ctxt *ctxt) |
2308 | { | 2665 | { |
2309 | unsigned cpl = ctxt->ops->cpl(ctxt->vcpu); | ||
2310 | struct decode_cache *c = &ctxt->decode; | 2666 | struct decode_cache *c = &ctxt->decode; |
2311 | u64 tsc = 0; | 2667 | u64 tsc = 0; |
2312 | 2668 | ||
2313 | if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) | 2669 | ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); |
2314 | return emulate_gp(ctxt, 0); | ||
2315 | ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); | ||
2316 | c->regs[VCPU_REGS_RAX] = (u32)tsc; | 2670 | c->regs[VCPU_REGS_RAX] = (u32)tsc; |
2317 | c->regs[VCPU_REGS_RDX] = tsc >> 32; | 2671 | c->regs[VCPU_REGS_RDX] = tsc >> 32; |
2318 | return X86EMUL_CONTINUE; | 2672 | return X86EMUL_CONTINUE; |
@@ -2325,22 +2679,375 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) | |||
2325 | return X86EMUL_CONTINUE; | 2679 | return X86EMUL_CONTINUE; |
2326 | } | 2680 | } |
2327 | 2681 | ||
2682 | static int em_movdqu(struct x86_emulate_ctxt *ctxt) | ||
2683 | { | ||
2684 | struct decode_cache *c = &ctxt->decode; | ||
2685 | memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes); | ||
2686 | return X86EMUL_CONTINUE; | ||
2687 | } | ||
2688 | |||
2689 | static int em_invlpg(struct x86_emulate_ctxt *ctxt) | ||
2690 | { | ||
2691 | struct decode_cache *c = &ctxt->decode; | ||
2692 | int rc; | ||
2693 | ulong linear; | ||
2694 | |||
2695 | rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); | ||
2696 | if (rc == X86EMUL_CONTINUE) | ||
2697 | ctxt->ops->invlpg(ctxt, linear); | ||
2698 | /* Disable writeback. */ | ||
2699 | c->dst.type = OP_NONE; | ||
2700 | return X86EMUL_CONTINUE; | ||
2701 | } | ||
2702 | |||
2703 | static int em_clts(struct x86_emulate_ctxt *ctxt) | ||
2704 | { | ||
2705 | ulong cr0; | ||
2706 | |||
2707 | cr0 = ctxt->ops->get_cr(ctxt, 0); | ||
2708 | cr0 &= ~X86_CR0_TS; | ||
2709 | ctxt->ops->set_cr(ctxt, 0, cr0); | ||
2710 | return X86EMUL_CONTINUE; | ||
2711 | } | ||
2712 | |||
2713 | static int em_vmcall(struct x86_emulate_ctxt *ctxt) | ||
2714 | { | ||
2715 | struct decode_cache *c = &ctxt->decode; | ||
2716 | int rc; | ||
2717 | |||
2718 | if (c->modrm_mod != 3 || c->modrm_rm != 1) | ||
2719 | return X86EMUL_UNHANDLEABLE; | ||
2720 | |||
2721 | rc = ctxt->ops->fix_hypercall(ctxt); | ||
2722 | if (rc != X86EMUL_CONTINUE) | ||
2723 | return rc; | ||
2724 | |||
2725 | /* Let the processor re-execute the fixed hypercall */ | ||
2726 | c->eip = ctxt->eip; | ||
2727 | /* Disable writeback. */ | ||
2728 | c->dst.type = OP_NONE; | ||
2729 | return X86EMUL_CONTINUE; | ||
2730 | } | ||
2731 | |||
2732 | static int em_lgdt(struct x86_emulate_ctxt *ctxt) | ||
2733 | { | ||
2734 | struct decode_cache *c = &ctxt->decode; | ||
2735 | struct desc_ptr desc_ptr; | ||
2736 | int rc; | ||
2737 | |||
2738 | rc = read_descriptor(ctxt, c->src.addr.mem, | ||
2739 | &desc_ptr.size, &desc_ptr.address, | ||
2740 | c->op_bytes); | ||
2741 | if (rc != X86EMUL_CONTINUE) | ||
2742 | return rc; | ||
2743 | ctxt->ops->set_gdt(ctxt, &desc_ptr); | ||
2744 | /* Disable writeback. */ | ||
2745 | c->dst.type = OP_NONE; | ||
2746 | return X86EMUL_CONTINUE; | ||
2747 | } | ||
2748 | |||
2749 | static int em_vmmcall(struct x86_emulate_ctxt *ctxt) | ||
2750 | { | ||
2751 | struct decode_cache *c = &ctxt->decode; | ||
2752 | int rc; | ||
2753 | |||
2754 | rc = ctxt->ops->fix_hypercall(ctxt); | ||
2755 | |||
2756 | /* Disable writeback. */ | ||
2757 | c->dst.type = OP_NONE; | ||
2758 | return rc; | ||
2759 | } | ||
2760 | |||
2761 | static int em_lidt(struct x86_emulate_ctxt *ctxt) | ||
2762 | { | ||
2763 | struct decode_cache *c = &ctxt->decode; | ||
2764 | struct desc_ptr desc_ptr; | ||
2765 | int rc; | ||
2766 | |||
2767 | rc = read_descriptor(ctxt, c->src.addr.mem, | ||
2768 | &desc_ptr.size, &desc_ptr.address, | ||
2769 | c->op_bytes); | ||
2770 | if (rc != X86EMUL_CONTINUE) | ||
2771 | return rc; | ||
2772 | ctxt->ops->set_idt(ctxt, &desc_ptr); | ||
2773 | /* Disable writeback. */ | ||
2774 | c->dst.type = OP_NONE; | ||
2775 | return X86EMUL_CONTINUE; | ||
2776 | } | ||
2777 | |||
2778 | static int em_smsw(struct x86_emulate_ctxt *ctxt) | ||
2779 | { | ||
2780 | struct decode_cache *c = &ctxt->decode; | ||
2781 | |||
2782 | c->dst.bytes = 2; | ||
2783 | c->dst.val = ctxt->ops->get_cr(ctxt, 0); | ||
2784 | return X86EMUL_CONTINUE; | ||
2785 | } | ||
2786 | |||
2787 | static int em_lmsw(struct x86_emulate_ctxt *ctxt) | ||
2788 | { | ||
2789 | struct decode_cache *c = &ctxt->decode; | ||
2790 | ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) | ||
2791 | | (c->src.val & 0x0f)); | ||
2792 | c->dst.type = OP_NONE; | ||
2793 | return X86EMUL_CONTINUE; | ||
2794 | } | ||
2795 | |||
2796 | static bool valid_cr(int nr) | ||
2797 | { | ||
2798 | switch (nr) { | ||
2799 | case 0: | ||
2800 | case 2 ... 4: | ||
2801 | case 8: | ||
2802 | return true; | ||
2803 | default: | ||
2804 | return false; | ||
2805 | } | ||
2806 | } | ||
2807 | |||
2808 | static int check_cr_read(struct x86_emulate_ctxt *ctxt) | ||
2809 | { | ||
2810 | struct decode_cache *c = &ctxt->decode; | ||
2811 | |||
2812 | if (!valid_cr(c->modrm_reg)) | ||
2813 | return emulate_ud(ctxt); | ||
2814 | |||
2815 | return X86EMUL_CONTINUE; | ||
2816 | } | ||
2817 | |||
2818 | static int check_cr_write(struct x86_emulate_ctxt *ctxt) | ||
2819 | { | ||
2820 | struct decode_cache *c = &ctxt->decode; | ||
2821 | u64 new_val = c->src.val64; | ||
2822 | int cr = c->modrm_reg; | ||
2823 | u64 efer = 0; | ||
2824 | |||
2825 | static u64 cr_reserved_bits[] = { | ||
2826 | 0xffffffff00000000ULL, | ||
2827 | 0, 0, 0, /* CR3 checked later */ | ||
2828 | CR4_RESERVED_BITS, | ||
2829 | 0, 0, 0, | ||
2830 | CR8_RESERVED_BITS, | ||
2831 | }; | ||
2832 | |||
2833 | if (!valid_cr(cr)) | ||
2834 | return emulate_ud(ctxt); | ||
2835 | |||
2836 | if (new_val & cr_reserved_bits[cr]) | ||
2837 | return emulate_gp(ctxt, 0); | ||
2838 | |||
2839 | switch (cr) { | ||
2840 | case 0: { | ||
2841 | u64 cr4; | ||
2842 | if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || | ||
2843 | ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) | ||
2844 | return emulate_gp(ctxt, 0); | ||
2845 | |||
2846 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2847 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
2848 | |||
2849 | if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && | ||
2850 | !(cr4 & X86_CR4_PAE)) | ||
2851 | return emulate_gp(ctxt, 0); | ||
2852 | |||
2853 | break; | ||
2854 | } | ||
2855 | case 3: { | ||
2856 | u64 rsvd = 0; | ||
2857 | |||
2858 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
2859 | if (efer & EFER_LMA) | ||
2860 | rsvd = CR3_L_MODE_RESERVED_BITS; | ||
2861 | else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE) | ||
2862 | rsvd = CR3_PAE_RESERVED_BITS; | ||
2863 | else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG) | ||
2864 | rsvd = CR3_NONPAE_RESERVED_BITS; | ||
2865 | |||
2866 | if (new_val & rsvd) | ||
2867 | return emulate_gp(ctxt, 0); | ||
2868 | |||
2869 | break; | ||
2870 | } | ||
2871 | case 4: { | ||
2872 | u64 cr4; | ||
2873 | |||
2874 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2875 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
2876 | |||
2877 | if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) | ||
2878 | return emulate_gp(ctxt, 0); | ||
2879 | |||
2880 | break; | ||
2881 | } | ||
2882 | } | ||
2883 | |||
2884 | return X86EMUL_CONTINUE; | ||
2885 | } | ||
2886 | |||
2887 | static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) | ||
2888 | { | ||
2889 | unsigned long dr7; | ||
2890 | |||
2891 | ctxt->ops->get_dr(ctxt, 7, &dr7); | ||
2892 | |||
2893 | /* Check if DR7.Global_Enable is set */ | ||
2894 | return dr7 & (1 << 13); | ||
2895 | } | ||
2896 | |||
2897 | static int check_dr_read(struct x86_emulate_ctxt *ctxt) | ||
2898 | { | ||
2899 | struct decode_cache *c = &ctxt->decode; | ||
2900 | int dr = c->modrm_reg; | ||
2901 | u64 cr4; | ||
2902 | |||
2903 | if (dr > 7) | ||
2904 | return emulate_ud(ctxt); | ||
2905 | |||
2906 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2907 | if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) | ||
2908 | return emulate_ud(ctxt); | ||
2909 | |||
2910 | if (check_dr7_gd(ctxt)) | ||
2911 | return emulate_db(ctxt); | ||
2912 | |||
2913 | return X86EMUL_CONTINUE; | ||
2914 | } | ||
2915 | |||
2916 | static int check_dr_write(struct x86_emulate_ctxt *ctxt) | ||
2917 | { | ||
2918 | struct decode_cache *c = &ctxt->decode; | ||
2919 | u64 new_val = c->src.val64; | ||
2920 | int dr = c->modrm_reg; | ||
2921 | |||
2922 | if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) | ||
2923 | return emulate_gp(ctxt, 0); | ||
2924 | |||
2925 | return check_dr_read(ctxt); | ||
2926 | } | ||
2927 | |||
2928 | static int check_svme(struct x86_emulate_ctxt *ctxt) | ||
2929 | { | ||
2930 | u64 efer; | ||
2931 | |||
2932 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
2933 | |||
2934 | if (!(efer & EFER_SVME)) | ||
2935 | return emulate_ud(ctxt); | ||
2936 | |||
2937 | return X86EMUL_CONTINUE; | ||
2938 | } | ||
2939 | |||
2940 | static int check_svme_pa(struct x86_emulate_ctxt *ctxt) | ||
2941 | { | ||
2942 | u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; | ||
2943 | |||
2944 | /* Valid physical address? */ | ||
2945 | if (rax & 0xffff000000000000ULL) | ||
2946 | return emulate_gp(ctxt, 0); | ||
2947 | |||
2948 | return check_svme(ctxt); | ||
2949 | } | ||
2950 | |||
2951 | static int check_rdtsc(struct x86_emulate_ctxt *ctxt) | ||
2952 | { | ||
2953 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2954 | |||
2955 | if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) | ||
2956 | return emulate_ud(ctxt); | ||
2957 | |||
2958 | return X86EMUL_CONTINUE; | ||
2959 | } | ||
2960 | |||
2961 | static int check_rdpmc(struct x86_emulate_ctxt *ctxt) | ||
2962 | { | ||
2963 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2964 | u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX]; | ||
2965 | |||
2966 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || | ||
2967 | (rcx > 3)) | ||
2968 | return emulate_gp(ctxt, 0); | ||
2969 | |||
2970 | return X86EMUL_CONTINUE; | ||
2971 | } | ||
2972 | |||
2973 | static int check_perm_in(struct x86_emulate_ctxt *ctxt) | ||
2974 | { | ||
2975 | struct decode_cache *c = &ctxt->decode; | ||
2976 | |||
2977 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
2978 | if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) | ||
2979 | return emulate_gp(ctxt, 0); | ||
2980 | |||
2981 | return X86EMUL_CONTINUE; | ||
2982 | } | ||
2983 | |||
2984 | static int check_perm_out(struct x86_emulate_ctxt *ctxt) | ||
2985 | { | ||
2986 | struct decode_cache *c = &ctxt->decode; | ||
2987 | |||
2988 | c->src.bytes = min(c->src.bytes, 4u); | ||
2989 | if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) | ||
2990 | return emulate_gp(ctxt, 0); | ||
2991 | |||
2992 | return X86EMUL_CONTINUE; | ||
2993 | } | ||
2994 | |||
2328 | #define D(_y) { .flags = (_y) } | 2995 | #define D(_y) { .flags = (_y) } |
2996 | #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } | ||
2997 | #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ | ||
2998 | .check_perm = (_p) } | ||
2329 | #define N D(0) | 2999 | #define N D(0) |
3000 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } | ||
2330 | #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } | 3001 | #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } |
2331 | #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } | 3002 | #define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } |
2332 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } | 3003 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } |
3004 | #define II(_f, _e, _i) \ | ||
3005 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } | ||
3006 | #define IIP(_f, _e, _i, _p) \ | ||
3007 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ | ||
3008 | .check_perm = (_p) } | ||
3009 | #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } | ||
2333 | 3010 | ||
2334 | #define D2bv(_f) D((_f) | ByteOp), D(_f) | 3011 | #define D2bv(_f) D((_f) | ByteOp), D(_f) |
3012 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) | ||
2335 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) | 3013 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) |
2336 | 3014 | ||
2337 | #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ | 3015 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ |
2338 | D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ | 3016 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ |
2339 | D2bv(((_f) & ~Lock) | DstAcc | SrcImm) | 3017 | I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) |
2340 | 3018 | ||
3019 | static struct opcode group7_rm1[] = { | ||
3020 | DI(SrcNone | ModRM | Priv, monitor), | ||
3021 | DI(SrcNone | ModRM | Priv, mwait), | ||
3022 | N, N, N, N, N, N, | ||
3023 | }; | ||
3024 | |||
3025 | static struct opcode group7_rm3[] = { | ||
3026 | DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), | ||
3027 | II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), | ||
3028 | DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), | ||
3029 | DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), | ||
3030 | DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), | ||
3031 | DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), | ||
3032 | DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), | ||
3033 | DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), | ||
3034 | }; | ||
3035 | |||
3036 | static struct opcode group7_rm7[] = { | ||
3037 | N, | ||
3038 | DIP(SrcNone | ModRM, rdtscp, check_rdtsc), | ||
3039 | N, N, N, N, N, N, | ||
3040 | }; | ||
2341 | 3041 | ||
2342 | static struct opcode group1[] = { | 3042 | static struct opcode group1[] = { |
2343 | X7(D(Lock)), N | 3043 | I(Lock, em_add), |
3044 | I(Lock, em_or), | ||
3045 | I(Lock, em_adc), | ||
3046 | I(Lock, em_sbb), | ||
3047 | I(Lock, em_and), | ||
3048 | I(Lock, em_sub), | ||
3049 | I(Lock, em_xor), | ||
3050 | I(0, em_cmp), | ||
2344 | }; | 3051 | }; |
2345 | 3052 | ||
2346 | static struct opcode group1A[] = { | 3053 | static struct opcode group1A[] = { |
@@ -2366,16 +3073,28 @@ static struct opcode group5[] = { | |||
2366 | D(SrcMem | ModRM | Stack), N, | 3073 | D(SrcMem | ModRM | Stack), N, |
2367 | }; | 3074 | }; |
2368 | 3075 | ||
3076 | static struct opcode group6[] = { | ||
3077 | DI(ModRM | Prot, sldt), | ||
3078 | DI(ModRM | Prot, str), | ||
3079 | DI(ModRM | Prot | Priv, lldt), | ||
3080 | DI(ModRM | Prot | Priv, ltr), | ||
3081 | N, N, N, N, | ||
3082 | }; | ||
3083 | |||
2369 | static struct group_dual group7 = { { | 3084 | static struct group_dual group7 = { { |
2370 | N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv), | 3085 | DI(ModRM | Mov | DstMem | Priv, sgdt), |
2371 | D(SrcNone | ModRM | DstMem | Mov), N, | 3086 | DI(ModRM | Mov | DstMem | Priv, sidt), |
2372 | D(SrcMem16 | ModRM | Mov | Priv), | 3087 | II(ModRM | SrcMem | Priv, em_lgdt, lgdt), |
2373 | D(SrcMem | ModRM | ByteOp | Priv | NoAccess), | 3088 | II(ModRM | SrcMem | Priv, em_lidt, lidt), |
3089 | II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, | ||
3090 | II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), | ||
3091 | II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), | ||
2374 | }, { | 3092 | }, { |
2375 | D(SrcNone | ModRM | Priv | VendorSpecific), N, | 3093 | I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), |
2376 | N, D(SrcNone | ModRM | Priv | VendorSpecific), | 3094 | EXT(0, group7_rm1), |
2377 | D(SrcNone | ModRM | DstMem | Mov), N, | 3095 | N, EXT(0, group7_rm3), |
2378 | D(SrcMem16 | ModRM | Mov | Priv), N, | 3096 | II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, |
3097 | II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), | ||
2379 | } }; | 3098 | } }; |
2380 | 3099 | ||
2381 | static struct opcode group8[] = { | 3100 | static struct opcode group8[] = { |
@@ -2394,35 +3113,40 @@ static struct opcode group11[] = { | |||
2394 | I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), | 3113 | I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), |
2395 | }; | 3114 | }; |
2396 | 3115 | ||
3116 | static struct gprefix pfx_0f_6f_0f_7f = { | ||
3117 | N, N, N, I(Sse, em_movdqu), | ||
3118 | }; | ||
3119 | |||
2397 | static struct opcode opcode_table[256] = { | 3120 | static struct opcode opcode_table[256] = { |
2398 | /* 0x00 - 0x07 */ | 3121 | /* 0x00 - 0x07 */ |
2399 | D6ALU(Lock), | 3122 | I6ALU(Lock, em_add), |
2400 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3123 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), |
2401 | /* 0x08 - 0x0F */ | 3124 | /* 0x08 - 0x0F */ |
2402 | D6ALU(Lock), | 3125 | I6ALU(Lock, em_or), |
2403 | D(ImplicitOps | Stack | No64), N, | 3126 | D(ImplicitOps | Stack | No64), N, |
2404 | /* 0x10 - 0x17 */ | 3127 | /* 0x10 - 0x17 */ |
2405 | D6ALU(Lock), | 3128 | I6ALU(Lock, em_adc), |
2406 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3129 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), |
2407 | /* 0x18 - 0x1F */ | 3130 | /* 0x18 - 0x1F */ |
2408 | D6ALU(Lock), | 3131 | I6ALU(Lock, em_sbb), |
2409 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3132 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), |
2410 | /* 0x20 - 0x27 */ | 3133 | /* 0x20 - 0x27 */ |
2411 | D6ALU(Lock), N, N, | 3134 | I6ALU(Lock, em_and), N, N, |
2412 | /* 0x28 - 0x2F */ | 3135 | /* 0x28 - 0x2F */ |
2413 | D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das), | 3136 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), |
2414 | /* 0x30 - 0x37 */ | 3137 | /* 0x30 - 0x37 */ |
2415 | D6ALU(Lock), N, N, | 3138 | I6ALU(Lock, em_xor), N, N, |
2416 | /* 0x38 - 0x3F */ | 3139 | /* 0x38 - 0x3F */ |
2417 | D6ALU(0), N, N, | 3140 | I6ALU(0, em_cmp), N, N, |
2418 | /* 0x40 - 0x4F */ | 3141 | /* 0x40 - 0x4F */ |
2419 | X16(D(DstReg)), | 3142 | X16(D(DstReg)), |
2420 | /* 0x50 - 0x57 */ | 3143 | /* 0x50 - 0x57 */ |
2421 | X8(I(SrcReg | Stack, em_push)), | 3144 | X8(I(SrcReg | Stack, em_push)), |
2422 | /* 0x58 - 0x5F */ | 3145 | /* 0x58 - 0x5F */ |
2423 | X8(D(DstReg | Stack)), | 3146 | X8(I(DstReg | Stack, em_pop)), |
2424 | /* 0x60 - 0x67 */ | 3147 | /* 0x60 - 0x67 */ |
2425 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3148 | I(ImplicitOps | Stack | No64, em_pusha), |
3149 | I(ImplicitOps | Stack | No64, em_popa), | ||
2426 | N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , | 3150 | N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , |
2427 | N, N, N, N, | 3151 | N, N, N, N, |
2428 | /* 0x68 - 0x6F */ | 3152 | /* 0x68 - 0x6F */ |
@@ -2430,8 +3154,8 @@ static struct opcode opcode_table[256] = { | |||
2430 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), | 3154 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), |
2431 | I(SrcImmByte | Mov | Stack, em_push), | 3155 | I(SrcImmByte | Mov | Stack, em_push), |
2432 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), | 3156 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), |
2433 | D2bv(DstDI | Mov | String), /* insb, insw/insd */ | 3157 | D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ |
2434 | D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */ | 3158 | D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ |
2435 | /* 0x70 - 0x7F */ | 3159 | /* 0x70 - 0x7F */ |
2436 | X16(D(SrcImmByte)), | 3160 | X16(D(SrcImmByte)), |
2437 | /* 0x80 - 0x87 */ | 3161 | /* 0x80 - 0x87 */ |
@@ -2446,21 +3170,22 @@ static struct opcode opcode_table[256] = { | |||
2446 | D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), | 3170 | D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), |
2447 | D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), | 3171 | D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), |
2448 | /* 0x90 - 0x97 */ | 3172 | /* 0x90 - 0x97 */ |
2449 | X8(D(SrcAcc | DstReg)), | 3173 | DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), |
2450 | /* 0x98 - 0x9F */ | 3174 | /* 0x98 - 0x9F */ |
2451 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), | 3175 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), |
2452 | I(SrcImmFAddr | No64, em_call_far), N, | 3176 | I(SrcImmFAddr | No64, em_call_far), N, |
2453 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N, | 3177 | II(ImplicitOps | Stack, em_pushf, pushf), |
3178 | II(ImplicitOps | Stack, em_popf, popf), N, N, | ||
2454 | /* 0xA0 - 0xA7 */ | 3179 | /* 0xA0 - 0xA7 */ |
2455 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3180 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
2456 | I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), | 3181 | I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), |
2457 | I2bv(SrcSI | DstDI | Mov | String, em_mov), | 3182 | I2bv(SrcSI | DstDI | Mov | String, em_mov), |
2458 | D2bv(SrcSI | DstDI | String), | 3183 | I2bv(SrcSI | DstDI | String, em_cmp), |
2459 | /* 0xA8 - 0xAF */ | 3184 | /* 0xA8 - 0xAF */ |
2460 | D2bv(DstAcc | SrcImm), | 3185 | D2bv(DstAcc | SrcImm), |
2461 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), | 3186 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), |
2462 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), | 3187 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), |
2463 | D2bv(SrcAcc | DstDI | String), | 3188 | I2bv(SrcAcc | DstDI | String, em_cmp), |
2464 | /* 0xB0 - 0xB7 */ | 3189 | /* 0xB0 - 0xB7 */ |
2465 | X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), | 3190 | X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), |
2466 | /* 0xB8 - 0xBF */ | 3191 | /* 0xB8 - 0xBF */ |
@@ -2473,7 +3198,8 @@ static struct opcode opcode_table[256] = { | |||
2473 | G(ByteOp, group11), G(0, group11), | 3198 | G(ByteOp, group11), G(0, group11), |
2474 | /* 0xC8 - 0xCF */ | 3199 | /* 0xC8 - 0xCF */ |
2475 | N, N, N, D(ImplicitOps | Stack), | 3200 | N, N, N, D(ImplicitOps | Stack), |
2476 | D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps), | 3201 | D(ImplicitOps), DI(SrcImmByte, intn), |
3202 | D(ImplicitOps | No64), DI(ImplicitOps, iret), | ||
2477 | /* 0xD0 - 0xD7 */ | 3203 | /* 0xD0 - 0xD7 */ |
2478 | D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), | 3204 | D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), |
2479 | N, N, N, N, | 3205 | N, N, N, N, |
@@ -2481,14 +3207,17 @@ static struct opcode opcode_table[256] = { | |||
2481 | N, N, N, N, N, N, N, N, | 3207 | N, N, N, N, N, N, N, N, |
2482 | /* 0xE0 - 0xE7 */ | 3208 | /* 0xE0 - 0xE7 */ |
2483 | X4(D(SrcImmByte)), | 3209 | X4(D(SrcImmByte)), |
2484 | D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte), | 3210 | D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), |
3211 | D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), | ||
2485 | /* 0xE8 - 0xEF */ | 3212 | /* 0xE8 - 0xEF */ |
2486 | D(SrcImm | Stack), D(SrcImm | ImplicitOps), | 3213 | D(SrcImm | Stack), D(SrcImm | ImplicitOps), |
2487 | D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), | 3214 | D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), |
2488 | D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), | 3215 | D2bvIP(SrcNone | DstAcc, in, check_perm_in), |
3216 | D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), | ||
2489 | /* 0xF0 - 0xF7 */ | 3217 | /* 0xF0 - 0xF7 */ |
2490 | N, N, N, N, | 3218 | N, DI(ImplicitOps, icebp), N, N, |
2491 | D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3), | 3219 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), |
3220 | G(ByteOp, group3), G(0, group3), | ||
2492 | /* 0xF8 - 0xFF */ | 3221 | /* 0xF8 - 0xFF */ |
2493 | D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), | 3222 | D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), |
2494 | D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), | 3223 | D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), |
@@ -2496,20 +3225,24 @@ static struct opcode opcode_table[256] = { | |||
2496 | 3225 | ||
2497 | static struct opcode twobyte_table[256] = { | 3226 | static struct opcode twobyte_table[256] = { |
2498 | /* 0x00 - 0x0F */ | 3227 | /* 0x00 - 0x0F */ |
2499 | N, GD(0, &group7), N, N, | 3228 | G(0, group6), GD(0, &group7), N, N, |
2500 | N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, | 3229 | N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, |
2501 | D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, | 3230 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, |
2502 | N, D(ImplicitOps | ModRM), N, N, | 3231 | N, D(ImplicitOps | ModRM), N, N, |
2503 | /* 0x10 - 0x1F */ | 3232 | /* 0x10 - 0x1F */ |
2504 | N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, | 3233 | N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, |
2505 | /* 0x20 - 0x2F */ | 3234 | /* 0x20 - 0x2F */ |
2506 | D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264), | 3235 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), |
2507 | D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264), | 3236 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), |
3237 | DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), | ||
3238 | DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), | ||
2508 | N, N, N, N, | 3239 | N, N, N, N, |
2509 | N, N, N, N, N, N, N, N, | 3240 | N, N, N, N, N, N, N, N, |
2510 | /* 0x30 - 0x3F */ | 3241 | /* 0x30 - 0x3F */ |
2511 | D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), | 3242 | DI(ImplicitOps | Priv, wrmsr), |
2512 | D(ImplicitOps | Priv), N, | 3243 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), |
3244 | DI(ImplicitOps | Priv, rdmsr), | ||
3245 | DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), | ||
2513 | D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), | 3246 | D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), |
2514 | N, N, | 3247 | N, N, |
2515 | N, N, N, N, N, N, N, N, | 3248 | N, N, N, N, N, N, N, N, |
@@ -2518,21 +3251,27 @@ static struct opcode twobyte_table[256] = { | |||
2518 | /* 0x50 - 0x5F */ | 3251 | /* 0x50 - 0x5F */ |
2519 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3252 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, |
2520 | /* 0x60 - 0x6F */ | 3253 | /* 0x60 - 0x6F */ |
2521 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3254 | N, N, N, N, |
3255 | N, N, N, N, | ||
3256 | N, N, N, N, | ||
3257 | N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f), | ||
2522 | /* 0x70 - 0x7F */ | 3258 | /* 0x70 - 0x7F */ |
2523 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3259 | N, N, N, N, |
3260 | N, N, N, N, | ||
3261 | N, N, N, N, | ||
3262 | N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), | ||
2524 | /* 0x80 - 0x8F */ | 3263 | /* 0x80 - 0x8F */ |
2525 | X16(D(SrcImm)), | 3264 | X16(D(SrcImm)), |
2526 | /* 0x90 - 0x9F */ | 3265 | /* 0x90 - 0x9F */ |
2527 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3266 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
2528 | /* 0xA0 - 0xA7 */ | 3267 | /* 0xA0 - 0xA7 */ |
2529 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), | 3268 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), |
2530 | N, D(DstMem | SrcReg | ModRM | BitOp), | 3269 | DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), |
2531 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3270 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
2532 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 3271 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, |
2533 | /* 0xA8 - 0xAF */ | 3272 | /* 0xA8 - 0xAF */ |
2534 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), | 3273 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), |
2535 | N, D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3274 | DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), |
2536 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3275 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
2537 | D(DstMem | SrcReg | Src2CL | ModRM), | 3276 | D(DstMem | SrcReg | Src2CL | ModRM), |
2538 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), | 3277 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), |
@@ -2564,10 +3303,13 @@ static struct opcode twobyte_table[256] = { | |||
2564 | #undef G | 3303 | #undef G |
2565 | #undef GD | 3304 | #undef GD |
2566 | #undef I | 3305 | #undef I |
3306 | #undef GP | ||
3307 | #undef EXT | ||
2567 | 3308 | ||
2568 | #undef D2bv | 3309 | #undef D2bv |
3310 | #undef D2bvIP | ||
2569 | #undef I2bv | 3311 | #undef I2bv |
2570 | #undef D6ALU | 3312 | #undef I6ALU |
2571 | 3313 | ||
2572 | static unsigned imm_size(struct decode_cache *c) | 3314 | static unsigned imm_size(struct decode_cache *c) |
2573 | { | 3315 | { |
@@ -2625,8 +3367,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
2625 | struct decode_cache *c = &ctxt->decode; | 3367 | struct decode_cache *c = &ctxt->decode; |
2626 | int rc = X86EMUL_CONTINUE; | 3368 | int rc = X86EMUL_CONTINUE; |
2627 | int mode = ctxt->mode; | 3369 | int mode = ctxt->mode; |
2628 | int def_op_bytes, def_ad_bytes, dual, goffset; | 3370 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; |
2629 | struct opcode opcode, *g_mod012, *g_mod3; | 3371 | bool op_prefix = false; |
3372 | struct opcode opcode; | ||
2630 | struct operand memop = { .type = OP_NONE }; | 3373 | struct operand memop = { .type = OP_NONE }; |
2631 | 3374 | ||
2632 | c->eip = ctxt->eip; | 3375 | c->eip = ctxt->eip; |
@@ -2634,7 +3377,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
2634 | c->fetch.end = c->fetch.start + insn_len; | 3377 | c->fetch.end = c->fetch.start + insn_len; |
2635 | if (insn_len > 0) | 3378 | if (insn_len > 0) |
2636 | memcpy(c->fetch.data, insn, insn_len); | 3379 | memcpy(c->fetch.data, insn, insn_len); |
2637 | ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); | ||
2638 | 3380 | ||
2639 | switch (mode) { | 3381 | switch (mode) { |
2640 | case X86EMUL_MODE_REAL: | 3382 | case X86EMUL_MODE_REAL: |
@@ -2662,6 +3404,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
2662 | for (;;) { | 3404 | for (;;) { |
2663 | switch (c->b = insn_fetch(u8, 1, c->eip)) { | 3405 | switch (c->b = insn_fetch(u8, 1, c->eip)) { |
2664 | case 0x66: /* operand-size override */ | 3406 | case 0x66: /* operand-size override */ |
3407 | op_prefix = true; | ||
2665 | /* switch between 2/4 bytes */ | 3408 | /* switch between 2/4 bytes */ |
2666 | c->op_bytes = def_op_bytes ^ 6; | 3409 | c->op_bytes = def_op_bytes ^ 6; |
2667 | break; | 3410 | break; |
@@ -2692,10 +3435,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
2692 | c->lock_prefix = 1; | 3435 | c->lock_prefix = 1; |
2693 | break; | 3436 | break; |
2694 | case 0xf2: /* REPNE/REPNZ */ | 3437 | case 0xf2: /* REPNE/REPNZ */ |
2695 | c->rep_prefix = REPNE_PREFIX; | ||
2696 | break; | ||
2697 | case 0xf3: /* REP/REPE/REPZ */ | 3438 | case 0xf3: /* REP/REPE/REPZ */ |
2698 | c->rep_prefix = REPE_PREFIX; | 3439 | c->rep_prefix = c->b; |
2699 | break; | 3440 | break; |
2700 | default: | 3441 | default: |
2701 | goto done_prefixes; | 3442 | goto done_prefixes; |
@@ -2722,29 +3463,49 @@ done_prefixes: | |||
2722 | } | 3463 | } |
2723 | c->d = opcode.flags; | 3464 | c->d = opcode.flags; |
2724 | 3465 | ||
2725 | if (c->d & Group) { | 3466 | while (c->d & GroupMask) { |
2726 | dual = c->d & GroupDual; | 3467 | switch (c->d & GroupMask) { |
2727 | c->modrm = insn_fetch(u8, 1, c->eip); | 3468 | case Group: |
2728 | --c->eip; | 3469 | c->modrm = insn_fetch(u8, 1, c->eip); |
2729 | 3470 | --c->eip; | |
2730 | if (c->d & GroupDual) { | 3471 | goffset = (c->modrm >> 3) & 7; |
2731 | g_mod012 = opcode.u.gdual->mod012; | 3472 | opcode = opcode.u.group[goffset]; |
2732 | g_mod3 = opcode.u.gdual->mod3; | 3473 | break; |
2733 | } else | 3474 | case GroupDual: |
2734 | g_mod012 = g_mod3 = opcode.u.group; | 3475 | c->modrm = insn_fetch(u8, 1, c->eip); |
2735 | 3476 | --c->eip; | |
2736 | c->d &= ~(Group | GroupDual); | 3477 | goffset = (c->modrm >> 3) & 7; |
2737 | 3478 | if ((c->modrm >> 6) == 3) | |
2738 | goffset = (c->modrm >> 3) & 7; | 3479 | opcode = opcode.u.gdual->mod3[goffset]; |
3480 | else | ||
3481 | opcode = opcode.u.gdual->mod012[goffset]; | ||
3482 | break; | ||
3483 | case RMExt: | ||
3484 | goffset = c->modrm & 7; | ||
3485 | opcode = opcode.u.group[goffset]; | ||
3486 | break; | ||
3487 | case Prefix: | ||
3488 | if (c->rep_prefix && op_prefix) | ||
3489 | return X86EMUL_UNHANDLEABLE; | ||
3490 | simd_prefix = op_prefix ? 0x66 : c->rep_prefix; | ||
3491 | switch (simd_prefix) { | ||
3492 | case 0x00: opcode = opcode.u.gprefix->pfx_no; break; | ||
3493 | case 0x66: opcode = opcode.u.gprefix->pfx_66; break; | ||
3494 | case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; | ||
3495 | case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; | ||
3496 | } | ||
3497 | break; | ||
3498 | default: | ||
3499 | return X86EMUL_UNHANDLEABLE; | ||
3500 | } | ||
2739 | 3501 | ||
2740 | if ((c->modrm >> 6) == 3) | 3502 | c->d &= ~GroupMask; |
2741 | opcode = g_mod3[goffset]; | ||
2742 | else | ||
2743 | opcode = g_mod012[goffset]; | ||
2744 | c->d |= opcode.flags; | 3503 | c->d |= opcode.flags; |
2745 | } | 3504 | } |
2746 | 3505 | ||
2747 | c->execute = opcode.u.execute; | 3506 | c->execute = opcode.u.execute; |
3507 | c->check_perm = opcode.check_perm; | ||
3508 | c->intercept = opcode.intercept; | ||
2748 | 3509 | ||
2749 | /* Unrecognised? */ | 3510 | /* Unrecognised? */ |
2750 | if (c->d == 0 || (c->d & Undefined)) | 3511 | if (c->d == 0 || (c->d & Undefined)) |
@@ -2763,6 +3524,9 @@ done_prefixes: | |||
2763 | c->op_bytes = 4; | 3524 | c->op_bytes = 4; |
2764 | } | 3525 | } |
2765 | 3526 | ||
3527 | if (c->d & Sse) | ||
3528 | c->op_bytes = 16; | ||
3529 | |||
2766 | /* ModRM and SIB bytes. */ | 3530 | /* ModRM and SIB bytes. */ |
2767 | if (c->d & ModRM) { | 3531 | if (c->d & ModRM) { |
2768 | rc = decode_modrm(ctxt, ops, &memop); | 3532 | rc = decode_modrm(ctxt, ops, &memop); |
@@ -2776,7 +3540,7 @@ done_prefixes: | |||
2776 | if (!c->has_seg_override) | 3540 | if (!c->has_seg_override) |
2777 | set_seg_override(c, VCPU_SREG_DS); | 3541 | set_seg_override(c, VCPU_SREG_DS); |
2778 | 3542 | ||
2779 | memop.addr.mem.seg = seg_override(ctxt, ops, c); | 3543 | memop.addr.mem.seg = seg_override(ctxt, c); |
2780 | 3544 | ||
2781 | if (memop.type == OP_MEM && c->ad_bytes != 8) | 3545 | if (memop.type == OP_MEM && c->ad_bytes != 8) |
2782 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; | 3546 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; |
@@ -2792,7 +3556,7 @@ done_prefixes: | |||
2792 | case SrcNone: | 3556 | case SrcNone: |
2793 | break; | 3557 | break; |
2794 | case SrcReg: | 3558 | case SrcReg: |
2795 | decode_register_operand(&c->src, c, 0); | 3559 | decode_register_operand(ctxt, &c->src, c, 0); |
2796 | break; | 3560 | break; |
2797 | case SrcMem16: | 3561 | case SrcMem16: |
2798 | memop.bytes = 2; | 3562 | memop.bytes = 2; |
@@ -2836,7 +3600,7 @@ done_prefixes: | |||
2836 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 3600 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
2837 | c->src.addr.mem.ea = | 3601 | c->src.addr.mem.ea = |
2838 | register_address(c, c->regs[VCPU_REGS_RSI]); | 3602 | register_address(c, c->regs[VCPU_REGS_RSI]); |
2839 | c->src.addr.mem.seg = seg_override(ctxt, ops, c), | 3603 | c->src.addr.mem.seg = seg_override(ctxt, c); |
2840 | c->src.val = 0; | 3604 | c->src.val = 0; |
2841 | break; | 3605 | break; |
2842 | case SrcImmFAddr: | 3606 | case SrcImmFAddr: |
@@ -2883,7 +3647,7 @@ done_prefixes: | |||
2883 | /* Decode and fetch the destination operand: register or memory. */ | 3647 | /* Decode and fetch the destination operand: register or memory. */ |
2884 | switch (c->d & DstMask) { | 3648 | switch (c->d & DstMask) { |
2885 | case DstReg: | 3649 | case DstReg: |
2886 | decode_register_operand(&c->dst, c, | 3650 | decode_register_operand(ctxt, &c->dst, c, |
2887 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); | 3651 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); |
2888 | break; | 3652 | break; |
2889 | case DstImmUByte: | 3653 | case DstImmUByte: |
@@ -2926,7 +3690,7 @@ done_prefixes: | |||
2926 | } | 3690 | } |
2927 | 3691 | ||
2928 | done: | 3692 | done: |
2929 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 3693 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
2930 | } | 3694 | } |
2931 | 3695 | ||
2932 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | 3696 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) |
@@ -2979,12 +3743,51 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
2979 | goto done; | 3743 | goto done; |
2980 | } | 3744 | } |
2981 | 3745 | ||
3746 | if ((c->d & Sse) | ||
3747 | && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) | ||
3748 | || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { | ||
3749 | rc = emulate_ud(ctxt); | ||
3750 | goto done; | ||
3751 | } | ||
3752 | |||
3753 | if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { | ||
3754 | rc = emulate_nm(ctxt); | ||
3755 | goto done; | ||
3756 | } | ||
3757 | |||
3758 | if (unlikely(ctxt->guest_mode) && c->intercept) { | ||
3759 | rc = emulator_check_intercept(ctxt, c->intercept, | ||
3760 | X86_ICPT_PRE_EXCEPT); | ||
3761 | if (rc != X86EMUL_CONTINUE) | ||
3762 | goto done; | ||
3763 | } | ||
3764 | |||
2982 | /* Privileged instruction can be executed only in CPL=0 */ | 3765 | /* Privileged instruction can be executed only in CPL=0 */ |
2983 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { | 3766 | if ((c->d & Priv) && ops->cpl(ctxt)) { |
2984 | rc = emulate_gp(ctxt, 0); | 3767 | rc = emulate_gp(ctxt, 0); |
2985 | goto done; | 3768 | goto done; |
2986 | } | 3769 | } |
2987 | 3770 | ||
3771 | /* Instruction can only be executed in protected mode */ | ||
3772 | if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { | ||
3773 | rc = emulate_ud(ctxt); | ||
3774 | goto done; | ||
3775 | } | ||
3776 | |||
3777 | /* Do instruction specific permission checks */ | ||
3778 | if (c->check_perm) { | ||
3779 | rc = c->check_perm(ctxt); | ||
3780 | if (rc != X86EMUL_CONTINUE) | ||
3781 | goto done; | ||
3782 | } | ||
3783 | |||
3784 | if (unlikely(ctxt->guest_mode) && c->intercept) { | ||
3785 | rc = emulator_check_intercept(ctxt, c->intercept, | ||
3786 | X86_ICPT_POST_EXCEPT); | ||
3787 | if (rc != X86EMUL_CONTINUE) | ||
3788 | goto done; | ||
3789 | } | ||
3790 | |||
2988 | if (c->rep_prefix && (c->d & String)) { | 3791 | if (c->rep_prefix && (c->d & String)) { |
2989 | /* All REP prefixes have the same first termination condition */ | 3792 | /* All REP prefixes have the same first termination condition */ |
2990 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { | 3793 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
@@ -2994,16 +3797,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
2994 | } | 3797 | } |
2995 | 3798 | ||
2996 | if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { | 3799 | if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { |
2997 | rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), | 3800 | rc = segmented_read(ctxt, c->src.addr.mem, |
2998 | c->src.valptr, c->src.bytes); | 3801 | c->src.valptr, c->src.bytes); |
2999 | if (rc != X86EMUL_CONTINUE) | 3802 | if (rc != X86EMUL_CONTINUE) |
3000 | goto done; | 3803 | goto done; |
3001 | c->src.orig_val64 = c->src.val64; | 3804 | c->src.orig_val64 = c->src.val64; |
3002 | } | 3805 | } |
3003 | 3806 | ||
3004 | if (c->src2.type == OP_MEM) { | 3807 | if (c->src2.type == OP_MEM) { |
3005 | rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), | 3808 | rc = segmented_read(ctxt, c->src2.addr.mem, |
3006 | &c->src2.val, c->src2.bytes); | 3809 | &c->src2.val, c->src2.bytes); |
3007 | if (rc != X86EMUL_CONTINUE) | 3810 | if (rc != X86EMUL_CONTINUE) |
3008 | goto done; | 3811 | goto done; |
3009 | } | 3812 | } |
@@ -3014,7 +3817,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3014 | 3817 | ||
3015 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { | 3818 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
3016 | /* optimisation - avoid slow emulated read if Mov */ | 3819 | /* optimisation - avoid slow emulated read if Mov */ |
3017 | rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), | 3820 | rc = segmented_read(ctxt, c->dst.addr.mem, |
3018 | &c->dst.val, c->dst.bytes); | 3821 | &c->dst.val, c->dst.bytes); |
3019 | if (rc != X86EMUL_CONTINUE) | 3822 | if (rc != X86EMUL_CONTINUE) |
3020 | goto done; | 3823 | goto done; |
@@ -3023,6 +3826,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3023 | 3826 | ||
3024 | special_insn: | 3827 | special_insn: |
3025 | 3828 | ||
3829 | if (unlikely(ctxt->guest_mode) && c->intercept) { | ||
3830 | rc = emulator_check_intercept(ctxt, c->intercept, | ||
3831 | X86_ICPT_POST_MEMACCESS); | ||
3832 | if (rc != X86EMUL_CONTINUE) | ||
3833 | goto done; | ||
3834 | } | ||
3835 | |||
3026 | if (c->execute) { | 3836 | if (c->execute) { |
3027 | rc = c->execute(ctxt); | 3837 | rc = c->execute(ctxt); |
3028 | if (rc != X86EMUL_CONTINUE) | 3838 | if (rc != X86EMUL_CONTINUE) |
@@ -3034,75 +3844,33 @@ special_insn: | |||
3034 | goto twobyte_insn; | 3844 | goto twobyte_insn; |
3035 | 3845 | ||
3036 | switch (c->b) { | 3846 | switch (c->b) { |
3037 | case 0x00 ... 0x05: | ||
3038 | add: /* add */ | ||
3039 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | ||
3040 | break; | ||
3041 | case 0x06: /* push es */ | 3847 | case 0x06: /* push es */ |
3042 | emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); | 3848 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); |
3043 | break; | 3849 | break; |
3044 | case 0x07: /* pop es */ | 3850 | case 0x07: /* pop es */ |
3045 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 3851 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
3046 | break; | 3852 | break; |
3047 | case 0x08 ... 0x0d: | ||
3048 | or: /* or */ | ||
3049 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | ||
3050 | break; | ||
3051 | case 0x0e: /* push cs */ | 3853 | case 0x0e: /* push cs */ |
3052 | emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); | 3854 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); |
3053 | break; | ||
3054 | case 0x10 ... 0x15: | ||
3055 | adc: /* adc */ | ||
3056 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | ||
3057 | break; | 3855 | break; |
3058 | case 0x16: /* push ss */ | 3856 | case 0x16: /* push ss */ |
3059 | emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); | 3857 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); |
3060 | break; | 3858 | break; |
3061 | case 0x17: /* pop ss */ | 3859 | case 0x17: /* pop ss */ |
3062 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 3860 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
3063 | break; | 3861 | break; |
3064 | case 0x18 ... 0x1d: | ||
3065 | sbb: /* sbb */ | ||
3066 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | ||
3067 | break; | ||
3068 | case 0x1e: /* push ds */ | 3862 | case 0x1e: /* push ds */ |
3069 | emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); | 3863 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); |
3070 | break; | 3864 | break; |
3071 | case 0x1f: /* pop ds */ | 3865 | case 0x1f: /* pop ds */ |
3072 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 3866 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
3073 | break; | 3867 | break; |
3074 | case 0x20 ... 0x25: | ||
3075 | and: /* and */ | ||
3076 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | ||
3077 | break; | ||
3078 | case 0x28 ... 0x2d: | ||
3079 | sub: /* sub */ | ||
3080 | emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); | ||
3081 | break; | ||
3082 | case 0x30 ... 0x35: | ||
3083 | xor: /* xor */ | ||
3084 | emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); | ||
3085 | break; | ||
3086 | case 0x38 ... 0x3d: | ||
3087 | cmp: /* cmp */ | ||
3088 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
3089 | break; | ||
3090 | case 0x40 ... 0x47: /* inc r16/r32 */ | 3868 | case 0x40 ... 0x47: /* inc r16/r32 */ |
3091 | emulate_1op("inc", c->dst, ctxt->eflags); | 3869 | emulate_1op("inc", c->dst, ctxt->eflags); |
3092 | break; | 3870 | break; |
3093 | case 0x48 ... 0x4f: /* dec r16/r32 */ | 3871 | case 0x48 ... 0x4f: /* dec r16/r32 */ |
3094 | emulate_1op("dec", c->dst, ctxt->eflags); | 3872 | emulate_1op("dec", c->dst, ctxt->eflags); |
3095 | break; | 3873 | break; |
3096 | case 0x58 ... 0x5f: /* pop reg */ | ||
3097 | pop_instruction: | ||
3098 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); | ||
3099 | break; | ||
3100 | case 0x60: /* pusha */ | ||
3101 | rc = emulate_pusha(ctxt, ops); | ||
3102 | break; | ||
3103 | case 0x61: /* popa */ | ||
3104 | rc = emulate_popa(ctxt, ops); | ||
3105 | break; | ||
3106 | case 0x63: /* movsxd */ | 3874 | case 0x63: /* movsxd */ |
3107 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 3875 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
3108 | goto cannot_emulate; | 3876 | goto cannot_emulate; |
@@ -3121,26 +3889,6 @@ special_insn: | |||
3121 | if (test_cc(c->b, ctxt->eflags)) | 3889 | if (test_cc(c->b, ctxt->eflags)) |
3122 | jmp_rel(c, c->src.val); | 3890 | jmp_rel(c, c->src.val); |
3123 | break; | 3891 | break; |
3124 | case 0x80 ... 0x83: /* Grp1 */ | ||
3125 | switch (c->modrm_reg) { | ||
3126 | case 0: | ||
3127 | goto add; | ||
3128 | case 1: | ||
3129 | goto or; | ||
3130 | case 2: | ||
3131 | goto adc; | ||
3132 | case 3: | ||
3133 | goto sbb; | ||
3134 | case 4: | ||
3135 | goto and; | ||
3136 | case 5: | ||
3137 | goto sub; | ||
3138 | case 6: | ||
3139 | goto xor; | ||
3140 | case 7: | ||
3141 | goto cmp; | ||
3142 | } | ||
3143 | break; | ||
3144 | case 0x84 ... 0x85: | 3892 | case 0x84 ... 0x85: |
3145 | test: | 3893 | test: |
3146 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); | 3894 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); |
@@ -3162,7 +3910,7 @@ special_insn: | |||
3162 | rc = emulate_ud(ctxt); | 3910 | rc = emulate_ud(ctxt); |
3163 | goto done; | 3911 | goto done; |
3164 | } | 3912 | } |
3165 | c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); | 3913 | c->dst.val = get_segment_selector(ctxt, c->modrm_reg); |
3166 | break; | 3914 | break; |
3167 | case 0x8d: /* lea r16/r32, m */ | 3915 | case 0x8d: /* lea r16/r32, m */ |
3168 | c->dst.val = c->src.addr.mem.ea; | 3916 | c->dst.val = c->src.addr.mem.ea; |
@@ -3187,7 +3935,7 @@ special_insn: | |||
3187 | break; | 3935 | break; |
3188 | } | 3936 | } |
3189 | case 0x8f: /* pop (sole member of Grp1a) */ | 3937 | case 0x8f: /* pop (sole member of Grp1a) */ |
3190 | rc = emulate_grp1a(ctxt, ops); | 3938 | rc = em_grp1a(ctxt); |
3191 | break; | 3939 | break; |
3192 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ | 3940 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ |
3193 | if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) | 3941 | if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) |
@@ -3200,31 +3948,17 @@ special_insn: | |||
3200 | case 8: c->dst.val = (s32)c->dst.val; break; | 3948 | case 8: c->dst.val = (s32)c->dst.val; break; |
3201 | } | 3949 | } |
3202 | break; | 3950 | break; |
3203 | case 0x9c: /* pushf */ | ||
3204 | c->src.val = (unsigned long) ctxt->eflags; | ||
3205 | emulate_push(ctxt, ops); | ||
3206 | break; | ||
3207 | case 0x9d: /* popf */ | ||
3208 | c->dst.type = OP_REG; | ||
3209 | c->dst.addr.reg = &ctxt->eflags; | ||
3210 | c->dst.bytes = c->op_bytes; | ||
3211 | rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); | ||
3212 | break; | ||
3213 | case 0xa6 ... 0xa7: /* cmps */ | ||
3214 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
3215 | goto cmp; | ||
3216 | case 0xa8 ... 0xa9: /* test ax, imm */ | 3951 | case 0xa8 ... 0xa9: /* test ax, imm */ |
3217 | goto test; | 3952 | goto test; |
3218 | case 0xae ... 0xaf: /* scas */ | ||
3219 | goto cmp; | ||
3220 | case 0xc0 ... 0xc1: | 3953 | case 0xc0 ... 0xc1: |
3221 | emulate_grp2(ctxt); | 3954 | rc = em_grp2(ctxt); |
3222 | break; | 3955 | break; |
3223 | case 0xc3: /* ret */ | 3956 | case 0xc3: /* ret */ |
3224 | c->dst.type = OP_REG; | 3957 | c->dst.type = OP_REG; |
3225 | c->dst.addr.reg = &c->eip; | 3958 | c->dst.addr.reg = &c->eip; |
3226 | c->dst.bytes = c->op_bytes; | 3959 | c->dst.bytes = c->op_bytes; |
3227 | goto pop_instruction; | 3960 | rc = em_pop(ctxt); |
3961 | break; | ||
3228 | case 0xc4: /* les */ | 3962 | case 0xc4: /* les */ |
3229 | rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); | 3963 | rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); |
3230 | break; | 3964 | break; |
@@ -3252,11 +3986,11 @@ special_insn: | |||
3252 | rc = emulate_iret(ctxt, ops); | 3986 | rc = emulate_iret(ctxt, ops); |
3253 | break; | 3987 | break; |
3254 | case 0xd0 ... 0xd1: /* Grp2 */ | 3988 | case 0xd0 ... 0xd1: /* Grp2 */ |
3255 | emulate_grp2(ctxt); | 3989 | rc = em_grp2(ctxt); |
3256 | break; | 3990 | break; |
3257 | case 0xd2 ... 0xd3: /* Grp2 */ | 3991 | case 0xd2 ... 0xd3: /* Grp2 */ |
3258 | c->src.val = c->regs[VCPU_REGS_RCX]; | 3992 | c->src.val = c->regs[VCPU_REGS_RCX]; |
3259 | emulate_grp2(ctxt); | 3993 | rc = em_grp2(ctxt); |
3260 | break; | 3994 | break; |
3261 | case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ | 3995 | case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ |
3262 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | 3996 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); |
@@ -3278,23 +4012,14 @@ special_insn: | |||
3278 | long int rel = c->src.val; | 4012 | long int rel = c->src.val; |
3279 | c->src.val = (unsigned long) c->eip; | 4013 | c->src.val = (unsigned long) c->eip; |
3280 | jmp_rel(c, rel); | 4014 | jmp_rel(c, rel); |
3281 | emulate_push(ctxt, ops); | 4015 | rc = em_push(ctxt); |
3282 | break; | 4016 | break; |
3283 | } | 4017 | } |
3284 | case 0xe9: /* jmp rel */ | 4018 | case 0xe9: /* jmp rel */ |
3285 | goto jmp; | 4019 | goto jmp; |
3286 | case 0xea: { /* jmp far */ | 4020 | case 0xea: /* jmp far */ |
3287 | unsigned short sel; | 4021 | rc = em_jmp_far(ctxt); |
3288 | jump_far: | ||
3289 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); | ||
3290 | |||
3291 | if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) | ||
3292 | goto done; | ||
3293 | |||
3294 | c->eip = 0; | ||
3295 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | ||
3296 | break; | 4022 | break; |
3297 | } | ||
3298 | case 0xeb: | 4023 | case 0xeb: |
3299 | jmp: /* jmp rel short */ | 4024 | jmp: /* jmp rel short */ |
3300 | jmp_rel(c, c->src.val); | 4025 | jmp_rel(c, c->src.val); |
@@ -3304,11 +4029,6 @@ special_insn: | |||
3304 | case 0xed: /* in (e/r)ax,dx */ | 4029 | case 0xed: /* in (e/r)ax,dx */ |
3305 | c->src.val = c->regs[VCPU_REGS_RDX]; | 4030 | c->src.val = c->regs[VCPU_REGS_RDX]; |
3306 | do_io_in: | 4031 | do_io_in: |
3307 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
3308 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | ||
3309 | rc = emulate_gp(ctxt, 0); | ||
3310 | goto done; | ||
3311 | } | ||
3312 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | 4032 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, |
3313 | &c->dst.val)) | 4033 | &c->dst.val)) |
3314 | goto done; /* IO is needed */ | 4034 | goto done; /* IO is needed */ |
@@ -3317,25 +4037,19 @@ special_insn: | |||
3317 | case 0xef: /* out dx,(e/r)ax */ | 4037 | case 0xef: /* out dx,(e/r)ax */ |
3318 | c->dst.val = c->regs[VCPU_REGS_RDX]; | 4038 | c->dst.val = c->regs[VCPU_REGS_RDX]; |
3319 | do_io_out: | 4039 | do_io_out: |
3320 | c->src.bytes = min(c->src.bytes, 4u); | 4040 | ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, |
3321 | if (!emulator_io_permited(ctxt, ops, c->dst.val, | 4041 | &c->src.val, 1); |
3322 | c->src.bytes)) { | ||
3323 | rc = emulate_gp(ctxt, 0); | ||
3324 | goto done; | ||
3325 | } | ||
3326 | ops->pio_out_emulated(c->src.bytes, c->dst.val, | ||
3327 | &c->src.val, 1, ctxt->vcpu); | ||
3328 | c->dst.type = OP_NONE; /* Disable writeback. */ | 4042 | c->dst.type = OP_NONE; /* Disable writeback. */ |
3329 | break; | 4043 | break; |
3330 | case 0xf4: /* hlt */ | 4044 | case 0xf4: /* hlt */ |
3331 | ctxt->vcpu->arch.halt_request = 1; | 4045 | ctxt->ops->halt(ctxt); |
3332 | break; | 4046 | break; |
3333 | case 0xf5: /* cmc */ | 4047 | case 0xf5: /* cmc */ |
3334 | /* complement carry flag from eflags reg */ | 4048 | /* complement carry flag from eflags reg */ |
3335 | ctxt->eflags ^= EFLG_CF; | 4049 | ctxt->eflags ^= EFLG_CF; |
3336 | break; | 4050 | break; |
3337 | case 0xf6 ... 0xf7: /* Grp3 */ | 4051 | case 0xf6 ... 0xf7: /* Grp3 */ |
3338 | rc = emulate_grp3(ctxt, ops); | 4052 | rc = em_grp3(ctxt); |
3339 | break; | 4053 | break; |
3340 | case 0xf8: /* clc */ | 4054 | case 0xf8: /* clc */ |
3341 | ctxt->eflags &= ~EFLG_CF; | 4055 | ctxt->eflags &= ~EFLG_CF; |
@@ -3366,13 +4080,11 @@ special_insn: | |||
3366 | ctxt->eflags |= EFLG_DF; | 4080 | ctxt->eflags |= EFLG_DF; |
3367 | break; | 4081 | break; |
3368 | case 0xfe: /* Grp4 */ | 4082 | case 0xfe: /* Grp4 */ |
3369 | grp45: | 4083 | rc = em_grp45(ctxt); |
3370 | rc = emulate_grp45(ctxt, ops); | ||
3371 | break; | 4084 | break; |
3372 | case 0xff: /* Grp5 */ | 4085 | case 0xff: /* Grp5 */ |
3373 | if (c->modrm_reg == 5) | 4086 | rc = em_grp45(ctxt); |
3374 | goto jump_far; | 4087 | break; |
3375 | goto grp45; | ||
3376 | default: | 4088 | default: |
3377 | goto cannot_emulate; | 4089 | goto cannot_emulate; |
3378 | } | 4090 | } |
@@ -3381,7 +4093,7 @@ special_insn: | |||
3381 | goto done; | 4093 | goto done; |
3382 | 4094 | ||
3383 | writeback: | 4095 | writeback: |
3384 | rc = writeback(ctxt, ops); | 4096 | rc = writeback(ctxt); |
3385 | if (rc != X86EMUL_CONTINUE) | 4097 | if (rc != X86EMUL_CONTINUE) |
3386 | goto done; | 4098 | goto done; |
3387 | 4099 | ||
@@ -3392,7 +4104,7 @@ writeback: | |||
3392 | c->dst.type = saved_dst_type; | 4104 | c->dst.type = saved_dst_type; |
3393 | 4105 | ||
3394 | if ((c->d & SrcMask) == SrcSI) | 4106 | if ((c->d & SrcMask) == SrcSI) |
3395 | string_addr_inc(ctxt, seg_override(ctxt, ops, c), | 4107 | string_addr_inc(ctxt, seg_override(ctxt, c), |
3396 | VCPU_REGS_RSI, &c->src); | 4108 | VCPU_REGS_RSI, &c->src); |
3397 | 4109 | ||
3398 | if ((c->d & DstMask) == DstDI) | 4110 | if ((c->d & DstMask) == DstDI) |
@@ -3427,115 +4139,34 @@ writeback: | |||
3427 | done: | 4139 | done: |
3428 | if (rc == X86EMUL_PROPAGATE_FAULT) | 4140 | if (rc == X86EMUL_PROPAGATE_FAULT) |
3429 | ctxt->have_exception = true; | 4141 | ctxt->have_exception = true; |
4142 | if (rc == X86EMUL_INTERCEPTED) | ||
4143 | return EMULATION_INTERCEPTED; | ||
4144 | |||
3430 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 4145 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
3431 | 4146 | ||
3432 | twobyte_insn: | 4147 | twobyte_insn: |
3433 | switch (c->b) { | 4148 | switch (c->b) { |
3434 | case 0x01: /* lgdt, lidt, lmsw */ | ||
3435 | switch (c->modrm_reg) { | ||
3436 | u16 size; | ||
3437 | unsigned long address; | ||
3438 | |||
3439 | case 0: /* vmcall */ | ||
3440 | if (c->modrm_mod != 3 || c->modrm_rm != 1) | ||
3441 | goto cannot_emulate; | ||
3442 | |||
3443 | rc = kvm_fix_hypercall(ctxt->vcpu); | ||
3444 | if (rc != X86EMUL_CONTINUE) | ||
3445 | goto done; | ||
3446 | |||
3447 | /* Let the processor re-execute the fixed hypercall */ | ||
3448 | c->eip = ctxt->eip; | ||
3449 | /* Disable writeback. */ | ||
3450 | c->dst.type = OP_NONE; | ||
3451 | break; | ||
3452 | case 2: /* lgdt */ | ||
3453 | rc = read_descriptor(ctxt, ops, c->src.addr.mem, | ||
3454 | &size, &address, c->op_bytes); | ||
3455 | if (rc != X86EMUL_CONTINUE) | ||
3456 | goto done; | ||
3457 | realmode_lgdt(ctxt->vcpu, size, address); | ||
3458 | /* Disable writeback. */ | ||
3459 | c->dst.type = OP_NONE; | ||
3460 | break; | ||
3461 | case 3: /* lidt/vmmcall */ | ||
3462 | if (c->modrm_mod == 3) { | ||
3463 | switch (c->modrm_rm) { | ||
3464 | case 1: | ||
3465 | rc = kvm_fix_hypercall(ctxt->vcpu); | ||
3466 | break; | ||
3467 | default: | ||
3468 | goto cannot_emulate; | ||
3469 | } | ||
3470 | } else { | ||
3471 | rc = read_descriptor(ctxt, ops, c->src.addr.mem, | ||
3472 | &size, &address, | ||
3473 | c->op_bytes); | ||
3474 | if (rc != X86EMUL_CONTINUE) | ||
3475 | goto done; | ||
3476 | realmode_lidt(ctxt->vcpu, size, address); | ||
3477 | } | ||
3478 | /* Disable writeback. */ | ||
3479 | c->dst.type = OP_NONE; | ||
3480 | break; | ||
3481 | case 4: /* smsw */ | ||
3482 | c->dst.bytes = 2; | ||
3483 | c->dst.val = ops->get_cr(0, ctxt->vcpu); | ||
3484 | break; | ||
3485 | case 6: /* lmsw */ | ||
3486 | ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) | | ||
3487 | (c->src.val & 0x0f), ctxt->vcpu); | ||
3488 | c->dst.type = OP_NONE; | ||
3489 | break; | ||
3490 | case 5: /* not defined */ | ||
3491 | emulate_ud(ctxt); | ||
3492 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3493 | goto done; | ||
3494 | case 7: /* invlpg*/ | ||
3495 | emulate_invlpg(ctxt->vcpu, | ||
3496 | linear(ctxt, c->src.addr.mem)); | ||
3497 | /* Disable writeback. */ | ||
3498 | c->dst.type = OP_NONE; | ||
3499 | break; | ||
3500 | default: | ||
3501 | goto cannot_emulate; | ||
3502 | } | ||
3503 | break; | ||
3504 | case 0x05: /* syscall */ | 4149 | case 0x05: /* syscall */ |
3505 | rc = emulate_syscall(ctxt, ops); | 4150 | rc = emulate_syscall(ctxt, ops); |
3506 | break; | 4151 | break; |
3507 | case 0x06: | 4152 | case 0x06: |
3508 | emulate_clts(ctxt->vcpu); | 4153 | rc = em_clts(ctxt); |
3509 | break; | 4154 | break; |
3510 | case 0x09: /* wbinvd */ | 4155 | case 0x09: /* wbinvd */ |
3511 | kvm_emulate_wbinvd(ctxt->vcpu); | 4156 | (ctxt->ops->wbinvd)(ctxt); |
3512 | break; | 4157 | break; |
3513 | case 0x08: /* invd */ | 4158 | case 0x08: /* invd */ |
3514 | case 0x0d: /* GrpP (prefetch) */ | 4159 | case 0x0d: /* GrpP (prefetch) */ |
3515 | case 0x18: /* Grp16 (prefetch/nop) */ | 4160 | case 0x18: /* Grp16 (prefetch/nop) */ |
3516 | break; | 4161 | break; |
3517 | case 0x20: /* mov cr, reg */ | 4162 | case 0x20: /* mov cr, reg */ |
3518 | switch (c->modrm_reg) { | 4163 | c->dst.val = ops->get_cr(ctxt, c->modrm_reg); |
3519 | case 1: | ||
3520 | case 5 ... 7: | ||
3521 | case 9 ... 15: | ||
3522 | emulate_ud(ctxt); | ||
3523 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3524 | goto done; | ||
3525 | } | ||
3526 | c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); | ||
3527 | break; | 4164 | break; |
3528 | case 0x21: /* mov from dr to reg */ | 4165 | case 0x21: /* mov from dr to reg */ |
3529 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 4166 | ops->get_dr(ctxt, c->modrm_reg, &c->dst.val); |
3530 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | ||
3531 | emulate_ud(ctxt); | ||
3532 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3533 | goto done; | ||
3534 | } | ||
3535 | ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); | ||
3536 | break; | 4167 | break; |
3537 | case 0x22: /* mov reg, cr */ | 4168 | case 0x22: /* mov reg, cr */ |
3538 | if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { | 4169 | if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) { |
3539 | emulate_gp(ctxt, 0); | 4170 | emulate_gp(ctxt, 0); |
3540 | rc = X86EMUL_PROPAGATE_FAULT; | 4171 | rc = X86EMUL_PROPAGATE_FAULT; |
3541 | goto done; | 4172 | goto done; |
@@ -3543,16 +4174,9 @@ twobyte_insn: | |||
3543 | c->dst.type = OP_NONE; | 4174 | c->dst.type = OP_NONE; |
3544 | break; | 4175 | break; |
3545 | case 0x23: /* mov from reg to dr */ | 4176 | case 0x23: /* mov from reg to dr */ |
3546 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 4177 | if (ops->set_dr(ctxt, c->modrm_reg, c->src.val & |
3547 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | ||
3548 | emulate_ud(ctxt); | ||
3549 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3550 | goto done; | ||
3551 | } | ||
3552 | |||
3553 | if (ops->set_dr(c->modrm_reg, c->src.val & | ||
3554 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? | 4178 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? |
3555 | ~0ULL : ~0U), ctxt->vcpu) < 0) { | 4179 | ~0ULL : ~0U)) < 0) { |
3556 | /* #UD condition is already handled by the code above */ | 4180 | /* #UD condition is already handled by the code above */ |
3557 | emulate_gp(ctxt, 0); | 4181 | emulate_gp(ctxt, 0); |
3558 | rc = X86EMUL_PROPAGATE_FAULT; | 4182 | rc = X86EMUL_PROPAGATE_FAULT; |
@@ -3565,7 +4189,7 @@ twobyte_insn: | |||
3565 | /* wrmsr */ | 4189 | /* wrmsr */ |
3566 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 4190 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
3567 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 4191 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
3568 | if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { | 4192 | if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) { |
3569 | emulate_gp(ctxt, 0); | 4193 | emulate_gp(ctxt, 0); |
3570 | rc = X86EMUL_PROPAGATE_FAULT; | 4194 | rc = X86EMUL_PROPAGATE_FAULT; |
3571 | goto done; | 4195 | goto done; |
@@ -3574,7 +4198,7 @@ twobyte_insn: | |||
3574 | break; | 4198 | break; |
3575 | case 0x32: | 4199 | case 0x32: |
3576 | /* rdmsr */ | 4200 | /* rdmsr */ |
3577 | if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { | 4201 | if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) { |
3578 | emulate_gp(ctxt, 0); | 4202 | emulate_gp(ctxt, 0); |
3579 | rc = X86EMUL_PROPAGATE_FAULT; | 4203 | rc = X86EMUL_PROPAGATE_FAULT; |
3580 | goto done; | 4204 | goto done; |
@@ -3603,7 +4227,7 @@ twobyte_insn: | |||
3603 | c->dst.val = test_cc(c->b, ctxt->eflags); | 4227 | c->dst.val = test_cc(c->b, ctxt->eflags); |
3604 | break; | 4228 | break; |
3605 | case 0xa0: /* push fs */ | 4229 | case 0xa0: /* push fs */ |
3606 | emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); | 4230 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); |
3607 | break; | 4231 | break; |
3608 | case 0xa1: /* pop fs */ | 4232 | case 0xa1: /* pop fs */ |
3609 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 4233 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
@@ -3620,7 +4244,7 @@ twobyte_insn: | |||
3620 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 4244 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); |
3621 | break; | 4245 | break; |
3622 | case 0xa8: /* push gs */ | 4246 | case 0xa8: /* push gs */ |
3623 | emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); | 4247 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); |
3624 | break; | 4248 | break; |
3625 | case 0xa9: /* pop gs */ | 4249 | case 0xa9: /* pop gs */ |
3626 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 4250 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
@@ -3727,7 +4351,7 @@ twobyte_insn: | |||
3727 | (u64) c->src.val; | 4351 | (u64) c->src.val; |
3728 | break; | 4352 | break; |
3729 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 4353 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
3730 | rc = emulate_grp9(ctxt, ops); | 4354 | rc = em_grp9(ctxt); |
3731 | break; | 4355 | break; |
3732 | default: | 4356 | default: |
3733 | goto cannot_emulate; | 4357 | goto cannot_emulate; |
@@ -3739,5 +4363,5 @@ twobyte_insn: | |||
3739 | goto writeback; | 4363 | goto writeback; |
3740 | 4364 | ||
3741 | cannot_emulate: | 4365 | cannot_emulate: |
3742 | return -1; | 4366 | return EMULATION_FAILED; |
3743 | } | 4367 | } |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 46d08ca0b48f..51a97426e791 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -33,7 +33,6 @@ struct kvm_kpit_state { | |||
33 | }; | 33 | }; |
34 | 34 | ||
35 | struct kvm_pit { | 35 | struct kvm_pit { |
36 | unsigned long base_addresss; | ||
37 | struct kvm_io_device dev; | 36 | struct kvm_io_device dev; |
38 | struct kvm_io_device speaker_dev; | 37 | struct kvm_io_device speaker_dev; |
39 | struct kvm *kvm; | 38 | struct kvm *kvm; |
@@ -51,7 +50,6 @@ struct kvm_pit { | |||
51 | #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 | 50 | #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 |
52 | #define KVM_PIT_CHANNEL_MASK 0x3 | 51 | #define KVM_PIT_CHANNEL_MASK 0x3 |
53 | 52 | ||
54 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); | ||
55 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); | 53 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); |
56 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); | 54 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); |
57 | void kvm_free_pit(struct kvm *kvm); | 55 | void kvm_free_pit(struct kvm *kvm); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ba910d149410..53e2d084bffb 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm); | |||
75 | void kvm_destroy_pic(struct kvm *kvm); | 75 | void kvm_destroy_pic(struct kvm *kvm); |
76 | int kvm_pic_read_irq(struct kvm *kvm); | 76 | int kvm_pic_read_irq(struct kvm *kvm); |
77 | void kvm_pic_update_irq(struct kvm_pic *s); | 77 | void kvm_pic_update_irq(struct kvm_pic *s); |
78 | void kvm_pic_clear_isr_ack(struct kvm *kvm); | ||
79 | 78 | ||
80 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | 79 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) |
81 | { | 80 | { |
@@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); | |||
100 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); | 99 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); |
101 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); | 100 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); |
102 | 101 | ||
103 | int pit_has_pending_timer(struct kvm_vcpu *vcpu); | ||
104 | int apic_has_pending_timer(struct kvm_vcpu *vcpu); | 102 | int apic_has_pending_timer(struct kvm_vcpu *vcpu); |
105 | 103 | ||
106 | #endif | 104 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 22fae7593ee7..bd14bb4c8594 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | |||
1206 | 1206 | ||
1207 | static void nonpaging_update_pte(struct kvm_vcpu *vcpu, | 1207 | static void nonpaging_update_pte(struct kvm_vcpu *vcpu, |
1208 | struct kvm_mmu_page *sp, u64 *spte, | 1208 | struct kvm_mmu_page *sp, u64 *spte, |
1209 | const void *pte, unsigned long mmu_seq) | 1209 | const void *pte) |
1210 | { | 1210 | { |
1211 | WARN_ON(1); | 1211 | WARN_ON(1); |
1212 | } | 1212 | } |
@@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
3163 | } | 3163 | } |
3164 | 3164 | ||
3165 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 3165 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
3166 | struct kvm_mmu_page *sp, | 3166 | struct kvm_mmu_page *sp, u64 *spte, |
3167 | u64 *spte, | 3167 | const void *new) |
3168 | const void *new, unsigned long mmu_seq) | ||
3169 | { | 3168 | { |
3170 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { | 3169 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { |
3171 | ++vcpu->kvm->stat.mmu_pde_zapped; | 3170 | ++vcpu->kvm->stat.mmu_pde_zapped; |
@@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
3173 | } | 3172 | } |
3174 | 3173 | ||
3175 | ++vcpu->kvm->stat.mmu_pte_updated; | 3174 | ++vcpu->kvm->stat.mmu_pte_updated; |
3176 | vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); | 3175 | vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); |
3177 | } | 3176 | } |
3178 | 3177 | ||
3179 | static bool need_remote_flush(u64 old, u64 new) | 3178 | static bool need_remote_flush(u64 old, u64 new) |
@@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3229 | struct kvm_mmu_page *sp; | 3228 | struct kvm_mmu_page *sp; |
3230 | struct hlist_node *node; | 3229 | struct hlist_node *node; |
3231 | LIST_HEAD(invalid_list); | 3230 | LIST_HEAD(invalid_list); |
3232 | unsigned long mmu_seq; | ||
3233 | u64 entry, gentry, *spte; | 3231 | u64 entry, gentry, *spte; |
3234 | unsigned pte_size, page_offset, misaligned, quadrant, offset; | 3232 | unsigned pte_size, page_offset, misaligned, quadrant, offset; |
3235 | int level, npte, invlpg_counter, r, flooded = 0; | 3233 | int level, npte, invlpg_counter, r, flooded = 0; |
@@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3271 | break; | 3269 | break; |
3272 | } | 3270 | } |
3273 | 3271 | ||
3274 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
3275 | smp_rmb(); | ||
3276 | |||
3277 | spin_lock(&vcpu->kvm->mmu_lock); | 3272 | spin_lock(&vcpu->kvm->mmu_lock); |
3278 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | 3273 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) |
3279 | gentry = 0; | 3274 | gentry = 0; |
@@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3345 | if (gentry && | 3340 | if (gentry && |
3346 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3341 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
3347 | & mask.word)) | 3342 | & mask.word)) |
3348 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, | 3343 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
3349 | mmu_seq); | ||
3350 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3344 | if (!remote_flush && need_remote_flush(entry, *spte)) |
3351 | remote_flush = true; | 3345 | remote_flush = true; |
3352 | ++spte; | 3346 | ++spte; |
@@ -3551,10 +3545,11 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | |||
3551 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); | 3545 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); |
3552 | } | 3546 | } |
3553 | 3547 | ||
3554 | static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 3548 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
3555 | { | 3549 | { |
3556 | struct kvm *kvm; | 3550 | struct kvm *kvm; |
3557 | struct kvm *kvm_freed = NULL; | 3551 | struct kvm *kvm_freed = NULL; |
3552 | int nr_to_scan = sc->nr_to_scan; | ||
3558 | 3553 | ||
3559 | if (nr_to_scan == 0) | 3554 | if (nr_to_scan == 0) |
3560 | goto out; | 3555 | goto out; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index c6397795d865..6c4dc010c4cb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -78,15 +78,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) | |||
78 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; | 78 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; |
79 | } | 79 | } |
80 | 80 | ||
81 | static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, | 81 | static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
82 | gfn_t table_gfn, unsigned index, | 82 | pt_element_t __user *ptep_user, unsigned index, |
83 | pt_element_t orig_pte, pt_element_t new_pte) | 83 | pt_element_t orig_pte, pt_element_t new_pte) |
84 | { | 84 | { |
85 | int npages; | ||
85 | pt_element_t ret; | 86 | pt_element_t ret; |
86 | pt_element_t *table; | 87 | pt_element_t *table; |
87 | struct page *page; | 88 | struct page *page; |
88 | 89 | ||
89 | page = gfn_to_page(kvm, table_gfn); | 90 | npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); |
91 | /* Check if the user is doing something meaningless. */ | ||
92 | if (unlikely(npages != 1)) | ||
93 | return -EFAULT; | ||
90 | 94 | ||
91 | table = kmap_atomic(page, KM_USER0); | 95 | table = kmap_atomic(page, KM_USER0); |
92 | ret = CMPXCHG(&table[index], orig_pte, new_pte); | 96 | ret = CMPXCHG(&table[index], orig_pte, new_pte); |
@@ -117,6 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
117 | gva_t addr, u32 access) | 121 | gva_t addr, u32 access) |
118 | { | 122 | { |
119 | pt_element_t pte; | 123 | pt_element_t pte; |
124 | pt_element_t __user *ptep_user; | ||
120 | gfn_t table_gfn; | 125 | gfn_t table_gfn; |
121 | unsigned index, pt_access, uninitialized_var(pte_access); | 126 | unsigned index, pt_access, uninitialized_var(pte_access); |
122 | gpa_t pte_gpa; | 127 | gpa_t pte_gpa; |
@@ -152,6 +157,9 @@ walk: | |||
152 | pt_access = ACC_ALL; | 157 | pt_access = ACC_ALL; |
153 | 158 | ||
154 | for (;;) { | 159 | for (;;) { |
160 | gfn_t real_gfn; | ||
161 | unsigned long host_addr; | ||
162 | |||
155 | index = PT_INDEX(addr, walker->level); | 163 | index = PT_INDEX(addr, walker->level); |
156 | 164 | ||
157 | table_gfn = gpte_to_gfn(pte); | 165 | table_gfn = gpte_to_gfn(pte); |
@@ -160,43 +168,64 @@ walk: | |||
160 | walker->table_gfn[walker->level - 1] = table_gfn; | 168 | walker->table_gfn[walker->level - 1] = table_gfn; |
161 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 169 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
162 | 170 | ||
163 | if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, | 171 | real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), |
164 | offset, sizeof(pte), | 172 | PFERR_USER_MASK|PFERR_WRITE_MASK); |
165 | PFERR_USER_MASK|PFERR_WRITE_MASK)) { | 173 | if (unlikely(real_gfn == UNMAPPED_GVA)) { |
174 | present = false; | ||
175 | break; | ||
176 | } | ||
177 | real_gfn = gpa_to_gfn(real_gfn); | ||
178 | |||
179 | host_addr = gfn_to_hva(vcpu->kvm, real_gfn); | ||
180 | if (unlikely(kvm_is_error_hva(host_addr))) { | ||
181 | present = false; | ||
182 | break; | ||
183 | } | ||
184 | |||
185 | ptep_user = (pt_element_t __user *)((void *)host_addr + offset); | ||
186 | if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { | ||
166 | present = false; | 187 | present = false; |
167 | break; | 188 | break; |
168 | } | 189 | } |
169 | 190 | ||
170 | trace_kvm_mmu_paging_element(pte, walker->level); | 191 | trace_kvm_mmu_paging_element(pte, walker->level); |
171 | 192 | ||
172 | if (!is_present_gpte(pte)) { | 193 | if (unlikely(!is_present_gpte(pte))) { |
173 | present = false; | 194 | present = false; |
174 | break; | 195 | break; |
175 | } | 196 | } |
176 | 197 | ||
177 | if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { | 198 | if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, |
199 | walker->level))) { | ||
178 | rsvd_fault = true; | 200 | rsvd_fault = true; |
179 | break; | 201 | break; |
180 | } | 202 | } |
181 | 203 | ||
182 | if (write_fault && !is_writable_pte(pte)) | 204 | if (unlikely(write_fault && !is_writable_pte(pte) |
183 | if (user_fault || is_write_protection(vcpu)) | 205 | && (user_fault || is_write_protection(vcpu)))) |
184 | eperm = true; | 206 | eperm = true; |
185 | 207 | ||
186 | if (user_fault && !(pte & PT_USER_MASK)) | 208 | if (unlikely(user_fault && !(pte & PT_USER_MASK))) |
187 | eperm = true; | 209 | eperm = true; |
188 | 210 | ||
189 | #if PTTYPE == 64 | 211 | #if PTTYPE == 64 |
190 | if (fetch_fault && (pte & PT64_NX_MASK)) | 212 | if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) |
191 | eperm = true; | 213 | eperm = true; |
192 | #endif | 214 | #endif |
193 | 215 | ||
194 | if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { | 216 | if (!eperm && !rsvd_fault |
217 | && unlikely(!(pte & PT_ACCESSED_MASK))) { | ||
218 | int ret; | ||
195 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | 219 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, |
196 | sizeof(pte)); | 220 | sizeof(pte)); |
197 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, | 221 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, |
198 | index, pte, pte|PT_ACCESSED_MASK)) | 222 | pte, pte|PT_ACCESSED_MASK); |
223 | if (unlikely(ret < 0)) { | ||
224 | present = false; | ||
225 | break; | ||
226 | } else if (ret) | ||
199 | goto walk; | 227 | goto walk; |
228 | |||
200 | mark_page_dirty(vcpu->kvm, table_gfn); | 229 | mark_page_dirty(vcpu->kvm, table_gfn); |
201 | pte |= PT_ACCESSED_MASK; | 230 | pte |= PT_ACCESSED_MASK; |
202 | } | 231 | } |
@@ -241,17 +270,21 @@ walk: | |||
241 | --walker->level; | 270 | --walker->level; |
242 | } | 271 | } |
243 | 272 | ||
244 | if (!present || eperm || rsvd_fault) | 273 | if (unlikely(!present || eperm || rsvd_fault)) |
245 | goto error; | 274 | goto error; |
246 | 275 | ||
247 | if (write_fault && !is_dirty_gpte(pte)) { | 276 | if (write_fault && unlikely(!is_dirty_gpte(pte))) { |
248 | bool ret; | 277 | int ret; |
249 | 278 | ||
250 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 279 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
251 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, | 280 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, |
252 | pte|PT_DIRTY_MASK); | 281 | pte, pte|PT_DIRTY_MASK); |
253 | if (ret) | 282 | if (unlikely(ret < 0)) { |
283 | present = false; | ||
284 | goto error; | ||
285 | } else if (ret) | ||
254 | goto walk; | 286 | goto walk; |
287 | |||
255 | mark_page_dirty(vcpu->kvm, table_gfn); | 288 | mark_page_dirty(vcpu->kvm, table_gfn); |
256 | pte |= PT_DIRTY_MASK; | 289 | pte |= PT_DIRTY_MASK; |
257 | walker->ptes[walker->level - 1] = pte; | 290 | walker->ptes[walker->level - 1] = pte; |
@@ -325,7 +358,7 @@ no_present: | |||
325 | } | 358 | } |
326 | 359 | ||
327 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 360 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
328 | u64 *spte, const void *pte, unsigned long mmu_seq) | 361 | u64 *spte, const void *pte) |
329 | { | 362 | { |
330 | pt_element_t gpte; | 363 | pt_element_t gpte; |
331 | unsigned pte_access; | 364 | unsigned pte_access; |
@@ -342,8 +375,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
342 | kvm_release_pfn_clean(pfn); | 375 | kvm_release_pfn_clean(pfn); |
343 | return; | 376 | return; |
344 | } | 377 | } |
345 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
346 | return; | ||
347 | 378 | ||
348 | /* | 379 | /* |
349 | * we call mmu_set_spte() with host_writable = true because that | 380 | * we call mmu_set_spte() with host_writable = true because that |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6bb15d583e47..506e4fe23adc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -63,6 +63,10 @@ MODULE_LICENSE("GPL"); | |||
63 | 63 | ||
64 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | 64 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) |
65 | 65 | ||
66 | #define TSC_RATIO_RSVD 0xffffff0000000000ULL | ||
67 | #define TSC_RATIO_MIN 0x0000000000000001ULL | ||
68 | #define TSC_RATIO_MAX 0x000000ffffffffffULL | ||
69 | |||
66 | static bool erratum_383_found __read_mostly; | 70 | static bool erratum_383_found __read_mostly; |
67 | 71 | ||
68 | static const u32 host_save_user_msrs[] = { | 72 | static const u32 host_save_user_msrs[] = { |
@@ -93,14 +97,6 @@ struct nested_state { | |||
93 | /* A VMEXIT is required but not yet emulated */ | 97 | /* A VMEXIT is required but not yet emulated */ |
94 | bool exit_required; | 98 | bool exit_required; |
95 | 99 | ||
96 | /* | ||
97 | * If we vmexit during an instruction emulation we need this to restore | ||
98 | * the l1 guest rip after the emulation | ||
99 | */ | ||
100 | unsigned long vmexit_rip; | ||
101 | unsigned long vmexit_rsp; | ||
102 | unsigned long vmexit_rax; | ||
103 | |||
104 | /* cache for intercepts of the guest */ | 100 | /* cache for intercepts of the guest */ |
105 | u32 intercept_cr; | 101 | u32 intercept_cr; |
106 | u32 intercept_dr; | 102 | u32 intercept_dr; |
@@ -144,8 +140,13 @@ struct vcpu_svm { | |||
144 | unsigned int3_injected; | 140 | unsigned int3_injected; |
145 | unsigned long int3_rip; | 141 | unsigned long int3_rip; |
146 | u32 apf_reason; | 142 | u32 apf_reason; |
143 | |||
144 | u64 tsc_ratio; | ||
147 | }; | 145 | }; |
148 | 146 | ||
147 | static DEFINE_PER_CPU(u64, current_tsc_ratio); | ||
148 | #define TSC_RATIO_DEFAULT 0x0100000000ULL | ||
149 | |||
149 | #define MSR_INVALID 0xffffffffU | 150 | #define MSR_INVALID 0xffffffffU |
150 | 151 | ||
151 | static struct svm_direct_access_msrs { | 152 | static struct svm_direct_access_msrs { |
@@ -190,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm); | |||
190 | static int nested_svm_vmexit(struct vcpu_svm *svm); | 191 | static int nested_svm_vmexit(struct vcpu_svm *svm); |
191 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 192 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
192 | bool has_error_code, u32 error_code); | 193 | bool has_error_code, u32 error_code); |
194 | static u64 __scale_tsc(u64 ratio, u64 tsc); | ||
193 | 195 | ||
194 | enum { | 196 | enum { |
195 | VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, | 197 | VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, |
@@ -376,7 +378,6 @@ struct svm_cpu_data { | |||
376 | }; | 378 | }; |
377 | 379 | ||
378 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); | 380 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); |
379 | static uint32_t svm_features; | ||
380 | 381 | ||
381 | struct svm_init_data { | 382 | struct svm_init_data { |
382 | int cpu; | 383 | int cpu; |
@@ -569,6 +570,10 @@ static int has_svm(void) | |||
569 | 570 | ||
570 | static void svm_hardware_disable(void *garbage) | 571 | static void svm_hardware_disable(void *garbage) |
571 | { | 572 | { |
573 | /* Make sure we clean up behind us */ | ||
574 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) | ||
575 | wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); | ||
576 | |||
572 | cpu_svm_disable(); | 577 | cpu_svm_disable(); |
573 | } | 578 | } |
574 | 579 | ||
@@ -610,6 +615,11 @@ static int svm_hardware_enable(void *garbage) | |||
610 | 615 | ||
611 | wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); | 616 | wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); |
612 | 617 | ||
618 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { | ||
619 | wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); | ||
620 | __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; | ||
621 | } | ||
622 | |||
613 | svm_init_erratum_383(); | 623 | svm_init_erratum_383(); |
614 | 624 | ||
615 | return 0; | 625 | return 0; |
@@ -791,6 +801,23 @@ static __init int svm_hardware_setup(void) | |||
791 | if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) | 801 | if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) |
792 | kvm_enable_efer_bits(EFER_FFXSR); | 802 | kvm_enable_efer_bits(EFER_FFXSR); |
793 | 803 | ||
804 | if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { | ||
805 | u64 max; | ||
806 | |||
807 | kvm_has_tsc_control = true; | ||
808 | |||
809 | /* | ||
810 | * Make sure the user can only configure tsc_khz values that | ||
811 | * fit into a signed integer. | ||
812 | * A min value is not calculated needed because it will always | ||
813 | * be 1 on all machines and a value of 0 is used to disable | ||
814 | * tsc-scaling for the vcpu. | ||
815 | */ | ||
816 | max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); | ||
817 | |||
818 | kvm_max_guest_tsc_khz = max; | ||
819 | } | ||
820 | |||
794 | if (nested) { | 821 | if (nested) { |
795 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); | 822 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); |
796 | kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); | 823 | kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); |
@@ -802,8 +829,6 @@ static __init int svm_hardware_setup(void) | |||
802 | goto err; | 829 | goto err; |
803 | } | 830 | } |
804 | 831 | ||
805 | svm_features = cpuid_edx(SVM_CPUID_FUNC); | ||
806 | |||
807 | if (!boot_cpu_has(X86_FEATURE_NPT)) | 832 | if (!boot_cpu_has(X86_FEATURE_NPT)) |
808 | npt_enabled = false; | 833 | npt_enabled = false; |
809 | 834 | ||
@@ -854,6 +879,64 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) | |||
854 | seg->base = 0; | 879 | seg->base = 0; |
855 | } | 880 | } |
856 | 881 | ||
882 | static u64 __scale_tsc(u64 ratio, u64 tsc) | ||
883 | { | ||
884 | u64 mult, frac, _tsc; | ||
885 | |||
886 | mult = ratio >> 32; | ||
887 | frac = ratio & ((1ULL << 32) - 1); | ||
888 | |||
889 | _tsc = tsc; | ||
890 | _tsc *= mult; | ||
891 | _tsc += (tsc >> 32) * frac; | ||
892 | _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; | ||
893 | |||
894 | return _tsc; | ||
895 | } | ||
896 | |||
897 | static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) | ||
898 | { | ||
899 | struct vcpu_svm *svm = to_svm(vcpu); | ||
900 | u64 _tsc = tsc; | ||
901 | |||
902 | if (svm->tsc_ratio != TSC_RATIO_DEFAULT) | ||
903 | _tsc = __scale_tsc(svm->tsc_ratio, tsc); | ||
904 | |||
905 | return _tsc; | ||
906 | } | ||
907 | |||
908 | static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | ||
909 | { | ||
910 | struct vcpu_svm *svm = to_svm(vcpu); | ||
911 | u64 ratio; | ||
912 | u64 khz; | ||
913 | |||
914 | /* TSC scaling supported? */ | ||
915 | if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) | ||
916 | return; | ||
917 | |||
918 | /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ | ||
919 | if (user_tsc_khz == 0) { | ||
920 | vcpu->arch.virtual_tsc_khz = 0; | ||
921 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | ||
922 | return; | ||
923 | } | ||
924 | |||
925 | khz = user_tsc_khz; | ||
926 | |||
927 | /* TSC scaling required - calculate ratio */ | ||
928 | ratio = khz << 32; | ||
929 | do_div(ratio, tsc_khz); | ||
930 | |||
931 | if (ratio == 0 || ratio & TSC_RATIO_RSVD) { | ||
932 | WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", | ||
933 | user_tsc_khz); | ||
934 | return; | ||
935 | } | ||
936 | vcpu->arch.virtual_tsc_khz = user_tsc_khz; | ||
937 | svm->tsc_ratio = ratio; | ||
938 | } | ||
939 | |||
857 | static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | 940 | static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) |
858 | { | 941 | { |
859 | struct vcpu_svm *svm = to_svm(vcpu); | 942 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -880,6 +963,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | |||
880 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | 963 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); |
881 | } | 964 | } |
882 | 965 | ||
966 | static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | ||
967 | { | ||
968 | u64 tsc; | ||
969 | |||
970 | tsc = svm_scale_tsc(vcpu, native_read_tsc()); | ||
971 | |||
972 | return target_tsc - tsc; | ||
973 | } | ||
974 | |||
883 | static void init_vmcb(struct vcpu_svm *svm) | 975 | static void init_vmcb(struct vcpu_svm *svm) |
884 | { | 976 | { |
885 | struct vmcb_control_area *control = &svm->vmcb->control; | 977 | struct vmcb_control_area *control = &svm->vmcb->control; |
@@ -975,7 +1067,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
975 | svm_set_efer(&svm->vcpu, 0); | 1067 | svm_set_efer(&svm->vcpu, 0); |
976 | save->dr6 = 0xffff0ff0; | 1068 | save->dr6 = 0xffff0ff0; |
977 | save->dr7 = 0x400; | 1069 | save->dr7 = 0x400; |
978 | save->rflags = 2; | 1070 | kvm_set_rflags(&svm->vcpu, 2); |
979 | save->rip = 0x0000fff0; | 1071 | save->rip = 0x0000fff0; |
980 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 1072 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
981 | 1073 | ||
@@ -1048,6 +1140,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1048 | goto out; | 1140 | goto out; |
1049 | } | 1141 | } |
1050 | 1142 | ||
1143 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | ||
1144 | |||
1051 | err = kvm_vcpu_init(&svm->vcpu, kvm, id); | 1145 | err = kvm_vcpu_init(&svm->vcpu, kvm, id); |
1052 | if (err) | 1146 | if (err) |
1053 | goto free_svm; | 1147 | goto free_svm; |
@@ -1141,6 +1235,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1141 | 1235 | ||
1142 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1236 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
1143 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1237 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1238 | |||
1239 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && | ||
1240 | svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { | ||
1241 | __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; | ||
1242 | wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); | ||
1243 | } | ||
1144 | } | 1244 | } |
1145 | 1245 | ||
1146 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) | 1246 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) |
@@ -1365,31 +1465,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1365 | { | 1465 | { |
1366 | struct vcpu_svm *svm = to_svm(vcpu); | 1466 | struct vcpu_svm *svm = to_svm(vcpu); |
1367 | 1467 | ||
1368 | if (is_guest_mode(vcpu)) { | ||
1369 | /* | ||
1370 | * We are here because we run in nested mode, the host kvm | ||
1371 | * intercepts cr0 writes but the l1 hypervisor does not. | ||
1372 | * But the L1 hypervisor may intercept selective cr0 writes. | ||
1373 | * This needs to be checked here. | ||
1374 | */ | ||
1375 | unsigned long old, new; | ||
1376 | |||
1377 | /* Remove bits that would trigger a real cr0 write intercept */ | ||
1378 | old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1379 | new = cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1380 | |||
1381 | if (old == new) { | ||
1382 | /* cr0 write with ts and mp unchanged */ | ||
1383 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
1384 | if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { | ||
1385 | svm->nested.vmexit_rip = kvm_rip_read(vcpu); | ||
1386 | svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
1387 | svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
1388 | return; | ||
1389 | } | ||
1390 | } | ||
1391 | } | ||
1392 | |||
1393 | #ifdef CONFIG_X86_64 | 1468 | #ifdef CONFIG_X86_64 |
1394 | if (vcpu->arch.efer & EFER_LME) { | 1469 | if (vcpu->arch.efer & EFER_LME) { |
1395 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1470 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
@@ -2127,7 +2202,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
2127 | nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); | 2202 | nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); |
2128 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 2203 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
2129 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | 2204 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; |
2130 | nested_vmcb->save.rflags = vmcb->save.rflags; | 2205 | nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); |
2131 | nested_vmcb->save.rip = vmcb->save.rip; | 2206 | nested_vmcb->save.rip = vmcb->save.rip; |
2132 | nested_vmcb->save.rsp = vmcb->save.rsp; | 2207 | nested_vmcb->save.rsp = vmcb->save.rsp; |
2133 | nested_vmcb->save.rax = vmcb->save.rax; | 2208 | nested_vmcb->save.rax = vmcb->save.rax; |
@@ -2184,7 +2259,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
2184 | svm->vmcb->save.ds = hsave->save.ds; | 2259 | svm->vmcb->save.ds = hsave->save.ds; |
2185 | svm->vmcb->save.gdtr = hsave->save.gdtr; | 2260 | svm->vmcb->save.gdtr = hsave->save.gdtr; |
2186 | svm->vmcb->save.idtr = hsave->save.idtr; | 2261 | svm->vmcb->save.idtr = hsave->save.idtr; |
2187 | svm->vmcb->save.rflags = hsave->save.rflags; | 2262 | kvm_set_rflags(&svm->vcpu, hsave->save.rflags); |
2188 | svm_set_efer(&svm->vcpu, hsave->save.efer); | 2263 | svm_set_efer(&svm->vcpu, hsave->save.efer); |
2189 | svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); | 2264 | svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); |
2190 | svm_set_cr4(&svm->vcpu, hsave->save.cr4); | 2265 | svm_set_cr4(&svm->vcpu, hsave->save.cr4); |
@@ -2312,7 +2387,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2312 | hsave->save.efer = svm->vcpu.arch.efer; | 2387 | hsave->save.efer = svm->vcpu.arch.efer; |
2313 | hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); | 2388 | hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); |
2314 | hsave->save.cr4 = svm->vcpu.arch.cr4; | 2389 | hsave->save.cr4 = svm->vcpu.arch.cr4; |
2315 | hsave->save.rflags = vmcb->save.rflags; | 2390 | hsave->save.rflags = kvm_get_rflags(&svm->vcpu); |
2316 | hsave->save.rip = kvm_rip_read(&svm->vcpu); | 2391 | hsave->save.rip = kvm_rip_read(&svm->vcpu); |
2317 | hsave->save.rsp = vmcb->save.rsp; | 2392 | hsave->save.rsp = vmcb->save.rsp; |
2318 | hsave->save.rax = vmcb->save.rax; | 2393 | hsave->save.rax = vmcb->save.rax; |
@@ -2323,7 +2398,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2323 | 2398 | ||
2324 | copy_vmcb_control_area(hsave, vmcb); | 2399 | copy_vmcb_control_area(hsave, vmcb); |
2325 | 2400 | ||
2326 | if (svm->vmcb->save.rflags & X86_EFLAGS_IF) | 2401 | if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) |
2327 | svm->vcpu.arch.hflags |= HF_HIF_MASK; | 2402 | svm->vcpu.arch.hflags |= HF_HIF_MASK; |
2328 | else | 2403 | else |
2329 | svm->vcpu.arch.hflags &= ~HF_HIF_MASK; | 2404 | svm->vcpu.arch.hflags &= ~HF_HIF_MASK; |
@@ -2341,7 +2416,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2341 | svm->vmcb->save.ds = nested_vmcb->save.ds; | 2416 | svm->vmcb->save.ds = nested_vmcb->save.ds; |
2342 | svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; | 2417 | svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; |
2343 | svm->vmcb->save.idtr = nested_vmcb->save.idtr; | 2418 | svm->vmcb->save.idtr = nested_vmcb->save.idtr; |
2344 | svm->vmcb->save.rflags = nested_vmcb->save.rflags; | 2419 | kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); |
2345 | svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); | 2420 | svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); |
2346 | svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); | 2421 | svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); |
2347 | svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); | 2422 | svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); |
@@ -2443,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
2443 | if (nested_svm_check_permissions(svm)) | 2518 | if (nested_svm_check_permissions(svm)) |
2444 | return 1; | 2519 | return 1; |
2445 | 2520 | ||
2446 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2447 | skip_emulated_instruction(&svm->vcpu); | ||
2448 | |||
2449 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | 2521 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
2450 | if (!nested_vmcb) | 2522 | if (!nested_vmcb) |
2451 | return 1; | 2523 | return 1; |
2452 | 2524 | ||
2525 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2526 | skip_emulated_instruction(&svm->vcpu); | ||
2527 | |||
2453 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); | 2528 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); |
2454 | nested_svm_unmap(page); | 2529 | nested_svm_unmap(page); |
2455 | 2530 | ||
@@ -2464,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm) | |||
2464 | if (nested_svm_check_permissions(svm)) | 2539 | if (nested_svm_check_permissions(svm)) |
2465 | return 1; | 2540 | return 1; |
2466 | 2541 | ||
2467 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2468 | skip_emulated_instruction(&svm->vcpu); | ||
2469 | |||
2470 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | 2542 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
2471 | if (!nested_vmcb) | 2543 | if (!nested_vmcb) |
2472 | return 1; | 2544 | return 1; |
2473 | 2545 | ||
2546 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2547 | skip_emulated_instruction(&svm->vcpu); | ||
2548 | |||
2474 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); | 2549 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); |
2475 | nested_svm_unmap(page); | 2550 | nested_svm_unmap(page); |
2476 | 2551 | ||
@@ -2676,6 +2751,29 @@ static int emulate_on_interception(struct vcpu_svm *svm) | |||
2676 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; | 2751 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; |
2677 | } | 2752 | } |
2678 | 2753 | ||
2754 | bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) | ||
2755 | { | ||
2756 | unsigned long cr0 = svm->vcpu.arch.cr0; | ||
2757 | bool ret = false; | ||
2758 | u64 intercept; | ||
2759 | |||
2760 | intercept = svm->nested.intercept; | ||
2761 | |||
2762 | if (!is_guest_mode(&svm->vcpu) || | ||
2763 | (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) | ||
2764 | return false; | ||
2765 | |||
2766 | cr0 &= ~SVM_CR0_SELECTIVE_MASK; | ||
2767 | val &= ~SVM_CR0_SELECTIVE_MASK; | ||
2768 | |||
2769 | if (cr0 ^ val) { | ||
2770 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
2771 | ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); | ||
2772 | } | ||
2773 | |||
2774 | return ret; | ||
2775 | } | ||
2776 | |||
2679 | #define CR_VALID (1ULL << 63) | 2777 | #define CR_VALID (1ULL << 63) |
2680 | 2778 | ||
2681 | static int cr_interception(struct vcpu_svm *svm) | 2779 | static int cr_interception(struct vcpu_svm *svm) |
@@ -2699,7 +2797,11 @@ static int cr_interception(struct vcpu_svm *svm) | |||
2699 | val = kvm_register_read(&svm->vcpu, reg); | 2797 | val = kvm_register_read(&svm->vcpu, reg); |
2700 | switch (cr) { | 2798 | switch (cr) { |
2701 | case 0: | 2799 | case 0: |
2702 | err = kvm_set_cr0(&svm->vcpu, val); | 2800 | if (!check_selective_cr0_intercepted(svm, val)) |
2801 | err = kvm_set_cr0(&svm->vcpu, val); | ||
2802 | else | ||
2803 | return 1; | ||
2804 | |||
2703 | break; | 2805 | break; |
2704 | case 3: | 2806 | case 3: |
2705 | err = kvm_set_cr3(&svm->vcpu, val); | 2807 | err = kvm_set_cr3(&svm->vcpu, val); |
@@ -2744,23 +2846,6 @@ static int cr_interception(struct vcpu_svm *svm) | |||
2744 | return 1; | 2846 | return 1; |
2745 | } | 2847 | } |
2746 | 2848 | ||
2747 | static int cr0_write_interception(struct vcpu_svm *svm) | ||
2748 | { | ||
2749 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
2750 | int r; | ||
2751 | |||
2752 | r = cr_interception(svm); | ||
2753 | |||
2754 | if (svm->nested.vmexit_rip) { | ||
2755 | kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); | ||
2756 | kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp); | ||
2757 | kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax); | ||
2758 | svm->nested.vmexit_rip = 0; | ||
2759 | } | ||
2760 | |||
2761 | return r; | ||
2762 | } | ||
2763 | |||
2764 | static int dr_interception(struct vcpu_svm *svm) | 2849 | static int dr_interception(struct vcpu_svm *svm) |
2765 | { | 2850 | { |
2766 | int reg, dr; | 2851 | int reg, dr; |
@@ -2813,7 +2898,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2813 | case MSR_IA32_TSC: { | 2898 | case MSR_IA32_TSC: { |
2814 | struct vmcb *vmcb = get_host_vmcb(svm); | 2899 | struct vmcb *vmcb = get_host_vmcb(svm); |
2815 | 2900 | ||
2816 | *data = vmcb->control.tsc_offset + native_read_tsc(); | 2901 | *data = vmcb->control.tsc_offset + |
2902 | svm_scale_tsc(vcpu, native_read_tsc()); | ||
2903 | |||
2817 | break; | 2904 | break; |
2818 | } | 2905 | } |
2819 | case MSR_STAR: | 2906 | case MSR_STAR: |
@@ -3048,7 +3135,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3048 | [SVM_EXIT_READ_CR4] = cr_interception, | 3135 | [SVM_EXIT_READ_CR4] = cr_interception, |
3049 | [SVM_EXIT_READ_CR8] = cr_interception, | 3136 | [SVM_EXIT_READ_CR8] = cr_interception, |
3050 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 3137 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
3051 | [SVM_EXIT_WRITE_CR0] = cr0_write_interception, | 3138 | [SVM_EXIT_WRITE_CR0] = cr_interception, |
3052 | [SVM_EXIT_WRITE_CR3] = cr_interception, | 3139 | [SVM_EXIT_WRITE_CR3] = cr_interception, |
3053 | [SVM_EXIT_WRITE_CR4] = cr_interception, | 3140 | [SVM_EXIT_WRITE_CR4] = cr_interception, |
3054 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 3141 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
@@ -3104,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3104 | [SVM_EXIT_NPF] = pf_interception, | 3191 | [SVM_EXIT_NPF] = pf_interception, |
3105 | }; | 3192 | }; |
3106 | 3193 | ||
3107 | void dump_vmcb(struct kvm_vcpu *vcpu) | 3194 | static void dump_vmcb(struct kvm_vcpu *vcpu) |
3108 | { | 3195 | { |
3109 | struct vcpu_svm *svm = to_svm(vcpu); | 3196 | struct vcpu_svm *svm = to_svm(vcpu); |
3110 | struct vmcb_control_area *control = &svm->vmcb->control; | 3197 | struct vmcb_control_area *control = &svm->vmcb->control; |
3111 | struct vmcb_save_area *save = &svm->vmcb->save; | 3198 | struct vmcb_save_area *save = &svm->vmcb->save; |
3112 | 3199 | ||
3113 | pr_err("VMCB Control Area:\n"); | 3200 | pr_err("VMCB Control Area:\n"); |
3114 | pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); | 3201 | pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); |
3115 | pr_err("cr_write: %04x\n", control->intercept_cr >> 16); | 3202 | pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); |
3116 | pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); | 3203 | pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); |
3117 | pr_err("dr_write: %04x\n", control->intercept_dr >> 16); | 3204 | pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); |
3118 | pr_err("exceptions: %08x\n", control->intercept_exceptions); | 3205 | pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); |
3119 | pr_err("intercepts: %016llx\n", control->intercept); | 3206 | pr_err("%-20s%016llx\n", "intercepts:", control->intercept); |
3120 | pr_err("pause filter count: %d\n", control->pause_filter_count); | 3207 | pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); |
3121 | pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); | 3208 | pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); |
3122 | pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); | 3209 | pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); |
3123 | pr_err("tsc_offset: %016llx\n", control->tsc_offset); | 3210 | pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); |
3124 | pr_err("asid: %d\n", control->asid); | 3211 | pr_err("%-20s%d\n", "asid:", control->asid); |
3125 | pr_err("tlb_ctl: %d\n", control->tlb_ctl); | 3212 | pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); |
3126 | pr_err("int_ctl: %08x\n", control->int_ctl); | 3213 | pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); |
3127 | pr_err("int_vector: %08x\n", control->int_vector); | 3214 | pr_err("%-20s%08x\n", "int_vector:", control->int_vector); |
3128 | pr_err("int_state: %08x\n", control->int_state); | 3215 | pr_err("%-20s%08x\n", "int_state:", control->int_state); |
3129 | pr_err("exit_code: %08x\n", control->exit_code); | 3216 | pr_err("%-20s%08x\n", "exit_code:", control->exit_code); |
3130 | pr_err("exit_info1: %016llx\n", control->exit_info_1); | 3217 | pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); |
3131 | pr_err("exit_info2: %016llx\n", control->exit_info_2); | 3218 | pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); |
3132 | pr_err("exit_int_info: %08x\n", control->exit_int_info); | 3219 | pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); |
3133 | pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); | 3220 | pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); |
3134 | pr_err("nested_ctl: %lld\n", control->nested_ctl); | 3221 | pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); |
3135 | pr_err("nested_cr3: %016llx\n", control->nested_cr3); | 3222 | pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); |
3136 | pr_err("event_inj: %08x\n", control->event_inj); | 3223 | pr_err("%-20s%08x\n", "event_inj:", control->event_inj); |
3137 | pr_err("event_inj_err: %08x\n", control->event_inj_err); | 3224 | pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); |
3138 | pr_err("lbr_ctl: %lld\n", control->lbr_ctl); | 3225 | pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); |
3139 | pr_err("next_rip: %016llx\n", control->next_rip); | 3226 | pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); |
3140 | pr_err("VMCB State Save Area:\n"); | 3227 | pr_err("VMCB State Save Area:\n"); |
3141 | pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", | 3228 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3142 | save->es.selector, save->es.attrib, | 3229 | "es:", |
3143 | save->es.limit, save->es.base); | 3230 | save->es.selector, save->es.attrib, |
3144 | pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", | 3231 | save->es.limit, save->es.base); |
3145 | save->cs.selector, save->cs.attrib, | 3232 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3146 | save->cs.limit, save->cs.base); | 3233 | "cs:", |
3147 | pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", | 3234 | save->cs.selector, save->cs.attrib, |
3148 | save->ss.selector, save->ss.attrib, | 3235 | save->cs.limit, save->cs.base); |
3149 | save->ss.limit, save->ss.base); | 3236 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3150 | pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", | 3237 | "ss:", |
3151 | save->ds.selector, save->ds.attrib, | 3238 | save->ss.selector, save->ss.attrib, |
3152 | save->ds.limit, save->ds.base); | 3239 | save->ss.limit, save->ss.base); |
3153 | pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", | 3240 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3154 | save->fs.selector, save->fs.attrib, | 3241 | "ds:", |
3155 | save->fs.limit, save->fs.base); | 3242 | save->ds.selector, save->ds.attrib, |
3156 | pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", | 3243 | save->ds.limit, save->ds.base); |
3157 | save->gs.selector, save->gs.attrib, | 3244 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3158 | save->gs.limit, save->gs.base); | 3245 | "fs:", |
3159 | pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", | 3246 | save->fs.selector, save->fs.attrib, |
3160 | save->gdtr.selector, save->gdtr.attrib, | 3247 | save->fs.limit, save->fs.base); |
3161 | save->gdtr.limit, save->gdtr.base); | 3248 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3162 | pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", | 3249 | "gs:", |
3163 | save->ldtr.selector, save->ldtr.attrib, | 3250 | save->gs.selector, save->gs.attrib, |
3164 | save->ldtr.limit, save->ldtr.base); | 3251 | save->gs.limit, save->gs.base); |
3165 | pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", | 3252 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3166 | save->idtr.selector, save->idtr.attrib, | 3253 | "gdtr:", |
3167 | save->idtr.limit, save->idtr.base); | 3254 | save->gdtr.selector, save->gdtr.attrib, |
3168 | pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", | 3255 | save->gdtr.limit, save->gdtr.base); |
3169 | save->tr.selector, save->tr.attrib, | 3256 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3170 | save->tr.limit, save->tr.base); | 3257 | "ldtr:", |
3258 | save->ldtr.selector, save->ldtr.attrib, | ||
3259 | save->ldtr.limit, save->ldtr.base); | ||
3260 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", | ||
3261 | "idtr:", | ||
3262 | save->idtr.selector, save->idtr.attrib, | ||
3263 | save->idtr.limit, save->idtr.base); | ||
3264 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", | ||
3265 | "tr:", | ||
3266 | save->tr.selector, save->tr.attrib, | ||
3267 | save->tr.limit, save->tr.base); | ||
3171 | pr_err("cpl: %d efer: %016llx\n", | 3268 | pr_err("cpl: %d efer: %016llx\n", |
3172 | save->cpl, save->efer); | 3269 | save->cpl, save->efer); |
3173 | pr_err("cr0: %016llx cr2: %016llx\n", | 3270 | pr_err("%-15s %016llx %-13s %016llx\n", |
3174 | save->cr0, save->cr2); | 3271 | "cr0:", save->cr0, "cr2:", save->cr2); |
3175 | pr_err("cr3: %016llx cr4: %016llx\n", | 3272 | pr_err("%-15s %016llx %-13s %016llx\n", |
3176 | save->cr3, save->cr4); | 3273 | "cr3:", save->cr3, "cr4:", save->cr4); |
3177 | pr_err("dr6: %016llx dr7: %016llx\n", | 3274 | pr_err("%-15s %016llx %-13s %016llx\n", |
3178 | save->dr6, save->dr7); | 3275 | "dr6:", save->dr6, "dr7:", save->dr7); |
3179 | pr_err("rip: %016llx rflags: %016llx\n", | 3276 | pr_err("%-15s %016llx %-13s %016llx\n", |
3180 | save->rip, save->rflags); | 3277 | "rip:", save->rip, "rflags:", save->rflags); |
3181 | pr_err("rsp: %016llx rax: %016llx\n", | 3278 | pr_err("%-15s %016llx %-13s %016llx\n", |
3182 | save->rsp, save->rax); | 3279 | "rsp:", save->rsp, "rax:", save->rax); |
3183 | pr_err("star: %016llx lstar: %016llx\n", | 3280 | pr_err("%-15s %016llx %-13s %016llx\n", |
3184 | save->star, save->lstar); | 3281 | "star:", save->star, "lstar:", save->lstar); |
3185 | pr_err("cstar: %016llx sfmask: %016llx\n", | 3282 | pr_err("%-15s %016llx %-13s %016llx\n", |
3186 | save->cstar, save->sfmask); | 3283 | "cstar:", save->cstar, "sfmask:", save->sfmask); |
3187 | pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", | 3284 | pr_err("%-15s %016llx %-13s %016llx\n", |
3188 | save->kernel_gs_base, save->sysenter_cs); | 3285 | "kernel_gs_base:", save->kernel_gs_base, |
3189 | pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", | 3286 | "sysenter_cs:", save->sysenter_cs); |
3190 | save->sysenter_esp, save->sysenter_eip); | 3287 | pr_err("%-15s %016llx %-13s %016llx\n", |
3191 | pr_err("gpat: %016llx dbgctl: %016llx\n", | 3288 | "sysenter_esp:", save->sysenter_esp, |
3192 | save->g_pat, save->dbgctl); | 3289 | "sysenter_eip:", save->sysenter_eip); |
3193 | pr_err("br_from: %016llx br_to: %016llx\n", | 3290 | pr_err("%-15s %016llx %-13s %016llx\n", |
3194 | save->br_from, save->br_to); | 3291 | "gpat:", save->g_pat, "dbgctl:", save->dbgctl); |
3195 | pr_err("excp_from: %016llx excp_to: %016llx\n", | 3292 | pr_err("%-15s %016llx %-13s %016llx\n", |
3196 | save->last_excp_from, save->last_excp_to); | 3293 | "br_from:", save->br_from, "br_to:", save->br_to); |
3197 | 3294 | pr_err("%-15s %016llx %-13s %016llx\n", | |
3295 | "excp_from:", save->last_excp_from, | ||
3296 | "excp_to:", save->last_excp_to); | ||
3198 | } | 3297 | } |
3199 | 3298 | ||
3200 | static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | 3299 | static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) |
@@ -3384,7 +3483,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
3384 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) | 3483 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) |
3385 | return 0; | 3484 | return 0; |
3386 | 3485 | ||
3387 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); | 3486 | ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); |
3388 | 3487 | ||
3389 | if (is_guest_mode(vcpu)) | 3488 | if (is_guest_mode(vcpu)) |
3390 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); | 3489 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); |
@@ -3871,6 +3970,186 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
3871 | update_cr0_intercept(svm); | 3970 | update_cr0_intercept(svm); |
3872 | } | 3971 | } |
3873 | 3972 | ||
3973 | #define PRE_EX(exit) { .exit_code = (exit), \ | ||
3974 | .stage = X86_ICPT_PRE_EXCEPT, } | ||
3975 | #define POST_EX(exit) { .exit_code = (exit), \ | ||
3976 | .stage = X86_ICPT_POST_EXCEPT, } | ||
3977 | #define POST_MEM(exit) { .exit_code = (exit), \ | ||
3978 | .stage = X86_ICPT_POST_MEMACCESS, } | ||
3979 | |||
3980 | static struct __x86_intercept { | ||
3981 | u32 exit_code; | ||
3982 | enum x86_intercept_stage stage; | ||
3983 | } x86_intercept_map[] = { | ||
3984 | [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), | ||
3985 | [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), | ||
3986 | [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), | ||
3987 | [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), | ||
3988 | [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), | ||
3989 | [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), | ||
3990 | [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), | ||
3991 | [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), | ||
3992 | [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), | ||
3993 | [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), | ||
3994 | [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), | ||
3995 | [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), | ||
3996 | [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), | ||
3997 | [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), | ||
3998 | [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), | ||
3999 | [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), | ||
4000 | [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), | ||
4001 | [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), | ||
4002 | [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), | ||
4003 | [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), | ||
4004 | [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), | ||
4005 | [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), | ||
4006 | [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), | ||
4007 | [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), | ||
4008 | [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), | ||
4009 | [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), | ||
4010 | [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), | ||
4011 | [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), | ||
4012 | [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), | ||
4013 | [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), | ||
4014 | [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), | ||
4015 | [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), | ||
4016 | [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), | ||
4017 | [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), | ||
4018 | [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), | ||
4019 | [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), | ||
4020 | [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), | ||
4021 | [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), | ||
4022 | [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), | ||
4023 | [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), | ||
4024 | [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), | ||
4025 | [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), | ||
4026 | [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), | ||
4027 | [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), | ||
4028 | [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), | ||
4029 | [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), | ||
4030 | }; | ||
4031 | |||
4032 | #undef PRE_EX | ||
4033 | #undef POST_EX | ||
4034 | #undef POST_MEM | ||
4035 | |||
4036 | static int svm_check_intercept(struct kvm_vcpu *vcpu, | ||
4037 | struct x86_instruction_info *info, | ||
4038 | enum x86_intercept_stage stage) | ||
4039 | { | ||
4040 | struct vcpu_svm *svm = to_svm(vcpu); | ||
4041 | int vmexit, ret = X86EMUL_CONTINUE; | ||
4042 | struct __x86_intercept icpt_info; | ||
4043 | struct vmcb *vmcb = svm->vmcb; | ||
4044 | |||
4045 | if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) | ||
4046 | goto out; | ||
4047 | |||
4048 | icpt_info = x86_intercept_map[info->intercept]; | ||
4049 | |||
4050 | if (stage != icpt_info.stage) | ||
4051 | goto out; | ||
4052 | |||
4053 | switch (icpt_info.exit_code) { | ||
4054 | case SVM_EXIT_READ_CR0: | ||
4055 | if (info->intercept == x86_intercept_cr_read) | ||
4056 | icpt_info.exit_code += info->modrm_reg; | ||
4057 | break; | ||
4058 | case SVM_EXIT_WRITE_CR0: { | ||
4059 | unsigned long cr0, val; | ||
4060 | u64 intercept; | ||
4061 | |||
4062 | if (info->intercept == x86_intercept_cr_write) | ||
4063 | icpt_info.exit_code += info->modrm_reg; | ||
4064 | |||
4065 | if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) | ||
4066 | break; | ||
4067 | |||
4068 | intercept = svm->nested.intercept; | ||
4069 | |||
4070 | if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) | ||
4071 | break; | ||
4072 | |||
4073 | cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; | ||
4074 | val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; | ||
4075 | |||
4076 | if (info->intercept == x86_intercept_lmsw) { | ||
4077 | cr0 &= 0xfUL; | ||
4078 | val &= 0xfUL; | ||
4079 | /* lmsw can't clear PE - catch this here */ | ||
4080 | if (cr0 & X86_CR0_PE) | ||
4081 | val |= X86_CR0_PE; | ||
4082 | } | ||
4083 | |||
4084 | if (cr0 ^ val) | ||
4085 | icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
4086 | |||
4087 | break; | ||
4088 | } | ||
4089 | case SVM_EXIT_READ_DR0: | ||
4090 | case SVM_EXIT_WRITE_DR0: | ||
4091 | icpt_info.exit_code += info->modrm_reg; | ||
4092 | break; | ||
4093 | case SVM_EXIT_MSR: | ||
4094 | if (info->intercept == x86_intercept_wrmsr) | ||
4095 | vmcb->control.exit_info_1 = 1; | ||
4096 | else | ||
4097 | vmcb->control.exit_info_1 = 0; | ||
4098 | break; | ||
4099 | case SVM_EXIT_PAUSE: | ||
4100 | /* | ||
4101 | * We get this for NOP only, but pause | ||
4102 | * is rep not, check this here | ||
4103 | */ | ||
4104 | if (info->rep_prefix != REPE_PREFIX) | ||
4105 | goto out; | ||
4106 | case SVM_EXIT_IOIO: { | ||
4107 | u64 exit_info; | ||
4108 | u32 bytes; | ||
4109 | |||
4110 | exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; | ||
4111 | |||
4112 | if (info->intercept == x86_intercept_in || | ||
4113 | info->intercept == x86_intercept_ins) { | ||
4114 | exit_info |= SVM_IOIO_TYPE_MASK; | ||
4115 | bytes = info->src_bytes; | ||
4116 | } else { | ||
4117 | bytes = info->dst_bytes; | ||
4118 | } | ||
4119 | |||
4120 | if (info->intercept == x86_intercept_outs || | ||
4121 | info->intercept == x86_intercept_ins) | ||
4122 | exit_info |= SVM_IOIO_STR_MASK; | ||
4123 | |||
4124 | if (info->rep_prefix) | ||
4125 | exit_info |= SVM_IOIO_REP_MASK; | ||
4126 | |||
4127 | bytes = min(bytes, 4u); | ||
4128 | |||
4129 | exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; | ||
4130 | |||
4131 | exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); | ||
4132 | |||
4133 | vmcb->control.exit_info_1 = exit_info; | ||
4134 | vmcb->control.exit_info_2 = info->next_rip; | ||
4135 | |||
4136 | break; | ||
4137 | } | ||
4138 | default: | ||
4139 | break; | ||
4140 | } | ||
4141 | |||
4142 | vmcb->control.next_rip = info->next_rip; | ||
4143 | vmcb->control.exit_code = icpt_info.exit_code; | ||
4144 | vmexit = nested_svm_exit_handled(svm); | ||
4145 | |||
4146 | ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED | ||
4147 | : X86EMUL_CONTINUE; | ||
4148 | |||
4149 | out: | ||
4150 | return ret; | ||
4151 | } | ||
4152 | |||
3874 | static struct kvm_x86_ops svm_x86_ops = { | 4153 | static struct kvm_x86_ops svm_x86_ops = { |
3875 | .cpu_has_kvm_support = has_svm, | 4154 | .cpu_has_kvm_support = has_svm, |
3876 | .disabled_by_bios = is_disabled, | 4155 | .disabled_by_bios = is_disabled, |
@@ -3952,10 +4231,14 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3952 | 4231 | ||
3953 | .has_wbinvd_exit = svm_has_wbinvd_exit, | 4232 | .has_wbinvd_exit = svm_has_wbinvd_exit, |
3954 | 4233 | ||
4234 | .set_tsc_khz = svm_set_tsc_khz, | ||
3955 | .write_tsc_offset = svm_write_tsc_offset, | 4235 | .write_tsc_offset = svm_write_tsc_offset, |
3956 | .adjust_tsc_offset = svm_adjust_tsc_offset, | 4236 | .adjust_tsc_offset = svm_adjust_tsc_offset, |
4237 | .compute_tsc_offset = svm_compute_tsc_offset, | ||
3957 | 4238 | ||
3958 | .set_tdp_cr3 = set_tdp_cr3, | 4239 | .set_tdp_cr3 = set_tdp_cr3, |
4240 | |||
4241 | .check_intercept = svm_check_intercept, | ||
3959 | }; | 4242 | }; |
3960 | 4243 | ||
3961 | static int __init svm_init(void) | 4244 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5b4cdcbd154c..4c3fa0f67469 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -128,8 +128,11 @@ struct vcpu_vmx { | |||
128 | unsigned long host_rsp; | 128 | unsigned long host_rsp; |
129 | int launched; | 129 | int launched; |
130 | u8 fail; | 130 | u8 fail; |
131 | u8 cpl; | ||
132 | bool nmi_known_unmasked; | ||
131 | u32 exit_intr_info; | 133 | u32 exit_intr_info; |
132 | u32 idt_vectoring_info; | 134 | u32 idt_vectoring_info; |
135 | ulong rflags; | ||
133 | struct shared_msr_entry *guest_msrs; | 136 | struct shared_msr_entry *guest_msrs; |
134 | int nmsrs; | 137 | int nmsrs; |
135 | int save_nmsrs; | 138 | int save_nmsrs; |
@@ -159,6 +162,10 @@ struct vcpu_vmx { | |||
159 | u32 ar; | 162 | u32 ar; |
160 | } tr, es, ds, fs, gs; | 163 | } tr, es, ds, fs, gs; |
161 | } rmode; | 164 | } rmode; |
165 | struct { | ||
166 | u32 bitmask; /* 4 bits per segment (1 bit per field) */ | ||
167 | struct kvm_save_segment seg[8]; | ||
168 | } segment_cache; | ||
162 | int vpid; | 169 | int vpid; |
163 | bool emulation_required; | 170 | bool emulation_required; |
164 | 171 | ||
@@ -171,6 +178,15 @@ struct vcpu_vmx { | |||
171 | bool rdtscp_enabled; | 178 | bool rdtscp_enabled; |
172 | }; | 179 | }; |
173 | 180 | ||
181 | enum segment_cache_field { | ||
182 | SEG_FIELD_SEL = 0, | ||
183 | SEG_FIELD_BASE = 1, | ||
184 | SEG_FIELD_LIMIT = 2, | ||
185 | SEG_FIELD_AR = 3, | ||
186 | |||
187 | SEG_FIELD_NR = 4 | ||
188 | }; | ||
189 | |||
174 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 190 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
175 | { | 191 | { |
176 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 192 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
@@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) | |||
643 | vmcs_writel(field, vmcs_readl(field) | mask); | 659 | vmcs_writel(field, vmcs_readl(field) | mask); |
644 | } | 660 | } |
645 | 661 | ||
662 | static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) | ||
663 | { | ||
664 | vmx->segment_cache.bitmask = 0; | ||
665 | } | ||
666 | |||
667 | static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, | ||
668 | unsigned field) | ||
669 | { | ||
670 | bool ret; | ||
671 | u32 mask = 1 << (seg * SEG_FIELD_NR + field); | ||
672 | |||
673 | if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { | ||
674 | vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); | ||
675 | vmx->segment_cache.bitmask = 0; | ||
676 | } | ||
677 | ret = vmx->segment_cache.bitmask & mask; | ||
678 | vmx->segment_cache.bitmask |= mask; | ||
679 | return ret; | ||
680 | } | ||
681 | |||
682 | static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) | ||
683 | { | ||
684 | u16 *p = &vmx->segment_cache.seg[seg].selector; | ||
685 | |||
686 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) | ||
687 | *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); | ||
688 | return *p; | ||
689 | } | ||
690 | |||
691 | static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) | ||
692 | { | ||
693 | ulong *p = &vmx->segment_cache.seg[seg].base; | ||
694 | |||
695 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) | ||
696 | *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); | ||
697 | return *p; | ||
698 | } | ||
699 | |||
700 | static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) | ||
701 | { | ||
702 | u32 *p = &vmx->segment_cache.seg[seg].limit; | ||
703 | |||
704 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) | ||
705 | *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); | ||
706 | return *p; | ||
707 | } | ||
708 | |||
709 | static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) | ||
710 | { | ||
711 | u32 *p = &vmx->segment_cache.seg[seg].ar; | ||
712 | |||
713 | if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) | ||
714 | *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); | ||
715 | return *p; | ||
716 | } | ||
717 | |||
646 | static void update_exception_bitmap(struct kvm_vcpu *vcpu) | 718 | static void update_exception_bitmap(struct kvm_vcpu *vcpu) |
647 | { | 719 | { |
648 | u32 eb; | 720 | u32 eb; |
@@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | |||
970 | { | 1042 | { |
971 | unsigned long rflags, save_rflags; | 1043 | unsigned long rflags, save_rflags; |
972 | 1044 | ||
973 | rflags = vmcs_readl(GUEST_RFLAGS); | 1045 | if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { |
974 | if (to_vmx(vcpu)->rmode.vm86_active) { | 1046 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); |
975 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; | 1047 | rflags = vmcs_readl(GUEST_RFLAGS); |
976 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | 1048 | if (to_vmx(vcpu)->rmode.vm86_active) { |
977 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | 1049 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
1050 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | ||
1051 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | ||
1052 | } | ||
1053 | to_vmx(vcpu)->rflags = rflags; | ||
978 | } | 1054 | } |
979 | return rflags; | 1055 | return to_vmx(vcpu)->rflags; |
980 | } | 1056 | } |
981 | 1057 | ||
982 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 1058 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
983 | { | 1059 | { |
1060 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); | ||
1061 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
1062 | to_vmx(vcpu)->rflags = rflags; | ||
984 | if (to_vmx(vcpu)->rmode.vm86_active) { | 1063 | if (to_vmx(vcpu)->rmode.vm86_active) { |
985 | to_vmx(vcpu)->rmode.save_rflags = rflags; | 1064 | to_vmx(vcpu)->rmode.save_rflags = rflags; |
986 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 1065 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
@@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1053 | } | 1132 | } |
1054 | 1133 | ||
1055 | if (vmx->rmode.vm86_active) { | 1134 | if (vmx->rmode.vm86_active) { |
1056 | if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) | 1135 | int inc_eip = 0; |
1136 | if (kvm_exception_is_soft(nr)) | ||
1137 | inc_eip = vcpu->arch.event_exit_inst_len; | ||
1138 | if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) | ||
1057 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 1139 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
1058 | return; | 1140 | return; |
1059 | } | 1141 | } |
@@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void) | |||
1151 | } | 1233 | } |
1152 | 1234 | ||
1153 | /* | 1235 | /* |
1236 | * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ | ||
1237 | * ioctl. In this case the call-back should update internal vmx state to make | ||
1238 | * the changes effective. | ||
1239 | */ | ||
1240 | static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | ||
1241 | { | ||
1242 | /* Nothing to do here */ | ||
1243 | } | ||
1244 | |||
1245 | /* | ||
1154 | * writes 'offset' into guest's timestamp counter offset register | 1246 | * writes 'offset' into guest's timestamp counter offset register |
1155 | */ | 1247 | */ |
1156 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | 1248 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) |
@@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | |||
1164 | vmcs_write64(TSC_OFFSET, offset + adjustment); | 1256 | vmcs_write64(TSC_OFFSET, offset + adjustment); |
1165 | } | 1257 | } |
1166 | 1258 | ||
1259 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | ||
1260 | { | ||
1261 | return target_tsc - native_read_tsc(); | ||
1262 | } | ||
1263 | |||
1167 | /* | 1264 | /* |
1168 | * Reads an msr value (of 'msr_index') into 'pdata'. | 1265 | * Reads an msr value (of 'msr_index') into 'pdata'. |
1169 | * Returns 0 on success, non-0 otherwise. | 1266 | * Returns 0 on success, non-0 otherwise. |
@@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1243 | break; | 1340 | break; |
1244 | #ifdef CONFIG_X86_64 | 1341 | #ifdef CONFIG_X86_64 |
1245 | case MSR_FS_BASE: | 1342 | case MSR_FS_BASE: |
1343 | vmx_segment_cache_clear(vmx); | ||
1246 | vmcs_writel(GUEST_FS_BASE, data); | 1344 | vmcs_writel(GUEST_FS_BASE, data); |
1247 | break; | 1345 | break; |
1248 | case MSR_GS_BASE: | 1346 | case MSR_GS_BASE: |
1347 | vmx_segment_cache_clear(vmx); | ||
1249 | vmcs_writel(GUEST_GS_BASE, data); | 1348 | vmcs_writel(GUEST_GS_BASE, data); |
1250 | break; | 1349 | break; |
1251 | case MSR_KERNEL_GS_BASE: | 1350 | case MSR_KERNEL_GS_BASE: |
@@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1689 | vmx->emulation_required = 1; | 1788 | vmx->emulation_required = 1; |
1690 | vmx->rmode.vm86_active = 0; | 1789 | vmx->rmode.vm86_active = 0; |
1691 | 1790 | ||
1791 | vmx_segment_cache_clear(vmx); | ||
1792 | |||
1692 | vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); | 1793 | vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); |
1693 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); | 1794 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); |
1694 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); | 1795 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); |
@@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1712 | fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); | 1813 | fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); |
1713 | fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); | 1814 | fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); |
1714 | 1815 | ||
1816 | vmx_segment_cache_clear(vmx); | ||
1817 | |||
1715 | vmcs_write16(GUEST_SS_SELECTOR, 0); | 1818 | vmcs_write16(GUEST_SS_SELECTOR, 0); |
1716 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); | 1819 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); |
1717 | 1820 | ||
@@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1775 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 1878 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
1776 | } | 1879 | } |
1777 | 1880 | ||
1881 | vmx_segment_cache_clear(vmx); | ||
1882 | |||
1778 | vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); | 1883 | vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); |
1779 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1884 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
1780 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 1885 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
@@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
1851 | { | 1956 | { |
1852 | u32 guest_tr_ar; | 1957 | u32 guest_tr_ar; |
1853 | 1958 | ||
1959 | vmx_segment_cache_clear(to_vmx(vcpu)); | ||
1960 | |||
1854 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); | 1961 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); |
1855 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { | 1962 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { |
1856 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", | 1963 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", |
@@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1998 | vmcs_writel(CR0_READ_SHADOW, cr0); | 2105 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1999 | vmcs_writel(GUEST_CR0, hw_cr0); | 2106 | vmcs_writel(GUEST_CR0, hw_cr0); |
2000 | vcpu->arch.cr0 = cr0; | 2107 | vcpu->arch.cr0 = cr0; |
2108 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
2001 | } | 2109 | } |
2002 | 2110 | ||
2003 | static u64 construct_eptp(unsigned long root_hpa) | 2111 | static u64 construct_eptp(unsigned long root_hpa) |
@@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
2053 | struct kvm_segment *var, int seg) | 2161 | struct kvm_segment *var, int seg) |
2054 | { | 2162 | { |
2055 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2163 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2056 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2057 | struct kvm_save_segment *save; | 2164 | struct kvm_save_segment *save; |
2058 | u32 ar; | 2165 | u32 ar; |
2059 | 2166 | ||
@@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
2075 | var->limit = save->limit; | 2182 | var->limit = save->limit; |
2076 | ar = save->ar; | 2183 | ar = save->ar; |
2077 | if (seg == VCPU_SREG_TR | 2184 | if (seg == VCPU_SREG_TR |
2078 | || var->selector == vmcs_read16(sf->selector)) | 2185 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) |
2079 | goto use_saved_rmode_seg; | 2186 | goto use_saved_rmode_seg; |
2080 | } | 2187 | } |
2081 | var->base = vmcs_readl(sf->base); | 2188 | var->base = vmx_read_guest_seg_base(vmx, seg); |
2082 | var->limit = vmcs_read32(sf->limit); | 2189 | var->limit = vmx_read_guest_seg_limit(vmx, seg); |
2083 | var->selector = vmcs_read16(sf->selector); | 2190 | var->selector = vmx_read_guest_seg_selector(vmx, seg); |
2084 | ar = vmcs_read32(sf->ar_bytes); | 2191 | ar = vmx_read_guest_seg_ar(vmx, seg); |
2085 | use_saved_rmode_seg: | 2192 | use_saved_rmode_seg: |
2086 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | 2193 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) |
2087 | ar = 0; | 2194 | ar = 0; |
@@ -2098,27 +2205,37 @@ use_saved_rmode_seg: | |||
2098 | 2205 | ||
2099 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | 2206 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) |
2100 | { | 2207 | { |
2101 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2102 | struct kvm_segment s; | 2208 | struct kvm_segment s; |
2103 | 2209 | ||
2104 | if (to_vmx(vcpu)->rmode.vm86_active) { | 2210 | if (to_vmx(vcpu)->rmode.vm86_active) { |
2105 | vmx_get_segment(vcpu, &s, seg); | 2211 | vmx_get_segment(vcpu, &s, seg); |
2106 | return s.base; | 2212 | return s.base; |
2107 | } | 2213 | } |
2108 | return vmcs_readl(sf->base); | 2214 | return vmx_read_guest_seg_base(to_vmx(vcpu), seg); |
2109 | } | 2215 | } |
2110 | 2216 | ||
2111 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 2217 | static int __vmx_get_cpl(struct kvm_vcpu *vcpu) |
2112 | { | 2218 | { |
2113 | if (!is_protmode(vcpu)) | 2219 | if (!is_protmode(vcpu)) |
2114 | return 0; | 2220 | return 0; |
2115 | 2221 | ||
2116 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ | 2222 | if (!is_long_mode(vcpu) |
2223 | && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ | ||
2117 | return 3; | 2224 | return 3; |
2118 | 2225 | ||
2119 | return vmcs_read16(GUEST_CS_SELECTOR) & 3; | 2226 | return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; |
2120 | } | 2227 | } |
2121 | 2228 | ||
2229 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | ||
2230 | { | ||
2231 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { | ||
2232 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
2233 | to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); | ||
2234 | } | ||
2235 | return to_vmx(vcpu)->cpl; | ||
2236 | } | ||
2237 | |||
2238 | |||
2122 | static u32 vmx_segment_access_rights(struct kvm_segment *var) | 2239 | static u32 vmx_segment_access_rights(struct kvm_segment *var) |
2123 | { | 2240 | { |
2124 | u32 ar; | 2241 | u32 ar; |
@@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
2148 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2265 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2149 | u32 ar; | 2266 | u32 ar; |
2150 | 2267 | ||
2268 | vmx_segment_cache_clear(vmx); | ||
2269 | |||
2151 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 2270 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { |
2152 | vmcs_write16(sf->selector, var->selector); | 2271 | vmcs_write16(sf->selector, var->selector); |
2153 | vmx->rmode.tr.selector = var->selector; | 2272 | vmx->rmode.tr.selector = var->selector; |
@@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
2184 | ar |= 0x1; /* Accessed */ | 2303 | ar |= 0x1; /* Accessed */ |
2185 | 2304 | ||
2186 | vmcs_write32(sf->ar_bytes, ar); | 2305 | vmcs_write32(sf->ar_bytes, ar); |
2306 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
2187 | } | 2307 | } |
2188 | 2308 | ||
2189 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 2309 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
2190 | { | 2310 | { |
2191 | u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); | 2311 | u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); |
2192 | 2312 | ||
2193 | *db = (ar >> 14) & 1; | 2313 | *db = (ar >> 14) & 1; |
2194 | *l = (ar >> 13) & 1; | 2314 | *l = (ar >> 13) & 1; |
@@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2775 | if (ret != 0) | 2895 | if (ret != 0) |
2776 | goto out; | 2896 | goto out; |
2777 | 2897 | ||
2898 | vmx_segment_cache_clear(vmx); | ||
2899 | |||
2778 | seg_setup(VCPU_SREG_CS); | 2900 | seg_setup(VCPU_SREG_CS); |
2779 | /* | 2901 | /* |
2780 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | 2902 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode |
@@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
2904 | 3026 | ||
2905 | ++vcpu->stat.irq_injections; | 3027 | ++vcpu->stat.irq_injections; |
2906 | if (vmx->rmode.vm86_active) { | 3028 | if (vmx->rmode.vm86_active) { |
2907 | if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) | 3029 | int inc_eip = 0; |
3030 | if (vcpu->arch.interrupt.soft) | ||
3031 | inc_eip = vcpu->arch.event_exit_inst_len; | ||
3032 | if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) | ||
2908 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 3033 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2909 | return; | 3034 | return; |
2910 | } | 3035 | } |
@@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
2937 | } | 3062 | } |
2938 | 3063 | ||
2939 | ++vcpu->stat.nmi_injections; | 3064 | ++vcpu->stat.nmi_injections; |
3065 | vmx->nmi_known_unmasked = false; | ||
2940 | if (vmx->rmode.vm86_active) { | 3066 | if (vmx->rmode.vm86_active) { |
2941 | if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) | 3067 | if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) |
2942 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 3068 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2943 | return; | 3069 | return; |
2944 | } | 3070 | } |
@@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | |||
2961 | { | 3087 | { |
2962 | if (!cpu_has_virtual_nmis()) | 3088 | if (!cpu_has_virtual_nmis()) |
2963 | return to_vmx(vcpu)->soft_vnmi_blocked; | 3089 | return to_vmx(vcpu)->soft_vnmi_blocked; |
3090 | if (to_vmx(vcpu)->nmi_known_unmasked) | ||
3091 | return false; | ||
2964 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; | 3092 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; |
2965 | } | 3093 | } |
2966 | 3094 | ||
@@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
2974 | vmx->vnmi_blocked_time = 0; | 3102 | vmx->vnmi_blocked_time = 0; |
2975 | } | 3103 | } |
2976 | } else { | 3104 | } else { |
3105 | vmx->nmi_known_unmasked = !masked; | ||
2977 | if (masked) | 3106 | if (masked) |
2978 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 3107 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
2979 | GUEST_INTR_STATE_NMI); | 3108 | GUEST_INTR_STATE_NMI); |
@@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3091 | enum emulation_result er; | 3220 | enum emulation_result er; |
3092 | 3221 | ||
3093 | vect_info = vmx->idt_vectoring_info; | 3222 | vect_info = vmx->idt_vectoring_info; |
3094 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 3223 | intr_info = vmx->exit_intr_info; |
3095 | 3224 | ||
3096 | if (is_machine_check(intr_info)) | 3225 | if (is_machine_check(intr_info)) |
3097 | return handle_machine_check(vcpu); | 3226 | return handle_machine_check(vcpu); |
@@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3122 | } | 3251 | } |
3123 | 3252 | ||
3124 | error_code = 0; | 3253 | error_code = 0; |
3125 | rip = kvm_rip_read(vcpu); | ||
3126 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) | 3254 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
3127 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 3255 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
3128 | if (is_page_fault(intr_info)) { | 3256 | if (is_page_fault(intr_info)) { |
@@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3169 | vmx->vcpu.arch.event_exit_inst_len = | 3297 | vmx->vcpu.arch.event_exit_inst_len = |
3170 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 3298 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
3171 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 3299 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
3300 | rip = kvm_rip_read(vcpu); | ||
3172 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; | 3301 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; |
3173 | kvm_run->debug.arch.exception = ex_no; | 3302 | kvm_run->debug.arch.exception = ex_no; |
3174 | break; | 3303 | break; |
@@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3505 | switch (type) { | 3634 | switch (type) { |
3506 | case INTR_TYPE_NMI_INTR: | 3635 | case INTR_TYPE_NMI_INTR: |
3507 | vcpu->arch.nmi_injected = false; | 3636 | vcpu->arch.nmi_injected = false; |
3508 | if (cpu_has_virtual_nmis()) | 3637 | vmx_set_nmi_mask(vcpu, true); |
3509 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
3510 | GUEST_INTR_STATE_NMI); | ||
3511 | break; | 3638 | break; |
3512 | case INTR_TYPE_EXT_INTR: | 3639 | case INTR_TYPE_EXT_INTR: |
3513 | case INTR_TYPE_SOFT_INTR: | 3640 | case INTR_TYPE_SOFT_INTR: |
@@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
3867 | 3994 | ||
3868 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | 3995 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) |
3869 | { | 3996 | { |
3870 | u32 exit_intr_info = vmx->exit_intr_info; | 3997 | u32 exit_intr_info; |
3998 | |||
3999 | if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY | ||
4000 | || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) | ||
4001 | return; | ||
4002 | |||
4003 | vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
4004 | exit_intr_info = vmx->exit_intr_info; | ||
3871 | 4005 | ||
3872 | /* Handle machine checks before interrupts are enabled */ | 4006 | /* Handle machine checks before interrupts are enabled */ |
3873 | if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) | 4007 | if (is_machine_check(exit_intr_info)) |
3874 | || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI | ||
3875 | && is_machine_check(exit_intr_info))) | ||
3876 | kvm_machine_check(); | 4008 | kvm_machine_check(); |
3877 | 4009 | ||
3878 | /* We need to handle NMIs before interrupts are enabled */ | 4010 | /* We need to handle NMIs before interrupts are enabled */ |
@@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | |||
3886 | 4018 | ||
3887 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | 4019 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) |
3888 | { | 4020 | { |
3889 | u32 exit_intr_info = vmx->exit_intr_info; | 4021 | u32 exit_intr_info; |
3890 | bool unblock_nmi; | 4022 | bool unblock_nmi; |
3891 | u8 vector; | 4023 | u8 vector; |
3892 | bool idtv_info_valid; | 4024 | bool idtv_info_valid; |
@@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | |||
3894 | idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 4026 | idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
3895 | 4027 | ||
3896 | if (cpu_has_virtual_nmis()) { | 4028 | if (cpu_has_virtual_nmis()) { |
4029 | if (vmx->nmi_known_unmasked) | ||
4030 | return; | ||
4031 | /* | ||
4032 | * Can't use vmx->exit_intr_info since we're not sure what | ||
4033 | * the exit reason is. | ||
4034 | */ | ||
4035 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
3897 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; | 4036 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; |
3898 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | 4037 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; |
3899 | /* | 4038 | /* |
@@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | |||
3910 | vector != DF_VECTOR && !idtv_info_valid) | 4049 | vector != DF_VECTOR && !idtv_info_valid) |
3911 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 4050 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
3912 | GUEST_INTR_STATE_NMI); | 4051 | GUEST_INTR_STATE_NMI); |
4052 | else | ||
4053 | vmx->nmi_known_unmasked = | ||
4054 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) | ||
4055 | & GUEST_INTR_STATE_NMI); | ||
3913 | } else if (unlikely(vmx->soft_vnmi_blocked)) | 4056 | } else if (unlikely(vmx->soft_vnmi_blocked)) |
3914 | vmx->vnmi_blocked_time += | 4057 | vmx->vnmi_blocked_time += |
3915 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | 4058 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); |
@@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | |||
3946 | * Clear bit "block by NMI" before VM entry if a NMI | 4089 | * Clear bit "block by NMI" before VM entry if a NMI |
3947 | * delivery faulted. | 4090 | * delivery faulted. |
3948 | */ | 4091 | */ |
3949 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | 4092 | vmx_set_nmi_mask(&vmx->vcpu, false); |
3950 | GUEST_INTR_STATE_NMI); | ||
3951 | break; | 4093 | break; |
3952 | case INTR_TYPE_SOFT_EXCEPTION: | 4094 | case INTR_TYPE_SOFT_EXCEPTION: |
3953 | vmx->vcpu.arch.event_exit_inst_len = | 4095 | vmx->vcpu.arch.event_exit_inst_len = |
@@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4124 | ); | 4266 | ); |
4125 | 4267 | ||
4126 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | 4268 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) |
4269 | | (1 << VCPU_EXREG_RFLAGS) | ||
4270 | | (1 << VCPU_EXREG_CPL) | ||
4127 | | (1 << VCPU_EXREG_PDPTR) | 4271 | | (1 << VCPU_EXREG_PDPTR) |
4272 | | (1 << VCPU_EXREG_SEGMENTS) | ||
4128 | | (1 << VCPU_EXREG_CR3)); | 4273 | | (1 << VCPU_EXREG_CR3)); |
4129 | vcpu->arch.regs_dirty = 0; | 4274 | vcpu->arch.regs_dirty = 0; |
4130 | 4275 | ||
@@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4134 | vmx->launched = 1; | 4279 | vmx->launched = 1; |
4135 | 4280 | ||
4136 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 4281 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
4137 | vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
4138 | 4282 | ||
4139 | vmx_complete_atomic_exit(vmx); | 4283 | vmx_complete_atomic_exit(vmx); |
4140 | vmx_recover_nmi_blocking(vmx); | 4284 | vmx_recover_nmi_blocking(vmx); |
@@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4195 | goto free_vcpu; | 4339 | goto free_vcpu; |
4196 | 4340 | ||
4197 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | 4341 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); |
4342 | err = -ENOMEM; | ||
4198 | if (!vmx->guest_msrs) { | 4343 | if (!vmx->guest_msrs) { |
4199 | err = -ENOMEM; | ||
4200 | goto uninit_vcpu; | 4344 | goto uninit_vcpu; |
4201 | } | 4345 | } |
4202 | 4346 | ||
@@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4215 | if (err) | 4359 | if (err) |
4216 | goto free_vmcs; | 4360 | goto free_vmcs; |
4217 | if (vm_need_virtualize_apic_accesses(kvm)) | 4361 | if (vm_need_virtualize_apic_accesses(kvm)) |
4218 | if (alloc_apic_access_page(kvm) != 0) | 4362 | err = alloc_apic_access_page(kvm); |
4363 | if (err) | ||
4219 | goto free_vmcs; | 4364 | goto free_vmcs; |
4220 | 4365 | ||
4221 | if (enable_ept) { | 4366 | if (enable_ept) { |
@@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
4368 | { | 4513 | { |
4369 | } | 4514 | } |
4370 | 4515 | ||
4516 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, | ||
4517 | struct x86_instruction_info *info, | ||
4518 | enum x86_intercept_stage stage) | ||
4519 | { | ||
4520 | return X86EMUL_CONTINUE; | ||
4521 | } | ||
4522 | |||
4371 | static struct kvm_x86_ops vmx_x86_ops = { | 4523 | static struct kvm_x86_ops vmx_x86_ops = { |
4372 | .cpu_has_kvm_support = cpu_has_kvm_support, | 4524 | .cpu_has_kvm_support = cpu_has_kvm_support, |
4373 | .disabled_by_bios = vmx_disabled_by_bios, | 4525 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4449 | 4601 | ||
4450 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, | 4602 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, |
4451 | 4603 | ||
4604 | .set_tsc_khz = vmx_set_tsc_khz, | ||
4452 | .write_tsc_offset = vmx_write_tsc_offset, | 4605 | .write_tsc_offset = vmx_write_tsc_offset, |
4453 | .adjust_tsc_offset = vmx_adjust_tsc_offset, | 4606 | .adjust_tsc_offset = vmx_adjust_tsc_offset, |
4607 | .compute_tsc_offset = vmx_compute_tsc_offset, | ||
4454 | 4608 | ||
4455 | .set_tdp_cr3 = vmx_set_cr3, | 4609 | .set_tdp_cr3 = vmx_set_cr3, |
4610 | |||
4611 | .check_intercept = vmx_check_intercept, | ||
4456 | }; | 4612 | }; |
4457 | 4613 | ||
4458 | static int __init vmx_init(void) | 4614 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 934b4c6b0bf9..77c9d8673dc4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -60,22 +60,12 @@ | |||
60 | #include <asm/div64.h> | 60 | #include <asm/div64.h> |
61 | 61 | ||
62 | #define MAX_IO_MSRS 256 | 62 | #define MAX_IO_MSRS 256 |
63 | #define CR0_RESERVED_BITS \ | ||
64 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | ||
65 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | ||
66 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | ||
67 | #define CR4_RESERVED_BITS \ | ||
68 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | ||
69 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | ||
70 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
71 | | X86_CR4_OSXSAVE \ | ||
72 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | ||
73 | |||
74 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | ||
75 | |||
76 | #define KVM_MAX_MCE_BANKS 32 | 63 | #define KVM_MAX_MCE_BANKS 32 |
77 | #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) | 64 | #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) |
78 | 65 | ||
66 | #define emul_to_vcpu(ctxt) \ | ||
67 | container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) | ||
68 | |||
79 | /* EFER defaults: | 69 | /* EFER defaults: |
80 | * - enable syscall per default because its emulated by KVM | 70 | * - enable syscall per default because its emulated by KVM |
81 | * - enable LME and LMA per default on 64 bit KVM | 71 | * - enable LME and LMA per default on 64 bit KVM |
@@ -100,6 +90,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
100 | int ignore_msrs = 0; | 90 | int ignore_msrs = 0; |
101 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | 91 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); |
102 | 92 | ||
93 | bool kvm_has_tsc_control; | ||
94 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | ||
95 | u32 kvm_max_guest_tsc_khz; | ||
96 | EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); | ||
97 | |||
103 | #define KVM_NR_SHARED_MSRS 16 | 98 | #define KVM_NR_SHARED_MSRS 16 |
104 | 99 | ||
105 | struct kvm_shared_msrs_global { | 100 | struct kvm_shared_msrs_global { |
@@ -157,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
157 | 152 | ||
158 | u64 __read_mostly host_xcr0; | 153 | u64 __read_mostly host_xcr0; |
159 | 154 | ||
155 | int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); | ||
156 | |||
160 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) | 157 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) |
161 | { | 158 | { |
162 | int i; | 159 | int i; |
@@ -361,8 +358,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | |||
361 | 358 | ||
362 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 359 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
363 | { | 360 | { |
364 | kvm_make_request(KVM_REQ_NMI, vcpu); | ||
365 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 361 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
362 | vcpu->arch.nmi_pending = 1; | ||
366 | } | 363 | } |
367 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); | 364 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); |
368 | 365 | ||
@@ -982,7 +979,15 @@ static inline int kvm_tsc_changes_freq(void) | |||
982 | return ret; | 979 | return ret; |
983 | } | 980 | } |
984 | 981 | ||
985 | static inline u64 nsec_to_cycles(u64 nsec) | 982 | static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) |
983 | { | ||
984 | if (vcpu->arch.virtual_tsc_khz) | ||
985 | return vcpu->arch.virtual_tsc_khz; | ||
986 | else | ||
987 | return __this_cpu_read(cpu_tsc_khz); | ||
988 | } | ||
989 | |||
990 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) | ||
986 | { | 991 | { |
987 | u64 ret; | 992 | u64 ret; |
988 | 993 | ||
@@ -990,25 +995,24 @@ static inline u64 nsec_to_cycles(u64 nsec) | |||
990 | if (kvm_tsc_changes_freq()) | 995 | if (kvm_tsc_changes_freq()) |
991 | printk_once(KERN_WARNING | 996 | printk_once(KERN_WARNING |
992 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); | 997 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); |
993 | ret = nsec * __this_cpu_read(cpu_tsc_khz); | 998 | ret = nsec * vcpu_tsc_khz(vcpu); |
994 | do_div(ret, USEC_PER_SEC); | 999 | do_div(ret, USEC_PER_SEC); |
995 | return ret; | 1000 | return ret; |
996 | } | 1001 | } |
997 | 1002 | ||
998 | static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) | 1003 | static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) |
999 | { | 1004 | { |
1000 | /* Compute a scale to convert nanoseconds in TSC cycles */ | 1005 | /* Compute a scale to convert nanoseconds in TSC cycles */ |
1001 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | 1006 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, |
1002 | &kvm->arch.virtual_tsc_shift, | 1007 | &vcpu->arch.tsc_catchup_shift, |
1003 | &kvm->arch.virtual_tsc_mult); | 1008 | &vcpu->arch.tsc_catchup_mult); |
1004 | kvm->arch.virtual_tsc_khz = this_tsc_khz; | ||
1005 | } | 1009 | } |
1006 | 1010 | ||
1007 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | 1011 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) |
1008 | { | 1012 | { |
1009 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, | 1013 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, |
1010 | vcpu->kvm->arch.virtual_tsc_mult, | 1014 | vcpu->arch.tsc_catchup_mult, |
1011 | vcpu->kvm->arch.virtual_tsc_shift); | 1015 | vcpu->arch.tsc_catchup_shift); |
1012 | tsc += vcpu->arch.last_tsc_write; | 1016 | tsc += vcpu->arch.last_tsc_write; |
1013 | return tsc; | 1017 | return tsc; |
1014 | } | 1018 | } |
@@ -1021,7 +1025,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1021 | s64 sdiff; | 1025 | s64 sdiff; |
1022 | 1026 | ||
1023 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1027 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1024 | offset = data - native_read_tsc(); | 1028 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); |
1025 | ns = get_kernel_ns(); | 1029 | ns = get_kernel_ns(); |
1026 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1030 | elapsed = ns - kvm->arch.last_tsc_nsec; |
1027 | sdiff = data - kvm->arch.last_tsc_write; | 1031 | sdiff = data - kvm->arch.last_tsc_write; |
@@ -1037,13 +1041,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1037 | * In that case, for a reliable TSC, we can match TSC offsets, | 1041 | * In that case, for a reliable TSC, we can match TSC offsets, |
1038 | * or make a best guest using elapsed value. | 1042 | * or make a best guest using elapsed value. |
1039 | */ | 1043 | */ |
1040 | if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) && | 1044 | if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && |
1041 | elapsed < 5ULL * NSEC_PER_SEC) { | 1045 | elapsed < 5ULL * NSEC_PER_SEC) { |
1042 | if (!check_tsc_unstable()) { | 1046 | if (!check_tsc_unstable()) { |
1043 | offset = kvm->arch.last_tsc_offset; | 1047 | offset = kvm->arch.last_tsc_offset; |
1044 | pr_debug("kvm: matched tsc offset for %llu\n", data); | 1048 | pr_debug("kvm: matched tsc offset for %llu\n", data); |
1045 | } else { | 1049 | } else { |
1046 | u64 delta = nsec_to_cycles(elapsed); | 1050 | u64 delta = nsec_to_cycles(vcpu, elapsed); |
1047 | offset += delta; | 1051 | offset += delta; |
1048 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); | 1052 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); |
1049 | } | 1053 | } |
@@ -1075,8 +1079,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1075 | local_irq_save(flags); | 1079 | local_irq_save(flags); |
1076 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); | 1080 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); |
1077 | kernel_ns = get_kernel_ns(); | 1081 | kernel_ns = get_kernel_ns(); |
1078 | this_tsc_khz = __this_cpu_read(cpu_tsc_khz); | 1082 | this_tsc_khz = vcpu_tsc_khz(v); |
1079 | |||
1080 | if (unlikely(this_tsc_khz == 0)) { | 1083 | if (unlikely(this_tsc_khz == 0)) { |
1081 | local_irq_restore(flags); | 1084 | local_irq_restore(flags); |
1082 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); | 1085 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); |
@@ -1993,6 +1996,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1993 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1996 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1994 | case KVM_CAP_XSAVE: | 1997 | case KVM_CAP_XSAVE: |
1995 | case KVM_CAP_ASYNC_PF: | 1998 | case KVM_CAP_ASYNC_PF: |
1999 | case KVM_CAP_GET_TSC_KHZ: | ||
1996 | r = 1; | 2000 | r = 1; |
1997 | break; | 2001 | break; |
1998 | case KVM_CAP_COALESCED_MMIO: | 2002 | case KVM_CAP_COALESCED_MMIO: |
@@ -2019,6 +2023,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2019 | case KVM_CAP_XCRS: | 2023 | case KVM_CAP_XCRS: |
2020 | r = cpu_has_xsave; | 2024 | r = cpu_has_xsave; |
2021 | break; | 2025 | break; |
2026 | case KVM_CAP_TSC_CONTROL: | ||
2027 | r = kvm_has_tsc_control; | ||
2028 | break; | ||
2022 | default: | 2029 | default: |
2023 | r = 0; | 2030 | r = 0; |
2024 | break; | 2031 | break; |
@@ -2120,8 +2127,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2120 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 2127 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
2121 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { | 2128 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { |
2122 | /* Make sure TSC doesn't go backwards */ | 2129 | /* Make sure TSC doesn't go backwards */ |
2123 | s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : | 2130 | s64 tsc_delta; |
2124 | native_read_tsc() - vcpu->arch.last_host_tsc; | 2131 | u64 tsc; |
2132 | |||
2133 | kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); | ||
2134 | tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : | ||
2135 | tsc - vcpu->arch.last_guest_tsc; | ||
2136 | |||
2125 | if (tsc_delta < 0) | 2137 | if (tsc_delta < 0) |
2126 | mark_tsc_unstable("KVM discovered backwards TSC"); | 2138 | mark_tsc_unstable("KVM discovered backwards TSC"); |
2127 | if (check_tsc_unstable()) { | 2139 | if (check_tsc_unstable()) { |
@@ -2139,7 +2151,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2139 | { | 2151 | { |
2140 | kvm_x86_ops->vcpu_put(vcpu); | 2152 | kvm_x86_ops->vcpu_put(vcpu); |
2141 | kvm_put_guest_fpu(vcpu); | 2153 | kvm_put_guest_fpu(vcpu); |
2142 | vcpu->arch.last_host_tsc = native_read_tsc(); | 2154 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); |
2143 | } | 2155 | } |
2144 | 2156 | ||
2145 | static int is_efer_nx(void) | 2157 | static int is_efer_nx(void) |
@@ -2324,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2324 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | 2336 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | |
2325 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | 2337 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); |
2326 | 2338 | ||
2339 | /* cpuid 0xC0000001.edx */ | ||
2340 | const u32 kvm_supported_word5_x86_features = | ||
2341 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
2342 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | ||
2343 | F(PMM) | F(PMM_EN); | ||
2344 | |||
2327 | /* all calls to cpuid_count() should be made on the same cpu */ | 2345 | /* all calls to cpuid_count() should be made on the same cpu */ |
2328 | get_cpu(); | 2346 | get_cpu(); |
2329 | do_cpuid_1_ent(entry, function, index); | 2347 | do_cpuid_1_ent(entry, function, index); |
@@ -2418,6 +2436,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2418 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | 2436 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | |
2419 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | 2437 | (1 << KVM_FEATURE_NOP_IO_DELAY) | |
2420 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 2438 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
2439 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
2421 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 2440 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); |
2422 | entry->ebx = 0; | 2441 | entry->ebx = 0; |
2423 | entry->ecx = 0; | 2442 | entry->ecx = 0; |
@@ -2432,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2432 | entry->ecx &= kvm_supported_word6_x86_features; | 2451 | entry->ecx &= kvm_supported_word6_x86_features; |
2433 | cpuid_mask(&entry->ecx, 6); | 2452 | cpuid_mask(&entry->ecx, 6); |
2434 | break; | 2453 | break; |
2454 | /*Add support for Centaur's CPUID instruction*/ | ||
2455 | case 0xC0000000: | ||
2456 | /*Just support up to 0xC0000004 now*/ | ||
2457 | entry->eax = min(entry->eax, 0xC0000004); | ||
2458 | break; | ||
2459 | case 0xC0000001: | ||
2460 | entry->edx &= kvm_supported_word5_x86_features; | ||
2461 | cpuid_mask(&entry->edx, 5); | ||
2462 | break; | ||
2463 | case 0xC0000002: | ||
2464 | case 0xC0000003: | ||
2465 | case 0xC0000004: | ||
2466 | /*Now nothing to do, reserved for the future*/ | ||
2467 | break; | ||
2435 | } | 2468 | } |
2436 | 2469 | ||
2437 | kvm_x86_ops->set_supported_cpuid(function, entry); | 2470 | kvm_x86_ops->set_supported_cpuid(function, entry); |
@@ -2478,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
2478 | if (nent >= cpuid->nent) | 2511 | if (nent >= cpuid->nent) |
2479 | goto out_free; | 2512 | goto out_free; |
2480 | 2513 | ||
2514 | /* Add support for Centaur's CPUID instruction. */ | ||
2515 | if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { | ||
2516 | do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, | ||
2517 | &nent, cpuid->nent); | ||
2518 | |||
2519 | r = -E2BIG; | ||
2520 | if (nent >= cpuid->nent) | ||
2521 | goto out_free; | ||
2522 | |||
2523 | limit = cpuid_entries[nent - 1].eax; | ||
2524 | for (func = 0xC0000001; | ||
2525 | func <= limit && nent < cpuid->nent; ++func) | ||
2526 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
2527 | &nent, cpuid->nent); | ||
2528 | |||
2529 | r = -E2BIG; | ||
2530 | if (nent >= cpuid->nent) | ||
2531 | goto out_free; | ||
2532 | } | ||
2533 | |||
2481 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | 2534 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, |
2482 | cpuid->nent); | 2535 | cpuid->nent); |
2483 | 2536 | ||
@@ -3046,6 +3099,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3046 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | 3099 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); |
3047 | break; | 3100 | break; |
3048 | } | 3101 | } |
3102 | case KVM_SET_TSC_KHZ: { | ||
3103 | u32 user_tsc_khz; | ||
3104 | |||
3105 | r = -EINVAL; | ||
3106 | if (!kvm_has_tsc_control) | ||
3107 | break; | ||
3108 | |||
3109 | user_tsc_khz = (u32)arg; | ||
3110 | |||
3111 | if (user_tsc_khz >= kvm_max_guest_tsc_khz) | ||
3112 | goto out; | ||
3113 | |||
3114 | kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); | ||
3115 | |||
3116 | r = 0; | ||
3117 | goto out; | ||
3118 | } | ||
3119 | case KVM_GET_TSC_KHZ: { | ||
3120 | r = -EIO; | ||
3121 | if (check_tsc_unstable()) | ||
3122 | goto out; | ||
3123 | |||
3124 | r = vcpu_tsc_khz(vcpu); | ||
3125 | |||
3126 | goto out; | ||
3127 | } | ||
3049 | default: | 3128 | default: |
3050 | r = -EINVAL; | 3129 | r = -EINVAL; |
3051 | } | 3130 | } |
@@ -3595,20 +3674,43 @@ static void kvm_init_msr_list(void) | |||
3595 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | 3674 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, |
3596 | const void *v) | 3675 | const void *v) |
3597 | { | 3676 | { |
3598 | if (vcpu->arch.apic && | 3677 | int handled = 0; |
3599 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) | 3678 | int n; |
3600 | return 0; | ||
3601 | 3679 | ||
3602 | return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3680 | do { |
3681 | n = min(len, 8); | ||
3682 | if (!(vcpu->arch.apic && | ||
3683 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) | ||
3684 | && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) | ||
3685 | break; | ||
3686 | handled += n; | ||
3687 | addr += n; | ||
3688 | len -= n; | ||
3689 | v += n; | ||
3690 | } while (len); | ||
3691 | |||
3692 | return handled; | ||
3603 | } | 3693 | } |
3604 | 3694 | ||
3605 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | 3695 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) |
3606 | { | 3696 | { |
3607 | if (vcpu->arch.apic && | 3697 | int handled = 0; |
3608 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) | 3698 | int n; |
3609 | return 0; | 3699 | |
3700 | do { | ||
3701 | n = min(len, 8); | ||
3702 | if (!(vcpu->arch.apic && | ||
3703 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) | ||
3704 | && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) | ||
3705 | break; | ||
3706 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); | ||
3707 | handled += n; | ||
3708 | addr += n; | ||
3709 | len -= n; | ||
3710 | v += n; | ||
3711 | } while (len); | ||
3610 | 3712 | ||
3611 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3713 | return handled; |
3612 | } | 3714 | } |
3613 | 3715 | ||
3614 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | 3716 | static void kvm_set_segment(struct kvm_vcpu *vcpu, |
@@ -3703,37 +3805,43 @@ out: | |||
3703 | } | 3805 | } |
3704 | 3806 | ||
3705 | /* used for instruction fetching */ | 3807 | /* used for instruction fetching */ |
3706 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3808 | static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, |
3707 | struct kvm_vcpu *vcpu, | 3809 | gva_t addr, void *val, unsigned int bytes, |
3708 | struct x86_exception *exception) | 3810 | struct x86_exception *exception) |
3709 | { | 3811 | { |
3812 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3710 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3813 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3814 | |||
3711 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | 3815 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, |
3712 | access | PFERR_FETCH_MASK, | 3816 | access | PFERR_FETCH_MASK, |
3713 | exception); | 3817 | exception); |
3714 | } | 3818 | } |
3715 | 3819 | ||
3716 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3820 | static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, |
3717 | struct kvm_vcpu *vcpu, | 3821 | gva_t addr, void *val, unsigned int bytes, |
3718 | struct x86_exception *exception) | 3822 | struct x86_exception *exception) |
3719 | { | 3823 | { |
3824 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3720 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3825 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3826 | |||
3721 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | 3827 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, |
3722 | exception); | 3828 | exception); |
3723 | } | 3829 | } |
3724 | 3830 | ||
3725 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | 3831 | static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
3726 | struct kvm_vcpu *vcpu, | 3832 | gva_t addr, void *val, unsigned int bytes, |
3727 | struct x86_exception *exception) | 3833 | struct x86_exception *exception) |
3728 | { | 3834 | { |
3835 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3729 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); | 3836 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); |
3730 | } | 3837 | } |
3731 | 3838 | ||
3732 | static int kvm_write_guest_virt_system(gva_t addr, void *val, | 3839 | static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
3840 | gva_t addr, void *val, | ||
3733 | unsigned int bytes, | 3841 | unsigned int bytes, |
3734 | struct kvm_vcpu *vcpu, | ||
3735 | struct x86_exception *exception) | 3842 | struct x86_exception *exception) |
3736 | { | 3843 | { |
3844 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3737 | void *data = val; | 3845 | void *data = val; |
3738 | int r = X86EMUL_CONTINUE; | 3846 | int r = X86EMUL_CONTINUE; |
3739 | 3847 | ||
@@ -3761,13 +3869,15 @@ out: | |||
3761 | return r; | 3869 | return r; |
3762 | } | 3870 | } |
3763 | 3871 | ||
3764 | static int emulator_read_emulated(unsigned long addr, | 3872 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, |
3873 | unsigned long addr, | ||
3765 | void *val, | 3874 | void *val, |
3766 | unsigned int bytes, | 3875 | unsigned int bytes, |
3767 | struct x86_exception *exception, | 3876 | struct x86_exception *exception) |
3768 | struct kvm_vcpu *vcpu) | ||
3769 | { | 3877 | { |
3878 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3770 | gpa_t gpa; | 3879 | gpa_t gpa; |
3880 | int handled; | ||
3771 | 3881 | ||
3772 | if (vcpu->mmio_read_completed) { | 3882 | if (vcpu->mmio_read_completed) { |
3773 | memcpy(val, vcpu->mmio_data, bytes); | 3883 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -3786,7 +3896,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
3786 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3896 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
3787 | goto mmio; | 3897 | goto mmio; |
3788 | 3898 | ||
3789 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) | 3899 | if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) |
3790 | == X86EMUL_CONTINUE) | 3900 | == X86EMUL_CONTINUE) |
3791 | return X86EMUL_CONTINUE; | 3901 | return X86EMUL_CONTINUE; |
3792 | 3902 | ||
@@ -3794,18 +3904,24 @@ mmio: | |||
3794 | /* | 3904 | /* |
3795 | * Is this MMIO handled locally? | 3905 | * Is this MMIO handled locally? |
3796 | */ | 3906 | */ |
3797 | if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { | 3907 | handled = vcpu_mmio_read(vcpu, gpa, bytes, val); |
3798 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); | 3908 | |
3909 | if (handled == bytes) | ||
3799 | return X86EMUL_CONTINUE; | 3910 | return X86EMUL_CONTINUE; |
3800 | } | 3911 | |
3912 | gpa += handled; | ||
3913 | bytes -= handled; | ||
3914 | val += handled; | ||
3801 | 3915 | ||
3802 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); | 3916 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); |
3803 | 3917 | ||
3804 | vcpu->mmio_needed = 1; | 3918 | vcpu->mmio_needed = 1; |
3805 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | 3919 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3806 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | 3920 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3807 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; | 3921 | vcpu->mmio_size = bytes; |
3922 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | ||
3808 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; | 3923 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; |
3924 | vcpu->mmio_index = 0; | ||
3809 | 3925 | ||
3810 | return X86EMUL_IO_NEEDED; | 3926 | return X86EMUL_IO_NEEDED; |
3811 | } | 3927 | } |
@@ -3829,6 +3945,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
3829 | struct kvm_vcpu *vcpu) | 3945 | struct kvm_vcpu *vcpu) |
3830 | { | 3946 | { |
3831 | gpa_t gpa; | 3947 | gpa_t gpa; |
3948 | int handled; | ||
3832 | 3949 | ||
3833 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); | 3950 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); |
3834 | 3951 | ||
@@ -3847,25 +3964,35 @@ mmio: | |||
3847 | /* | 3964 | /* |
3848 | * Is this MMIO handled locally? | 3965 | * Is this MMIO handled locally? |
3849 | */ | 3966 | */ |
3850 | if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) | 3967 | handled = vcpu_mmio_write(vcpu, gpa, bytes, val); |
3968 | if (handled == bytes) | ||
3851 | return X86EMUL_CONTINUE; | 3969 | return X86EMUL_CONTINUE; |
3852 | 3970 | ||
3971 | gpa += handled; | ||
3972 | bytes -= handled; | ||
3973 | val += handled; | ||
3974 | |||
3853 | vcpu->mmio_needed = 1; | 3975 | vcpu->mmio_needed = 1; |
3976 | memcpy(vcpu->mmio_data, val, bytes); | ||
3854 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | 3977 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3855 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | 3978 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3856 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; | 3979 | vcpu->mmio_size = bytes; |
3980 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | ||
3857 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; | 3981 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; |
3858 | memcpy(vcpu->run->mmio.data, val, bytes); | 3982 | memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); |
3983 | vcpu->mmio_index = 0; | ||
3859 | 3984 | ||
3860 | return X86EMUL_CONTINUE; | 3985 | return X86EMUL_CONTINUE; |
3861 | } | 3986 | } |
3862 | 3987 | ||
3863 | int emulator_write_emulated(unsigned long addr, | 3988 | int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, |
3989 | unsigned long addr, | ||
3864 | const void *val, | 3990 | const void *val, |
3865 | unsigned int bytes, | 3991 | unsigned int bytes, |
3866 | struct x86_exception *exception, | 3992 | struct x86_exception *exception) |
3867 | struct kvm_vcpu *vcpu) | ||
3868 | { | 3993 | { |
3994 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3995 | |||
3869 | /* Crossing a page boundary? */ | 3996 | /* Crossing a page boundary? */ |
3870 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3997 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
3871 | int rc, now; | 3998 | int rc, now; |
@@ -3893,13 +4020,14 @@ int emulator_write_emulated(unsigned long addr, | |||
3893 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) | 4020 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) |
3894 | #endif | 4021 | #endif |
3895 | 4022 | ||
3896 | static int emulator_cmpxchg_emulated(unsigned long addr, | 4023 | static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, |
4024 | unsigned long addr, | ||
3897 | const void *old, | 4025 | const void *old, |
3898 | const void *new, | 4026 | const void *new, |
3899 | unsigned int bytes, | 4027 | unsigned int bytes, |
3900 | struct x86_exception *exception, | 4028 | struct x86_exception *exception) |
3901 | struct kvm_vcpu *vcpu) | ||
3902 | { | 4029 | { |
4030 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3903 | gpa_t gpa; | 4031 | gpa_t gpa; |
3904 | struct page *page; | 4032 | struct page *page; |
3905 | char *kaddr; | 4033 | char *kaddr; |
@@ -3955,7 +4083,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3955 | emul_write: | 4083 | emul_write: |
3956 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 4084 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3957 | 4085 | ||
3958 | return emulator_write_emulated(addr, new, bytes, exception, vcpu); | 4086 | return emulator_write_emulated(ctxt, addr, new, bytes, exception); |
3959 | } | 4087 | } |
3960 | 4088 | ||
3961 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | 4089 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) |
@@ -3974,9 +4102,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
3974 | } | 4102 | } |
3975 | 4103 | ||
3976 | 4104 | ||
3977 | static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | 4105 | static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
3978 | unsigned int count, struct kvm_vcpu *vcpu) | 4106 | int size, unsigned short port, void *val, |
4107 | unsigned int count) | ||
3979 | { | 4108 | { |
4109 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4110 | |||
3980 | if (vcpu->arch.pio.count) | 4111 | if (vcpu->arch.pio.count) |
3981 | goto data_avail; | 4112 | goto data_avail; |
3982 | 4113 | ||
@@ -4004,10 +4135,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | |||
4004 | return 0; | 4135 | return 0; |
4005 | } | 4136 | } |
4006 | 4137 | ||
4007 | static int emulator_pio_out_emulated(int size, unsigned short port, | 4138 | static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, |
4008 | const void *val, unsigned int count, | 4139 | int size, unsigned short port, |
4009 | struct kvm_vcpu *vcpu) | 4140 | const void *val, unsigned int count) |
4010 | { | 4141 | { |
4142 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4143 | |||
4011 | trace_kvm_pio(1, port, size, count); | 4144 | trace_kvm_pio(1, port, size, count); |
4012 | 4145 | ||
4013 | vcpu->arch.pio.port = port; | 4146 | vcpu->arch.pio.port = port; |
@@ -4037,10 +4170,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | |||
4037 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 4170 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
4038 | } | 4171 | } |
4039 | 4172 | ||
4040 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | 4173 | static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) |
4041 | { | 4174 | { |
4042 | kvm_mmu_invlpg(vcpu, address); | 4175 | kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); |
4043 | return X86EMUL_CONTINUE; | ||
4044 | } | 4176 | } |
4045 | 4177 | ||
4046 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | 4178 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) |
@@ -4062,22 +4194,20 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | |||
4062 | } | 4194 | } |
4063 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); | 4195 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); |
4064 | 4196 | ||
4065 | int emulate_clts(struct kvm_vcpu *vcpu) | 4197 | static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) |
4066 | { | 4198 | { |
4067 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | 4199 | kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); |
4068 | kvm_x86_ops->fpu_activate(vcpu); | ||
4069 | return X86EMUL_CONTINUE; | ||
4070 | } | 4200 | } |
4071 | 4201 | ||
4072 | int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) | 4202 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
4073 | { | 4203 | { |
4074 | return _kvm_get_dr(vcpu, dr, dest); | 4204 | return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); |
4075 | } | 4205 | } |
4076 | 4206 | ||
4077 | int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) | 4207 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
4078 | { | 4208 | { |
4079 | 4209 | ||
4080 | return __kvm_set_dr(vcpu, dr, value); | 4210 | return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); |
4081 | } | 4211 | } |
4082 | 4212 | ||
4083 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 4213 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
@@ -4085,8 +4215,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val) | |||
4085 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | 4215 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; |
4086 | } | 4216 | } |
4087 | 4217 | ||
4088 | static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | 4218 | static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) |
4089 | { | 4219 | { |
4220 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4090 | unsigned long value; | 4221 | unsigned long value; |
4091 | 4222 | ||
4092 | switch (cr) { | 4223 | switch (cr) { |
@@ -4113,8 +4244,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | |||
4113 | return value; | 4244 | return value; |
4114 | } | 4245 | } |
4115 | 4246 | ||
4116 | static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | 4247 | static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) |
4117 | { | 4248 | { |
4249 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4118 | int res = 0; | 4250 | int res = 0; |
4119 | 4251 | ||
4120 | switch (cr) { | 4252 | switch (cr) { |
@@ -4141,33 +4273,45 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | |||
4141 | return res; | 4273 | return res; |
4142 | } | 4274 | } |
4143 | 4275 | ||
4144 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | 4276 | static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) |
4277 | { | ||
4278 | return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); | ||
4279 | } | ||
4280 | |||
4281 | static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) | ||
4282 | { | ||
4283 | kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); | ||
4284 | } | ||
4285 | |||
4286 | static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) | ||
4145 | { | 4287 | { |
4146 | return kvm_x86_ops->get_cpl(vcpu); | 4288 | kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); |
4147 | } | 4289 | } |
4148 | 4290 | ||
4149 | static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | 4291 | static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) |
4150 | { | 4292 | { |
4151 | kvm_x86_ops->get_gdt(vcpu, dt); | 4293 | kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); |
4152 | } | 4294 | } |
4153 | 4295 | ||
4154 | static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | 4296 | static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) |
4155 | { | 4297 | { |
4156 | kvm_x86_ops->get_idt(vcpu, dt); | 4298 | kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); |
4157 | } | 4299 | } |
4158 | 4300 | ||
4159 | static unsigned long emulator_get_cached_segment_base(int seg, | 4301 | static unsigned long emulator_get_cached_segment_base( |
4160 | struct kvm_vcpu *vcpu) | 4302 | struct x86_emulate_ctxt *ctxt, int seg) |
4161 | { | 4303 | { |
4162 | return get_segment_base(vcpu, seg); | 4304 | return get_segment_base(emul_to_vcpu(ctxt), seg); |
4163 | } | 4305 | } |
4164 | 4306 | ||
4165 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, | 4307 | static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, |
4166 | int seg, struct kvm_vcpu *vcpu) | 4308 | struct desc_struct *desc, u32 *base3, |
4309 | int seg) | ||
4167 | { | 4310 | { |
4168 | struct kvm_segment var; | 4311 | struct kvm_segment var; |
4169 | 4312 | ||
4170 | kvm_get_segment(vcpu, &var, seg); | 4313 | kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); |
4314 | *selector = var.selector; | ||
4171 | 4315 | ||
4172 | if (var.unusable) | 4316 | if (var.unusable) |
4173 | return false; | 4317 | return false; |
@@ -4192,14 +4336,14 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, | |||
4192 | return true; | 4336 | return true; |
4193 | } | 4337 | } |
4194 | 4338 | ||
4195 | static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, | 4339 | static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, |
4196 | int seg, struct kvm_vcpu *vcpu) | 4340 | struct desc_struct *desc, u32 base3, |
4341 | int seg) | ||
4197 | { | 4342 | { |
4343 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4198 | struct kvm_segment var; | 4344 | struct kvm_segment var; |
4199 | 4345 | ||
4200 | /* needed to preserve selector */ | 4346 | var.selector = selector; |
4201 | kvm_get_segment(vcpu, &var, seg); | ||
4202 | |||
4203 | var.base = get_desc_base(desc); | 4347 | var.base = get_desc_base(desc); |
4204 | #ifdef CONFIG_X86_64 | 4348 | #ifdef CONFIG_X86_64 |
4205 | var.base |= ((u64)base3) << 32; | 4349 | var.base |= ((u64)base3) << 32; |
@@ -4223,22 +4367,44 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, | |||
4223 | return; | 4367 | return; |
4224 | } | 4368 | } |
4225 | 4369 | ||
4226 | static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) | 4370 | static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, |
4371 | u32 msr_index, u64 *pdata) | ||
4227 | { | 4372 | { |
4228 | struct kvm_segment kvm_seg; | 4373 | return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); |
4374 | } | ||
4229 | 4375 | ||
4230 | kvm_get_segment(vcpu, &kvm_seg, seg); | 4376 | static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, |
4231 | return kvm_seg.selector; | 4377 | u32 msr_index, u64 data) |
4378 | { | ||
4379 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); | ||
4232 | } | 4380 | } |
4233 | 4381 | ||
4234 | static void emulator_set_segment_selector(u16 sel, int seg, | 4382 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) |
4235 | struct kvm_vcpu *vcpu) | ||
4236 | { | 4383 | { |
4237 | struct kvm_segment kvm_seg; | 4384 | emul_to_vcpu(ctxt)->arch.halt_request = 1; |
4385 | } | ||
4238 | 4386 | ||
4239 | kvm_get_segment(vcpu, &kvm_seg, seg); | 4387 | static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) |
4240 | kvm_seg.selector = sel; | 4388 | { |
4241 | kvm_set_segment(vcpu, &kvm_seg, seg); | 4389 | preempt_disable(); |
4390 | kvm_load_guest_fpu(emul_to_vcpu(ctxt)); | ||
4391 | /* | ||
4392 | * CR0.TS may reference the host fpu state, not the guest fpu state, | ||
4393 | * so it may be clear at this point. | ||
4394 | */ | ||
4395 | clts(); | ||
4396 | } | ||
4397 | |||
4398 | static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) | ||
4399 | { | ||
4400 | preempt_enable(); | ||
4401 | } | ||
4402 | |||
4403 | static int emulator_intercept(struct x86_emulate_ctxt *ctxt, | ||
4404 | struct x86_instruction_info *info, | ||
4405 | enum x86_intercept_stage stage) | ||
4406 | { | ||
4407 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); | ||
4242 | } | 4408 | } |
4243 | 4409 | ||
4244 | static struct x86_emulate_ops emulate_ops = { | 4410 | static struct x86_emulate_ops emulate_ops = { |
@@ -4248,22 +4414,29 @@ static struct x86_emulate_ops emulate_ops = { | |||
4248 | .read_emulated = emulator_read_emulated, | 4414 | .read_emulated = emulator_read_emulated, |
4249 | .write_emulated = emulator_write_emulated, | 4415 | .write_emulated = emulator_write_emulated, |
4250 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 4416 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
4417 | .invlpg = emulator_invlpg, | ||
4251 | .pio_in_emulated = emulator_pio_in_emulated, | 4418 | .pio_in_emulated = emulator_pio_in_emulated, |
4252 | .pio_out_emulated = emulator_pio_out_emulated, | 4419 | .pio_out_emulated = emulator_pio_out_emulated, |
4253 | .get_cached_descriptor = emulator_get_cached_descriptor, | 4420 | .get_segment = emulator_get_segment, |
4254 | .set_cached_descriptor = emulator_set_cached_descriptor, | 4421 | .set_segment = emulator_set_segment, |
4255 | .get_segment_selector = emulator_get_segment_selector, | ||
4256 | .set_segment_selector = emulator_set_segment_selector, | ||
4257 | .get_cached_segment_base = emulator_get_cached_segment_base, | 4422 | .get_cached_segment_base = emulator_get_cached_segment_base, |
4258 | .get_gdt = emulator_get_gdt, | 4423 | .get_gdt = emulator_get_gdt, |
4259 | .get_idt = emulator_get_idt, | 4424 | .get_idt = emulator_get_idt, |
4425 | .set_gdt = emulator_set_gdt, | ||
4426 | .set_idt = emulator_set_idt, | ||
4260 | .get_cr = emulator_get_cr, | 4427 | .get_cr = emulator_get_cr, |
4261 | .set_cr = emulator_set_cr, | 4428 | .set_cr = emulator_set_cr, |
4262 | .cpl = emulator_get_cpl, | 4429 | .cpl = emulator_get_cpl, |
4263 | .get_dr = emulator_get_dr, | 4430 | .get_dr = emulator_get_dr, |
4264 | .set_dr = emulator_set_dr, | 4431 | .set_dr = emulator_set_dr, |
4265 | .set_msr = kvm_set_msr, | 4432 | .set_msr = emulator_set_msr, |
4266 | .get_msr = kvm_get_msr, | 4433 | .get_msr = emulator_get_msr, |
4434 | .halt = emulator_halt, | ||
4435 | .wbinvd = emulator_wbinvd, | ||
4436 | .fix_hypercall = emulator_fix_hypercall, | ||
4437 | .get_fpu = emulator_get_fpu, | ||
4438 | .put_fpu = emulator_put_fpu, | ||
4439 | .intercept = emulator_intercept, | ||
4267 | }; | 4440 | }; |
4268 | 4441 | ||
4269 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 4442 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
@@ -4305,12 +4478,17 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | |||
4305 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4478 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
4306 | int cs_db, cs_l; | 4479 | int cs_db, cs_l; |
4307 | 4480 | ||
4481 | /* | ||
4482 | * TODO: fix emulate.c to use guest_read/write_register | ||
4483 | * instead of direct ->regs accesses, can save hundred cycles | ||
4484 | * on Intel for instructions that don't read/change RSP, for | ||
4485 | * for example. | ||
4486 | */ | ||
4308 | cache_all_regs(vcpu); | 4487 | cache_all_regs(vcpu); |
4309 | 4488 | ||
4310 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 4489 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
4311 | 4490 | ||
4312 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 4491 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); |
4313 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); | ||
4314 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | 4492 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); |
4315 | vcpu->arch.emulate_ctxt.mode = | 4493 | vcpu->arch.emulate_ctxt.mode = |
4316 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | 4494 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : |
@@ -4318,11 +4496,13 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | |||
4318 | ? X86EMUL_MODE_VM86 : cs_l | 4496 | ? X86EMUL_MODE_VM86 : cs_l |
4319 | ? X86EMUL_MODE_PROT64 : cs_db | 4497 | ? X86EMUL_MODE_PROT64 : cs_db |
4320 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 4498 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
4499 | vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); | ||
4321 | memset(c, 0, sizeof(struct decode_cache)); | 4500 | memset(c, 0, sizeof(struct decode_cache)); |
4322 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | 4501 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); |
4502 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | ||
4323 | } | 4503 | } |
4324 | 4504 | ||
4325 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) | 4505 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) |
4326 | { | 4506 | { |
4327 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4507 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
4328 | int ret; | 4508 | int ret; |
@@ -4331,7 +4511,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) | |||
4331 | 4511 | ||
4332 | vcpu->arch.emulate_ctxt.decode.op_bytes = 2; | 4512 | vcpu->arch.emulate_ctxt.decode.op_bytes = 2; |
4333 | vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; | 4513 | vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; |
4334 | vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; | 4514 | vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + |
4515 | inc_eip; | ||
4335 | ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); | 4516 | ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); |
4336 | 4517 | ||
4337 | if (ret != X86EMUL_CONTINUE) | 4518 | if (ret != X86EMUL_CONTINUE) |
@@ -4340,7 +4521,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) | |||
4340 | vcpu->arch.emulate_ctxt.eip = c->eip; | 4521 | vcpu->arch.emulate_ctxt.eip = c->eip; |
4341 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 4522 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); |
4342 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 4523 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); |
4343 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 4524 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
4344 | 4525 | ||
4345 | if (irq == NMI_VECTOR) | 4526 | if (irq == NMI_VECTOR) |
4346 | vcpu->arch.nmi_pending = false; | 4527 | vcpu->arch.nmi_pending = false; |
@@ -4402,16 +4583,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4402 | { | 4583 | { |
4403 | int r; | 4584 | int r; |
4404 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4585 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
4586 | bool writeback = true; | ||
4405 | 4587 | ||
4406 | kvm_clear_exception_queue(vcpu); | 4588 | kvm_clear_exception_queue(vcpu); |
4407 | vcpu->arch.mmio_fault_cr2 = cr2; | ||
4408 | /* | ||
4409 | * TODO: fix emulate.c to use guest_read/write_register | ||
4410 | * instead of direct ->regs accesses, can save hundred cycles | ||
4411 | * on Intel for instructions that don't read/change RSP, for | ||
4412 | * for example. | ||
4413 | */ | ||
4414 | cache_all_regs(vcpu); | ||
4415 | 4589 | ||
4416 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4590 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
4417 | init_emulate_ctxt(vcpu); | 4591 | init_emulate_ctxt(vcpu); |
@@ -4442,13 +4616,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4442 | return EMULATE_DONE; | 4616 | return EMULATE_DONE; |
4443 | } | 4617 | } |
4444 | 4618 | ||
4445 | /* this is needed for vmware backdor interface to work since it | 4619 | /* this is needed for vmware backdoor interface to work since it |
4446 | changes registers values during IO operation */ | 4620 | changes registers values during IO operation */ |
4447 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | 4621 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { |
4622 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | ||
4623 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
4624 | } | ||
4448 | 4625 | ||
4449 | restart: | 4626 | restart: |
4450 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); | 4627 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); |
4451 | 4628 | ||
4629 | if (r == EMULATION_INTERCEPTED) | ||
4630 | return EMULATE_DONE; | ||
4631 | |||
4452 | if (r == EMULATION_FAILED) { | 4632 | if (r == EMULATION_FAILED) { |
4453 | if (reexecute_instruction(vcpu, cr2)) | 4633 | if (reexecute_instruction(vcpu, cr2)) |
4454 | return EMULATE_DONE; | 4634 | return EMULATE_DONE; |
@@ -4462,21 +4642,28 @@ restart: | |||
4462 | } else if (vcpu->arch.pio.count) { | 4642 | } else if (vcpu->arch.pio.count) { |
4463 | if (!vcpu->arch.pio.in) | 4643 | if (!vcpu->arch.pio.in) |
4464 | vcpu->arch.pio.count = 0; | 4644 | vcpu->arch.pio.count = 0; |
4645 | else | ||
4646 | writeback = false; | ||
4465 | r = EMULATE_DO_MMIO; | 4647 | r = EMULATE_DO_MMIO; |
4466 | } else if (vcpu->mmio_needed) { | 4648 | } else if (vcpu->mmio_needed) { |
4467 | if (vcpu->mmio_is_write) | 4649 | if (!vcpu->mmio_is_write) |
4468 | vcpu->mmio_needed = 0; | 4650 | writeback = false; |
4469 | r = EMULATE_DO_MMIO; | 4651 | r = EMULATE_DO_MMIO; |
4470 | } else if (r == EMULATION_RESTART) | 4652 | } else if (r == EMULATION_RESTART) |
4471 | goto restart; | 4653 | goto restart; |
4472 | else | 4654 | else |
4473 | r = EMULATE_DONE; | 4655 | r = EMULATE_DONE; |
4474 | 4656 | ||
4475 | toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); | 4657 | if (writeback) { |
4476 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 4658 | toggle_interruptibility(vcpu, |
4477 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 4659 | vcpu->arch.emulate_ctxt.interruptibility); |
4478 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 4660 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
4479 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 4661 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
4662 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
4663 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | ||
4664 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | ||
4665 | } else | ||
4666 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; | ||
4480 | 4667 | ||
4481 | return r; | 4668 | return r; |
4482 | } | 4669 | } |
@@ -4485,7 +4672,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction); | |||
4485 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) | 4672 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
4486 | { | 4673 | { |
4487 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4674 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4488 | int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); | 4675 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, |
4676 | size, port, &val, 1); | ||
4489 | /* do not return to emulator after return from userspace */ | 4677 | /* do not return to emulator after return from userspace */ |
4490 | vcpu->arch.pio.count = 0; | 4678 | vcpu->arch.pio.count = 0; |
4491 | return ret; | 4679 | return ret; |
@@ -4879,8 +5067,9 @@ out: | |||
4879 | } | 5067 | } |
4880 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); | 5068 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); |
4881 | 5069 | ||
4882 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | 5070 | int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) |
4883 | { | 5071 | { |
5072 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4884 | char instruction[3]; | 5073 | char instruction[3]; |
4885 | unsigned long rip = kvm_rip_read(vcpu); | 5074 | unsigned long rip = kvm_rip_read(vcpu); |
4886 | 5075 | ||
@@ -4893,21 +5082,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
4893 | 5082 | ||
4894 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 5083 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
4895 | 5084 | ||
4896 | return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); | 5085 | return emulator_write_emulated(&vcpu->arch.emulate_ctxt, |
4897 | } | 5086 | rip, instruction, 3, NULL); |
4898 | |||
4899 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | ||
4900 | { | ||
4901 | struct desc_ptr dt = { limit, base }; | ||
4902 | |||
4903 | kvm_x86_ops->set_gdt(vcpu, &dt); | ||
4904 | } | ||
4905 | |||
4906 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | ||
4907 | { | ||
4908 | struct desc_ptr dt = { limit, base }; | ||
4909 | |||
4910 | kvm_x86_ops->set_idt(vcpu, &dt); | ||
4911 | } | 5087 | } |
4912 | 5088 | ||
4913 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 5089 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
@@ -5170,6 +5346,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | |||
5170 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5346 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5171 | { | 5347 | { |
5172 | int r; | 5348 | int r; |
5349 | bool nmi_pending; | ||
5173 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5350 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
5174 | vcpu->run->request_interrupt_window; | 5351 | vcpu->run->request_interrupt_window; |
5175 | 5352 | ||
@@ -5207,19 +5384,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5207 | r = 1; | 5384 | r = 1; |
5208 | goto out; | 5385 | goto out; |
5209 | } | 5386 | } |
5210 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | ||
5211 | vcpu->arch.nmi_pending = true; | ||
5212 | } | 5387 | } |
5213 | 5388 | ||
5214 | r = kvm_mmu_reload(vcpu); | 5389 | r = kvm_mmu_reload(vcpu); |
5215 | if (unlikely(r)) | 5390 | if (unlikely(r)) |
5216 | goto out; | 5391 | goto out; |
5217 | 5392 | ||
5393 | /* | ||
5394 | * An NMI can be injected between local nmi_pending read and | ||
5395 | * vcpu->arch.nmi_pending read inside inject_pending_event(). | ||
5396 | * But in that case, KVM_REQ_EVENT will be set, which makes | ||
5397 | * the race described above benign. | ||
5398 | */ | ||
5399 | nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); | ||
5400 | |||
5218 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5401 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
5219 | inject_pending_event(vcpu); | 5402 | inject_pending_event(vcpu); |
5220 | 5403 | ||
5221 | /* enable NMI/IRQ window open exits if needed */ | 5404 | /* enable NMI/IRQ window open exits if needed */ |
5222 | if (vcpu->arch.nmi_pending) | 5405 | if (nmi_pending) |
5223 | kvm_x86_ops->enable_nmi_window(vcpu); | 5406 | kvm_x86_ops->enable_nmi_window(vcpu); |
5224 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | 5407 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) |
5225 | kvm_x86_ops->enable_irq_window(vcpu); | 5408 | kvm_x86_ops->enable_irq_window(vcpu); |
@@ -5399,6 +5582,41 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5399 | return r; | 5582 | return r; |
5400 | } | 5583 | } |
5401 | 5584 | ||
5585 | static int complete_mmio(struct kvm_vcpu *vcpu) | ||
5586 | { | ||
5587 | struct kvm_run *run = vcpu->run; | ||
5588 | int r; | ||
5589 | |||
5590 | if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) | ||
5591 | return 1; | ||
5592 | |||
5593 | if (vcpu->mmio_needed) { | ||
5594 | vcpu->mmio_needed = 0; | ||
5595 | if (!vcpu->mmio_is_write) | ||
5596 | memcpy(vcpu->mmio_data + vcpu->mmio_index, | ||
5597 | run->mmio.data, 8); | ||
5598 | vcpu->mmio_index += 8; | ||
5599 | if (vcpu->mmio_index < vcpu->mmio_size) { | ||
5600 | run->exit_reason = KVM_EXIT_MMIO; | ||
5601 | run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; | ||
5602 | memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); | ||
5603 | run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); | ||
5604 | run->mmio.is_write = vcpu->mmio_is_write; | ||
5605 | vcpu->mmio_needed = 1; | ||
5606 | return 0; | ||
5607 | } | ||
5608 | if (vcpu->mmio_is_write) | ||
5609 | return 1; | ||
5610 | vcpu->mmio_read_completed = 1; | ||
5611 | } | ||
5612 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5613 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); | ||
5614 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
5615 | if (r != EMULATE_DONE) | ||
5616 | return 0; | ||
5617 | return 1; | ||
5618 | } | ||
5619 | |||
5402 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 5620 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
5403 | { | 5621 | { |
5404 | int r; | 5622 | int r; |
@@ -5425,20 +5643,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5425 | } | 5643 | } |
5426 | } | 5644 | } |
5427 | 5645 | ||
5428 | if (vcpu->arch.pio.count || vcpu->mmio_needed) { | 5646 | r = complete_mmio(vcpu); |
5429 | if (vcpu->mmio_needed) { | 5647 | if (r <= 0) |
5430 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 5648 | goto out; |
5431 | vcpu->mmio_read_completed = 1; | 5649 | |
5432 | vcpu->mmio_needed = 0; | ||
5433 | } | ||
5434 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5435 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); | ||
5436 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
5437 | if (r != EMULATE_DONE) { | ||
5438 | r = 0; | ||
5439 | goto out; | ||
5440 | } | ||
5441 | } | ||
5442 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | 5650 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) |
5443 | kvm_register_write(vcpu, VCPU_REGS_RAX, | 5651 | kvm_register_write(vcpu, VCPU_REGS_RAX, |
5444 | kvm_run->hypercall.ret); | 5652 | kvm_run->hypercall.ret); |
@@ -5455,6 +5663,18 @@ out: | |||
5455 | 5663 | ||
5456 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 5664 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
5457 | { | 5665 | { |
5666 | if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { | ||
5667 | /* | ||
5668 | * We are here if userspace calls get_regs() in the middle of | ||
5669 | * instruction emulation. Registers state needs to be copied | ||
5670 | * back from emulation context to vcpu. Usrapace shouldn't do | ||
5671 | * that usually, but some bad designed PV devices (vmware | ||
5672 | * backdoor interface) need this to work | ||
5673 | */ | ||
5674 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | ||
5675 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
5676 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | ||
5677 | } | ||
5458 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 5678 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
5459 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); | 5679 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
5460 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 5680 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
@@ -5482,6 +5702,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
5482 | 5702 | ||
5483 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 5703 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
5484 | { | 5704 | { |
5705 | vcpu->arch.emulate_regs_need_sync_from_vcpu = true; | ||
5706 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | ||
5707 | |||
5485 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); | 5708 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); |
5486 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); | 5709 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); |
5487 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); | 5710 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); |
@@ -5592,7 +5815,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
5592 | 5815 | ||
5593 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 5816 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); |
5594 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 5817 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); |
5595 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 5818 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
5596 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5819 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5597 | return EMULATE_DONE; | 5820 | return EMULATE_DONE; |
5598 | } | 5821 | } |
@@ -5974,8 +6197,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5974 | } | 6197 | } |
5975 | vcpu->arch.pio_data = page_address(page); | 6198 | vcpu->arch.pio_data = page_address(page); |
5976 | 6199 | ||
5977 | if (!kvm->arch.virtual_tsc_khz) | 6200 | kvm_init_tsc_catchup(vcpu, max_tsc_khz); |
5978 | kvm_arch_set_tsc_khz(kvm, max_tsc_khz); | ||
5979 | 6201 | ||
5980 | r = kvm_mmu_create(vcpu); | 6202 | r = kvm_mmu_create(vcpu); |
5981 | if (r < 0) | 6203 | if (r < 0) |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c600da830ce0..e407ed3df817 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -77,7 +77,7 @@ static inline u32 bit(int bitno) | |||
77 | 77 | ||
78 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 78 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
79 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 79 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
80 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); | 80 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); |
81 | 81 | ||
82 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); | 82 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); |
83 | 83 | ||
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 1cd608973ce5..e191c096ab90 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * kernel and insert a module (lg.ko) which allows us to run other Linux | 7 | * kernel and insert a module (lg.ko) which allows us to run other Linux |
8 | * kernels the same way we'd run processes. We call the first kernel the Host, | 8 | * kernels the same way we'd run processes. We call the first kernel the Host, |
9 | * and the others the Guests. The program which sets up and configures Guests | 9 | * and the others the Guests. The program which sets up and configures Guests |
10 | * (such as the example in Documentation/lguest/lguest.c) is called the | 10 | * (such as the example in Documentation/virtual/lguest/lguest.c) is called the |
11 | * Launcher. | 11 | * Launcher. |
12 | * | 12 | * |
13 | * Secondly, we only run specially modified Guests, not normal kernels: setting | 13 | * Secondly, we only run specially modified Guests, not normal kernels: setting |
@@ -913,8 +913,6 @@ static struct clocksource lguest_clock = { | |||
913 | .rating = 200, | 913 | .rating = 200, |
914 | .read = lguest_clock_read, | 914 | .read = lguest_clock_read, |
915 | .mask = CLOCKSOURCE_MASK(64), | 915 | .mask = CLOCKSOURCE_MASK(64), |
916 | .mult = 1 << 22, | ||
917 | .shift = 22, | ||
918 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 916 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
919 | }; | 917 | }; |
920 | 918 | ||
@@ -997,7 +995,7 @@ static void lguest_time_init(void) | |||
997 | /* Set up the timer interrupt (0) to go to our simple timer routine */ | 995 | /* Set up the timer interrupt (0) to go to our simple timer routine */ |
998 | irq_set_handler(0, lguest_time_irq); | 996 | irq_set_handler(0, lguest_time_irq); |
999 | 997 | ||
1000 | clocksource_register(&lguest_clock); | 998 | clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); |
1001 | 999 | ||
1002 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 1000 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
1003 | * here and register our timer device. */ | 1001 | * here and register our timer device. */ |
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index aa4326bfb24a..f2145cfa12a6 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/linkage.h> | 1 | #include <linux/linkage.h> |
2 | #include <asm/dwarf2.h> | 2 | #include <asm/dwarf2.h> |
3 | #include <asm/alternative-asm.h> | ||
3 | 4 | ||
4 | /* | 5 | /* |
5 | * Zero a page. | 6 | * Zero a page. |
@@ -14,6 +15,15 @@ ENTRY(clear_page_c) | |||
14 | CFI_ENDPROC | 15 | CFI_ENDPROC |
15 | ENDPROC(clear_page_c) | 16 | ENDPROC(clear_page_c) |
16 | 17 | ||
18 | ENTRY(clear_page_c_e) | ||
19 | CFI_STARTPROC | ||
20 | movl $4096,%ecx | ||
21 | xorl %eax,%eax | ||
22 | rep stosb | ||
23 | ret | ||
24 | CFI_ENDPROC | ||
25 | ENDPROC(clear_page_c_e) | ||
26 | |||
17 | ENTRY(clear_page) | 27 | ENTRY(clear_page) |
18 | CFI_STARTPROC | 28 | CFI_STARTPROC |
19 | xorl %eax,%eax | 29 | xorl %eax,%eax |
@@ -38,21 +48,26 @@ ENTRY(clear_page) | |||
38 | .Lclear_page_end: | 48 | .Lclear_page_end: |
39 | ENDPROC(clear_page) | 49 | ENDPROC(clear_page) |
40 | 50 | ||
41 | /* Some CPUs run faster using the string instructions. | 51 | /* |
42 | It is also a lot simpler. Use this when possible */ | 52 | * Some CPUs support enhanced REP MOVSB/STOSB instructions. |
53 | * It is recommended to use this when possible. | ||
54 | * If enhanced REP MOVSB/STOSB is not available, try to use fast string. | ||
55 | * Otherwise, use original function. | ||
56 | * | ||
57 | */ | ||
43 | 58 | ||
44 | #include <asm/cpufeature.h> | 59 | #include <asm/cpufeature.h> |
45 | 60 | ||
46 | .section .altinstr_replacement,"ax" | 61 | .section .altinstr_replacement,"ax" |
47 | 1: .byte 0xeb /* jmp <disp8> */ | 62 | 1: .byte 0xeb /* jmp <disp8> */ |
48 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ | 63 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ |
49 | 2: | 64 | 2: .byte 0xeb /* jmp <disp8> */ |
65 | .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ | ||
66 | 3: | ||
50 | .previous | 67 | .previous |
51 | .section .altinstructions,"a" | 68 | .section .altinstructions,"a" |
52 | .align 8 | 69 | altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ |
53 | .quad clear_page | 70 | .Lclear_page_end-clear_page, 2b-1b |
54 | .quad 1b | 71 | altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ |
55 | .word X86_FEATURE_REP_GOOD | 72 | .Lclear_page_end-clear_page,3b-2b |
56 | .byte .Lclear_page_end - clear_page | ||
57 | .byte 2b - 1b | ||
58 | .previous | 73 | .previous |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 99e482615195..024840266ba0 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -15,23 +15,30 @@ | |||
15 | #include <asm/asm-offsets.h> | 15 | #include <asm/asm-offsets.h> |
16 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
17 | #include <asm/cpufeature.h> | 17 | #include <asm/cpufeature.h> |
18 | #include <asm/alternative-asm.h> | ||
18 | 19 | ||
19 | .macro ALTERNATIVE_JUMP feature,orig,alt | 20 | /* |
21 | * By placing feature2 after feature1 in altinstructions section, we logically | ||
22 | * implement: | ||
23 | * If CPU has feature2, jmp to alt2 is used | ||
24 | * else if CPU has feature1, jmp to alt1 is used | ||
25 | * else jmp to orig is used. | ||
26 | */ | ||
27 | .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 | ||
20 | 0: | 28 | 0: |
21 | .byte 0xe9 /* 32bit jump */ | 29 | .byte 0xe9 /* 32bit jump */ |
22 | .long \orig-1f /* by default jump to orig */ | 30 | .long \orig-1f /* by default jump to orig */ |
23 | 1: | 31 | 1: |
24 | .section .altinstr_replacement,"ax" | 32 | .section .altinstr_replacement,"ax" |
25 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | 33 | 2: .byte 0xe9 /* near jump with 32bit immediate */ |
26 | .long \alt-1b /* offset */ /* or alternatively to alt */ | 34 | .long \alt1-1b /* offset */ /* or alternatively to alt1 */ |
35 | 3: .byte 0xe9 /* near jump with 32bit immediate */ | ||
36 | .long \alt2-1b /* offset */ /* or alternatively to alt2 */ | ||
27 | .previous | 37 | .previous |
38 | |||
28 | .section .altinstructions,"a" | 39 | .section .altinstructions,"a" |
29 | .align 8 | 40 | altinstruction_entry 0b,2b,\feature1,5,5 |
30 | .quad 0b | 41 | altinstruction_entry 0b,3b,\feature2,5,5 |
31 | .quad 2b | ||
32 | .word \feature /* when feature is set */ | ||
33 | .byte 5 | ||
34 | .byte 5 | ||
35 | .previous | 42 | .previous |
36 | .endm | 43 | .endm |
37 | 44 | ||
@@ -72,8 +79,10 @@ ENTRY(_copy_to_user) | |||
72 | addq %rdx,%rcx | 79 | addq %rdx,%rcx |
73 | jc bad_to_user | 80 | jc bad_to_user |
74 | cmpq TI_addr_limit(%rax),%rcx | 81 | cmpq TI_addr_limit(%rax),%rcx |
75 | jae bad_to_user | 82 | ja bad_to_user |
76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 83 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ |
84 | copy_user_generic_unrolled,copy_user_generic_string, \ | ||
85 | copy_user_enhanced_fast_string | ||
77 | CFI_ENDPROC | 86 | CFI_ENDPROC |
78 | ENDPROC(_copy_to_user) | 87 | ENDPROC(_copy_to_user) |
79 | 88 | ||
@@ -85,8 +94,10 @@ ENTRY(_copy_from_user) | |||
85 | addq %rdx,%rcx | 94 | addq %rdx,%rcx |
86 | jc bad_from_user | 95 | jc bad_from_user |
87 | cmpq TI_addr_limit(%rax),%rcx | 96 | cmpq TI_addr_limit(%rax),%rcx |
88 | jae bad_from_user | 97 | ja bad_from_user |
89 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 98 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ |
99 | copy_user_generic_unrolled,copy_user_generic_string, \ | ||
100 | copy_user_enhanced_fast_string | ||
90 | CFI_ENDPROC | 101 | CFI_ENDPROC |
91 | ENDPROC(_copy_from_user) | 102 | ENDPROC(_copy_from_user) |
92 | 103 | ||
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string) | |||
255 | .previous | 266 | .previous |
256 | CFI_ENDPROC | 267 | CFI_ENDPROC |
257 | ENDPROC(copy_user_generic_string) | 268 | ENDPROC(copy_user_generic_string) |
269 | |||
270 | /* | ||
271 | * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. | ||
272 | * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. | ||
273 | * | ||
274 | * Input: | ||
275 | * rdi destination | ||
276 | * rsi source | ||
277 | * rdx count | ||
278 | * | ||
279 | * Output: | ||
280 | * eax uncopied bytes or 0 if successful. | ||
281 | */ | ||
282 | ENTRY(copy_user_enhanced_fast_string) | ||
283 | CFI_STARTPROC | ||
284 | andl %edx,%edx | ||
285 | jz 2f | ||
286 | movl %edx,%ecx | ||
287 | 1: rep | ||
288 | movsb | ||
289 | 2: xorl %eax,%eax | ||
290 | ret | ||
291 | |||
292 | .section .fixup,"ax" | ||
293 | 12: movl %ecx,%edx /* ecx is zerorest also */ | ||
294 | jmp copy_user_handle_tail | ||
295 | .previous | ||
296 | |||
297 | .section __ex_table,"a" | ||
298 | .align 8 | ||
299 | .quad 1b,12b | ||
300 | .previous | ||
301 | CFI_ENDPROC | ||
302 | ENDPROC(copy_user_enhanced_fast_string) | ||
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 75ef61e35e38..efbf2a0ecdea 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | #include <asm/cpufeature.h> | 5 | #include <asm/cpufeature.h> |
6 | #include <asm/dwarf2.h> | 6 | #include <asm/dwarf2.h> |
7 | #include <asm/alternative-asm.h> | ||
7 | 8 | ||
8 | /* | 9 | /* |
9 | * memcpy - Copy a memory block. | 10 | * memcpy - Copy a memory block. |
@@ -37,6 +38,23 @@ | |||
37 | .Lmemcpy_e: | 38 | .Lmemcpy_e: |
38 | .previous | 39 | .previous |
39 | 40 | ||
41 | /* | ||
42 | * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than | ||
43 | * memcpy_c. Use memcpy_c_e when possible. | ||
44 | * | ||
45 | * This gets patched over the unrolled variant (below) via the | ||
46 | * alternative instructions framework: | ||
47 | */ | ||
48 | .section .altinstr_replacement, "ax", @progbits | ||
49 | .Lmemcpy_c_e: | ||
50 | movq %rdi, %rax | ||
51 | |||
52 | movl %edx, %ecx | ||
53 | rep movsb | ||
54 | ret | ||
55 | .Lmemcpy_e_e: | ||
56 | .previous | ||
57 | |||
40 | ENTRY(__memcpy) | 58 | ENTRY(__memcpy) |
41 | ENTRY(memcpy) | 59 | ENTRY(memcpy) |
42 | CFI_STARTPROC | 60 | CFI_STARTPROC |
@@ -49,7 +67,7 @@ ENTRY(memcpy) | |||
49 | jb .Lhandle_tail | 67 | jb .Lhandle_tail |
50 | 68 | ||
51 | /* | 69 | /* |
52 | * We check whether memory false dependece could occur, | 70 | * We check whether memory false dependence could occur, |
53 | * then jump to corresponding copy mode. | 71 | * then jump to corresponding copy mode. |
54 | */ | 72 | */ |
55 | cmp %dil, %sil | 73 | cmp %dil, %sil |
@@ -171,21 +189,22 @@ ENDPROC(memcpy) | |||
171 | ENDPROC(__memcpy) | 189 | ENDPROC(__memcpy) |
172 | 190 | ||
173 | /* | 191 | /* |
174 | * Some CPUs run faster using the string copy instructions. | 192 | * Some CPUs are adding enhanced REP MOVSB/STOSB feature |
175 | * It is also a lot simpler. Use this when possible: | 193 | * If the feature is supported, memcpy_c_e() is the first choice. |
176 | */ | 194 | * If enhanced rep movsb copy is not available, use fast string copy |
177 | 195 | * memcpy_c() when possible. This is faster and code is simpler than | |
178 | .section .altinstructions, "a" | 196 | * original memcpy(). |
179 | .align 8 | 197 | * Otherwise, original memcpy() is used. |
180 | .quad memcpy | 198 | * In .altinstructions section, ERMS feature is placed after REG_GOOD |
181 | .quad .Lmemcpy_c | 199 | * feature to implement the right patch order. |
182 | .word X86_FEATURE_REP_GOOD | 200 | * |
183 | |||
184 | /* | ||
185 | * Replace only beginning, memcpy is used to apply alternatives, | 201 | * Replace only beginning, memcpy is used to apply alternatives, |
186 | * so it is silly to overwrite itself with nops - reboot is the | 202 | * so it is silly to overwrite itself with nops - reboot is the |
187 | * only outcome... | 203 | * only outcome... |
188 | */ | 204 | */ |
189 | .byte .Lmemcpy_e - .Lmemcpy_c | 205 | .section .altinstructions, "a" |
190 | .byte .Lmemcpy_e - .Lmemcpy_c | 206 | altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ |
207 | .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c | ||
208 | altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ | ||
209 | .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e | ||
191 | .previous | 210 | .previous |
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0ecb8433e5a8..d0ec9c2936d7 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S | |||
@@ -8,6 +8,7 @@ | |||
8 | #define _STRING_C | 8 | #define _STRING_C |
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
11 | #include <asm/cpufeature.h> | ||
11 | 12 | ||
12 | #undef memmove | 13 | #undef memmove |
13 | 14 | ||
@@ -24,6 +25,7 @@ | |||
24 | */ | 25 | */ |
25 | ENTRY(memmove) | 26 | ENTRY(memmove) |
26 | CFI_STARTPROC | 27 | CFI_STARTPROC |
28 | |||
27 | /* Handle more 32bytes in loop */ | 29 | /* Handle more 32bytes in loop */ |
28 | mov %rdi, %rax | 30 | mov %rdi, %rax |
29 | cmp $0x20, %rdx | 31 | cmp $0x20, %rdx |
@@ -31,8 +33,13 @@ ENTRY(memmove) | |||
31 | 33 | ||
32 | /* Decide forward/backward copy mode */ | 34 | /* Decide forward/backward copy mode */ |
33 | cmp %rdi, %rsi | 35 | cmp %rdi, %rsi |
34 | jb 2f | 36 | jge .Lmemmove_begin_forward |
37 | mov %rsi, %r8 | ||
38 | add %rdx, %r8 | ||
39 | cmp %rdi, %r8 | ||
40 | jg 2f | ||
35 | 41 | ||
42 | .Lmemmove_begin_forward: | ||
36 | /* | 43 | /* |
37 | * movsq instruction have many startup latency | 44 | * movsq instruction have many startup latency |
38 | * so we handle small size by general register. | 45 | * so we handle small size by general register. |
@@ -78,6 +85,8 @@ ENTRY(memmove) | |||
78 | rep movsq | 85 | rep movsq |
79 | movq %r11, (%r10) | 86 | movq %r11, (%r10) |
80 | jmp 13f | 87 | jmp 13f |
88 | .Lmemmove_end_forward: | ||
89 | |||
81 | /* | 90 | /* |
82 | * Handle data backward by movsq. | 91 | * Handle data backward by movsq. |
83 | */ | 92 | */ |
@@ -194,4 +203,22 @@ ENTRY(memmove) | |||
194 | 13: | 203 | 13: |
195 | retq | 204 | retq |
196 | CFI_ENDPROC | 205 | CFI_ENDPROC |
206 | |||
207 | .section .altinstr_replacement,"ax" | ||
208 | .Lmemmove_begin_forward_efs: | ||
209 | /* Forward moving data. */ | ||
210 | movq %rdx, %rcx | ||
211 | rep movsb | ||
212 | retq | ||
213 | .Lmemmove_end_forward_efs: | ||
214 | .previous | ||
215 | |||
216 | .section .altinstructions,"a" | ||
217 | .align 8 | ||
218 | .quad .Lmemmove_begin_forward | ||
219 | .quad .Lmemmove_begin_forward_efs | ||
220 | .word X86_FEATURE_ERMS | ||
221 | .byte .Lmemmove_end_forward-.Lmemmove_begin_forward | ||
222 | .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs | ||
223 | .previous | ||
197 | ENDPROC(memmove) | 224 | ENDPROC(memmove) |
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 09d344269652..79bd454b78a3 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
@@ -2,9 +2,13 @@ | |||
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <asm/dwarf2.h> | 4 | #include <asm/dwarf2.h> |
5 | #include <asm/cpufeature.h> | ||
6 | #include <asm/alternative-asm.h> | ||
5 | 7 | ||
6 | /* | 8 | /* |
7 | * ISO C memset - set a memory block to a byte value. | 9 | * ISO C memset - set a memory block to a byte value. This function uses fast |
10 | * string to get better performance than the original function. The code is | ||
11 | * simpler and shorter than the orignal function as well. | ||
8 | * | 12 | * |
9 | * rdi destination | 13 | * rdi destination |
10 | * rsi value (char) | 14 | * rsi value (char) |
@@ -31,6 +35,28 @@ | |||
31 | .Lmemset_e: | 35 | .Lmemset_e: |
32 | .previous | 36 | .previous |
33 | 37 | ||
38 | /* | ||
39 | * ISO C memset - set a memory block to a byte value. This function uses | ||
40 | * enhanced rep stosb to override the fast string function. | ||
41 | * The code is simpler and shorter than the fast string function as well. | ||
42 | * | ||
43 | * rdi destination | ||
44 | * rsi value (char) | ||
45 | * rdx count (bytes) | ||
46 | * | ||
47 | * rax original destination | ||
48 | */ | ||
49 | .section .altinstr_replacement, "ax", @progbits | ||
50 | .Lmemset_c_e: | ||
51 | movq %rdi,%r9 | ||
52 | movb %sil,%al | ||
53 | movl %edx,%ecx | ||
54 | rep stosb | ||
55 | movq %r9,%rax | ||
56 | ret | ||
57 | .Lmemset_e_e: | ||
58 | .previous | ||
59 | |||
34 | ENTRY(memset) | 60 | ENTRY(memset) |
35 | ENTRY(__memset) | 61 | ENTRY(__memset) |
36 | CFI_STARTPROC | 62 | CFI_STARTPROC |
@@ -112,16 +138,20 @@ ENTRY(__memset) | |||
112 | ENDPROC(memset) | 138 | ENDPROC(memset) |
113 | ENDPROC(__memset) | 139 | ENDPROC(__memset) |
114 | 140 | ||
115 | /* Some CPUs run faster using the string instructions. | 141 | /* Some CPUs support enhanced REP MOVSB/STOSB feature. |
116 | It is also a lot simpler. Use this when possible */ | 142 | * It is recommended to use this when possible. |
117 | 143 | * | |
118 | #include <asm/cpufeature.h> | 144 | * If enhanced REP MOVSB/STOSB feature is not available, use fast string |
119 | 145 | * instructions. | |
146 | * | ||
147 | * Otherwise, use original memset function. | ||
148 | * | ||
149 | * In .altinstructions section, ERMS feature is placed after REG_GOOD | ||
150 | * feature to implement the right patch order. | ||
151 | */ | ||
120 | .section .altinstructions,"a" | 152 | .section .altinstructions,"a" |
121 | .align 8 | 153 | altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ |
122 | .quad memset | 154 | .Lfinal-memset,.Lmemset_e-.Lmemset_c |
123 | .quad .Lmemset_c | 155 | altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ |
124 | .word X86_FEATURE_REP_GOOD | 156 | .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e |
125 | .byte .Lfinal - memset | ||
126 | .byte .Lmemset_e - .Lmemset_c | ||
127 | .previous | 157 | .previous |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 3e608edf9958..3d11327c9ab4 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -23,8 +23,8 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o | |||
23 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 23 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
24 | 24 | ||
25 | obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o | 25 | obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o |
26 | obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o | 26 | obj-$(CONFIG_AMD_NUMA) += amdtopology.o |
27 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o | 27 | obj-$(CONFIG_ACPI_NUMA) += srat.o |
28 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o | 28 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o |
29 | 29 | ||
30 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | 30 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o |
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology.c index 0919c26820d4..5247d01329ca 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/nodemask.h> | 13 | #include <linux/nodemask.h> |
14 | #include <linux/memblock.h> | 14 | #include <linux/memblock.h> |
15 | #include <linux/bootmem.h> | ||
15 | 16 | ||
16 | #include <asm/io.h> | 17 | #include <asm/io.h> |
17 | #include <linux/pci_ids.h> | 18 | #include <linux/pci_ids.h> |
@@ -69,10 +70,10 @@ static __init void early_get_boot_cpu_id(void) | |||
69 | 70 | ||
70 | int __init amd_numa_init(void) | 71 | int __init amd_numa_init(void) |
71 | { | 72 | { |
72 | unsigned long start = PFN_PHYS(0); | 73 | u64 start = PFN_PHYS(0); |
73 | unsigned long end = PFN_PHYS(max_pfn); | 74 | u64 end = PFN_PHYS(max_pfn); |
74 | unsigned numnodes; | 75 | unsigned numnodes; |
75 | unsigned long prevbase; | 76 | u64 prevbase; |
76 | int i, j, nb; | 77 | int i, j, nb; |
77 | u32 nodeid, reg; | 78 | u32 nodeid, reg; |
78 | unsigned int bits, cores, apicid_base; | 79 | unsigned int bits, cores, apicid_base; |
@@ -95,7 +96,7 @@ int __init amd_numa_init(void) | |||
95 | 96 | ||
96 | prevbase = 0; | 97 | prevbase = 0; |
97 | for (i = 0; i < 8; i++) { | 98 | for (i = 0; i < 8; i++) { |
98 | unsigned long base, limit; | 99 | u64 base, limit; |
99 | 100 | ||
100 | base = read_pci_config(0, nb, 1, 0x40 + i*8); | 101 | base = read_pci_config(0, nb, 1, 0x40 + i*8); |
101 | limit = read_pci_config(0, nb, 1, 0x44 + i*8); | 102 | limit = read_pci_config(0, nb, 1, 0x44 + i*8); |
@@ -107,18 +108,18 @@ int __init amd_numa_init(void) | |||
107 | continue; | 108 | continue; |
108 | } | 109 | } |
109 | if (nodeid >= numnodes) { | 110 | if (nodeid >= numnodes) { |
110 | pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, | 111 | pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid, |
111 | base, limit); | 112 | base, limit); |
112 | continue; | 113 | continue; |
113 | } | 114 | } |
114 | 115 | ||
115 | if (!limit) { | 116 | if (!limit) { |
116 | pr_info("Skipping node entry %d (base %lx)\n", | 117 | pr_info("Skipping node entry %d (base %Lx)\n", |
117 | i, base); | 118 | i, base); |
118 | continue; | 119 | continue; |
119 | } | 120 | } |
120 | if ((base >> 8) & 3 || (limit >> 8) & 3) { | 121 | if ((base >> 8) & 3 || (limit >> 8) & 3) { |
121 | pr_err("Node %d using interleaving mode %lx/%lx\n", | 122 | pr_err("Node %d using interleaving mode %Lx/%Lx\n", |
122 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); | 123 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); |
123 | return -EINVAL; | 124 | return -EINVAL; |
124 | } | 125 | } |
@@ -150,19 +151,19 @@ int __init amd_numa_init(void) | |||
150 | continue; | 151 | continue; |
151 | } | 152 | } |
152 | if (limit < base) { | 153 | if (limit < base) { |
153 | pr_err("Node %d bogus settings %lx-%lx.\n", | 154 | pr_err("Node %d bogus settings %Lx-%Lx.\n", |
154 | nodeid, base, limit); | 155 | nodeid, base, limit); |
155 | continue; | 156 | continue; |
156 | } | 157 | } |
157 | 158 | ||
158 | /* Could sort here, but pun for now. Should not happen anyroads. */ | 159 | /* Could sort here, but pun for now. Should not happen anyroads. */ |
159 | if (prevbase > base) { | 160 | if (prevbase > base) { |
160 | pr_err("Node map not sorted %lx,%lx\n", | 161 | pr_err("Node map not sorted %Lx,%Lx\n", |
161 | prevbase, base); | 162 | prevbase, base); |
162 | return -EINVAL; | 163 | return -EINVAL; |
163 | } | 164 | } |
164 | 165 | ||
165 | pr_info("Node %d MemBase %016lx Limit %016lx\n", | 166 | pr_info("Node %d MemBase %016Lx Limit %016Lx\n", |
166 | nodeid, base, limit); | 167 | nodeid, base, limit); |
167 | 168 | ||
168 | prevbase = base; | 169 | prevbase = base; |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 20e3f8702d1e..f7a2a054a3c0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ |
13 | #include <linux/perf_event.h> /* perf_sw_event */ | 13 | #include <linux/perf_event.h> /* perf_sw_event */ |
14 | #include <linux/hugetlb.h> /* hstate_index_to_shift */ | 14 | #include <linux/hugetlb.h> /* hstate_index_to_shift */ |
15 | #include <linux/prefetch.h> /* prefetchw */ | ||
15 | 16 | ||
16 | #include <asm/traps.h> /* dotraplinkage, ... */ | 17 | #include <asm/traps.h> /* dotraplinkage, ... */ |
17 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 18 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
@@ -964,7 +965,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
964 | struct mm_struct *mm; | 965 | struct mm_struct *mm; |
965 | int fault; | 966 | int fault; |
966 | int write = error_code & PF_WRITE; | 967 | int write = error_code & PF_WRITE; |
967 | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | | 968 | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | |
968 | (write ? FAULT_FLAG_WRITE : 0); | 969 | (write ? FAULT_FLAG_WRITE : 0); |
969 | 970 | ||
970 | tsk = current; | 971 | tsk = current; |
@@ -1138,6 +1139,16 @@ good_area: | |||
1138 | } | 1139 | } |
1139 | 1140 | ||
1140 | /* | 1141 | /* |
1142 | * Pagefault was interrupted by SIGKILL. We have no reason to | ||
1143 | * continue pagefault. | ||
1144 | */ | ||
1145 | if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { | ||
1146 | if (!(error_code & PF_USER)) | ||
1147 | no_context(regs, error_code, address); | ||
1148 | return; | ||
1149 | } | ||
1150 | |||
1151 | /* | ||
1141 | * Major/minor page fault accounting is only done on the | 1152 | * Major/minor page fault accounting is only done on the |
1142 | * initial attempt. If we go through a retry, it is extremely | 1153 | * initial attempt. If we go through a retry, it is extremely |
1143 | * likely that the page will be found in page cache at that point. | 1154 | * likely that the page will be found in page cache at that point. |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index d4203988504a..f581a18c0d4d 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -72,7 +72,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | |||
72 | if (!vma_shareable(vma, addr)) | 72 | if (!vma_shareable(vma, addr)) |
73 | return; | 73 | return; |
74 | 74 | ||
75 | spin_lock(&mapping->i_mmap_lock); | 75 | mutex_lock(&mapping->i_mmap_mutex); |
76 | vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { | 76 | vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { |
77 | if (svma == vma) | 77 | if (svma == vma) |
78 | continue; | 78 | continue; |
@@ -97,7 +97,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | |||
97 | put_page(virt_to_page(spte)); | 97 | put_page(virt_to_page(spte)); |
98 | spin_unlock(&mm->page_table_lock); | 98 | spin_unlock(&mm->page_table_lock); |
99 | out: | 99 | out: |
100 | spin_unlock(&mapping->i_mmap_lock); | 100 | mutex_unlock(&mapping->i_mmap_mutex); |
101 | } | 101 | } |
102 | 102 | ||
103 | /* | 103 | /* |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 286d289b039b..30326443ab81 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -16,8 +16,6 @@ | |||
16 | #include <asm/tlb.h> | 16 | #include <asm/tlb.h> |
17 | #include <asm/proto.h> | 17 | #include <asm/proto.h> |
18 | 18 | ||
19 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
20 | |||
21 | unsigned long __initdata pgt_buf_start; | 19 | unsigned long __initdata pgt_buf_start; |
22 | unsigned long __meminitdata pgt_buf_end; | 20 | unsigned long __meminitdata pgt_buf_end; |
23 | unsigned long __meminitdata pgt_buf_top; | 21 | unsigned long __meminitdata pgt_buf_top; |
@@ -81,6 +79,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
81 | end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); | 79 | end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); |
82 | } | 80 | } |
83 | 81 | ||
82 | void __init native_pagetable_reserve(u64 start, u64 end) | ||
83 | { | ||
84 | memblock_x86_reserve_range(start, end, "PGTABLE"); | ||
85 | } | ||
86 | |||
84 | struct map_range { | 87 | struct map_range { |
85 | unsigned long start; | 88 | unsigned long start; |
86 | unsigned long end; | 89 | unsigned long end; |
@@ -272,9 +275,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
272 | 275 | ||
273 | __flush_tlb_all(); | 276 | __flush_tlb_all(); |
274 | 277 | ||
278 | /* | ||
279 | * Reserve the kernel pagetable pages we used (pgt_buf_start - | ||
280 | * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) | ||
281 | * so that they can be reused for other purposes. | ||
282 | * | ||
283 | * On native it just means calling memblock_x86_reserve_range, on Xen it | ||
284 | * also means marking RW the pagetable pages that we allocated before | ||
285 | * but that haven't been used. | ||
286 | * | ||
287 | * In fact on xen we mark RO the whole range pgt_buf_start - | ||
288 | * pgt_buf_top, because we have to make sure that when | ||
289 | * init_memory_mapping reaches the pagetable pages area, it maps | ||
290 | * RO all the pagetable pages, including the ones that are beyond | ||
291 | * pgt_buf_end at that time. | ||
292 | */ | ||
275 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) | 293 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) |
276 | memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT, | 294 | x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), |
277 | pgt_buf_end << PAGE_SHIFT, "PGTABLE"); | 295 | PFN_PHYS(pgt_buf_end)); |
278 | 296 | ||
279 | if (!after_bootmem) | 297 | if (!after_bootmem) |
280 | early_memtest(start, end); | 298 | early_memtest(start, end); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 80088f994193..29f7c6d98179 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -678,8 +678,10 @@ static void __init zone_sizes_init(void) | |||
678 | { | 678 | { |
679 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 679 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
680 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 680 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
681 | #ifdef CONFIG_ZONE_DMA | ||
681 | max_zone_pfns[ZONE_DMA] = | 682 | max_zone_pfns[ZONE_DMA] = |
682 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 683 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
684 | #endif | ||
683 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | 685 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
684 | #ifdef CONFIG_HIGHMEM | 686 | #ifdef CONFIG_HIGHMEM |
685 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | 687 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; |
@@ -716,6 +718,7 @@ void __init paging_init(void) | |||
716 | * NOTE: at this point the bootmem allocator is fully available. | 718 | * NOTE: at this point the bootmem allocator is fully available. |
717 | */ | 719 | */ |
718 | olpc_dt_build_devicetree(); | 720 | olpc_dt_build_devicetree(); |
721 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | ||
719 | sparse_init(); | 722 | sparse_init(); |
720 | zone_sizes_init(); | 723 | zone_sizes_init(); |
721 | } | 724 | } |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 794233587287..d865c4aeec55 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -616,7 +616,9 @@ void __init paging_init(void) | |||
616 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 616 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
617 | 617 | ||
618 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 618 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
619 | #ifdef CONFIG_ZONE_DMA | ||
619 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | 620 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; |
621 | #endif | ||
620 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | 622 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
621 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | 623 | max_zone_pfns[ZONE_NORMAL] = max_pfn; |
622 | 624 | ||
@@ -679,14 +681,6 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
679 | } | 681 | } |
680 | EXPORT_SYMBOL_GPL(arch_add_memory); | 682 | EXPORT_SYMBOL_GPL(arch_add_memory); |
681 | 683 | ||
682 | #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) | ||
683 | int memory_add_physaddr_to_nid(u64 start) | ||
684 | { | ||
685 | return 0; | ||
686 | } | ||
687 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
688 | #endif | ||
689 | |||
690 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 684 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
691 | 685 | ||
692 | static struct kcore_list kcore_vsyscall; | 686 | static struct kcore_list kcore_vsyscall; |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0369843511dc..be1ef574ce9a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -91,13 +91,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
91 | return (__force void __iomem *)phys_to_virt(phys_addr); | 91 | return (__force void __iomem *)phys_to_virt(phys_addr); |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Check if the request spans more than any BAR in the iomem resource | ||
95 | * tree. | ||
96 | */ | ||
97 | WARN_ONCE(iomem_map_sanity_check(phys_addr, size), | ||
98 | KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); | ||
99 | |||
100 | /* | ||
101 | * Don't allow anybody to remap normal RAM that we're using.. | 94 | * Don't allow anybody to remap normal RAM that we're using.. |
102 | */ | 95 | */ |
103 | last_pfn = last_addr >> PAGE_SHIFT; | 96 | last_pfn = last_addr >> PAGE_SHIFT; |
@@ -170,6 +163,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
170 | ret_addr = (void __iomem *) (vaddr + offset); | 163 | ret_addr = (void __iomem *) (vaddr + offset); |
171 | mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); | 164 | mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); |
172 | 165 | ||
166 | /* | ||
167 | * Check if the request spans more than any BAR in the iomem resource | ||
168 | * tree. | ||
169 | */ | ||
170 | WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size), | ||
171 | KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); | ||
172 | |||
173 | return ret_addr; | 173 | return ret_addr; |
174 | err_free_area: | 174 | err_free_area: |
175 | free_vm_area(area); | 175 | free_vm_area(area); |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 9559d360fde7..f5510d889a22 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -1,11 +1,39 @@ | |||
1 | /* Common code for 32 and 64-bit NUMA */ | 1 | /* Common code for 32 and 64-bit NUMA */ |
2 | #include <linux/topology.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/module.h> | 3 | #include <linux/mm.h> |
4 | #include <linux/string.h> | ||
5 | #include <linux/init.h> | ||
4 | #include <linux/bootmem.h> | 6 | #include <linux/bootmem.h> |
5 | #include <asm/numa.h> | 7 | #include <linux/memblock.h> |
8 | #include <linux/mmzone.h> | ||
9 | #include <linux/ctype.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/nodemask.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/topology.h> | ||
14 | |||
15 | #include <asm/e820.h> | ||
16 | #include <asm/proto.h> | ||
17 | #include <asm/dma.h> | ||
6 | #include <asm/acpi.h> | 18 | #include <asm/acpi.h> |
19 | #include <asm/amd_nb.h> | ||
20 | |||
21 | #include "numa_internal.h" | ||
7 | 22 | ||
8 | int __initdata numa_off; | 23 | int __initdata numa_off; |
24 | nodemask_t numa_nodes_parsed __initdata; | ||
25 | |||
26 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | ||
27 | EXPORT_SYMBOL(node_data); | ||
28 | |||
29 | static struct numa_meminfo numa_meminfo | ||
30 | #ifndef CONFIG_MEMORY_HOTPLUG | ||
31 | __initdata | ||
32 | #endif | ||
33 | ; | ||
34 | |||
35 | static int numa_distance_cnt; | ||
36 | static u8 *numa_distance; | ||
9 | 37 | ||
10 | static __init int numa_setup(char *opt) | 38 | static __init int numa_setup(char *opt) |
11 | { | 39 | { |
@@ -32,6 +60,15 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | |||
32 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 60 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE |
33 | }; | 61 | }; |
34 | 62 | ||
63 | int __cpuinit numa_cpu_node(int cpu) | ||
64 | { | ||
65 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | ||
66 | |||
67 | if (apicid != BAD_APICID) | ||
68 | return __apicid_to_node[apicid]; | ||
69 | return NUMA_NO_NODE; | ||
70 | } | ||
71 | |||
35 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 72 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; |
36 | EXPORT_SYMBOL(node_to_cpumask_map); | 73 | EXPORT_SYMBOL(node_to_cpumask_map); |
37 | 74 | ||
@@ -95,6 +132,407 @@ void __init setup_node_to_cpumask_map(void) | |||
95 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); | 132 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); |
96 | } | 133 | } |
97 | 134 | ||
135 | static int __init numa_add_memblk_to(int nid, u64 start, u64 end, | ||
136 | struct numa_meminfo *mi) | ||
137 | { | ||
138 | /* ignore zero length blks */ | ||
139 | if (start == end) | ||
140 | return 0; | ||
141 | |||
142 | /* whine about and ignore invalid blks */ | ||
143 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { | ||
144 | pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", | ||
145 | nid, start, end); | ||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | if (mi->nr_blks >= NR_NODE_MEMBLKS) { | ||
150 | pr_err("NUMA: too many memblk ranges\n"); | ||
151 | return -EINVAL; | ||
152 | } | ||
153 | |||
154 | mi->blk[mi->nr_blks].start = start; | ||
155 | mi->blk[mi->nr_blks].end = end; | ||
156 | mi->blk[mi->nr_blks].nid = nid; | ||
157 | mi->nr_blks++; | ||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | /** | ||
162 | * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo | ||
163 | * @idx: Index of memblk to remove | ||
164 | * @mi: numa_meminfo to remove memblk from | ||
165 | * | ||
166 | * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and | ||
167 | * decrementing @mi->nr_blks. | ||
168 | */ | ||
169 | void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) | ||
170 | { | ||
171 | mi->nr_blks--; | ||
172 | memmove(&mi->blk[idx], &mi->blk[idx + 1], | ||
173 | (mi->nr_blks - idx) * sizeof(mi->blk[0])); | ||
174 | } | ||
175 | |||
176 | /** | ||
177 | * numa_add_memblk - Add one numa_memblk to numa_meminfo | ||
178 | * @nid: NUMA node ID of the new memblk | ||
179 | * @start: Start address of the new memblk | ||
180 | * @end: End address of the new memblk | ||
181 | * | ||
182 | * Add a new memblk to the default numa_meminfo. | ||
183 | * | ||
184 | * RETURNS: | ||
185 | * 0 on success, -errno on failure. | ||
186 | */ | ||
187 | int __init numa_add_memblk(int nid, u64 start, u64 end) | ||
188 | { | ||
189 | return numa_add_memblk_to(nid, start, end, &numa_meminfo); | ||
190 | } | ||
191 | |||
192 | /* Initialize NODE_DATA for a node on the local memory */ | ||
193 | static void __init setup_node_data(int nid, u64 start, u64 end) | ||
194 | { | ||
195 | const u64 nd_low = PFN_PHYS(MAX_DMA_PFN); | ||
196 | const u64 nd_high = PFN_PHYS(max_pfn_mapped); | ||
197 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
198 | bool remapped = false; | ||
199 | u64 nd_pa; | ||
200 | void *nd; | ||
201 | int tnid; | ||
202 | |||
203 | /* | ||
204 | * Don't confuse VM with a node that doesn't have the | ||
205 | * minimum amount of memory: | ||
206 | */ | ||
207 | if (end && (end - start) < NODE_MIN_SIZE) | ||
208 | return; | ||
209 | |||
210 | /* initialize remap allocator before aligning to ZONE_ALIGN */ | ||
211 | init_alloc_remap(nid, start, end); | ||
212 | |||
213 | start = roundup(start, ZONE_ALIGN); | ||
214 | |||
215 | printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", | ||
216 | nid, start, end); | ||
217 | |||
218 | /* | ||
219 | * Allocate node data. Try remap allocator first, node-local | ||
220 | * memory and then any node. Never allocate in DMA zone. | ||
221 | */ | ||
222 | nd = alloc_remap(nid, nd_size); | ||
223 | if (nd) { | ||
224 | nd_pa = __pa(nd); | ||
225 | remapped = true; | ||
226 | } else { | ||
227 | nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, | ||
228 | nd_size, SMP_CACHE_BYTES); | ||
229 | if (nd_pa == MEMBLOCK_ERROR) | ||
230 | nd_pa = memblock_find_in_range(nd_low, nd_high, | ||
231 | nd_size, SMP_CACHE_BYTES); | ||
232 | if (nd_pa == MEMBLOCK_ERROR) { | ||
233 | pr_err("Cannot find %zu bytes in node %d\n", | ||
234 | nd_size, nid); | ||
235 | return; | ||
236 | } | ||
237 | memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA"); | ||
238 | nd = __va(nd_pa); | ||
239 | } | ||
240 | |||
241 | /* report and initialize */ | ||
242 | printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", | ||
243 | nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); | ||
244 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); | ||
245 | if (!remapped && tnid != nid) | ||
246 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); | ||
247 | |||
248 | node_data[nid] = nd; | ||
249 | memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); | ||
250 | NODE_DATA(nid)->node_id = nid; | ||
251 | NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT; | ||
252 | NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT; | ||
253 | |||
254 | node_set_online(nid); | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * numa_cleanup_meminfo - Cleanup a numa_meminfo | ||
259 | * @mi: numa_meminfo to clean up | ||
260 | * | ||
261 | * Sanitize @mi by merging and removing unncessary memblks. Also check for | ||
262 | * conflicts and clear unused memblks. | ||
263 | * | ||
264 | * RETURNS: | ||
265 | * 0 on success, -errno on failure. | ||
266 | */ | ||
267 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi) | ||
268 | { | ||
269 | const u64 low = 0; | ||
270 | const u64 high = PFN_PHYS(max_pfn); | ||
271 | int i, j, k; | ||
272 | |||
273 | /* first, trim all entries */ | ||
274 | for (i = 0; i < mi->nr_blks; i++) { | ||
275 | struct numa_memblk *bi = &mi->blk[i]; | ||
276 | |||
277 | /* make sure all blocks are inside the limits */ | ||
278 | bi->start = max(bi->start, low); | ||
279 | bi->end = min(bi->end, high); | ||
280 | |||
281 | /* and there's no empty block */ | ||
282 | if (bi->start >= bi->end) | ||
283 | numa_remove_memblk_from(i--, mi); | ||
284 | } | ||
285 | |||
286 | /* merge neighboring / overlapping entries */ | ||
287 | for (i = 0; i < mi->nr_blks; i++) { | ||
288 | struct numa_memblk *bi = &mi->blk[i]; | ||
289 | |||
290 | for (j = i + 1; j < mi->nr_blks; j++) { | ||
291 | struct numa_memblk *bj = &mi->blk[j]; | ||
292 | u64 start, end; | ||
293 | |||
294 | /* | ||
295 | * See whether there are overlapping blocks. Whine | ||
296 | * about but allow overlaps of the same nid. They | ||
297 | * will be merged below. | ||
298 | */ | ||
299 | if (bi->end > bj->start && bi->start < bj->end) { | ||
300 | if (bi->nid != bj->nid) { | ||
301 | pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", | ||
302 | bi->nid, bi->start, bi->end, | ||
303 | bj->nid, bj->start, bj->end); | ||
304 | return -EINVAL; | ||
305 | } | ||
306 | pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", | ||
307 | bi->nid, bi->start, bi->end, | ||
308 | bj->start, bj->end); | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Join together blocks on the same node, holes | ||
313 | * between which don't overlap with memory on other | ||
314 | * nodes. | ||
315 | */ | ||
316 | if (bi->nid != bj->nid) | ||
317 | continue; | ||
318 | start = min(bi->start, bj->start); | ||
319 | end = max(bi->end, bj->end); | ||
320 | for (k = 0; k < mi->nr_blks; k++) { | ||
321 | struct numa_memblk *bk = &mi->blk[k]; | ||
322 | |||
323 | if (bi->nid == bk->nid) | ||
324 | continue; | ||
325 | if (start < bk->end && end > bk->start) | ||
326 | break; | ||
327 | } | ||
328 | if (k < mi->nr_blks) | ||
329 | continue; | ||
330 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", | ||
331 | bi->nid, bi->start, bi->end, bj->start, bj->end, | ||
332 | start, end); | ||
333 | bi->start = start; | ||
334 | bi->end = end; | ||
335 | numa_remove_memblk_from(j--, mi); | ||
336 | } | ||
337 | } | ||
338 | |||
339 | /* clear unused ones */ | ||
340 | for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { | ||
341 | mi->blk[i].start = mi->blk[i].end = 0; | ||
342 | mi->blk[i].nid = NUMA_NO_NODE; | ||
343 | } | ||
344 | |||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * Set nodes, which have memory in @mi, in *@nodemask. | ||
350 | */ | ||
351 | static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, | ||
352 | const struct numa_meminfo *mi) | ||
353 | { | ||
354 | int i; | ||
355 | |||
356 | for (i = 0; i < ARRAY_SIZE(mi->blk); i++) | ||
357 | if (mi->blk[i].start != mi->blk[i].end && | ||
358 | mi->blk[i].nid != NUMA_NO_NODE) | ||
359 | node_set(mi->blk[i].nid, *nodemask); | ||
360 | } | ||
361 | |||
362 | /** | ||
363 | * numa_reset_distance - Reset NUMA distance table | ||
364 | * | ||
365 | * The current table is freed. The next numa_set_distance() call will | ||
366 | * create a new one. | ||
367 | */ | ||
368 | void __init numa_reset_distance(void) | ||
369 | { | ||
370 | size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); | ||
371 | |||
372 | /* numa_distance could be 1LU marking allocation failure, test cnt */ | ||
373 | if (numa_distance_cnt) | ||
374 | memblock_x86_free_range(__pa(numa_distance), | ||
375 | __pa(numa_distance) + size); | ||
376 | numa_distance_cnt = 0; | ||
377 | numa_distance = NULL; /* enable table creation */ | ||
378 | } | ||
379 | |||
380 | static int __init numa_alloc_distance(void) | ||
381 | { | ||
382 | nodemask_t nodes_parsed; | ||
383 | size_t size; | ||
384 | int i, j, cnt = 0; | ||
385 | u64 phys; | ||
386 | |||
387 | /* size the new table and allocate it */ | ||
388 | nodes_parsed = numa_nodes_parsed; | ||
389 | numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); | ||
390 | |||
391 | for_each_node_mask(i, nodes_parsed) | ||
392 | cnt = i; | ||
393 | cnt++; | ||
394 | size = cnt * cnt * sizeof(numa_distance[0]); | ||
395 | |||
396 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), | ||
397 | size, PAGE_SIZE); | ||
398 | if (phys == MEMBLOCK_ERROR) { | ||
399 | pr_warning("NUMA: Warning: can't allocate distance table!\n"); | ||
400 | /* don't retry until explicitly reset */ | ||
401 | numa_distance = (void *)1LU; | ||
402 | return -ENOMEM; | ||
403 | } | ||
404 | memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); | ||
405 | |||
406 | numa_distance = __va(phys); | ||
407 | numa_distance_cnt = cnt; | ||
408 | |||
409 | /* fill with the default distances */ | ||
410 | for (i = 0; i < cnt; i++) | ||
411 | for (j = 0; j < cnt; j++) | ||
412 | numa_distance[i * cnt + j] = i == j ? | ||
413 | LOCAL_DISTANCE : REMOTE_DISTANCE; | ||
414 | printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); | ||
415 | |||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | /** | ||
420 | * numa_set_distance - Set NUMA distance from one NUMA to another | ||
421 | * @from: the 'from' node to set distance | ||
422 | * @to: the 'to' node to set distance | ||
423 | * @distance: NUMA distance | ||
424 | * | ||
425 | * Set the distance from node @from to @to to @distance. If distance table | ||
426 | * doesn't exist, one which is large enough to accommodate all the currently | ||
427 | * known nodes will be created. | ||
428 | * | ||
429 | * If such table cannot be allocated, a warning is printed and further | ||
430 | * calls are ignored until the distance table is reset with | ||
431 | * numa_reset_distance(). | ||
432 | * | ||
433 | * If @from or @to is higher than the highest known node at the time of | ||
434 | * table creation or @distance doesn't make sense, the call is ignored. | ||
435 | * This is to allow simplification of specific NUMA config implementations. | ||
436 | */ | ||
437 | void __init numa_set_distance(int from, int to, int distance) | ||
438 | { | ||
439 | if (!numa_distance && numa_alloc_distance() < 0) | ||
440 | return; | ||
441 | |||
442 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) { | ||
443 | printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", | ||
444 | from, to, distance); | ||
445 | return; | ||
446 | } | ||
447 | |||
448 | if ((u8)distance != distance || | ||
449 | (from == to && distance != LOCAL_DISTANCE)) { | ||
450 | pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", | ||
451 | from, to, distance); | ||
452 | return; | ||
453 | } | ||
454 | |||
455 | numa_distance[from * numa_distance_cnt + to] = distance; | ||
456 | } | ||
457 | |||
458 | int __node_distance(int from, int to) | ||
459 | { | ||
460 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) | ||
461 | return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; | ||
462 | return numa_distance[from * numa_distance_cnt + to]; | ||
463 | } | ||
464 | EXPORT_SYMBOL(__node_distance); | ||
465 | |||
466 | /* | ||
467 | * Sanity check to catch more bad NUMA configurations (they are amazingly | ||
468 | * common). Make sure the nodes cover all memory. | ||
469 | */ | ||
470 | static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | ||
471 | { | ||
472 | u64 numaram, e820ram; | ||
473 | int i; | ||
474 | |||
475 | numaram = 0; | ||
476 | for (i = 0; i < mi->nr_blks; i++) { | ||
477 | u64 s = mi->blk[i].start >> PAGE_SHIFT; | ||
478 | u64 e = mi->blk[i].end >> PAGE_SHIFT; | ||
479 | numaram += e - s; | ||
480 | numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); | ||
481 | if ((s64)numaram < 0) | ||
482 | numaram = 0; | ||
483 | } | ||
484 | |||
485 | e820ram = max_pfn - (memblock_x86_hole_size(0, | ||
486 | PFN_PHYS(max_pfn)) >> PAGE_SHIFT); | ||
487 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | ||
488 | if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { | ||
489 | printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", | ||
490 | (numaram << PAGE_SHIFT) >> 20, | ||
491 | (e820ram << PAGE_SHIFT) >> 20); | ||
492 | return false; | ||
493 | } | ||
494 | return true; | ||
495 | } | ||
496 | |||
497 | static int __init numa_register_memblks(struct numa_meminfo *mi) | ||
498 | { | ||
499 | int i, nid; | ||
500 | |||
501 | /* Account for nodes with cpus and no memory */ | ||
502 | node_possible_map = numa_nodes_parsed; | ||
503 | numa_nodemask_from_meminfo(&node_possible_map, mi); | ||
504 | if (WARN_ON(nodes_empty(node_possible_map))) | ||
505 | return -EINVAL; | ||
506 | |||
507 | for (i = 0; i < mi->nr_blks; i++) | ||
508 | memblock_x86_register_active_regions(mi->blk[i].nid, | ||
509 | mi->blk[i].start >> PAGE_SHIFT, | ||
510 | mi->blk[i].end >> PAGE_SHIFT); | ||
511 | |||
512 | /* for out of order entries */ | ||
513 | sort_node_map(); | ||
514 | if (!numa_meminfo_cover_memory(mi)) | ||
515 | return -EINVAL; | ||
516 | |||
517 | /* Finally register nodes. */ | ||
518 | for_each_node_mask(nid, node_possible_map) { | ||
519 | u64 start = PFN_PHYS(max_pfn); | ||
520 | u64 end = 0; | ||
521 | |||
522 | for (i = 0; i < mi->nr_blks; i++) { | ||
523 | if (nid != mi->blk[i].nid) | ||
524 | continue; | ||
525 | start = min(mi->blk[i].start, start); | ||
526 | end = max(mi->blk[i].end, end); | ||
527 | } | ||
528 | |||
529 | if (start < end) | ||
530 | setup_node_data(nid, start, end); | ||
531 | } | ||
532 | |||
533 | return 0; | ||
534 | } | ||
535 | |||
98 | /* | 536 | /* |
99 | * There are unfortunately some poorly designed mainboards around that | 537 | * There are unfortunately some poorly designed mainboards around that |
100 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | 538 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node |
@@ -102,7 +540,7 @@ void __init setup_node_to_cpumask_map(void) | |||
102 | * as the number of CPUs is not known yet. We round robin the existing | 540 | * as the number of CPUs is not known yet. We round robin the existing |
103 | * nodes. | 541 | * nodes. |
104 | */ | 542 | */ |
105 | void __init numa_init_array(void) | 543 | static void __init numa_init_array(void) |
106 | { | 544 | { |
107 | int rr, i; | 545 | int rr, i; |
108 | 546 | ||
@@ -117,6 +555,95 @@ void __init numa_init_array(void) | |||
117 | } | 555 | } |
118 | } | 556 | } |
119 | 557 | ||
558 | static int __init numa_init(int (*init_func)(void)) | ||
559 | { | ||
560 | int i; | ||
561 | int ret; | ||
562 | |||
563 | for (i = 0; i < MAX_LOCAL_APIC; i++) | ||
564 | set_apicid_to_node(i, NUMA_NO_NODE); | ||
565 | |||
566 | nodes_clear(numa_nodes_parsed); | ||
567 | nodes_clear(node_possible_map); | ||
568 | nodes_clear(node_online_map); | ||
569 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | ||
570 | remove_all_active_ranges(); | ||
571 | numa_reset_distance(); | ||
572 | |||
573 | ret = init_func(); | ||
574 | if (ret < 0) | ||
575 | return ret; | ||
576 | ret = numa_cleanup_meminfo(&numa_meminfo); | ||
577 | if (ret < 0) | ||
578 | return ret; | ||
579 | |||
580 | numa_emulation(&numa_meminfo, numa_distance_cnt); | ||
581 | |||
582 | ret = numa_register_memblks(&numa_meminfo); | ||
583 | if (ret < 0) | ||
584 | return ret; | ||
585 | |||
586 | for (i = 0; i < nr_cpu_ids; i++) { | ||
587 | int nid = early_cpu_to_node(i); | ||
588 | |||
589 | if (nid == NUMA_NO_NODE) | ||
590 | continue; | ||
591 | if (!node_online(nid)) | ||
592 | numa_clear_node(i); | ||
593 | } | ||
594 | numa_init_array(); | ||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /** | ||
599 | * dummy_numa_init - Fallback dummy NUMA init | ||
600 | * | ||
601 | * Used if there's no underlying NUMA architecture, NUMA initialization | ||
602 | * fails, or NUMA is disabled on the command line. | ||
603 | * | ||
604 | * Must online at least one node and add memory blocks that cover all | ||
605 | * allowed memory. This function must not fail. | ||
606 | */ | ||
607 | static int __init dummy_numa_init(void) | ||
608 | { | ||
609 | printk(KERN_INFO "%s\n", | ||
610 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | ||
611 | printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", | ||
612 | 0LLU, PFN_PHYS(max_pfn)); | ||
613 | |||
614 | node_set(0, numa_nodes_parsed); | ||
615 | numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); | ||
616 | |||
617 | return 0; | ||
618 | } | ||
619 | |||
620 | /** | ||
621 | * x86_numa_init - Initialize NUMA | ||
622 | * | ||
623 | * Try each configured NUMA initialization method until one succeeds. The | ||
624 | * last fallback is dummy single node config encomapssing whole memory and | ||
625 | * never fails. | ||
626 | */ | ||
627 | void __init x86_numa_init(void) | ||
628 | { | ||
629 | if (!numa_off) { | ||
630 | #ifdef CONFIG_X86_NUMAQ | ||
631 | if (!numa_init(numaq_numa_init)) | ||
632 | return; | ||
633 | #endif | ||
634 | #ifdef CONFIG_ACPI_NUMA | ||
635 | if (!numa_init(x86_acpi_numa_init)) | ||
636 | return; | ||
637 | #endif | ||
638 | #ifdef CONFIG_AMD_NUMA | ||
639 | if (!numa_init(amd_numa_init)) | ||
640 | return; | ||
641 | #endif | ||
642 | } | ||
643 | |||
644 | numa_init(dummy_numa_init); | ||
645 | } | ||
646 | |||
120 | static __init int find_near_online_node(int node) | 647 | static __init int find_near_online_node(int node) |
121 | { | 648 | { |
122 | int n, val; | 649 | int n, val; |
@@ -213,53 +740,48 @@ int early_cpu_to_node(int cpu) | |||
213 | return per_cpu(x86_cpu_to_node_map, cpu); | 740 | return per_cpu(x86_cpu_to_node_map, cpu); |
214 | } | 741 | } |
215 | 742 | ||
216 | struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) | 743 | void debug_cpumask_set_cpu(int cpu, int node, bool enable) |
217 | { | 744 | { |
218 | int node = early_cpu_to_node(cpu); | ||
219 | struct cpumask *mask; | 745 | struct cpumask *mask; |
220 | char buf[64]; | 746 | char buf[64]; |
221 | 747 | ||
222 | if (node == NUMA_NO_NODE) { | 748 | if (node == NUMA_NO_NODE) { |
223 | /* early_cpu_to_node() already emits a warning and trace */ | 749 | /* early_cpu_to_node() already emits a warning and trace */ |
224 | return NULL; | 750 | return; |
225 | } | 751 | } |
226 | mask = node_to_cpumask_map[node]; | 752 | mask = node_to_cpumask_map[node]; |
227 | if (!mask) { | 753 | if (!mask) { |
228 | pr_err("node_to_cpumask_map[%i] NULL\n", node); | 754 | pr_err("node_to_cpumask_map[%i] NULL\n", node); |
229 | dump_stack(); | 755 | dump_stack(); |
230 | return NULL; | 756 | return; |
231 | } | 757 | } |
232 | 758 | ||
759 | if (enable) | ||
760 | cpumask_set_cpu(cpu, mask); | ||
761 | else | ||
762 | cpumask_clear_cpu(cpu, mask); | ||
763 | |||
233 | cpulist_scnprintf(buf, sizeof(buf), mask); | 764 | cpulist_scnprintf(buf, sizeof(buf), mask); |
234 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | 765 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", |
235 | enable ? "numa_add_cpu" : "numa_remove_cpu", | 766 | enable ? "numa_add_cpu" : "numa_remove_cpu", |
236 | cpu, node, buf); | 767 | cpu, node, buf); |
237 | return mask; | 768 | return; |
238 | } | 769 | } |
239 | 770 | ||
240 | # ifndef CONFIG_NUMA_EMU | 771 | # ifndef CONFIG_NUMA_EMU |
241 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | 772 | static void __cpuinit numa_set_cpumask(int cpu, bool enable) |
242 | { | 773 | { |
243 | struct cpumask *mask; | 774 | debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable); |
244 | |||
245 | mask = debug_cpumask_set_cpu(cpu, enable); | ||
246 | if (!mask) | ||
247 | return; | ||
248 | |||
249 | if (enable) | ||
250 | cpumask_set_cpu(cpu, mask); | ||
251 | else | ||
252 | cpumask_clear_cpu(cpu, mask); | ||
253 | } | 775 | } |
254 | 776 | ||
255 | void __cpuinit numa_add_cpu(int cpu) | 777 | void __cpuinit numa_add_cpu(int cpu) |
256 | { | 778 | { |
257 | numa_set_cpumask(cpu, 1); | 779 | numa_set_cpumask(cpu, true); |
258 | } | 780 | } |
259 | 781 | ||
260 | void __cpuinit numa_remove_cpu(int cpu) | 782 | void __cpuinit numa_remove_cpu(int cpu) |
261 | { | 783 | { |
262 | numa_set_cpumask(cpu, 0); | 784 | numa_set_cpumask(cpu, false); |
263 | } | 785 | } |
264 | # endif /* !CONFIG_NUMA_EMU */ | 786 | # endif /* !CONFIG_NUMA_EMU */ |
265 | 787 | ||
@@ -287,3 +809,18 @@ const struct cpumask *cpumask_of_node(int node) | |||
287 | EXPORT_SYMBOL(cpumask_of_node); | 809 | EXPORT_SYMBOL(cpumask_of_node); |
288 | 810 | ||
289 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 811 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
812 | |||
813 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
814 | int memory_add_physaddr_to_nid(u64 start) | ||
815 | { | ||
816 | struct numa_meminfo *mi = &numa_meminfo; | ||
817 | int nid = mi->blk[0].nid; | ||
818 | int i; | ||
819 | |||
820 | for (i = 0; i < mi->nr_blks; i++) | ||
821 | if (mi->blk[i].start <= start && mi->blk[i].end > start) | ||
822 | nid = mi->blk[i].nid; | ||
823 | return nid; | ||
824 | } | ||
825 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
826 | #endif | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index bde3906420df..849a975d3fa0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -22,39 +22,11 @@ | |||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/bootmem.h> | 25 | #include <linux/bootmem.h> |
27 | #include <linux/memblock.h> | 26 | #include <linux/memblock.h> |
28 | #include <linux/mmzone.h> | ||
29 | #include <linux/highmem.h> | ||
30 | #include <linux/initrd.h> | ||
31 | #include <linux/nodemask.h> | ||
32 | #include <linux/module.h> | 27 | #include <linux/module.h> |
33 | #include <linux/kexec.h> | ||
34 | #include <linux/pfn.h> | ||
35 | #include <linux/swap.h> | ||
36 | #include <linux/acpi.h> | ||
37 | |||
38 | #include <asm/e820.h> | ||
39 | #include <asm/setup.h> | ||
40 | #include <asm/mmzone.h> | ||
41 | #include <asm/bios_ebda.h> | ||
42 | #include <asm/proto.h> | ||
43 | |||
44 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | ||
45 | EXPORT_SYMBOL(node_data); | ||
46 | |||
47 | /* | ||
48 | * numa interface - we expect the numa architecture specific code to have | ||
49 | * populated the following initialisation. | ||
50 | * | ||
51 | * 1) node_online_map - the map of all nodes configured (online) in the system | ||
52 | * 2) node_start_pfn - the starting page frame number for a node | ||
53 | * 3) node_end_pfn - the ending page fram number for a node | ||
54 | */ | ||
55 | unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; | ||
56 | unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; | ||
57 | 28 | ||
29 | #include "numa_internal.h" | ||
58 | 30 | ||
59 | #ifdef CONFIG_DISCONTIGMEM | 31 | #ifdef CONFIG_DISCONTIGMEM |
60 | /* | 32 | /* |
@@ -99,108 +71,46 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | |||
99 | } | 71 | } |
100 | #endif | 72 | #endif |
101 | 73 | ||
102 | extern unsigned long find_max_low_pfn(void); | ||
103 | extern unsigned long highend_pfn, highstart_pfn; | 74 | extern unsigned long highend_pfn, highstart_pfn; |
104 | 75 | ||
105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | 76 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) |
106 | 77 | ||
107 | unsigned long node_remap_size[MAX_NUMNODES]; | ||
108 | static void *node_remap_start_vaddr[MAX_NUMNODES]; | 78 | static void *node_remap_start_vaddr[MAX_NUMNODES]; |
109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 79 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
110 | 80 | ||
111 | static unsigned long kva_start_pfn; | ||
112 | static unsigned long kva_pages; | ||
113 | |||
114 | int __cpuinit numa_cpu_node(int cpu) | ||
115 | { | ||
116 | return apic->x86_32_numa_cpu_node(cpu); | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * FLAT - support for basic PC memory model with discontig enabled, essentially | ||
121 | * a single node with all available processors in it with a flat | ||
122 | * memory map. | ||
123 | */ | ||
124 | int __init get_memcfg_numa_flat(void) | ||
125 | { | ||
126 | printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); | ||
127 | |||
128 | node_start_pfn[0] = 0; | ||
129 | node_end_pfn[0] = max_pfn; | ||
130 | memblock_x86_register_active_regions(0, 0, max_pfn); | ||
131 | memory_present(0, 0, max_pfn); | ||
132 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); | ||
133 | |||
134 | /* Indicate there is one node available. */ | ||
135 | nodes_clear(node_online_map); | ||
136 | node_set_online(0); | ||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Find the highest page frame number we have available for the node | ||
142 | */ | ||
143 | static void __init propagate_e820_map_node(int nid) | ||
144 | { | ||
145 | if (node_end_pfn[nid] > max_pfn) | ||
146 | node_end_pfn[nid] = max_pfn; | ||
147 | /* | ||
148 | * if a user has given mem=XXXX, then we need to make sure | ||
149 | * that the node _starts_ before that, too, not just ends | ||
150 | */ | ||
151 | if (node_start_pfn[nid] > max_pfn) | ||
152 | node_start_pfn[nid] = max_pfn; | ||
153 | BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem | ||
158 | * method. For node zero take this from the bottom of memory, for | ||
159 | * subsequent nodes place them at node_remap_start_vaddr which contains | ||
160 | * node local data in physically node local memory. See setup_memory() | ||
161 | * for details. | ||
162 | */ | ||
163 | static void __init allocate_pgdat(int nid) | ||
164 | { | ||
165 | char buf[16]; | ||
166 | |||
167 | if (node_has_online_mem(nid) && node_remap_start_vaddr[nid]) | ||
168 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; | ||
169 | else { | ||
170 | unsigned long pgdat_phys; | ||
171 | pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, | ||
172 | max_pfn_mapped<<PAGE_SHIFT, | ||
173 | sizeof(pg_data_t), | ||
174 | PAGE_SIZE); | ||
175 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); | ||
176 | memset(buf, 0, sizeof(buf)); | ||
177 | sprintf(buf, "NODE_DATA %d", nid); | ||
178 | memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); | ||
179 | } | ||
180 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", | ||
181 | nid, (unsigned long)NODE_DATA(nid)); | ||
182 | } | ||
183 | |||
184 | /* | 81 | /* |
185 | * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel | 82 | * Remap memory allocator |
186 | * virtual address space (KVA) is reserved and portions of nodes are mapped | ||
187 | * using it. This is to allow node-local memory to be allocated for | ||
188 | * structures that would normally require ZONE_NORMAL. The memory is | ||
189 | * allocated with alloc_remap() and callers should be prepared to allocate | ||
190 | * from the bootmem allocator instead. | ||
191 | */ | 83 | */ |
192 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | 84 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; |
193 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | 85 | static void *node_remap_end_vaddr[MAX_NUMNODES]; |
194 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; | 86 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; |
195 | static unsigned long node_remap_offset[MAX_NUMNODES]; | ||
196 | 87 | ||
88 | /** | ||
89 | * alloc_remap - Allocate remapped memory | ||
90 | * @nid: NUMA node to allocate memory from | ||
91 | * @size: The size of allocation | ||
92 | * | ||
93 | * Allocate @size bytes from the remap area of NUMA node @nid. The | ||
94 | * size of the remap area is predetermined by init_alloc_remap() and | ||
95 | * only the callers considered there should call this function. For | ||
96 | * more info, please read the comment on top of init_alloc_remap(). | ||
97 | * | ||
98 | * The caller must be ready to handle allocation failure from this | ||
99 | * function and fall back to regular memory allocator in such cases. | ||
100 | * | ||
101 | * CONTEXT: | ||
102 | * Single CPU early boot context. | ||
103 | * | ||
104 | * RETURNS: | ||
105 | * Pointer to the allocated memory on success, %NULL on failure. | ||
106 | */ | ||
197 | void *alloc_remap(int nid, unsigned long size) | 107 | void *alloc_remap(int nid, unsigned long size) |
198 | { | 108 | { |
199 | void *allocation = node_remap_alloc_vaddr[nid]; | 109 | void *allocation = node_remap_alloc_vaddr[nid]; |
200 | 110 | ||
201 | size = ALIGN(size, L1_CACHE_BYTES); | 111 | size = ALIGN(size, L1_CACHE_BYTES); |
202 | 112 | ||
203 | if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) | 113 | if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) |
204 | return NULL; | 114 | return NULL; |
205 | 115 | ||
206 | node_remap_alloc_vaddr[nid] += size; | 116 | node_remap_alloc_vaddr[nid] += size; |
@@ -209,26 +119,6 @@ void *alloc_remap(int nid, unsigned long size) | |||
209 | return allocation; | 119 | return allocation; |
210 | } | 120 | } |
211 | 121 | ||
212 | static void __init remap_numa_kva(void) | ||
213 | { | ||
214 | void *vaddr; | ||
215 | unsigned long pfn; | ||
216 | int node; | ||
217 | |||
218 | for_each_online_node(node) { | ||
219 | printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); | ||
220 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { | ||
221 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); | ||
222 | printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", | ||
223 | (unsigned long)vaddr, | ||
224 | node_remap_start_pfn[node] + pfn); | ||
225 | set_pmd_pfn((ulong) vaddr, | ||
226 | node_remap_start_pfn[node] + pfn, | ||
227 | PAGE_KERNEL_LARGE); | ||
228 | } | ||
229 | } | ||
230 | } | ||
231 | |||
232 | #ifdef CONFIG_HIBERNATION | 122 | #ifdef CONFIG_HIBERNATION |
233 | /** | 123 | /** |
234 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created | 124 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created |
@@ -240,15 +130,16 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
240 | int node; | 130 | int node; |
241 | 131 | ||
242 | for_each_online_node(node) { | 132 | for_each_online_node(node) { |
243 | unsigned long start_va, start_pfn, size, pfn; | 133 | unsigned long start_va, start_pfn, nr_pages, pfn; |
244 | 134 | ||
245 | start_va = (unsigned long)node_remap_start_vaddr[node]; | 135 | start_va = (unsigned long)node_remap_start_vaddr[node]; |
246 | start_pfn = node_remap_start_pfn[node]; | 136 | start_pfn = node_remap_start_pfn[node]; |
247 | size = node_remap_size[node]; | 137 | nr_pages = (node_remap_end_vaddr[node] - |
138 | node_remap_start_vaddr[node]) >> PAGE_SHIFT; | ||
248 | 139 | ||
249 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); | 140 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); |
250 | 141 | ||
251 | for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { | 142 | for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { |
252 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); | 143 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); |
253 | pgd_t *pgd = pgd_base + pgd_index(vaddr); | 144 | pgd_t *pgd = pgd_base + pgd_index(vaddr); |
254 | pud_t *pud = pud_offset(pgd, vaddr); | 145 | pud_t *pud = pud_offset(pgd, vaddr); |
@@ -264,132 +155,89 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
264 | } | 155 | } |
265 | #endif | 156 | #endif |
266 | 157 | ||
267 | static __init unsigned long calculate_numa_remap_pages(void) | 158 | /** |
159 | * init_alloc_remap - Initialize remap allocator for a NUMA node | ||
160 | * @nid: NUMA node to initizlie remap allocator for | ||
161 | * | ||
162 | * NUMA nodes may end up without any lowmem. As allocating pgdat and | ||
163 | * memmap on a different node with lowmem is inefficient, a special | ||
164 | * remap allocator is implemented which can be used by alloc_remap(). | ||
165 | * | ||
166 | * For each node, the amount of memory which will be necessary for | ||
167 | * pgdat and memmap is calculated and two memory areas of the size are | ||
168 | * allocated - one in the node and the other in lowmem; then, the area | ||
169 | * in the node is remapped to the lowmem area. | ||
170 | * | ||
171 | * As pgdat and memmap must be allocated in lowmem anyway, this | ||
172 | * doesn't waste lowmem address space; however, the actual lowmem | ||
173 | * which gets remapped over is wasted. The amount shouldn't be | ||
174 | * problematic on machines this feature will be used. | ||
175 | * | ||
176 | * Initialization failure isn't fatal. alloc_remap() is used | ||
177 | * opportunistically and the callers will fall back to other memory | ||
178 | * allocation mechanisms on failure. | ||
179 | */ | ||
180 | void __init init_alloc_remap(int nid, u64 start, u64 end) | ||
268 | { | 181 | { |
269 | int nid; | 182 | unsigned long start_pfn = start >> PAGE_SHIFT; |
270 | unsigned long size, reserve_pages = 0; | 183 | unsigned long end_pfn = end >> PAGE_SHIFT; |
271 | 184 | unsigned long size, pfn; | |
272 | for_each_online_node(nid) { | 185 | u64 node_pa, remap_pa; |
273 | u64 node_kva_target; | 186 | void *remap_va; |
274 | u64 node_kva_final; | ||
275 | |||
276 | /* | ||
277 | * The acpi/srat node info can show hot-add memroy zones | ||
278 | * where memory could be added but not currently present. | ||
279 | */ | ||
280 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", | ||
281 | nid, node_start_pfn[nid], node_end_pfn[nid]); | ||
282 | if (node_start_pfn[nid] > max_pfn) | ||
283 | continue; | ||
284 | if (!node_end_pfn[nid]) | ||
285 | continue; | ||
286 | if (node_end_pfn[nid] > max_pfn) | ||
287 | node_end_pfn[nid] = max_pfn; | ||
288 | |||
289 | /* ensure the remap includes space for the pgdat. */ | ||
290 | size = node_remap_size[nid] + sizeof(pg_data_t); | ||
291 | |||
292 | /* convert size to large (pmd size) pages, rounding up */ | ||
293 | size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; | ||
294 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | ||
295 | size = size * PTRS_PER_PTE; | ||
296 | |||
297 | node_kva_target = round_down(node_end_pfn[nid] - size, | ||
298 | PTRS_PER_PTE); | ||
299 | node_kva_target <<= PAGE_SHIFT; | ||
300 | do { | ||
301 | node_kva_final = memblock_find_in_range(node_kva_target, | ||
302 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, | ||
303 | ((u64)size)<<PAGE_SHIFT, | ||
304 | LARGE_PAGE_BYTES); | ||
305 | node_kva_target -= LARGE_PAGE_BYTES; | ||
306 | } while (node_kva_final == MEMBLOCK_ERROR && | ||
307 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); | ||
308 | |||
309 | if (node_kva_final == MEMBLOCK_ERROR) | ||
310 | panic("Can not get kva ram\n"); | ||
311 | |||
312 | node_remap_size[nid] = size; | ||
313 | node_remap_offset[nid] = reserve_pages; | ||
314 | reserve_pages += size; | ||
315 | printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" | ||
316 | " node %d at %llx\n", | ||
317 | size, nid, node_kva_final>>PAGE_SHIFT); | ||
318 | |||
319 | /* | ||
320 | * prevent kva address below max_low_pfn want it on system | ||
321 | * with less memory later. | ||
322 | * layout will be: KVA address , KVA RAM | ||
323 | * | ||
324 | * we are supposed to only record the one less then max_low_pfn | ||
325 | * but we could have some hole in high memory, and it will only | ||
326 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide | ||
327 | * to use it as free. | ||
328 | * So memblock_x86_reserve_range here, hope we don't run out of that array | ||
329 | */ | ||
330 | memblock_x86_reserve_range(node_kva_final, | ||
331 | node_kva_final+(((u64)size)<<PAGE_SHIFT), | ||
332 | "KVA RAM"); | ||
333 | |||
334 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; | ||
335 | } | ||
336 | printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", | ||
337 | reserve_pages); | ||
338 | return reserve_pages; | ||
339 | } | ||
340 | 187 | ||
341 | static void init_remap_allocator(int nid) | 188 | /* |
342 | { | 189 | * The acpi/srat node info can show hot-add memroy zones where |
343 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | 190 | * memory could be added but not currently present. |
344 | kva_start_pfn + node_remap_offset[nid]); | 191 | */ |
345 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | 192 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", |
346 | (node_remap_size[nid] * PAGE_SIZE); | 193 | nid, start_pfn, end_pfn); |
347 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | 194 | |
348 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | 195 | /* calculate the necessary space aligned to large page size */ |
349 | 196 | size = node_memmap_size_bytes(nid, start_pfn, end_pfn); | |
350 | printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, | 197 | size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); |
351 | (ulong) node_remap_start_vaddr[nid], | 198 | size = ALIGN(size, LARGE_PAGE_BYTES); |
352 | (ulong) node_remap_end_vaddr[nid]); | 199 | |
200 | /* allocate node memory and the lowmem remap area */ | ||
201 | node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); | ||
202 | if (node_pa == MEMBLOCK_ERROR) { | ||
203 | pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", | ||
204 | size, nid); | ||
205 | return; | ||
206 | } | ||
207 | memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); | ||
208 | |||
209 | remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, | ||
210 | max_low_pfn << PAGE_SHIFT, | ||
211 | size, LARGE_PAGE_BYTES); | ||
212 | if (remap_pa == MEMBLOCK_ERROR) { | ||
213 | pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", | ||
214 | size, nid); | ||
215 | memblock_x86_free_range(node_pa, node_pa + size); | ||
216 | return; | ||
217 | } | ||
218 | memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); | ||
219 | remap_va = phys_to_virt(remap_pa); | ||
220 | |||
221 | /* perform actual remap */ | ||
222 | for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) | ||
223 | set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), | ||
224 | (node_pa >> PAGE_SHIFT) + pfn, | ||
225 | PAGE_KERNEL_LARGE); | ||
226 | |||
227 | /* initialize remap allocator parameters */ | ||
228 | node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; | ||
229 | node_remap_start_vaddr[nid] = remap_va; | ||
230 | node_remap_end_vaddr[nid] = remap_va + size; | ||
231 | node_remap_alloc_vaddr[nid] = remap_va; | ||
232 | |||
233 | printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", | ||
234 | nid, node_pa, node_pa + size, remap_va, remap_va + size); | ||
353 | } | 235 | } |
354 | 236 | ||
355 | void __init initmem_init(void) | 237 | void __init initmem_init(void) |
356 | { | 238 | { |
357 | int nid; | 239 | x86_numa_init(); |
358 | long kva_target_pfn; | ||
359 | |||
360 | /* | ||
361 | * When mapping a NUMA machine we allocate the node_mem_map arrays | ||
362 | * from node local memory. They are then mapped directly into KVA | ||
363 | * between zone normal and vmalloc space. Calculate the size of | ||
364 | * this space and use it to adjust the boundary between ZONE_NORMAL | ||
365 | * and ZONE_HIGHMEM. | ||
366 | */ | ||
367 | |||
368 | get_memcfg_numa(); | ||
369 | numa_init_array(); | ||
370 | |||
371 | kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); | ||
372 | 240 | ||
373 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); | ||
374 | do { | ||
375 | kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT, | ||
376 | max_low_pfn<<PAGE_SHIFT, | ||
377 | kva_pages<<PAGE_SHIFT, | ||
378 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; | ||
379 | kva_target_pfn -= PTRS_PER_PTE; | ||
380 | } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn); | ||
381 | |||
382 | if (kva_start_pfn == MEMBLOCK_ERROR) | ||
383 | panic("Can not get kva space\n"); | ||
384 | |||
385 | printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", | ||
386 | kva_start_pfn, max_low_pfn); | ||
387 | printk(KERN_INFO "max_pfn = %lx\n", max_pfn); | ||
388 | |||
389 | /* avoid clash with initrd */ | ||
390 | memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT, | ||
391 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, | ||
392 | "KVA PG"); | ||
393 | #ifdef CONFIG_HIGHMEM | 241 | #ifdef CONFIG_HIGHMEM |
394 | highstart_pfn = highend_pfn = max_pfn; | 242 | highstart_pfn = highend_pfn = max_pfn; |
395 | if (max_pfn > max_low_pfn) | 243 | if (max_pfn > max_low_pfn) |
@@ -409,51 +257,9 @@ void __init initmem_init(void) | |||
409 | 257 | ||
410 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", | 258 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", |
411 | (ulong) pfn_to_kaddr(max_low_pfn)); | 259 | (ulong) pfn_to_kaddr(max_low_pfn)); |
412 | for_each_online_node(nid) { | ||
413 | init_remap_allocator(nid); | ||
414 | |||
415 | allocate_pgdat(nid); | ||
416 | } | ||
417 | remap_numa_kva(); | ||
418 | 260 | ||
419 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", | 261 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", |
420 | (ulong) pfn_to_kaddr(highstart_pfn)); | 262 | (ulong) pfn_to_kaddr(highstart_pfn)); |
421 | for_each_online_node(nid) | ||
422 | propagate_e820_map_node(nid); | ||
423 | |||
424 | for_each_online_node(nid) { | ||
425 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | ||
426 | NODE_DATA(nid)->node_id = nid; | ||
427 | } | ||
428 | 263 | ||
429 | setup_bootmem_allocator(); | 264 | setup_bootmem_allocator(); |
430 | } | 265 | } |
431 | |||
432 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
433 | static int paddr_to_nid(u64 addr) | ||
434 | { | ||
435 | int nid; | ||
436 | unsigned long pfn = PFN_DOWN(addr); | ||
437 | |||
438 | for_each_node(nid) | ||
439 | if (node_start_pfn[nid] <= pfn && | ||
440 | pfn < node_end_pfn[nid]) | ||
441 | return nid; | ||
442 | |||
443 | return -1; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * This function is used to ask node id BEFORE memmap and mem_section's | ||
448 | * initialization (pfn_to_nid() can't be used yet). | ||
449 | * If _PXM is not defined on ACPI's DSDT, node id must be found by this. | ||
450 | */ | ||
451 | int memory_add_physaddr_to_nid(u64 addr) | ||
452 | { | ||
453 | int nid = paddr_to_nid(addr); | ||
454 | return (nid >= 0) ? nid : 0; | ||
455 | } | ||
456 | |||
457 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
458 | #endif | ||
459 | |||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index e8c00cc72033..dd27f401f0a0 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -2,646 +2,13 @@ | |||
2 | * Generic VM initialization for x86-64 NUMA setups. | 2 | * Generic VM initialization for x86-64 NUMA setups. |
3 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 3 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. |
4 | */ | 4 | */ |
5 | #include <linux/kernel.h> | ||
6 | #include <linux/mm.h> | ||
7 | #include <linux/string.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/bootmem.h> | 5 | #include <linux/bootmem.h> |
10 | #include <linux/memblock.h> | ||
11 | #include <linux/mmzone.h> | ||
12 | #include <linux/ctype.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/nodemask.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/acpi.h> | ||
17 | |||
18 | #include <asm/e820.h> | ||
19 | #include <asm/proto.h> | ||
20 | #include <asm/dma.h> | ||
21 | #include <asm/acpi.h> | ||
22 | #include <asm/amd_nb.h> | ||
23 | 6 | ||
24 | #include "numa_internal.h" | 7 | #include "numa_internal.h" |
25 | 8 | ||
26 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | ||
27 | EXPORT_SYMBOL(node_data); | ||
28 | |||
29 | nodemask_t numa_nodes_parsed __initdata; | ||
30 | |||
31 | struct memnode memnode; | ||
32 | |||
33 | static unsigned long __initdata nodemap_addr; | ||
34 | static unsigned long __initdata nodemap_size; | ||
35 | |||
36 | static struct numa_meminfo numa_meminfo __initdata; | ||
37 | |||
38 | static int numa_distance_cnt; | ||
39 | static u8 *numa_distance; | ||
40 | |||
41 | /* | ||
42 | * Given a shift value, try to populate memnodemap[] | ||
43 | * Returns : | ||
44 | * 1 if OK | ||
45 | * 0 if memnodmap[] too small (of shift too small) | ||
46 | * -1 if node overlap or lost ram (shift too big) | ||
47 | */ | ||
48 | static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift) | ||
49 | { | ||
50 | unsigned long addr, end; | ||
51 | int i, res = -1; | ||
52 | |||
53 | memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); | ||
54 | for (i = 0; i < mi->nr_blks; i++) { | ||
55 | addr = mi->blk[i].start; | ||
56 | end = mi->blk[i].end; | ||
57 | if (addr >= end) | ||
58 | continue; | ||
59 | if ((end >> shift) >= memnodemapsize) | ||
60 | return 0; | ||
61 | do { | ||
62 | if (memnodemap[addr >> shift] != NUMA_NO_NODE) | ||
63 | return -1; | ||
64 | memnodemap[addr >> shift] = mi->blk[i].nid; | ||
65 | addr += (1UL << shift); | ||
66 | } while (addr < end); | ||
67 | res = 1; | ||
68 | } | ||
69 | return res; | ||
70 | } | ||
71 | |||
72 | static int __init allocate_cachealigned_memnodemap(void) | ||
73 | { | ||
74 | unsigned long addr; | ||
75 | |||
76 | memnodemap = memnode.embedded_map; | ||
77 | if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map)) | ||
78 | return 0; | ||
79 | |||
80 | addr = 0x8000; | ||
81 | nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); | ||
82 | nodemap_addr = memblock_find_in_range(addr, get_max_mapped(), | ||
83 | nodemap_size, L1_CACHE_BYTES); | ||
84 | if (nodemap_addr == MEMBLOCK_ERROR) { | ||
85 | printk(KERN_ERR | ||
86 | "NUMA: Unable to allocate Memory to Node hash map\n"); | ||
87 | nodemap_addr = nodemap_size = 0; | ||
88 | return -1; | ||
89 | } | ||
90 | memnodemap = phys_to_virt(nodemap_addr); | ||
91 | memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP"); | ||
92 | |||
93 | printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", | ||
94 | nodemap_addr, nodemap_addr + nodemap_size); | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * The LSB of all start and end addresses in the node map is the value of the | ||
100 | * maximum possible shift. | ||
101 | */ | ||
102 | static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi) | ||
103 | { | ||
104 | int i, nodes_used = 0; | ||
105 | unsigned long start, end; | ||
106 | unsigned long bitfield = 0, memtop = 0; | ||
107 | |||
108 | for (i = 0; i < mi->nr_blks; i++) { | ||
109 | start = mi->blk[i].start; | ||
110 | end = mi->blk[i].end; | ||
111 | if (start >= end) | ||
112 | continue; | ||
113 | bitfield |= start; | ||
114 | nodes_used++; | ||
115 | if (end > memtop) | ||
116 | memtop = end; | ||
117 | } | ||
118 | if (nodes_used <= 1) | ||
119 | i = 63; | ||
120 | else | ||
121 | i = find_first_bit(&bitfield, sizeof(unsigned long)*8); | ||
122 | memnodemapsize = (memtop >> i)+1; | ||
123 | return i; | ||
124 | } | ||
125 | |||
126 | static int __init compute_hash_shift(const struct numa_meminfo *mi) | ||
127 | { | ||
128 | int shift; | ||
129 | |||
130 | shift = extract_lsb_from_nodes(mi); | ||
131 | if (allocate_cachealigned_memnodemap()) | ||
132 | return -1; | ||
133 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | ||
134 | shift); | ||
135 | |||
136 | if (populate_memnodemap(mi, shift) != 1) { | ||
137 | printk(KERN_INFO "Your memory is not aligned you need to " | ||
138 | "rebuild your kernel with a bigger NODEMAPSIZE " | ||
139 | "shift=%d\n", shift); | ||
140 | return -1; | ||
141 | } | ||
142 | return shift; | ||
143 | } | ||
144 | |||
145 | int __meminit __early_pfn_to_nid(unsigned long pfn) | ||
146 | { | ||
147 | return phys_to_nid(pfn << PAGE_SHIFT); | ||
148 | } | ||
149 | |||
150 | static void * __init early_node_mem(int nodeid, unsigned long start, | ||
151 | unsigned long end, unsigned long size, | ||
152 | unsigned long align) | ||
153 | { | ||
154 | unsigned long mem; | ||
155 | |||
156 | /* | ||
157 | * put it on high as possible | ||
158 | * something will go with NODE_DATA | ||
159 | */ | ||
160 | if (start < (MAX_DMA_PFN<<PAGE_SHIFT)) | ||
161 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
162 | if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && | ||
163 | end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
164 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
165 | mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align); | ||
166 | if (mem != MEMBLOCK_ERROR) | ||
167 | return __va(mem); | ||
168 | |||
169 | /* extend the search scope */ | ||
170 | end = max_pfn_mapped << PAGE_SHIFT; | ||
171 | start = MAX_DMA_PFN << PAGE_SHIFT; | ||
172 | mem = memblock_find_in_range(start, end, size, align); | ||
173 | if (mem != MEMBLOCK_ERROR) | ||
174 | return __va(mem); | ||
175 | |||
176 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", | ||
177 | size, nodeid); | ||
178 | |||
179 | return NULL; | ||
180 | } | ||
181 | |||
182 | static int __init numa_add_memblk_to(int nid, u64 start, u64 end, | ||
183 | struct numa_meminfo *mi) | ||
184 | { | ||
185 | /* ignore zero length blks */ | ||
186 | if (start == end) | ||
187 | return 0; | ||
188 | |||
189 | /* whine about and ignore invalid blks */ | ||
190 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { | ||
191 | pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", | ||
192 | nid, start, end); | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | if (mi->nr_blks >= NR_NODE_MEMBLKS) { | ||
197 | pr_err("NUMA: too many memblk ranges\n"); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | mi->blk[mi->nr_blks].start = start; | ||
202 | mi->blk[mi->nr_blks].end = end; | ||
203 | mi->blk[mi->nr_blks].nid = nid; | ||
204 | mi->nr_blks++; | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo | ||
210 | * @idx: Index of memblk to remove | ||
211 | * @mi: numa_meminfo to remove memblk from | ||
212 | * | ||
213 | * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and | ||
214 | * decrementing @mi->nr_blks. | ||
215 | */ | ||
216 | void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) | ||
217 | { | ||
218 | mi->nr_blks--; | ||
219 | memmove(&mi->blk[idx], &mi->blk[idx + 1], | ||
220 | (mi->nr_blks - idx) * sizeof(mi->blk[0])); | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * numa_add_memblk - Add one numa_memblk to numa_meminfo | ||
225 | * @nid: NUMA node ID of the new memblk | ||
226 | * @start: Start address of the new memblk | ||
227 | * @end: End address of the new memblk | ||
228 | * | ||
229 | * Add a new memblk to the default numa_meminfo. | ||
230 | * | ||
231 | * RETURNS: | ||
232 | * 0 on success, -errno on failure. | ||
233 | */ | ||
234 | int __init numa_add_memblk(int nid, u64 start, u64 end) | ||
235 | { | ||
236 | return numa_add_memblk_to(nid, start, end, &numa_meminfo); | ||
237 | } | ||
238 | |||
239 | /* Initialize bootmem allocator for a node */ | ||
240 | void __init | ||
241 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | ||
242 | { | ||
243 | unsigned long start_pfn, last_pfn, nodedata_phys; | ||
244 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
245 | int nid; | ||
246 | |||
247 | if (!end) | ||
248 | return; | ||
249 | |||
250 | /* | ||
251 | * Don't confuse VM with a node that doesn't have the | ||
252 | * minimum amount of memory: | ||
253 | */ | ||
254 | if (end && (end - start) < NODE_MIN_SIZE) | ||
255 | return; | ||
256 | |||
257 | start = roundup(start, ZONE_ALIGN); | ||
258 | |||
259 | printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, | ||
260 | start, end); | ||
261 | |||
262 | start_pfn = start >> PAGE_SHIFT; | ||
263 | last_pfn = end >> PAGE_SHIFT; | ||
264 | |||
265 | node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, | ||
266 | SMP_CACHE_BYTES); | ||
267 | if (node_data[nodeid] == NULL) | ||
268 | return; | ||
269 | nodedata_phys = __pa(node_data[nodeid]); | ||
270 | memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); | ||
271 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, | ||
272 | nodedata_phys + pgdat_size - 1); | ||
273 | nid = phys_to_nid(nodedata_phys); | ||
274 | if (nid != nodeid) | ||
275 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
276 | |||
277 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | ||
278 | NODE_DATA(nodeid)->node_id = nodeid; | ||
279 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | ||
280 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | ||
281 | |||
282 | node_set_online(nodeid); | ||
283 | } | ||
284 | |||
285 | /** | ||
286 | * numa_cleanup_meminfo - Cleanup a numa_meminfo | ||
287 | * @mi: numa_meminfo to clean up | ||
288 | * | ||
289 | * Sanitize @mi by merging and removing unncessary memblks. Also check for | ||
290 | * conflicts and clear unused memblks. | ||
291 | * | ||
292 | * RETURNS: | ||
293 | * 0 on success, -errno on failure. | ||
294 | */ | ||
295 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi) | ||
296 | { | ||
297 | const u64 low = 0; | ||
298 | const u64 high = (u64)max_pfn << PAGE_SHIFT; | ||
299 | int i, j, k; | ||
300 | |||
301 | for (i = 0; i < mi->nr_blks; i++) { | ||
302 | struct numa_memblk *bi = &mi->blk[i]; | ||
303 | |||
304 | /* make sure all blocks are inside the limits */ | ||
305 | bi->start = max(bi->start, low); | ||
306 | bi->end = min(bi->end, high); | ||
307 | |||
308 | /* and there's no empty block */ | ||
309 | if (bi->start == bi->end) { | ||
310 | numa_remove_memblk_from(i--, mi); | ||
311 | continue; | ||
312 | } | ||
313 | |||
314 | for (j = i + 1; j < mi->nr_blks; j++) { | ||
315 | struct numa_memblk *bj = &mi->blk[j]; | ||
316 | unsigned long start, end; | ||
317 | |||
318 | /* | ||
319 | * See whether there are overlapping blocks. Whine | ||
320 | * about but allow overlaps of the same nid. They | ||
321 | * will be merged below. | ||
322 | */ | ||
323 | if (bi->end > bj->start && bi->start < bj->end) { | ||
324 | if (bi->nid != bj->nid) { | ||
325 | pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", | ||
326 | bi->nid, bi->start, bi->end, | ||
327 | bj->nid, bj->start, bj->end); | ||
328 | return -EINVAL; | ||
329 | } | ||
330 | pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", | ||
331 | bi->nid, bi->start, bi->end, | ||
332 | bj->start, bj->end); | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Join together blocks on the same node, holes | ||
337 | * between which don't overlap with memory on other | ||
338 | * nodes. | ||
339 | */ | ||
340 | if (bi->nid != bj->nid) | ||
341 | continue; | ||
342 | start = max(min(bi->start, bj->start), low); | ||
343 | end = min(max(bi->end, bj->end), high); | ||
344 | for (k = 0; k < mi->nr_blks; k++) { | ||
345 | struct numa_memblk *bk = &mi->blk[k]; | ||
346 | |||
347 | if (bi->nid == bk->nid) | ||
348 | continue; | ||
349 | if (start < bk->end && end > bk->start) | ||
350 | break; | ||
351 | } | ||
352 | if (k < mi->nr_blks) | ||
353 | continue; | ||
354 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", | ||
355 | bi->nid, bi->start, bi->end, bj->start, bj->end, | ||
356 | start, end); | ||
357 | bi->start = start; | ||
358 | bi->end = end; | ||
359 | numa_remove_memblk_from(j--, mi); | ||
360 | } | ||
361 | } | ||
362 | |||
363 | for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { | ||
364 | mi->blk[i].start = mi->blk[i].end = 0; | ||
365 | mi->blk[i].nid = NUMA_NO_NODE; | ||
366 | } | ||
367 | |||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * Set nodes, which have memory in @mi, in *@nodemask. | ||
373 | */ | ||
374 | static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, | ||
375 | const struct numa_meminfo *mi) | ||
376 | { | ||
377 | int i; | ||
378 | |||
379 | for (i = 0; i < ARRAY_SIZE(mi->blk); i++) | ||
380 | if (mi->blk[i].start != mi->blk[i].end && | ||
381 | mi->blk[i].nid != NUMA_NO_NODE) | ||
382 | node_set(mi->blk[i].nid, *nodemask); | ||
383 | } | ||
384 | |||
385 | /** | ||
386 | * numa_reset_distance - Reset NUMA distance table | ||
387 | * | ||
388 | * The current table is freed. The next numa_set_distance() call will | ||
389 | * create a new one. | ||
390 | */ | ||
391 | void __init numa_reset_distance(void) | ||
392 | { | ||
393 | size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); | ||
394 | |||
395 | /* numa_distance could be 1LU marking allocation failure, test cnt */ | ||
396 | if (numa_distance_cnt) | ||
397 | memblock_x86_free_range(__pa(numa_distance), | ||
398 | __pa(numa_distance) + size); | ||
399 | numa_distance_cnt = 0; | ||
400 | numa_distance = NULL; /* enable table creation */ | ||
401 | } | ||
402 | |||
403 | static int __init numa_alloc_distance(void) | ||
404 | { | ||
405 | nodemask_t nodes_parsed; | ||
406 | size_t size; | ||
407 | int i, j, cnt = 0; | ||
408 | u64 phys; | ||
409 | |||
410 | /* size the new table and allocate it */ | ||
411 | nodes_parsed = numa_nodes_parsed; | ||
412 | numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); | ||
413 | |||
414 | for_each_node_mask(i, nodes_parsed) | ||
415 | cnt = i; | ||
416 | cnt++; | ||
417 | size = cnt * cnt * sizeof(numa_distance[0]); | ||
418 | |||
419 | phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT, | ||
420 | size, PAGE_SIZE); | ||
421 | if (phys == MEMBLOCK_ERROR) { | ||
422 | pr_warning("NUMA: Warning: can't allocate distance table!\n"); | ||
423 | /* don't retry until explicitly reset */ | ||
424 | numa_distance = (void *)1LU; | ||
425 | return -ENOMEM; | ||
426 | } | ||
427 | memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); | ||
428 | |||
429 | numa_distance = __va(phys); | ||
430 | numa_distance_cnt = cnt; | ||
431 | |||
432 | /* fill with the default distances */ | ||
433 | for (i = 0; i < cnt; i++) | ||
434 | for (j = 0; j < cnt; j++) | ||
435 | numa_distance[i * cnt + j] = i == j ? | ||
436 | LOCAL_DISTANCE : REMOTE_DISTANCE; | ||
437 | printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); | ||
438 | |||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | /** | ||
443 | * numa_set_distance - Set NUMA distance from one NUMA to another | ||
444 | * @from: the 'from' node to set distance | ||
445 | * @to: the 'to' node to set distance | ||
446 | * @distance: NUMA distance | ||
447 | * | ||
448 | * Set the distance from node @from to @to to @distance. If distance table | ||
449 | * doesn't exist, one which is large enough to accommodate all the currently | ||
450 | * known nodes will be created. | ||
451 | * | ||
452 | * If such table cannot be allocated, a warning is printed and further | ||
453 | * calls are ignored until the distance table is reset with | ||
454 | * numa_reset_distance(). | ||
455 | * | ||
456 | * If @from or @to is higher than the highest known node at the time of | ||
457 | * table creation or @distance doesn't make sense, the call is ignored. | ||
458 | * This is to allow simplification of specific NUMA config implementations. | ||
459 | */ | ||
460 | void __init numa_set_distance(int from, int to, int distance) | ||
461 | { | ||
462 | if (!numa_distance && numa_alloc_distance() < 0) | ||
463 | return; | ||
464 | |||
465 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) { | ||
466 | printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", | ||
467 | from, to, distance); | ||
468 | return; | ||
469 | } | ||
470 | |||
471 | if ((u8)distance != distance || | ||
472 | (from == to && distance != LOCAL_DISTANCE)) { | ||
473 | pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", | ||
474 | from, to, distance); | ||
475 | return; | ||
476 | } | ||
477 | |||
478 | numa_distance[from * numa_distance_cnt + to] = distance; | ||
479 | } | ||
480 | |||
481 | int __node_distance(int from, int to) | ||
482 | { | ||
483 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) | ||
484 | return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; | ||
485 | return numa_distance[from * numa_distance_cnt + to]; | ||
486 | } | ||
487 | EXPORT_SYMBOL(__node_distance); | ||
488 | |||
489 | /* | ||
490 | * Sanity check to catch more bad NUMA configurations (they are amazingly | ||
491 | * common). Make sure the nodes cover all memory. | ||
492 | */ | ||
493 | static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | ||
494 | { | ||
495 | unsigned long numaram, e820ram; | ||
496 | int i; | ||
497 | |||
498 | numaram = 0; | ||
499 | for (i = 0; i < mi->nr_blks; i++) { | ||
500 | unsigned long s = mi->blk[i].start >> PAGE_SHIFT; | ||
501 | unsigned long e = mi->blk[i].end >> PAGE_SHIFT; | ||
502 | numaram += e - s; | ||
503 | numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); | ||
504 | if ((long)numaram < 0) | ||
505 | numaram = 0; | ||
506 | } | ||
507 | |||
508 | e820ram = max_pfn - (memblock_x86_hole_size(0, | ||
509 | max_pfn << PAGE_SHIFT) >> PAGE_SHIFT); | ||
510 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | ||
511 | if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { | ||
512 | printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", | ||
513 | (numaram << PAGE_SHIFT) >> 20, | ||
514 | (e820ram << PAGE_SHIFT) >> 20); | ||
515 | return false; | ||
516 | } | ||
517 | return true; | ||
518 | } | ||
519 | |||
520 | static int __init numa_register_memblks(struct numa_meminfo *mi) | ||
521 | { | ||
522 | int i, nid; | ||
523 | |||
524 | /* Account for nodes with cpus and no memory */ | ||
525 | node_possible_map = numa_nodes_parsed; | ||
526 | numa_nodemask_from_meminfo(&node_possible_map, mi); | ||
527 | if (WARN_ON(nodes_empty(node_possible_map))) | ||
528 | return -EINVAL; | ||
529 | |||
530 | memnode_shift = compute_hash_shift(mi); | ||
531 | if (memnode_shift < 0) { | ||
532 | printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n"); | ||
533 | return -EINVAL; | ||
534 | } | ||
535 | |||
536 | for (i = 0; i < mi->nr_blks; i++) | ||
537 | memblock_x86_register_active_regions(mi->blk[i].nid, | ||
538 | mi->blk[i].start >> PAGE_SHIFT, | ||
539 | mi->blk[i].end >> PAGE_SHIFT); | ||
540 | |||
541 | /* for out of order entries */ | ||
542 | sort_node_map(); | ||
543 | if (!numa_meminfo_cover_memory(mi)) | ||
544 | return -EINVAL; | ||
545 | |||
546 | /* Finally register nodes. */ | ||
547 | for_each_node_mask(nid, node_possible_map) { | ||
548 | u64 start = (u64)max_pfn << PAGE_SHIFT; | ||
549 | u64 end = 0; | ||
550 | |||
551 | for (i = 0; i < mi->nr_blks; i++) { | ||
552 | if (nid != mi->blk[i].nid) | ||
553 | continue; | ||
554 | start = min(mi->blk[i].start, start); | ||
555 | end = max(mi->blk[i].end, end); | ||
556 | } | ||
557 | |||
558 | if (start < end) | ||
559 | setup_node_bootmem(nid, start, end); | ||
560 | } | ||
561 | |||
562 | return 0; | ||
563 | } | ||
564 | |||
565 | /** | ||
566 | * dummy_numma_init - Fallback dummy NUMA init | ||
567 | * | ||
568 | * Used if there's no underlying NUMA architecture, NUMA initialization | ||
569 | * fails, or NUMA is disabled on the command line. | ||
570 | * | ||
571 | * Must online at least one node and add memory blocks that cover all | ||
572 | * allowed memory. This function must not fail. | ||
573 | */ | ||
574 | static int __init dummy_numa_init(void) | ||
575 | { | ||
576 | printk(KERN_INFO "%s\n", | ||
577 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | ||
578 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", | ||
579 | 0LU, max_pfn << PAGE_SHIFT); | ||
580 | |||
581 | node_set(0, numa_nodes_parsed); | ||
582 | numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); | ||
583 | |||
584 | return 0; | ||
585 | } | ||
586 | |||
587 | static int __init numa_init(int (*init_func)(void)) | ||
588 | { | ||
589 | int i; | ||
590 | int ret; | ||
591 | |||
592 | for (i = 0; i < MAX_LOCAL_APIC; i++) | ||
593 | set_apicid_to_node(i, NUMA_NO_NODE); | ||
594 | |||
595 | nodes_clear(numa_nodes_parsed); | ||
596 | nodes_clear(node_possible_map); | ||
597 | nodes_clear(node_online_map); | ||
598 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | ||
599 | remove_all_active_ranges(); | ||
600 | numa_reset_distance(); | ||
601 | |||
602 | ret = init_func(); | ||
603 | if (ret < 0) | ||
604 | return ret; | ||
605 | ret = numa_cleanup_meminfo(&numa_meminfo); | ||
606 | if (ret < 0) | ||
607 | return ret; | ||
608 | |||
609 | numa_emulation(&numa_meminfo, numa_distance_cnt); | ||
610 | |||
611 | ret = numa_register_memblks(&numa_meminfo); | ||
612 | if (ret < 0) | ||
613 | return ret; | ||
614 | |||
615 | for (i = 0; i < nr_cpu_ids; i++) { | ||
616 | int nid = early_cpu_to_node(i); | ||
617 | |||
618 | if (nid == NUMA_NO_NODE) | ||
619 | continue; | ||
620 | if (!node_online(nid)) | ||
621 | numa_clear_node(i); | ||
622 | } | ||
623 | numa_init_array(); | ||
624 | return 0; | ||
625 | } | ||
626 | |||
627 | void __init initmem_init(void) | 9 | void __init initmem_init(void) |
628 | { | 10 | { |
629 | int ret; | 11 | x86_numa_init(); |
630 | |||
631 | if (!numa_off) { | ||
632 | #ifdef CONFIG_ACPI_NUMA | ||
633 | ret = numa_init(x86_acpi_numa_init); | ||
634 | if (!ret) | ||
635 | return; | ||
636 | #endif | ||
637 | #ifdef CONFIG_AMD_NUMA | ||
638 | ret = numa_init(amd_numa_init); | ||
639 | if (!ret) | ||
640 | return; | ||
641 | #endif | ||
642 | } | ||
643 | |||
644 | numa_init(dummy_numa_init); | ||
645 | } | 12 | } |
646 | 13 | ||
647 | unsigned long __init numa_free_all_bootmem(void) | 14 | unsigned long __init numa_free_all_bootmem(void) |
@@ -656,12 +23,3 @@ unsigned long __init numa_free_all_bootmem(void) | |||
656 | 23 | ||
657 | return pages; | 24 | return pages; |
658 | } | 25 | } |
659 | |||
660 | int __cpuinit numa_cpu_node(int cpu) | ||
661 | { | ||
662 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | ||
663 | |||
664 | if (apicid != BAD_APICID) | ||
665 | return __apicid_to_node[apicid]; | ||
666 | return NUMA_NO_NODE; | ||
667 | } | ||
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index ad091e4cff17..d0ed086b6247 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/errno.h> | 5 | #include <linux/errno.h> |
6 | #include <linux/topology.h> | 6 | #include <linux/topology.h> |
7 | #include <linux/memblock.h> | 7 | #include <linux/memblock.h> |
8 | #include <linux/bootmem.h> | ||
8 | #include <asm/dma.h> | 9 | #include <asm/dma.h> |
9 | 10 | ||
10 | #include "numa_internal.h" | 11 | #include "numa_internal.h" |
@@ -84,7 +85,13 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
84 | nr_nodes = MAX_NUMNODES; | 85 | nr_nodes = MAX_NUMNODES; |
85 | } | 86 | } |
86 | 87 | ||
87 | size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes; | 88 | /* |
89 | * Calculate target node size. x86_32 freaks on __udivdi3() so do | ||
90 | * the division in ulong number of pages and convert back. | ||
91 | */ | ||
92 | size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); | ||
93 | size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); | ||
94 | |||
88 | /* | 95 | /* |
89 | * Calculate the number of big nodes that can be allocated as a result | 96 | * Calculate the number of big nodes that can be allocated as a result |
90 | * of consolidating the remainder. | 97 | * of consolidating the remainder. |
@@ -226,7 +233,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | |||
226 | */ | 233 | */ |
227 | while (nodes_weight(physnode_mask)) { | 234 | while (nodes_weight(physnode_mask)) { |
228 | for_each_node_mask(i, physnode_mask) { | 235 | for_each_node_mask(i, physnode_mask) { |
229 | u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; | 236 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); |
230 | u64 start, limit, end; | 237 | u64 start, limit, end; |
231 | int phys_blk; | 238 | int phys_blk; |
232 | 239 | ||
@@ -298,7 +305,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) | |||
298 | { | 305 | { |
299 | static struct numa_meminfo ei __initdata; | 306 | static struct numa_meminfo ei __initdata; |
300 | static struct numa_meminfo pi __initdata; | 307 | static struct numa_meminfo pi __initdata; |
301 | const u64 max_addr = max_pfn << PAGE_SHIFT; | 308 | const u64 max_addr = PFN_PHYS(max_pfn); |
302 | u8 *phys_dist = NULL; | 309 | u8 *phys_dist = NULL; |
303 | size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); | 310 | size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); |
304 | int max_emu_nid, dfl_phys_nid; | 311 | int max_emu_nid, dfl_phys_nid; |
@@ -342,8 +349,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) | |||
342 | if (numa_dist_cnt) { | 349 | if (numa_dist_cnt) { |
343 | u64 phys; | 350 | u64 phys; |
344 | 351 | ||
345 | phys = memblock_find_in_range(0, | 352 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
346 | (u64)max_pfn_mapped << PAGE_SHIFT, | ||
347 | phys_size, PAGE_SIZE); | 353 | phys_size, PAGE_SIZE); |
348 | if (phys == MEMBLOCK_ERROR) { | 354 | if (phys == MEMBLOCK_ERROR) { |
349 | pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); | 355 | pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); |
@@ -454,10 +460,9 @@ void __cpuinit numa_remove_cpu(int cpu) | |||
454 | cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); | 460 | cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); |
455 | } | 461 | } |
456 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 462 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
457 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | 463 | static void __cpuinit numa_set_cpumask(int cpu, bool enable) |
458 | { | 464 | { |
459 | struct cpumask *mask; | 465 | int nid, physnid; |
460 | int nid, physnid, i; | ||
461 | 466 | ||
462 | nid = early_cpu_to_node(cpu); | 467 | nid = early_cpu_to_node(cpu); |
463 | if (nid == NUMA_NO_NODE) { | 468 | if (nid == NUMA_NO_NODE) { |
@@ -467,28 +472,21 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable) | |||
467 | 472 | ||
468 | physnid = emu_nid_to_phys[nid]; | 473 | physnid = emu_nid_to_phys[nid]; |
469 | 474 | ||
470 | for_each_online_node(i) { | 475 | for_each_online_node(nid) { |
471 | if (emu_nid_to_phys[nid] != physnid) | 476 | if (emu_nid_to_phys[nid] != physnid) |
472 | continue; | 477 | continue; |
473 | 478 | ||
474 | mask = debug_cpumask_set_cpu(cpu, enable); | 479 | debug_cpumask_set_cpu(cpu, nid, enable); |
475 | if (!mask) | ||
476 | return; | ||
477 | |||
478 | if (enable) | ||
479 | cpumask_set_cpu(cpu, mask); | ||
480 | else | ||
481 | cpumask_clear_cpu(cpu, mask); | ||
482 | } | 480 | } |
483 | } | 481 | } |
484 | 482 | ||
485 | void __cpuinit numa_add_cpu(int cpu) | 483 | void __cpuinit numa_add_cpu(int cpu) |
486 | { | 484 | { |
487 | numa_set_cpumask(cpu, 1); | 485 | numa_set_cpumask(cpu, true); |
488 | } | 486 | } |
489 | 487 | ||
490 | void __cpuinit numa_remove_cpu(int cpu) | 488 | void __cpuinit numa_remove_cpu(int cpu) |
491 | { | 489 | { |
492 | numa_set_cpumask(cpu, 0); | 490 | numa_set_cpumask(cpu, false); |
493 | } | 491 | } |
494 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 492 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h index ef2d97377d7c..7178c3afe05e 100644 --- a/arch/x86/mm/numa_internal.h +++ b/arch/x86/mm/numa_internal.h | |||
@@ -19,6 +19,14 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); | |||
19 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi); | 19 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi); |
20 | void __init numa_reset_distance(void); | 20 | void __init numa_reset_distance(void); |
21 | 21 | ||
22 | void __init x86_numa_init(void); | ||
23 | |||
24 | #ifdef CONFIG_X86_64 | ||
25 | static inline void init_alloc_remap(int nid, u64 start, u64 end) { } | ||
26 | #else | ||
27 | void __init init_alloc_remap(int nid, u64 start, u64 end); | ||
28 | #endif | ||
29 | |||
22 | #ifdef CONFIG_NUMA_EMU | 30 | #ifdef CONFIG_NUMA_EMU |
23 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, | 31 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, |
24 | int numa_dist_cnt); | 32 | int numa_dist_cnt); |
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index 38e6d174c497..9f0614daea85 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c | |||
@@ -414,22 +414,17 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) | |||
414 | unsigned char *p; | 414 | unsigned char *p; |
415 | struct prefix_bits prf; | 415 | struct prefix_bits prf; |
416 | int i; | 416 | int i; |
417 | unsigned long rv; | ||
418 | 417 | ||
419 | p = (unsigned char *)ins_addr; | 418 | p = (unsigned char *)ins_addr; |
420 | p += skip_prefix(p, &prf); | 419 | p += skip_prefix(p, &prf); |
421 | p += get_opcode(p, &opcode); | 420 | p += get_opcode(p, &opcode); |
422 | for (i = 0; i < ARRAY_SIZE(reg_rop); i++) | 421 | for (i = 0; i < ARRAY_SIZE(reg_rop); i++) |
423 | if (reg_rop[i] == opcode) { | 422 | if (reg_rop[i] == opcode) |
424 | rv = REG_READ; | ||
425 | goto do_work; | 423 | goto do_work; |
426 | } | ||
427 | 424 | ||
428 | for (i = 0; i < ARRAY_SIZE(reg_wop); i++) | 425 | for (i = 0; i < ARRAY_SIZE(reg_wop); i++) |
429 | if (reg_wop[i] == opcode) { | 426 | if (reg_wop[i] == opcode) |
430 | rv = REG_WRITE; | ||
431 | goto do_work; | 427 | goto do_work; |
432 | } | ||
433 | 428 | ||
434 | printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " | 429 | printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " |
435 | "0x%02x\n", opcode); | 430 | "0x%02x\n", opcode); |
@@ -474,16 +469,13 @@ unsigned long get_ins_imm_val(unsigned long ins_addr) | |||
474 | unsigned char *p; | 469 | unsigned char *p; |
475 | struct prefix_bits prf; | 470 | struct prefix_bits prf; |
476 | int i; | 471 | int i; |
477 | unsigned long rv; | ||
478 | 472 | ||
479 | p = (unsigned char *)ins_addr; | 473 | p = (unsigned char *)ins_addr; |
480 | p += skip_prefix(p, &prf); | 474 | p += skip_prefix(p, &prf); |
481 | p += get_opcode(p, &opcode); | 475 | p += get_opcode(p, &opcode); |
482 | for (i = 0; i < ARRAY_SIZE(imm_wop); i++) | 476 | for (i = 0; i < ARRAY_SIZE(imm_wop); i++) |
483 | if (imm_wop[i] == opcode) { | 477 | if (imm_wop[i] == opcode) |
484 | rv = IMM_WRITE; | ||
485 | goto do_work; | 478 | goto do_work; |
486 | } | ||
487 | 479 | ||
488 | printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " | 480 | printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " |
489 | "0x%02x\n", opcode); | 481 | "0x%02x\n", opcode); |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat.c index 8e9d3394f6d4..81dbfdeb080d 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat.c | |||
@@ -26,8 +26,6 @@ | |||
26 | 26 | ||
27 | int acpi_numa __initdata; | 27 | int acpi_numa __initdata; |
28 | 28 | ||
29 | static struct bootnode nodes_add[MAX_NUMNODES]; | ||
30 | |||
31 | static __init int setup_node(int pxm) | 29 | static __init int setup_node(int pxm) |
32 | { | 30 | { |
33 | return acpi_map_pxm_to_node(pxm); | 31 | return acpi_map_pxm_to_node(pxm); |
@@ -37,7 +35,6 @@ static __init void bad_srat(void) | |||
37 | { | 35 | { |
38 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 36 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
39 | acpi_numa = -1; | 37 | acpi_numa = -1; |
40 | memset(nodes_add, 0, sizeof(nodes_add)); | ||
41 | } | 38 | } |
42 | 39 | ||
43 | static __init inline int srat_disabled(void) | 40 | static __init inline int srat_disabled(void) |
@@ -131,73 +128,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
131 | pxm, apic_id, node); | 128 | pxm, apic_id, node); |
132 | } | 129 | } |
133 | 130 | ||
134 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 131 | #ifdef CONFIG_MEMORY_HOTPLUG |
135 | static inline int save_add_info(void) {return 1;} | 132 | static inline int save_add_info(void) {return 1;} |
136 | #else | 133 | #else |
137 | static inline int save_add_info(void) {return 0;} | 134 | static inline int save_add_info(void) {return 0;} |
138 | #endif | 135 | #endif |
139 | /* | ||
140 | * Update nodes_add[] | ||
141 | * This code supports one contiguous hot add area per node | ||
142 | */ | ||
143 | static void __init | ||
144 | update_nodes_add(int node, unsigned long start, unsigned long end) | ||
145 | { | ||
146 | unsigned long s_pfn = start >> PAGE_SHIFT; | ||
147 | unsigned long e_pfn = end >> PAGE_SHIFT; | ||
148 | int changed = 0; | ||
149 | struct bootnode *nd = &nodes_add[node]; | ||
150 | |||
151 | /* I had some trouble with strange memory hotadd regions breaking | ||
152 | the boot. Be very strict here and reject anything unexpected. | ||
153 | If you want working memory hotadd write correct SRATs. | ||
154 | |||
155 | The node size check is a basic sanity check to guard against | ||
156 | mistakes */ | ||
157 | if ((signed long)(end - start) < NODE_MIN_SIZE) { | ||
158 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); | ||
159 | return; | ||
160 | } | ||
161 | |||
162 | /* This check might be a bit too strict, but I'm keeping it for now. */ | ||
163 | if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) { | ||
164 | printk(KERN_ERR | ||
165 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", | ||
166 | s_pfn, e_pfn); | ||
167 | return; | ||
168 | } | ||
169 | |||
170 | /* Looks good */ | ||
171 | |||
172 | if (nd->start == nd->end) { | ||
173 | nd->start = start; | ||
174 | nd->end = end; | ||
175 | changed = 1; | ||
176 | } else { | ||
177 | if (nd->start == end) { | ||
178 | nd->start = start; | ||
179 | changed = 1; | ||
180 | } | ||
181 | if (nd->end == start) { | ||
182 | nd->end = end; | ||
183 | changed = 1; | ||
184 | } | ||
185 | if (!changed) | ||
186 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | ||
187 | } | ||
188 | |||
189 | if (changed) { | ||
190 | node_set(node, numa_nodes_parsed); | ||
191 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", | ||
192 | nd->start, nd->end); | ||
193 | } | ||
194 | } | ||
195 | 136 | ||
196 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 137 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
197 | void __init | 138 | void __init |
198 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | 139 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) |
199 | { | 140 | { |
200 | unsigned long start, end; | 141 | u64 start, end; |
201 | int node, pxm; | 142 | int node, pxm; |
202 | 143 | ||
203 | if (srat_disabled()) | 144 | if (srat_disabled()) |
@@ -226,11 +167,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
226 | return; | 167 | return; |
227 | } | 168 | } |
228 | 169 | ||
229 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | 170 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, |
230 | start, end); | 171 | start, end); |
231 | |||
232 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) | ||
233 | update_nodes_add(node, start, end); | ||
234 | } | 172 | } |
235 | 173 | ||
236 | void __init acpi_numa_arch_fixup(void) {} | 174 | void __init acpi_numa_arch_fixup(void) {} |
@@ -244,17 +182,3 @@ int __init x86_acpi_numa_init(void) | |||
244 | return ret; | 182 | return ret; |
245 | return srat_disabled() ? -EINVAL : 0; | 183 | return srat_disabled() ? -EINVAL : 0; |
246 | } | 184 | } |
247 | |||
248 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) | ||
249 | int memory_add_physaddr_to_nid(u64 start) | ||
250 | { | ||
251 | int i, ret = 0; | ||
252 | |||
253 | for_each_node(i) | ||
254 | if (nodes_add[i].start <= start && nodes_add[i].end > start) | ||
255 | ret = i; | ||
256 | |||
257 | return ret; | ||
258 | } | ||
259 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
260 | #endif | ||
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c deleted file mode 100644 index 364f36bdfad8..000000000000 --- a/arch/x86/mm/srat_32.c +++ /dev/null | |||
@@ -1,288 +0,0 @@ | |||
1 | /* | ||
2 | * Some of the code in this file has been gleaned from the 64 bit | ||
3 | * discontigmem support code base. | ||
4 | * | ||
5 | * Copyright (C) 2002, IBM Corp. | ||
6 | * | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
17 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
18 | * details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | * | ||
24 | * Send feedback to Pat Gaughen <gone@us.ibm.com> | ||
25 | */ | ||
26 | #include <linux/mm.h> | ||
27 | #include <linux/bootmem.h> | ||
28 | #include <linux/memblock.h> | ||
29 | #include <linux/mmzone.h> | ||
30 | #include <linux/acpi.h> | ||
31 | #include <linux/nodemask.h> | ||
32 | #include <asm/srat.h> | ||
33 | #include <asm/topology.h> | ||
34 | #include <asm/smp.h> | ||
35 | #include <asm/e820.h> | ||
36 | |||
37 | /* | ||
38 | * proximity macros and definitions | ||
39 | */ | ||
40 | #define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */ | ||
41 | #define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ | ||
42 | #define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) | ||
43 | #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) | ||
44 | /* bitmap length; _PXM is at most 255 */ | ||
45 | #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) | ||
46 | static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ | ||
47 | |||
48 | #define MAX_CHUNKS_PER_NODE 3 | ||
49 | #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) | ||
50 | struct node_memory_chunk_s { | ||
51 | unsigned long start_pfn; | ||
52 | unsigned long end_pfn; | ||
53 | u8 pxm; // proximity domain of node | ||
54 | u8 nid; // which cnode contains this chunk? | ||
55 | u8 bank; // which mem bank on this node | ||
56 | }; | ||
57 | static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; | ||
58 | |||
59 | static int __initdata num_memory_chunks; /* total number of memory chunks */ | ||
60 | static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC]; | ||
61 | |||
62 | int acpi_numa __initdata; | ||
63 | |||
64 | static __init void bad_srat(void) | ||
65 | { | ||
66 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | ||
67 | acpi_numa = -1; | ||
68 | num_memory_chunks = 0; | ||
69 | } | ||
70 | |||
71 | static __init inline int srat_disabled(void) | ||
72 | { | ||
73 | return numa_off || acpi_numa < 0; | ||
74 | } | ||
75 | |||
76 | /* Identify CPU proximity domains */ | ||
77 | void __init | ||
78 | acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) | ||
79 | { | ||
80 | if (srat_disabled()) | ||
81 | return; | ||
82 | if (cpu_affinity->header.length != | ||
83 | sizeof(struct acpi_srat_cpu_affinity)) { | ||
84 | bad_srat(); | ||
85 | return; | ||
86 | } | ||
87 | |||
88 | if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0) | ||
89 | return; /* empty entry */ | ||
90 | |||
91 | /* mark this node as "seen" in node bitmap */ | ||
92 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); | ||
93 | |||
94 | /* don't need to check apic_id here, because it is always 8 bits */ | ||
95 | apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; | ||
96 | |||
97 | printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", | ||
98 | cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo); | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * Identify memory proximity domains and hot-remove capabilities. | ||
103 | * Fill node memory chunk list structure. | ||
104 | */ | ||
105 | void __init | ||
106 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity) | ||
107 | { | ||
108 | unsigned long long paddr, size; | ||
109 | unsigned long start_pfn, end_pfn; | ||
110 | u8 pxm; | ||
111 | struct node_memory_chunk_s *p, *q, *pend; | ||
112 | |||
113 | if (srat_disabled()) | ||
114 | return; | ||
115 | if (memory_affinity->header.length != | ||
116 | sizeof(struct acpi_srat_mem_affinity)) { | ||
117 | bad_srat(); | ||
118 | return; | ||
119 | } | ||
120 | |||
121 | if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0) | ||
122 | return; /* empty entry */ | ||
123 | |||
124 | pxm = memory_affinity->proximity_domain & 0xff; | ||
125 | |||
126 | /* mark this node as "seen" in node bitmap */ | ||
127 | BMAP_SET(pxm_bitmap, pxm); | ||
128 | |||
129 | /* calculate info for memory chunk structure */ | ||
130 | paddr = memory_affinity->base_address; | ||
131 | size = memory_affinity->length; | ||
132 | |||
133 | start_pfn = paddr >> PAGE_SHIFT; | ||
134 | end_pfn = (paddr + size) >> PAGE_SHIFT; | ||
135 | |||
136 | |||
137 | if (num_memory_chunks >= MAXCHUNKS) { | ||
138 | printk(KERN_WARNING "Too many mem chunks in SRAT." | ||
139 | " Ignoring %lld MBytes at %llx\n", | ||
140 | size/(1024*1024), paddr); | ||
141 | return; | ||
142 | } | ||
143 | |||
144 | /* Insertion sort based on base address */ | ||
145 | pend = &node_memory_chunk[num_memory_chunks]; | ||
146 | for (p = &node_memory_chunk[0]; p < pend; p++) { | ||
147 | if (start_pfn < p->start_pfn) | ||
148 | break; | ||
149 | } | ||
150 | if (p < pend) { | ||
151 | for (q = pend; q >= p; q--) | ||
152 | *(q + 1) = *q; | ||
153 | } | ||
154 | p->start_pfn = start_pfn; | ||
155 | p->end_pfn = end_pfn; | ||
156 | p->pxm = pxm; | ||
157 | |||
158 | num_memory_chunks++; | ||
159 | |||
160 | printk(KERN_DEBUG "Memory range %08lx to %08lx" | ||
161 | " in proximity domain %02x %s\n", | ||
162 | start_pfn, end_pfn, | ||
163 | pxm, | ||
164 | ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? | ||
165 | "enabled and removable" : "enabled" ) ); | ||
166 | } | ||
167 | |||
168 | /* Callback for SLIT parsing */ | ||
169 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | ||
170 | { | ||
171 | } | ||
172 | |||
173 | void acpi_numa_arch_fixup(void) | ||
174 | { | ||
175 | } | ||
176 | /* | ||
177 | * The SRAT table always lists ascending addresses, so can always | ||
178 | * assume that the first "start" address that you see is the real | ||
179 | * start of the node, and that the current "end" address is after | ||
180 | * the previous one. | ||
181 | */ | ||
182 | static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) | ||
183 | { | ||
184 | /* | ||
185 | * Only add present memory as told by the e820. | ||
186 | * There is no guarantee from the SRAT that the memory it | ||
187 | * enumerates is present at boot time because it represents | ||
188 | * *possible* memory hotplug areas the same as normal RAM. | ||
189 | */ | ||
190 | if (memory_chunk->start_pfn >= max_pfn) { | ||
191 | printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", | ||
192 | memory_chunk->start_pfn, memory_chunk->end_pfn); | ||
193 | return -1; | ||
194 | } | ||
195 | if (memory_chunk->nid != nid) | ||
196 | return -1; | ||
197 | |||
198 | if (!node_has_online_mem(nid)) | ||
199 | node_start_pfn[nid] = memory_chunk->start_pfn; | ||
200 | |||
201 | if (node_start_pfn[nid] > memory_chunk->start_pfn) | ||
202 | node_start_pfn[nid] = memory_chunk->start_pfn; | ||
203 | |||
204 | if (node_end_pfn[nid] < memory_chunk->end_pfn) | ||
205 | node_end_pfn[nid] = memory_chunk->end_pfn; | ||
206 | |||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | int __init get_memcfg_from_srat(void) | ||
211 | { | ||
212 | int i, j, nid; | ||
213 | |||
214 | if (srat_disabled()) | ||
215 | goto out_fail; | ||
216 | |||
217 | if (acpi_numa_init() < 0) | ||
218 | goto out_fail; | ||
219 | |||
220 | if (num_memory_chunks == 0) { | ||
221 | printk(KERN_DEBUG | ||
222 | "could not find any ACPI SRAT memory areas.\n"); | ||
223 | goto out_fail; | ||
224 | } | ||
225 | |||
226 | /* Calculate total number of nodes in system from PXM bitmap and create | ||
227 | * a set of sequential node IDs starting at zero. (ACPI doesn't seem | ||
228 | * to specify the range of _PXM values.) | ||
229 | */ | ||
230 | /* | ||
231 | * MCD - we no longer HAVE to number nodes sequentially. PXM domain | ||
232 | * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically | ||
233 | * 32, so we will continue numbering them in this manner until MAX_NUMNODES | ||
234 | * approaches MAX_PXM_DOMAINS for i386. | ||
235 | */ | ||
236 | nodes_clear(node_online_map); | ||
237 | for (i = 0; i < MAX_PXM_DOMAINS; i++) { | ||
238 | if (BMAP_TEST(pxm_bitmap, i)) { | ||
239 | int nid = acpi_map_pxm_to_node(i); | ||
240 | node_set_online(nid); | ||
241 | } | ||
242 | } | ||
243 | BUG_ON(num_online_nodes() == 0); | ||
244 | |||
245 | /* set cnode id in memory chunk structure */ | ||
246 | for (i = 0; i < num_memory_chunks; i++) | ||
247 | node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); | ||
248 | |||
249 | printk(KERN_DEBUG "pxm bitmap: "); | ||
250 | for (i = 0; i < sizeof(pxm_bitmap); i++) { | ||
251 | printk(KERN_CONT "%02x ", pxm_bitmap[i]); | ||
252 | } | ||
253 | printk(KERN_CONT "\n"); | ||
254 | printk(KERN_DEBUG "Number of logical nodes in system = %d\n", | ||
255 | num_online_nodes()); | ||
256 | printk(KERN_DEBUG "Number of memory chunks in system = %d\n", | ||
257 | num_memory_chunks); | ||
258 | |||
259 | for (i = 0; i < MAX_LOCAL_APIC; i++) | ||
260 | set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i])); | ||
261 | |||
262 | for (j = 0; j < num_memory_chunks; j++){ | ||
263 | struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; | ||
264 | printk(KERN_DEBUG | ||
265 | "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", | ||
266 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); | ||
267 | if (node_read_chunk(chunk->nid, chunk)) | ||
268 | continue; | ||
269 | |||
270 | memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn, | ||
271 | min(chunk->end_pfn, max_pfn)); | ||
272 | } | ||
273 | /* for out of order entries in SRAT */ | ||
274 | sort_node_map(); | ||
275 | |||
276 | for_each_online_node(nid) { | ||
277 | unsigned long start = node_start_pfn[nid]; | ||
278 | unsigned long end = min(node_end_pfn[nid], max_pfn); | ||
279 | |||
280 | memory_present(nid, start, end); | ||
281 | node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); | ||
282 | } | ||
283 | return 1; | ||
284 | out_fail: | ||
285 | printk(KERN_DEBUG "failed to get NUMA memory information from SRAT" | ||
286 | " table\n"); | ||
287 | return 0; | ||
288 | } | ||
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile new file mode 100644 index 000000000000..90568c33ddb0 --- /dev/null +++ b/arch/x86/net/Makefile | |||
@@ -0,0 +1,4 @@ | |||
1 | # | ||
2 | # Arch-specific network modules | ||
3 | # | ||
4 | obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o | ||
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S new file mode 100644 index 000000000000..66870223f8c5 --- /dev/null +++ b/arch/x86/net/bpf_jit.S | |||
@@ -0,0 +1,140 @@ | |||
1 | /* bpf_jit.S : BPF JIT helper functions | ||
2 | * | ||
3 | * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | */ | ||
10 | #include <linux/linkage.h> | ||
11 | #include <asm/dwarf2.h> | ||
12 | |||
13 | /* | ||
14 | * Calling convention : | ||
15 | * rdi : skb pointer | ||
16 | * esi : offset of byte(s) to fetch in skb (can be scratched) | ||
17 | * r8 : copy of skb->data | ||
18 | * r9d : hlen = skb->len - skb->data_len | ||
19 | */ | ||
20 | #define SKBDATA %r8 | ||
21 | |||
22 | sk_load_word_ind: | ||
23 | .globl sk_load_word_ind | ||
24 | |||
25 | add %ebx,%esi /* offset += X */ | ||
26 | # test %esi,%esi /* if (offset < 0) goto bpf_error; */ | ||
27 | js bpf_error | ||
28 | |||
29 | sk_load_word: | ||
30 | .globl sk_load_word | ||
31 | |||
32 | mov %r9d,%eax # hlen | ||
33 | sub %esi,%eax # hlen - offset | ||
34 | cmp $3,%eax | ||
35 | jle bpf_slow_path_word | ||
36 | mov (SKBDATA,%rsi),%eax | ||
37 | bswap %eax /* ntohl() */ | ||
38 | ret | ||
39 | |||
40 | |||
41 | sk_load_half_ind: | ||
42 | .globl sk_load_half_ind | ||
43 | |||
44 | add %ebx,%esi /* offset += X */ | ||
45 | js bpf_error | ||
46 | |||
47 | sk_load_half: | ||
48 | .globl sk_load_half | ||
49 | |||
50 | mov %r9d,%eax | ||
51 | sub %esi,%eax # hlen - offset | ||
52 | cmp $1,%eax | ||
53 | jle bpf_slow_path_half | ||
54 | movzwl (SKBDATA,%rsi),%eax | ||
55 | rol $8,%ax # ntohs() | ||
56 | ret | ||
57 | |||
58 | sk_load_byte_ind: | ||
59 | .globl sk_load_byte_ind | ||
60 | add %ebx,%esi /* offset += X */ | ||
61 | js bpf_error | ||
62 | |||
63 | sk_load_byte: | ||
64 | .globl sk_load_byte | ||
65 | |||
66 | cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ | ||
67 | jle bpf_slow_path_byte | ||
68 | movzbl (SKBDATA,%rsi),%eax | ||
69 | ret | ||
70 | |||
71 | /** | ||
72 | * sk_load_byte_msh - BPF_S_LDX_B_MSH helper | ||
73 | * | ||
74 | * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) | ||
75 | * Must preserve A accumulator (%eax) | ||
76 | * Inputs : %esi is the offset value, already known positive | ||
77 | */ | ||
78 | ENTRY(sk_load_byte_msh) | ||
79 | CFI_STARTPROC | ||
80 | cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ | ||
81 | jle bpf_slow_path_byte_msh | ||
82 | movzbl (SKBDATA,%rsi),%ebx | ||
83 | and $15,%bl | ||
84 | shl $2,%bl | ||
85 | ret | ||
86 | CFI_ENDPROC | ||
87 | ENDPROC(sk_load_byte_msh) | ||
88 | |||
89 | bpf_error: | ||
90 | # force a return 0 from jit handler | ||
91 | xor %eax,%eax | ||
92 | mov -8(%rbp),%rbx | ||
93 | leaveq | ||
94 | ret | ||
95 | |||
96 | /* rsi contains offset and can be scratched */ | ||
97 | #define bpf_slow_path_common(LEN) \ | ||
98 | push %rdi; /* save skb */ \ | ||
99 | push %r9; \ | ||
100 | push SKBDATA; \ | ||
101 | /* rsi already has offset */ \ | ||
102 | mov $LEN,%ecx; /* len */ \ | ||
103 | lea -12(%rbp),%rdx; \ | ||
104 | call skb_copy_bits; \ | ||
105 | test %eax,%eax; \ | ||
106 | pop SKBDATA; \ | ||
107 | pop %r9; \ | ||
108 | pop %rdi | ||
109 | |||
110 | |||
111 | bpf_slow_path_word: | ||
112 | bpf_slow_path_common(4) | ||
113 | js bpf_error | ||
114 | mov -12(%rbp),%eax | ||
115 | bswap %eax | ||
116 | ret | ||
117 | |||
118 | bpf_slow_path_half: | ||
119 | bpf_slow_path_common(2) | ||
120 | js bpf_error | ||
121 | mov -12(%rbp),%ax | ||
122 | rol $8,%ax | ||
123 | movzwl %ax,%eax | ||
124 | ret | ||
125 | |||
126 | bpf_slow_path_byte: | ||
127 | bpf_slow_path_common(1) | ||
128 | js bpf_error | ||
129 | movzbl -12(%rbp),%eax | ||
130 | ret | ||
131 | |||
132 | bpf_slow_path_byte_msh: | ||
133 | xchg %eax,%ebx /* dont lose A , X is about to be scratched */ | ||
134 | bpf_slow_path_common(1) | ||
135 | js bpf_error | ||
136 | movzbl -12(%rbp),%eax | ||
137 | and $15,%al | ||
138 | shl $2,%al | ||
139 | xchg %eax,%ebx | ||
140 | ret | ||
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c new file mode 100644 index 000000000000..bfab3fa10edc --- /dev/null +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -0,0 +1,654 @@ | |||
1 | /* bpf_jit_comp.c : BPF JIT compiler | ||
2 | * | ||
3 | * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | */ | ||
10 | #include <linux/moduleloader.h> | ||
11 | #include <asm/cacheflush.h> | ||
12 | #include <linux/netdevice.h> | ||
13 | #include <linux/filter.h> | ||
14 | |||
15 | /* | ||
16 | * Conventions : | ||
17 | * EAX : BPF A accumulator | ||
18 | * EBX : BPF X accumulator | ||
19 | * RDI : pointer to skb (first argument given to JIT function) | ||
20 | * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) | ||
21 | * ECX,EDX,ESI : scratch registers | ||
22 | * r9d : skb->len - skb->data_len (headlen) | ||
23 | * r8 : skb->data | ||
24 | * -8(RBP) : saved RBX value | ||
25 | * -16(RBP)..-80(RBP) : BPF_MEMWORDS values | ||
26 | */ | ||
27 | int bpf_jit_enable __read_mostly; | ||
28 | |||
29 | /* | ||
30 | * assembly code in arch/x86/net/bpf_jit.S | ||
31 | */ | ||
32 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; | ||
33 | extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; | ||
34 | |||
35 | static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | ||
36 | { | ||
37 | if (len == 1) | ||
38 | *ptr = bytes; | ||
39 | else if (len == 2) | ||
40 | *(u16 *)ptr = bytes; | ||
41 | else { | ||
42 | *(u32 *)ptr = bytes; | ||
43 | barrier(); | ||
44 | } | ||
45 | return ptr + len; | ||
46 | } | ||
47 | |||
48 | #define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0) | ||
49 | |||
50 | #define EMIT1(b1) EMIT(b1, 1) | ||
51 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | ||
52 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | ||
53 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | ||
54 | #define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0) | ||
55 | |||
56 | #define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */ | ||
57 | #define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */ | ||
58 | |||
59 | static inline bool is_imm8(int value) | ||
60 | { | ||
61 | return value <= 127 && value >= -128; | ||
62 | } | ||
63 | |||
64 | static inline bool is_near(int offset) | ||
65 | { | ||
66 | return offset <= 127 && offset >= -128; | ||
67 | } | ||
68 | |||
69 | #define EMIT_JMP(offset) \ | ||
70 | do { \ | ||
71 | if (offset) { \ | ||
72 | if (is_near(offset)) \ | ||
73 | EMIT2(0xeb, offset); /* jmp .+off8 */ \ | ||
74 | else \ | ||
75 | EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \ | ||
76 | } \ | ||
77 | } while (0) | ||
78 | |||
79 | /* list of x86 cond jumps opcodes (. + s8) | ||
80 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | ||
81 | */ | ||
82 | #define X86_JB 0x72 | ||
83 | #define X86_JAE 0x73 | ||
84 | #define X86_JE 0x74 | ||
85 | #define X86_JNE 0x75 | ||
86 | #define X86_JBE 0x76 | ||
87 | #define X86_JA 0x77 | ||
88 | |||
89 | #define EMIT_COND_JMP(op, offset) \ | ||
90 | do { \ | ||
91 | if (is_near(offset)) \ | ||
92 | EMIT2(op, offset); /* jxx .+off8 */ \ | ||
93 | else { \ | ||
94 | EMIT2(0x0f, op + 0x10); \ | ||
95 | EMIT(offset, 4); /* jxx .+off32 */ \ | ||
96 | } \ | ||
97 | } while (0) | ||
98 | |||
99 | #define COND_SEL(CODE, TOP, FOP) \ | ||
100 | case CODE: \ | ||
101 | t_op = TOP; \ | ||
102 | f_op = FOP; \ | ||
103 | goto cond_branch | ||
104 | |||
105 | |||
106 | #define SEEN_DATAREF 1 /* might call external helpers */ | ||
107 | #define SEEN_XREG 2 /* ebx is used */ | ||
108 | #define SEEN_MEM 4 /* use mem[] for temporary storage */ | ||
109 | |||
110 | static inline void bpf_flush_icache(void *start, void *end) | ||
111 | { | ||
112 | mm_segment_t old_fs = get_fs(); | ||
113 | |||
114 | set_fs(KERNEL_DS); | ||
115 | smp_wmb(); | ||
116 | flush_icache_range((unsigned long)start, (unsigned long)end); | ||
117 | set_fs(old_fs); | ||
118 | } | ||
119 | |||
120 | |||
121 | void bpf_jit_compile(struct sk_filter *fp) | ||
122 | { | ||
123 | u8 temp[64]; | ||
124 | u8 *prog; | ||
125 | unsigned int proglen, oldproglen = 0; | ||
126 | int ilen, i; | ||
127 | int t_offset, f_offset; | ||
128 | u8 t_op, f_op, seen = 0, pass; | ||
129 | u8 *image = NULL; | ||
130 | u8 *func; | ||
131 | int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */ | ||
132 | unsigned int cleanup_addr; /* epilogue code offset */ | ||
133 | unsigned int *addrs; | ||
134 | const struct sock_filter *filter = fp->insns; | ||
135 | int flen = fp->len; | ||
136 | |||
137 | if (!bpf_jit_enable) | ||
138 | return; | ||
139 | |||
140 | addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL); | ||
141 | if (addrs == NULL) | ||
142 | return; | ||
143 | |||
144 | /* Before first pass, make a rough estimation of addrs[] | ||
145 | * each bpf instruction is translated to less than 64 bytes | ||
146 | */ | ||
147 | for (proglen = 0, i = 0; i < flen; i++) { | ||
148 | proglen += 64; | ||
149 | addrs[i] = proglen; | ||
150 | } | ||
151 | cleanup_addr = proglen; /* epilogue address */ | ||
152 | |||
153 | for (pass = 0; pass < 10; pass++) { | ||
154 | /* no prologue/epilogue for trivial filters (RET something) */ | ||
155 | proglen = 0; | ||
156 | prog = temp; | ||
157 | |||
158 | if (seen) { | ||
159 | EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ | ||
160 | EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ | ||
161 | /* note : must save %rbx in case bpf_error is hit */ | ||
162 | if (seen & (SEEN_XREG | SEEN_DATAREF)) | ||
163 | EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ | ||
164 | if (seen & SEEN_XREG) | ||
165 | CLEAR_X(); /* make sure we dont leek kernel memory */ | ||
166 | |||
167 | /* | ||
168 | * If this filter needs to access skb data, | ||
169 | * loads r9 and r8 with : | ||
170 | * r9 = skb->len - skb->data_len | ||
171 | * r8 = skb->data | ||
172 | */ | ||
173 | if (seen & SEEN_DATAREF) { | ||
174 | if (offsetof(struct sk_buff, len) <= 127) | ||
175 | /* mov off8(%rdi),%r9d */ | ||
176 | EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); | ||
177 | else { | ||
178 | /* mov off32(%rdi),%r9d */ | ||
179 | EMIT3(0x44, 0x8b, 0x8f); | ||
180 | EMIT(offsetof(struct sk_buff, len), 4); | ||
181 | } | ||
182 | if (is_imm8(offsetof(struct sk_buff, data_len))) | ||
183 | /* sub off8(%rdi),%r9d */ | ||
184 | EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len)); | ||
185 | else { | ||
186 | EMIT3(0x44, 0x2b, 0x8f); | ||
187 | EMIT(offsetof(struct sk_buff, data_len), 4); | ||
188 | } | ||
189 | |||
190 | if (is_imm8(offsetof(struct sk_buff, data))) | ||
191 | /* mov off8(%rdi),%r8 */ | ||
192 | EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data)); | ||
193 | else { | ||
194 | /* mov off32(%rdi),%r8 */ | ||
195 | EMIT3(0x4c, 0x8b, 0x87); | ||
196 | EMIT(offsetof(struct sk_buff, data), 4); | ||
197 | } | ||
198 | } | ||
199 | } | ||
200 | |||
201 | switch (filter[0].code) { | ||
202 | case BPF_S_RET_K: | ||
203 | case BPF_S_LD_W_LEN: | ||
204 | case BPF_S_ANC_PROTOCOL: | ||
205 | case BPF_S_ANC_IFINDEX: | ||
206 | case BPF_S_ANC_MARK: | ||
207 | case BPF_S_ANC_RXHASH: | ||
208 | case BPF_S_ANC_CPU: | ||
209 | case BPF_S_ANC_QUEUE: | ||
210 | case BPF_S_LD_W_ABS: | ||
211 | case BPF_S_LD_H_ABS: | ||
212 | case BPF_S_LD_B_ABS: | ||
213 | /* first instruction sets A register (or is RET 'constant') */ | ||
214 | break; | ||
215 | default: | ||
216 | /* make sure we dont leak kernel information to user */ | ||
217 | CLEAR_A(); /* A = 0 */ | ||
218 | } | ||
219 | |||
220 | for (i = 0; i < flen; i++) { | ||
221 | unsigned int K = filter[i].k; | ||
222 | |||
223 | switch (filter[i].code) { | ||
224 | case BPF_S_ALU_ADD_X: /* A += X; */ | ||
225 | seen |= SEEN_XREG; | ||
226 | EMIT2(0x01, 0xd8); /* add %ebx,%eax */ | ||
227 | break; | ||
228 | case BPF_S_ALU_ADD_K: /* A += K; */ | ||
229 | if (!K) | ||
230 | break; | ||
231 | if (is_imm8(K)) | ||
232 | EMIT3(0x83, 0xc0, K); /* add imm8,%eax */ | ||
233 | else | ||
234 | EMIT1_off32(0x05, K); /* add imm32,%eax */ | ||
235 | break; | ||
236 | case BPF_S_ALU_SUB_X: /* A -= X; */ | ||
237 | seen |= SEEN_XREG; | ||
238 | EMIT2(0x29, 0xd8); /* sub %ebx,%eax */ | ||
239 | break; | ||
240 | case BPF_S_ALU_SUB_K: /* A -= K */ | ||
241 | if (!K) | ||
242 | break; | ||
243 | if (is_imm8(K)) | ||
244 | EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */ | ||
245 | else | ||
246 | EMIT1_off32(0x2d, K); /* sub imm32,%eax */ | ||
247 | break; | ||
248 | case BPF_S_ALU_MUL_X: /* A *= X; */ | ||
249 | seen |= SEEN_XREG; | ||
250 | EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */ | ||
251 | break; | ||
252 | case BPF_S_ALU_MUL_K: /* A *= K */ | ||
253 | if (is_imm8(K)) | ||
254 | EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */ | ||
255 | else { | ||
256 | EMIT2(0x69, 0xc0); /* imul imm32,%eax */ | ||
257 | EMIT(K, 4); | ||
258 | } | ||
259 | break; | ||
260 | case BPF_S_ALU_DIV_X: /* A /= X; */ | ||
261 | seen |= SEEN_XREG; | ||
262 | EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ | ||
263 | if (pc_ret0 != -1) | ||
264 | EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4)); | ||
265 | else { | ||
266 | EMIT_COND_JMP(X86_JNE, 2 + 5); | ||
267 | CLEAR_A(); | ||
268 | EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ | ||
269 | } | ||
270 | EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ | ||
271 | break; | ||
272 | case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ | ||
273 | EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ | ||
274 | EMIT(K, 4); | ||
275 | EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */ | ||
276 | break; | ||
277 | case BPF_S_ALU_AND_X: | ||
278 | seen |= SEEN_XREG; | ||
279 | EMIT2(0x21, 0xd8); /* and %ebx,%eax */ | ||
280 | break; | ||
281 | case BPF_S_ALU_AND_K: | ||
282 | if (K >= 0xFFFFFF00) { | ||
283 | EMIT2(0x24, K & 0xFF); /* and imm8,%al */ | ||
284 | } else if (K >= 0xFFFF0000) { | ||
285 | EMIT2(0x66, 0x25); /* and imm16,%ax */ | ||
286 | EMIT2(K, 2); | ||
287 | } else { | ||
288 | EMIT1_off32(0x25, K); /* and imm32,%eax */ | ||
289 | } | ||
290 | break; | ||
291 | case BPF_S_ALU_OR_X: | ||
292 | seen |= SEEN_XREG; | ||
293 | EMIT2(0x09, 0xd8); /* or %ebx,%eax */ | ||
294 | break; | ||
295 | case BPF_S_ALU_OR_K: | ||
296 | if (is_imm8(K)) | ||
297 | EMIT3(0x83, 0xc8, K); /* or imm8,%eax */ | ||
298 | else | ||
299 | EMIT1_off32(0x0d, K); /* or imm32,%eax */ | ||
300 | break; | ||
301 | case BPF_S_ALU_LSH_X: /* A <<= X; */ | ||
302 | seen |= SEEN_XREG; | ||
303 | EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ | ||
304 | break; | ||
305 | case BPF_S_ALU_LSH_K: | ||
306 | if (K == 0) | ||
307 | break; | ||
308 | else if (K == 1) | ||
309 | EMIT2(0xd1, 0xe0); /* shl %eax */ | ||
310 | else | ||
311 | EMIT3(0xc1, 0xe0, K); | ||
312 | break; | ||
313 | case BPF_S_ALU_RSH_X: /* A >>= X; */ | ||
314 | seen |= SEEN_XREG; | ||
315 | EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */ | ||
316 | break; | ||
317 | case BPF_S_ALU_RSH_K: /* A >>= K; */ | ||
318 | if (K == 0) | ||
319 | break; | ||
320 | else if (K == 1) | ||
321 | EMIT2(0xd1, 0xe8); /* shr %eax */ | ||
322 | else | ||
323 | EMIT3(0xc1, 0xe8, K); | ||
324 | break; | ||
325 | case BPF_S_ALU_NEG: | ||
326 | EMIT2(0xf7, 0xd8); /* neg %eax */ | ||
327 | break; | ||
328 | case BPF_S_RET_K: | ||
329 | if (!K) { | ||
330 | if (pc_ret0 == -1) | ||
331 | pc_ret0 = i; | ||
332 | CLEAR_A(); | ||
333 | } else { | ||
334 | EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ | ||
335 | } | ||
336 | /* fallinto */ | ||
337 | case BPF_S_RET_A: | ||
338 | if (seen) { | ||
339 | if (i != flen - 1) { | ||
340 | EMIT_JMP(cleanup_addr - addrs[i]); | ||
341 | break; | ||
342 | } | ||
343 | if (seen & SEEN_XREG) | ||
344 | EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ | ||
345 | EMIT1(0xc9); /* leaveq */ | ||
346 | } | ||
347 | EMIT1(0xc3); /* ret */ | ||
348 | break; | ||
349 | case BPF_S_MISC_TAX: /* X = A */ | ||
350 | seen |= SEEN_XREG; | ||
351 | EMIT2(0x89, 0xc3); /* mov %eax,%ebx */ | ||
352 | break; | ||
353 | case BPF_S_MISC_TXA: /* A = X */ | ||
354 | seen |= SEEN_XREG; | ||
355 | EMIT2(0x89, 0xd8); /* mov %ebx,%eax */ | ||
356 | break; | ||
357 | case BPF_S_LD_IMM: /* A = K */ | ||
358 | if (!K) | ||
359 | CLEAR_A(); | ||
360 | else | ||
361 | EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ | ||
362 | break; | ||
363 | case BPF_S_LDX_IMM: /* X = K */ | ||
364 | seen |= SEEN_XREG; | ||
365 | if (!K) | ||
366 | CLEAR_X(); | ||
367 | else | ||
368 | EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */ | ||
369 | break; | ||
370 | case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */ | ||
371 | seen |= SEEN_MEM; | ||
372 | EMIT3(0x8b, 0x45, 0xf0 - K*4); | ||
373 | break; | ||
374 | case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */ | ||
375 | seen |= SEEN_XREG | SEEN_MEM; | ||
376 | EMIT3(0x8b, 0x5d, 0xf0 - K*4); | ||
377 | break; | ||
378 | case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */ | ||
379 | seen |= SEEN_MEM; | ||
380 | EMIT3(0x89, 0x45, 0xf0 - K*4); | ||
381 | break; | ||
382 | case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ | ||
383 | seen |= SEEN_XREG | SEEN_MEM; | ||
384 | EMIT3(0x89, 0x5d, 0xf0 - K*4); | ||
385 | break; | ||
386 | case BPF_S_LD_W_LEN: /* A = skb->len; */ | ||
387 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); | ||
388 | if (is_imm8(offsetof(struct sk_buff, len))) | ||
389 | /* mov off8(%rdi),%eax */ | ||
390 | EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len)); | ||
391 | else { | ||
392 | EMIT2(0x8b, 0x87); | ||
393 | EMIT(offsetof(struct sk_buff, len), 4); | ||
394 | } | ||
395 | break; | ||
396 | case BPF_S_LDX_W_LEN: /* X = skb->len; */ | ||
397 | seen |= SEEN_XREG; | ||
398 | if (is_imm8(offsetof(struct sk_buff, len))) | ||
399 | /* mov off8(%rdi),%ebx */ | ||
400 | EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len)); | ||
401 | else { | ||
402 | EMIT2(0x8b, 0x9f); | ||
403 | EMIT(offsetof(struct sk_buff, len), 4); | ||
404 | } | ||
405 | break; | ||
406 | case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ | ||
407 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); | ||
408 | if (is_imm8(offsetof(struct sk_buff, protocol))) { | ||
409 | /* movzwl off8(%rdi),%eax */ | ||
410 | EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol)); | ||
411 | } else { | ||
412 | EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ | ||
413 | EMIT(offsetof(struct sk_buff, protocol), 4); | ||
414 | } | ||
415 | EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */ | ||
416 | break; | ||
417 | case BPF_S_ANC_IFINDEX: | ||
418 | if (is_imm8(offsetof(struct sk_buff, dev))) { | ||
419 | /* movq off8(%rdi),%rax */ | ||
420 | EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev)); | ||
421 | } else { | ||
422 | EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */ | ||
423 | EMIT(offsetof(struct sk_buff, dev), 4); | ||
424 | } | ||
425 | EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */ | ||
426 | EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6)); | ||
427 | BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); | ||
428 | EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */ | ||
429 | EMIT(offsetof(struct net_device, ifindex), 4); | ||
430 | break; | ||
431 | case BPF_S_ANC_MARK: | ||
432 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); | ||
433 | if (is_imm8(offsetof(struct sk_buff, mark))) { | ||
434 | /* mov off8(%rdi),%eax */ | ||
435 | EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark)); | ||
436 | } else { | ||
437 | EMIT2(0x8b, 0x87); | ||
438 | EMIT(offsetof(struct sk_buff, mark), 4); | ||
439 | } | ||
440 | break; | ||
441 | case BPF_S_ANC_RXHASH: | ||
442 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); | ||
443 | if (is_imm8(offsetof(struct sk_buff, rxhash))) { | ||
444 | /* mov off8(%rdi),%eax */ | ||
445 | EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash)); | ||
446 | } else { | ||
447 | EMIT2(0x8b, 0x87); | ||
448 | EMIT(offsetof(struct sk_buff, rxhash), 4); | ||
449 | } | ||
450 | break; | ||
451 | case BPF_S_ANC_QUEUE: | ||
452 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); | ||
453 | if (is_imm8(offsetof(struct sk_buff, queue_mapping))) { | ||
454 | /* movzwl off8(%rdi),%eax */ | ||
455 | EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping)); | ||
456 | } else { | ||
457 | EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ | ||
458 | EMIT(offsetof(struct sk_buff, queue_mapping), 4); | ||
459 | } | ||
460 | break; | ||
461 | case BPF_S_ANC_CPU: | ||
462 | #ifdef CONFIG_SMP | ||
463 | EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */ | ||
464 | EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */ | ||
465 | #else | ||
466 | CLEAR_A(); | ||
467 | #endif | ||
468 | break; | ||
469 | case BPF_S_LD_W_ABS: | ||
470 | func = sk_load_word; | ||
471 | common_load: seen |= SEEN_DATAREF; | ||
472 | if ((int)K < 0) | ||
473 | goto out; | ||
474 | t_offset = func - (image + addrs[i]); | ||
475 | EMIT1_off32(0xbe, K); /* mov imm32,%esi */ | ||
476 | EMIT1_off32(0xe8, t_offset); /* call */ | ||
477 | break; | ||
478 | case BPF_S_LD_H_ABS: | ||
479 | func = sk_load_half; | ||
480 | goto common_load; | ||
481 | case BPF_S_LD_B_ABS: | ||
482 | func = sk_load_byte; | ||
483 | goto common_load; | ||
484 | case BPF_S_LDX_B_MSH: | ||
485 | if ((int)K < 0) { | ||
486 | if (pc_ret0 != -1) { | ||
487 | EMIT_JMP(addrs[pc_ret0] - addrs[i]); | ||
488 | break; | ||
489 | } | ||
490 | CLEAR_A(); | ||
491 | EMIT_JMP(cleanup_addr - addrs[i]); | ||
492 | break; | ||
493 | } | ||
494 | seen |= SEEN_DATAREF | SEEN_XREG; | ||
495 | t_offset = sk_load_byte_msh - (image + addrs[i]); | ||
496 | EMIT1_off32(0xbe, K); /* mov imm32,%esi */ | ||
497 | EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ | ||
498 | break; | ||
499 | case BPF_S_LD_W_IND: | ||
500 | func = sk_load_word_ind; | ||
501 | common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; | ||
502 | t_offset = func - (image + addrs[i]); | ||
503 | EMIT1_off32(0xbe, K); /* mov imm32,%esi */ | ||
504 | EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ | ||
505 | break; | ||
506 | case BPF_S_LD_H_IND: | ||
507 | func = sk_load_half_ind; | ||
508 | goto common_load_ind; | ||
509 | case BPF_S_LD_B_IND: | ||
510 | func = sk_load_byte_ind; | ||
511 | goto common_load_ind; | ||
512 | case BPF_S_JMP_JA: | ||
513 | t_offset = addrs[i + K] - addrs[i]; | ||
514 | EMIT_JMP(t_offset); | ||
515 | break; | ||
516 | COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE); | ||
517 | COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB); | ||
518 | COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE); | ||
519 | COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE); | ||
520 | COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE); | ||
521 | COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB); | ||
522 | COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE); | ||
523 | COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE); | ||
524 | |||
525 | cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; | ||
526 | t_offset = addrs[i + filter[i].jt] - addrs[i]; | ||
527 | |||
528 | /* same targets, can avoid doing the test :) */ | ||
529 | if (filter[i].jt == filter[i].jf) { | ||
530 | EMIT_JMP(t_offset); | ||
531 | break; | ||
532 | } | ||
533 | |||
534 | switch (filter[i].code) { | ||
535 | case BPF_S_JMP_JGT_X: | ||
536 | case BPF_S_JMP_JGE_X: | ||
537 | case BPF_S_JMP_JEQ_X: | ||
538 | seen |= SEEN_XREG; | ||
539 | EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */ | ||
540 | break; | ||
541 | case BPF_S_JMP_JSET_X: | ||
542 | seen |= SEEN_XREG; | ||
543 | EMIT2(0x85, 0xd8); /* test %ebx,%eax */ | ||
544 | break; | ||
545 | case BPF_S_JMP_JEQ_K: | ||
546 | if (K == 0) { | ||
547 | EMIT2(0x85, 0xc0); /* test %eax,%eax */ | ||
548 | break; | ||
549 | } | ||
550 | case BPF_S_JMP_JGT_K: | ||
551 | case BPF_S_JMP_JGE_K: | ||
552 | if (K <= 127) | ||
553 | EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */ | ||
554 | else | ||
555 | EMIT1_off32(0x3d, K); /* cmp imm32,%eax */ | ||
556 | break; | ||
557 | case BPF_S_JMP_JSET_K: | ||
558 | if (K <= 0xFF) | ||
559 | EMIT2(0xa8, K); /* test imm8,%al */ | ||
560 | else if (!(K & 0xFFFF00FF)) | ||
561 | EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */ | ||
562 | else if (K <= 0xFFFF) { | ||
563 | EMIT2(0x66, 0xa9); /* test imm16,%ax */ | ||
564 | EMIT(K, 2); | ||
565 | } else { | ||
566 | EMIT1_off32(0xa9, K); /* test imm32,%eax */ | ||
567 | } | ||
568 | break; | ||
569 | } | ||
570 | if (filter[i].jt != 0) { | ||
571 | if (filter[i].jf) | ||
572 | t_offset += is_near(f_offset) ? 2 : 6; | ||
573 | EMIT_COND_JMP(t_op, t_offset); | ||
574 | if (filter[i].jf) | ||
575 | EMIT_JMP(f_offset); | ||
576 | break; | ||
577 | } | ||
578 | EMIT_COND_JMP(f_op, f_offset); | ||
579 | break; | ||
580 | default: | ||
581 | /* hmm, too complex filter, give up with jit compiler */ | ||
582 | goto out; | ||
583 | } | ||
584 | ilen = prog - temp; | ||
585 | if (image) { | ||
586 | if (unlikely(proglen + ilen > oldproglen)) { | ||
587 | pr_err("bpb_jit_compile fatal error\n"); | ||
588 | kfree(addrs); | ||
589 | module_free(NULL, image); | ||
590 | return; | ||
591 | } | ||
592 | memcpy(image + proglen, temp, ilen); | ||
593 | } | ||
594 | proglen += ilen; | ||
595 | addrs[i] = proglen; | ||
596 | prog = temp; | ||
597 | } | ||
598 | /* last bpf instruction is always a RET : | ||
599 | * use it to give the cleanup instruction(s) addr | ||
600 | */ | ||
601 | cleanup_addr = proglen - 1; /* ret */ | ||
602 | if (seen) | ||
603 | cleanup_addr -= 1; /* leaveq */ | ||
604 | if (seen & SEEN_XREG) | ||
605 | cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ | ||
606 | |||
607 | if (image) { | ||
608 | WARN_ON(proglen != oldproglen); | ||
609 | break; | ||
610 | } | ||
611 | if (proglen == oldproglen) { | ||
612 | image = module_alloc(max_t(unsigned int, | ||
613 | proglen, | ||
614 | sizeof(struct work_struct))); | ||
615 | if (!image) | ||
616 | goto out; | ||
617 | } | ||
618 | oldproglen = proglen; | ||
619 | } | ||
620 | if (bpf_jit_enable > 1) | ||
621 | pr_err("flen=%d proglen=%u pass=%d image=%p\n", | ||
622 | flen, proglen, pass, image); | ||
623 | |||
624 | if (image) { | ||
625 | if (bpf_jit_enable > 1) | ||
626 | print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, | ||
627 | 16, 1, image, proglen, false); | ||
628 | |||
629 | bpf_flush_icache(image, image + proglen); | ||
630 | |||
631 | fp->bpf_func = (void *)image; | ||
632 | } | ||
633 | out: | ||
634 | kfree(addrs); | ||
635 | return; | ||
636 | } | ||
637 | |||
638 | static void jit_free_defer(struct work_struct *arg) | ||
639 | { | ||
640 | module_free(NULL, arg); | ||
641 | } | ||
642 | |||
643 | /* run from softirq, we must use a work_struct to call | ||
644 | * module_free() from process context | ||
645 | */ | ||
646 | void bpf_jit_free(struct sk_filter *fp) | ||
647 | { | ||
648 | if (fp->bpf_func != sk_run_filter) { | ||
649 | struct work_struct *work = (struct work_struct *)fp->bpf_func; | ||
650 | |||
651 | INIT_WORK(work, jit_free_defer); | ||
652 | schedule_work(work); | ||
653 | } | ||
654 | } | ||
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e19a36..a5b64ab4cd6e 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -16,17 +16,6 @@ | |||
16 | #include <asm/stacktrace.h> | 16 | #include <asm/stacktrace.h> |
17 | #include <linux/compat.h> | 17 | #include <linux/compat.h> |
18 | 18 | ||
19 | static void backtrace_warning_symbol(void *data, char *msg, | ||
20 | unsigned long symbol) | ||
21 | { | ||
22 | /* Ignore warnings */ | ||
23 | } | ||
24 | |||
25 | static void backtrace_warning(void *data, char *msg) | ||
26 | { | ||
27 | /* Ignore warnings */ | ||
28 | } | ||
29 | |||
30 | static int backtrace_stack(void *data, char *name) | 19 | static int backtrace_stack(void *data, char *name) |
31 | { | 20 | { |
32 | /* Yes, we want all stacks */ | 21 | /* Yes, we want all stacks */ |
@@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
42 | } | 31 | } |
43 | 32 | ||
44 | static struct stacktrace_ops backtrace_ops = { | 33 | static struct stacktrace_ops backtrace_ops = { |
45 | .warning = backtrace_warning, | ||
46 | .warning_symbol = backtrace_warning_symbol, | ||
47 | .stack = backtrace_stack, | 34 | .stack = backtrace_stack, |
48 | .address = backtrace_address, | 35 | .address = backtrace_address, |
49 | .walk_stack = print_context_stack, | 36 | .walk_stack = print_context_stack, |
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index bd33620b0071..e6fd8473fb7b 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c | |||
@@ -280,12 +280,9 @@ void __init pci_direct_init(int type) | |||
280 | 280 | ||
281 | int __init pci_direct_probe(void) | 281 | int __init pci_direct_probe(void) |
282 | { | 282 | { |
283 | struct resource *region, *region2; | ||
284 | |||
285 | if ((pci_probe & PCI_PROBE_CONF1) == 0) | 283 | if ((pci_probe & PCI_PROBE_CONF1) == 0) |
286 | goto type2; | 284 | goto type2; |
287 | region = request_region(0xCF8, 8, "PCI conf1"); | 285 | if (!request_region(0xCF8, 8, "PCI conf1")) |
288 | if (!region) | ||
289 | goto type2; | 286 | goto type2; |
290 | 287 | ||
291 | if (pci_check_type1()) { | 288 | if (pci_check_type1()) { |
@@ -293,16 +290,14 @@ int __init pci_direct_probe(void) | |||
293 | port_cf9_safe = true; | 290 | port_cf9_safe = true; |
294 | return 1; | 291 | return 1; |
295 | } | 292 | } |
296 | release_resource(region); | 293 | release_region(0xCF8, 8); |
297 | 294 | ||
298 | type2: | 295 | type2: |
299 | if ((pci_probe & PCI_PROBE_CONF2) == 0) | 296 | if ((pci_probe & PCI_PROBE_CONF2) == 0) |
300 | return 0; | 297 | return 0; |
301 | region = request_region(0xCF8, 4, "PCI conf2"); | 298 | if (!request_region(0xCF8, 4, "PCI conf2")) |
302 | if (!region) | ||
303 | return 0; | 299 | return 0; |
304 | region2 = request_region(0xC000, 0x1000, "PCI conf2"); | 300 | if (!request_region(0xC000, 0x1000, "PCI conf2")) |
305 | if (!region2) | ||
306 | goto fail2; | 301 | goto fail2; |
307 | 302 | ||
308 | if (pci_check_type2()) { | 303 | if (pci_check_type2()) { |
@@ -311,8 +306,8 @@ int __init pci_direct_probe(void) | |||
311 | return 2; | 306 | return 2; |
312 | } | 307 | } |
313 | 308 | ||
314 | release_resource(region2); | 309 | release_region(0xC000, 0x1000); |
315 | fail2: | 310 | fail2: |
316 | release_resource(region); | 311 | release_region(0xCF8, 4); |
317 | return 0; | 312 | return 0; |
318 | } | 313 | } |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 8201165bae28..372e9b8989b3 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -602,7 +602,9 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
602 | || (device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN && | 602 | || (device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN && |
603 | device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX) | 603 | device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX) |
604 | || (device >= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN && | 604 | || (device >= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN && |
605 | device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX)) { | 605 | device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX) |
606 | || (device >= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN && | ||
607 | device <= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX)) { | ||
606 | r->name = "PIIX/ICH"; | 608 | r->name = "PIIX/ICH"; |
607 | r->get = pirq_piix_get; | 609 | r->get = pirq_piix_get; |
608 | r->set = pirq_piix_set; | 610 | r->set = pirq_piix_set; |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index e282886616a0..750c346ef50a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -606,6 +606,16 @@ static void __init __pci_mmcfg_init(int early) | |||
606 | if (list_empty(&pci_mmcfg_list)) | 606 | if (list_empty(&pci_mmcfg_list)) |
607 | return; | 607 | return; |
608 | 608 | ||
609 | if (pcibios_last_bus < 0) { | ||
610 | const struct pci_mmcfg_region *cfg; | ||
611 | |||
612 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | ||
613 | if (cfg->segment) | ||
614 | break; | ||
615 | pcibios_last_bus = cfg->end_bus; | ||
616 | } | ||
617 | } | ||
618 | |||
609 | if (pci_mmcfg_arch_init()) | 619 | if (pci_mmcfg_arch_init()) |
610 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; | 620 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; |
611 | else { | 621 | else { |
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e37b407a0ee8..8214724ce54d 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
108 | } | 108 | } |
109 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, | 109 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, |
110 | (type == PCI_CAP_ID_MSIX) ? | 110 | (type == PCI_CAP_ID_MSIX) ? |
111 | "msi-x" : "msi"); | 111 | "msi-x" : "msi", |
112 | DOMID_SELF); | ||
112 | if (irq < 0) | 113 | if (irq < 0) |
113 | goto error; | 114 | goto error; |
114 | dev_dbg(&dev->dev, | 115 | dev_dbg(&dev->dev, |
@@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
148 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, | 149 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, |
149 | (type == PCI_CAP_ID_MSIX) ? | 150 | (type == PCI_CAP_ID_MSIX) ? |
150 | "pcifront-msi-x" : | 151 | "pcifront-msi-x" : |
151 | "pcifront-msi"); | 152 | "pcifront-msi", |
153 | DOMID_SELF); | ||
152 | if (irq < 0) | 154 | if (irq < 0) |
153 | goto free; | 155 | goto free; |
154 | i++; | 156 | i++; |
@@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
190 | 192 | ||
191 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 193 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
192 | struct physdev_map_pirq map_irq; | 194 | struct physdev_map_pirq map_irq; |
195 | domid_t domid; | ||
196 | |||
197 | domid = ret = xen_find_device_domain_owner(dev); | ||
198 | /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED, | ||
199 | * hence check ret value for < 0. */ | ||
200 | if (ret < 0) | ||
201 | domid = DOMID_SELF; | ||
193 | 202 | ||
194 | memset(&map_irq, 0, sizeof(map_irq)); | 203 | memset(&map_irq, 0, sizeof(map_irq)); |
195 | map_irq.domid = DOMID_SELF; | 204 | map_irq.domid = domid; |
196 | map_irq.type = MAP_PIRQ_TYPE_MSI; | 205 | map_irq.type = MAP_PIRQ_TYPE_MSI; |
197 | map_irq.index = -1; | 206 | map_irq.index = -1; |
198 | map_irq.pirq = -1; | 207 | map_irq.pirq = -1; |
@@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
215 | 224 | ||
216 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); | 225 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); |
217 | if (ret) { | 226 | if (ret) { |
218 | dev_warn(&dev->dev, "xen map irq failed %d\n", ret); | 227 | dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n", |
228 | ret, domid); | ||
219 | goto out; | 229 | goto out; |
220 | } | 230 | } |
221 | 231 | ||
222 | ret = xen_bind_pirq_msi_to_irq(dev, msidesc, | 232 | ret = xen_bind_pirq_msi_to_irq(dev, msidesc, |
223 | map_irq.pirq, map_irq.index, | 233 | map_irq.pirq, map_irq.index, |
224 | (type == PCI_CAP_ID_MSIX) ? | 234 | (type == PCI_CAP_ID_MSIX) ? |
225 | "msi-x" : "msi"); | 235 | "msi-x" : "msi", |
236 | domid); | ||
226 | if (ret < 0) | 237 | if (ret < 0) |
227 | goto out; | 238 | goto out; |
228 | } | 239 | } |
@@ -461,3 +472,78 @@ void __init xen_setup_pirqs(void) | |||
461 | } | 472 | } |
462 | } | 473 | } |
463 | #endif | 474 | #endif |
475 | |||
476 | #ifdef CONFIG_XEN_DOM0 | ||
477 | struct xen_device_domain_owner { | ||
478 | domid_t domain; | ||
479 | struct pci_dev *dev; | ||
480 | struct list_head list; | ||
481 | }; | ||
482 | |||
483 | static DEFINE_SPINLOCK(dev_domain_list_spinlock); | ||
484 | static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); | ||
485 | |||
486 | static struct xen_device_domain_owner *find_device(struct pci_dev *dev) | ||
487 | { | ||
488 | struct xen_device_domain_owner *owner; | ||
489 | |||
490 | list_for_each_entry(owner, &dev_domain_list, list) { | ||
491 | if (owner->dev == dev) | ||
492 | return owner; | ||
493 | } | ||
494 | return NULL; | ||
495 | } | ||
496 | |||
497 | int xen_find_device_domain_owner(struct pci_dev *dev) | ||
498 | { | ||
499 | struct xen_device_domain_owner *owner; | ||
500 | int domain = -ENODEV; | ||
501 | |||
502 | spin_lock(&dev_domain_list_spinlock); | ||
503 | owner = find_device(dev); | ||
504 | if (owner) | ||
505 | domain = owner->domain; | ||
506 | spin_unlock(&dev_domain_list_spinlock); | ||
507 | return domain; | ||
508 | } | ||
509 | EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); | ||
510 | |||
511 | int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) | ||
512 | { | ||
513 | struct xen_device_domain_owner *owner; | ||
514 | |||
515 | owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); | ||
516 | if (!owner) | ||
517 | return -ENODEV; | ||
518 | |||
519 | spin_lock(&dev_domain_list_spinlock); | ||
520 | if (find_device(dev)) { | ||
521 | spin_unlock(&dev_domain_list_spinlock); | ||
522 | kfree(owner); | ||
523 | return -EEXIST; | ||
524 | } | ||
525 | owner->domain = domain; | ||
526 | owner->dev = dev; | ||
527 | list_add_tail(&owner->list, &dev_domain_list); | ||
528 | spin_unlock(&dev_domain_list_spinlock); | ||
529 | return 0; | ||
530 | } | ||
531 | EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); | ||
532 | |||
533 | int xen_unregister_device_domain_owner(struct pci_dev *dev) | ||
534 | { | ||
535 | struct xen_device_domain_owner *owner; | ||
536 | |||
537 | spin_lock(&dev_domain_list_spinlock); | ||
538 | owner = find_device(dev); | ||
539 | if (!owner) { | ||
540 | spin_unlock(&dev_domain_list_spinlock); | ||
541 | return -ENODEV; | ||
542 | } | ||
543 | list_del(&owner->list); | ||
544 | spin_unlock(&dev_domain_list_spinlock); | ||
545 | kfree(owner); | ||
546 | return 0; | ||
547 | } | ||
548 | EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); | ||
549 | #endif | ||
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts index dc701ea58546..e70be38ce039 100644 --- a/arch/x86/platform/ce4100/falconfalls.dts +++ b/arch/x86/platform/ce4100/falconfalls.dts | |||
@@ -74,6 +74,7 @@ | |||
74 | compatible = "intel,ce4100-pci", "pci"; | 74 | compatible = "intel,ce4100-pci", "pci"; |
75 | device_type = "pci"; | 75 | device_type = "pci"; |
76 | bus-range = <1 1>; | 76 | bus-range = <1 1>; |
77 | reg = <0x0800 0x0 0x0 0x0 0x0>; | ||
77 | ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>; | 78 | ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>; |
78 | 79 | ||
79 | interrupt-parent = <&ioapic2>; | 80 | interrupt-parent = <&ioapic2>; |
@@ -346,7 +347,7 @@ | |||
346 | "pciclass0c03"; | 347 | "pciclass0c03"; |
347 | 348 | ||
348 | reg = <0x16800 0x0 0x0 0x0 0x0>; | 349 | reg = <0x16800 0x0 0x0 0x0 0x0>; |
349 | interrupts = <22 3>; | 350 | interrupts = <22 1>; |
350 | }; | 351 | }; |
351 | 352 | ||
352 | usb@d,1 { | 353 | usb@d,1 { |
@@ -356,7 +357,7 @@ | |||
356 | "pciclass0c03"; | 357 | "pciclass0c03"; |
357 | 358 | ||
358 | reg = <0x16900 0x0 0x0 0x0 0x0>; | 359 | reg = <0x16900 0x0 0x0 0x0 0x0>; |
359 | interrupts = <22 3>; | 360 | interrupts = <22 1>; |
360 | }; | 361 | }; |
361 | 362 | ||
362 | sata@e,0 { | 363 | sata@e,0 { |
@@ -366,7 +367,7 @@ | |||
366 | "pciclass0106"; | 367 | "pciclass0106"; |
367 | 368 | ||
368 | reg = <0x17000 0x0 0x0 0x0 0x0>; | 369 | reg = <0x17000 0x0 0x0 0x0 0x0>; |
369 | interrupts = <23 3>; | 370 | interrupts = <23 1>; |
370 | }; | 371 | }; |
371 | 372 | ||
372 | flash@f,0 { | 373 | flash@f,0 { |
@@ -412,6 +413,7 @@ | |||
412 | #address-cells = <2>; | 413 | #address-cells = <2>; |
413 | #size-cells = <1>; | 414 | #size-cells = <1>; |
414 | compatible = "isa"; | 415 | compatible = "isa"; |
416 | reg = <0xf800 0x0 0x0 0x0 0x0>; | ||
415 | ranges = <1 0 0 0 0 0x100>; | 417 | ranges = <1 0 0 0 0 0x100>; |
416 | 418 | ||
417 | rtc@70 { | 419 | rtc@70 { |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 0fe27d7c6258..b30aa26a8df2 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -145,17 +145,6 @@ static void virt_efi_reset_system(int reset_type, | |||
145 | data_size, data); | 145 | data_size, data); |
146 | } | 146 | } |
147 | 147 | ||
148 | static efi_status_t virt_efi_set_virtual_address_map( | ||
149 | unsigned long memory_map_size, | ||
150 | unsigned long descriptor_size, | ||
151 | u32 descriptor_version, | ||
152 | efi_memory_desc_t *virtual_map) | ||
153 | { | ||
154 | return efi_call_virt4(set_virtual_address_map, | ||
155 | memory_map_size, descriptor_size, | ||
156 | descriptor_version, virtual_map); | ||
157 | } | ||
158 | |||
159 | static efi_status_t __init phys_efi_set_virtual_address_map( | 148 | static efi_status_t __init phys_efi_set_virtual_address_map( |
160 | unsigned long memory_map_size, | 149 | unsigned long memory_map_size, |
161 | unsigned long descriptor_size, | 150 | unsigned long descriptor_size, |
@@ -468,11 +457,25 @@ void __init efi_init(void) | |||
468 | #endif | 457 | #endif |
469 | } | 458 | } |
470 | 459 | ||
460 | void __init efi_set_executable(efi_memory_desc_t *md, bool executable) | ||
461 | { | ||
462 | u64 addr, npages; | ||
463 | |||
464 | addr = md->virt_addr; | ||
465 | npages = md->num_pages; | ||
466 | |||
467 | memrange_efi_to_native(&addr, &npages); | ||
468 | |||
469 | if (executable) | ||
470 | set_memory_x(addr, npages); | ||
471 | else | ||
472 | set_memory_nx(addr, npages); | ||
473 | } | ||
474 | |||
471 | static void __init runtime_code_page_mkexec(void) | 475 | static void __init runtime_code_page_mkexec(void) |
472 | { | 476 | { |
473 | efi_memory_desc_t *md; | 477 | efi_memory_desc_t *md; |
474 | void *p; | 478 | void *p; |
475 | u64 addr, npages; | ||
476 | 479 | ||
477 | /* Make EFI runtime service code area executable */ | 480 | /* Make EFI runtime service code area executable */ |
478 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 481 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
@@ -481,10 +484,7 @@ static void __init runtime_code_page_mkexec(void) | |||
481 | if (md->type != EFI_RUNTIME_SERVICES_CODE) | 484 | if (md->type != EFI_RUNTIME_SERVICES_CODE) |
482 | continue; | 485 | continue; |
483 | 486 | ||
484 | addr = md->virt_addr; | 487 | efi_set_executable(md, true); |
485 | npages = md->num_pages; | ||
486 | memrange_efi_to_native(&addr, &npages); | ||
487 | set_memory_x(addr, npages); | ||
488 | } | 488 | } |
489 | } | 489 | } |
490 | 490 | ||
@@ -498,13 +498,42 @@ static void __init runtime_code_page_mkexec(void) | |||
498 | */ | 498 | */ |
499 | void __init efi_enter_virtual_mode(void) | 499 | void __init efi_enter_virtual_mode(void) |
500 | { | 500 | { |
501 | efi_memory_desc_t *md; | 501 | efi_memory_desc_t *md, *prev_md = NULL; |
502 | efi_status_t status; | 502 | efi_status_t status; |
503 | unsigned long size; | 503 | unsigned long size; |
504 | u64 end, systab, addr, npages, end_pfn; | 504 | u64 end, systab, addr, npages, end_pfn; |
505 | void *p, *va; | 505 | void *p, *va, *new_memmap = NULL; |
506 | int count = 0; | ||
506 | 507 | ||
507 | efi.systab = NULL; | 508 | efi.systab = NULL; |
509 | |||
510 | /* Merge contiguous regions of the same type and attribute */ | ||
511 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
512 | u64 prev_size; | ||
513 | md = p; | ||
514 | |||
515 | if (!prev_md) { | ||
516 | prev_md = md; | ||
517 | continue; | ||
518 | } | ||
519 | |||
520 | if (prev_md->type != md->type || | ||
521 | prev_md->attribute != md->attribute) { | ||
522 | prev_md = md; | ||
523 | continue; | ||
524 | } | ||
525 | |||
526 | prev_size = prev_md->num_pages << EFI_PAGE_SHIFT; | ||
527 | |||
528 | if (md->phys_addr == (prev_md->phys_addr + prev_size)) { | ||
529 | prev_md->num_pages += md->num_pages; | ||
530 | md->type = EFI_RESERVED_TYPE; | ||
531 | md->attribute = 0; | ||
532 | continue; | ||
533 | } | ||
534 | prev_md = md; | ||
535 | } | ||
536 | |||
508 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 537 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
509 | md = p; | 538 | md = p; |
510 | if (!(md->attribute & EFI_MEMORY_RUNTIME)) | 539 | if (!(md->attribute & EFI_MEMORY_RUNTIME)) |
@@ -541,15 +570,21 @@ void __init efi_enter_virtual_mode(void) | |||
541 | systab += md->virt_addr - md->phys_addr; | 570 | systab += md->virt_addr - md->phys_addr; |
542 | efi.systab = (efi_system_table_t *) (unsigned long) systab; | 571 | efi.systab = (efi_system_table_t *) (unsigned long) systab; |
543 | } | 572 | } |
573 | new_memmap = krealloc(new_memmap, | ||
574 | (count + 1) * memmap.desc_size, | ||
575 | GFP_KERNEL); | ||
576 | memcpy(new_memmap + (count * memmap.desc_size), md, | ||
577 | memmap.desc_size); | ||
578 | count++; | ||
544 | } | 579 | } |
545 | 580 | ||
546 | BUG_ON(!efi.systab); | 581 | BUG_ON(!efi.systab); |
547 | 582 | ||
548 | status = phys_efi_set_virtual_address_map( | 583 | status = phys_efi_set_virtual_address_map( |
549 | memmap.desc_size * memmap.nr_map, | 584 | memmap.desc_size * count, |
550 | memmap.desc_size, | 585 | memmap.desc_size, |
551 | memmap.desc_version, | 586 | memmap.desc_version, |
552 | memmap.phys_map); | 587 | (efi_memory_desc_t *)__pa(new_memmap)); |
553 | 588 | ||
554 | if (status != EFI_SUCCESS) { | 589 | if (status != EFI_SUCCESS) { |
555 | printk(KERN_ALERT "Unable to switch EFI into virtual mode " | 590 | printk(KERN_ALERT "Unable to switch EFI into virtual mode " |
@@ -572,11 +607,12 @@ void __init efi_enter_virtual_mode(void) | |||
572 | efi.set_variable = virt_efi_set_variable; | 607 | efi.set_variable = virt_efi_set_variable; |
573 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; | 608 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; |
574 | efi.reset_system = virt_efi_reset_system; | 609 | efi.reset_system = virt_efi_reset_system; |
575 | efi.set_virtual_address_map = virt_efi_set_virtual_address_map; | 610 | efi.set_virtual_address_map = NULL; |
576 | if (__supported_pte_mask & _PAGE_NX) | 611 | if (__supported_pte_mask & _PAGE_NX) |
577 | runtime_code_page_mkexec(); | 612 | runtime_code_page_mkexec(); |
578 | early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); | 613 | early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); |
579 | memmap.map = NULL; | 614 | memmap.map = NULL; |
615 | kfree(new_memmap); | ||
580 | } | 616 | } |
581 | 617 | ||
582 | /* | 618 | /* |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac0621a7ac3d..2649426a7905 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -41,22 +41,7 @@ | |||
41 | static pgd_t save_pgd __initdata; | 41 | static pgd_t save_pgd __initdata; |
42 | static unsigned long efi_flags __initdata; | 42 | static unsigned long efi_flags __initdata; |
43 | 43 | ||
44 | static void __init early_mapping_set_exec(unsigned long start, | 44 | static void __init early_code_mapping_set_exec(int executable) |
45 | unsigned long end, | ||
46 | int executable) | ||
47 | { | ||
48 | unsigned long num_pages; | ||
49 | |||
50 | start &= PMD_MASK; | ||
51 | end = (end + PMD_SIZE - 1) & PMD_MASK; | ||
52 | num_pages = (end - start) >> PAGE_SHIFT; | ||
53 | if (executable) | ||
54 | set_memory_x((unsigned long)__va(start), num_pages); | ||
55 | else | ||
56 | set_memory_nx((unsigned long)__va(start), num_pages); | ||
57 | } | ||
58 | |||
59 | static void __init early_runtime_code_mapping_set_exec(int executable) | ||
60 | { | 45 | { |
61 | efi_memory_desc_t *md; | 46 | efi_memory_desc_t *md; |
62 | void *p; | 47 | void *p; |
@@ -67,11 +52,8 @@ static void __init early_runtime_code_mapping_set_exec(int executable) | |||
67 | /* Make EFI runtime service code area executable */ | 52 | /* Make EFI runtime service code area executable */ |
68 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 53 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
69 | md = p; | 54 | md = p; |
70 | if (md->type == EFI_RUNTIME_SERVICES_CODE) { | 55 | if (md->type == EFI_RUNTIME_SERVICES_CODE) |
71 | unsigned long end; | 56 | efi_set_executable(md, executable); |
72 | end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); | ||
73 | early_mapping_set_exec(md->phys_addr, end, executable); | ||
74 | } | ||
75 | } | 57 | } |
76 | } | 58 | } |
77 | 59 | ||
@@ -79,7 +61,7 @@ void __init efi_call_phys_prelog(void) | |||
79 | { | 61 | { |
80 | unsigned long vaddress; | 62 | unsigned long vaddress; |
81 | 63 | ||
82 | early_runtime_code_mapping_set_exec(1); | 64 | early_code_mapping_set_exec(1); |
83 | local_irq_save(efi_flags); | 65 | local_irq_save(efi_flags); |
84 | vaddress = (unsigned long)__va(0x0UL); | 66 | vaddress = (unsigned long)__va(0x0UL); |
85 | save_pgd = *pgd_offset_k(0x0UL); | 67 | save_pgd = *pgd_offset_k(0x0UL); |
@@ -95,7 +77,7 @@ void __init efi_call_phys_epilog(void) | |||
95 | set_pgd(pgd_offset_k(0x0UL), save_pgd); | 77 | set_pgd(pgd_offset_k(0x0UL), save_pgd); |
96 | __flush_tlb_all(); | 78 | __flush_tlb_all(); |
97 | local_irq_restore(efi_flags); | 79 | local_irq_restore(efi_flags); |
98 | early_runtime_code_mapping_set_exec(0); | 80 | early_code_mapping_set_exec(0); |
99 | } | 81 | } |
100 | 82 | ||
101 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | 83 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
@@ -107,8 +89,10 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | |||
107 | return ioremap(phys_addr, size); | 89 | return ioremap(phys_addr, size); |
108 | 90 | ||
109 | last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); | 91 | last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); |
110 | if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) | 92 | if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { |
111 | return NULL; | 93 | unsigned long top = last_map_pfn << PAGE_SHIFT; |
94 | efi_ioremap(top, size - (top - phys_addr), type); | ||
95 | } | ||
112 | 96 | ||
113 | return (void __iomem *)__va(phys_addr); | 97 | return (void __iomem *)__va(phys_addr); |
114 | } | 98 | } |
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 5c0207bf959b..7000e74b3087 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c | |||
@@ -97,11 +97,11 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table) | |||
97 | pentry->freq_hz, pentry->irq); | 97 | pentry->freq_hz, pentry->irq); |
98 | if (!pentry->irq) | 98 | if (!pentry->irq) |
99 | continue; | 99 | continue; |
100 | mp_irq.type = MP_IOAPIC; | 100 | mp_irq.type = MP_INTSRC; |
101 | mp_irq.irqtype = mp_INT; | 101 | mp_irq.irqtype = mp_INT; |
102 | /* triggering mode edge bit 2-3, active high polarity bit 0-1 */ | 102 | /* triggering mode edge bit 2-3, active high polarity bit 0-1 */ |
103 | mp_irq.irqflag = 5; | 103 | mp_irq.irqflag = 5; |
104 | mp_irq.srcbus = 0; | 104 | mp_irq.srcbus = MP_BUS_ISA; |
105 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ | 105 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ |
106 | mp_irq.dstapic = MP_APIC_ALL; | 106 | mp_irq.dstapic = MP_APIC_ALL; |
107 | mp_irq.dstirq = pentry->irq; | 107 | mp_irq.dstirq = pentry->irq; |
@@ -168,10 +168,10 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) | |||
168 | for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { | 168 | for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { |
169 | pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", | 169 | pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", |
170 | totallen, (u32)pentry->phys_addr, pentry->irq); | 170 | totallen, (u32)pentry->phys_addr, pentry->irq); |
171 | mp_irq.type = MP_IOAPIC; | 171 | mp_irq.type = MP_INTSRC; |
172 | mp_irq.irqtype = mp_INT; | 172 | mp_irq.irqtype = mp_INT; |
173 | mp_irq.irqflag = 0xf; /* level trigger and active low */ | 173 | mp_irq.irqflag = 0xf; /* level trigger and active low */ |
174 | mp_irq.srcbus = 0; | 174 | mp_irq.srcbus = MP_BUS_ISA; |
175 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ | 175 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ |
176 | mp_irq.dstapic = MP_APIC_ALL; | 176 | mp_irq.dstapic = MP_APIC_ALL; |
177 | mp_irq.dstirq = pentry->irq; | 177 | mp_irq.dstirq = pentry->irq; |
@@ -194,7 +194,7 @@ static unsigned long __init mrst_calibrate_tsc(void) | |||
194 | return 0; | 194 | return 0; |
195 | } | 195 | } |
196 | 196 | ||
197 | void __init mrst_time_init(void) | 197 | static void __init mrst_time_init(void) |
198 | { | 198 | { |
199 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); | 199 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); |
200 | switch (mrst_timer_options) { | 200 | switch (mrst_timer_options) { |
@@ -216,7 +216,7 @@ void __init mrst_time_init(void) | |||
216 | apbt_time_init(); | 216 | apbt_time_init(); |
217 | } | 217 | } |
218 | 218 | ||
219 | void __cpuinit mrst_arch_setup(void) | 219 | static void __cpuinit mrst_arch_setup(void) |
220 | { | 220 | { |
221 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) | 221 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) |
222 | __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; | 222 | __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; |
@@ -282,7 +282,7 @@ void __init x86_mrst_early_setup(void) | |||
282 | /* Avoid searching for BIOS MP tables */ | 282 | /* Avoid searching for BIOS MP tables */ |
283 | x86_init.mpparse.find_smp_config = x86_init_noop; | 283 | x86_init.mpparse.find_smp_config = x86_init_noop; |
284 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | 284 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; |
285 | 285 | set_bit(MP_BUS_ISA, mp_bus_not_pci); | |
286 | } | 286 | } |
287 | 287 | ||
288 | /* | 288 | /* |
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile index c2a8cab65e5d..81c5e2165c24 100644 --- a/arch/x86/platform/olpc/Makefile +++ b/arch/x86/platform/olpc/Makefile | |||
@@ -1,4 +1,2 @@ | |||
1 | obj-$(CONFIG_OLPC) += olpc.o | 1 | obj-$(CONFIG_OLPC) += olpc.o olpc_ofw.o olpc_dt.o |
2 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o | 2 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o |
3 | obj-$(CONFIG_OLPC) += olpc_ofw.o | ||
4 | obj-$(CONFIG_OF_PROMTREE) += olpc_dt.o | ||
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index edaf3fe8dc5e..0060fd59ea00 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/io.h> | 18 | #include <linux/io.h> |
19 | #include <linux/string.h> | 19 | #include <linux/string.h> |
20 | #include <linux/platform_device.h> | 20 | #include <linux/platform_device.h> |
21 | #include <linux/of.h> | ||
21 | 22 | ||
22 | #include <asm/geode.h> | 23 | #include <asm/geode.h> |
23 | #include <asm/setup.h> | 24 | #include <asm/setup.h> |
@@ -187,41 +188,43 @@ err: | |||
187 | } | 188 | } |
188 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); | 189 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); |
189 | 190 | ||
190 | static bool __init check_ofw_architecture(void) | 191 | static bool __init check_ofw_architecture(struct device_node *root) |
191 | { | 192 | { |
192 | size_t propsize; | 193 | const char *olpc_arch; |
193 | char olpc_arch[5]; | 194 | int propsize; |
194 | const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 }; | ||
195 | void *res[] = { &propsize }; | ||
196 | 195 | ||
197 | if (olpc_ofw("getprop", args, res)) { | 196 | olpc_arch = of_get_property(root, "architecture", &propsize); |
198 | printk(KERN_ERR "ofw: getprop call failed!\n"); | ||
199 | return false; | ||
200 | } | ||
201 | return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; | 197 | return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; |
202 | } | 198 | } |
203 | 199 | ||
204 | static u32 __init get_board_revision(void) | 200 | static u32 __init get_board_revision(struct device_node *root) |
205 | { | 201 | { |
206 | size_t propsize; | 202 | int propsize; |
207 | __be32 rev; | 203 | const __be32 *rev; |
208 | const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 }; | 204 | |
209 | void *res[] = { &propsize }; | 205 | rev = of_get_property(root, "board-revision-int", &propsize); |
210 | 206 | if (propsize != 4) | |
211 | if (olpc_ofw("getprop", args, res) || propsize != 4) { | 207 | return 0; |
212 | printk(KERN_ERR "ofw: getprop call failed!\n"); | 208 | |
213 | return cpu_to_be32(0); | 209 | return be32_to_cpu(*rev); |
214 | } | ||
215 | return be32_to_cpu(rev); | ||
216 | } | 210 | } |
217 | 211 | ||
218 | static bool __init platform_detect(void) | 212 | static bool __init platform_detect(void) |
219 | { | 213 | { |
220 | if (!check_ofw_architecture()) | 214 | struct device_node *root = of_find_node_by_path("/"); |
215 | bool success; | ||
216 | |||
217 | if (!root) | ||
221 | return false; | 218 | return false; |
222 | olpc_platform_info.flags |= OLPC_F_PRESENT; | 219 | |
223 | olpc_platform_info.boardrev = get_board_revision(); | 220 | success = check_ofw_architecture(root); |
224 | return true; | 221 | if (success) { |
222 | olpc_platform_info.boardrev = get_board_revision(root); | ||
223 | olpc_platform_info.flags |= OLPC_F_PRESENT; | ||
224 | } | ||
225 | |||
226 | of_node_put(root); | ||
227 | return success; | ||
225 | } | 228 | } |
226 | 229 | ||
227 | static int __init add_xo1_platform_devices(void) | 230 | static int __init add_xo1_platform_devices(void) |
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index 044bda5b3174..d39f63d017d2 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c | |||
@@ -19,7 +19,9 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/bootmem.h> | 20 | #include <linux/bootmem.h> |
21 | #include <linux/of.h> | 21 | #include <linux/of.h> |
22 | #include <linux/of_platform.h> | ||
22 | #include <linux/of_pdt.h> | 23 | #include <linux/of_pdt.h> |
24 | #include <asm/olpc.h> | ||
23 | #include <asm/olpc_ofw.h> | 25 | #include <asm/olpc_ofw.h> |
24 | 26 | ||
25 | static phandle __init olpc_dt_getsibling(phandle node) | 27 | static phandle __init olpc_dt_getsibling(phandle node) |
@@ -180,3 +182,20 @@ void __init olpc_dt_build_devicetree(void) | |||
180 | pr_info("PROM DT: Built device tree with %u bytes of memory.\n", | 182 | pr_info("PROM DT: Built device tree with %u bytes of memory.\n", |
181 | prom_early_allocated); | 183 | prom_early_allocated); |
182 | } | 184 | } |
185 | |||
186 | /* A list of DT node/bus matches that we want to expose as platform devices */ | ||
187 | static struct of_device_id __initdata of_ids[] = { | ||
188 | { .compatible = "olpc,xo1-battery" }, | ||
189 | { .compatible = "olpc,xo1-dcon" }, | ||
190 | { .compatible = "olpc,xo1-rtc" }, | ||
191 | {}, | ||
192 | }; | ||
193 | |||
194 | static int __init olpc_create_platform_devices(void) | ||
195 | { | ||
196 | if (machine_is_olpc()) | ||
197 | return of_platform_bus_probe(NULL, of_ids, NULL); | ||
198 | else | ||
199 | return 0; | ||
200 | } | ||
201 | device_initcall(olpc_create_platform_devices); | ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 7cb6424317f6..c58e0ea39ef5 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
699 | struct mm_struct *mm, | 699 | struct mm_struct *mm, |
700 | unsigned long va, unsigned int cpu) | 700 | unsigned long va, unsigned int cpu) |
701 | { | 701 | { |
702 | int tcpu; | ||
703 | int uvhub; | ||
704 | int locals = 0; | 702 | int locals = 0; |
705 | int remotes = 0; | 703 | int remotes = 0; |
706 | int hubs = 0; | 704 | int hubs = 0; |
705 | int tcpu; | ||
706 | int tpnode; | ||
707 | struct bau_desc *bau_desc; | 707 | struct bau_desc *bau_desc; |
708 | struct cpumask *flush_mask; | 708 | struct cpumask *flush_mask; |
709 | struct ptc_stats *stat; | 709 | struct ptc_stats *stat; |
710 | struct bau_control *bcp; | 710 | struct bau_control *bcp; |
711 | struct bau_control *tbcp; | 711 | struct bau_control *tbcp; |
712 | struct hub_and_pnode *hpp; | ||
712 | 713 | ||
713 | /* kernel was booted 'nobau' */ | 714 | /* kernel was booted 'nobau' */ |
714 | if (nobau) | 715 | if (nobau) |
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
750 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | 751 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; |
751 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 752 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
752 | 753 | ||
753 | /* cpu statistics */ | ||
754 | for_each_cpu(tcpu, flush_mask) { | 754 | for_each_cpu(tcpu, flush_mask) { |
755 | uvhub = uv_cpu_to_blade_id(tcpu); | 755 | /* |
756 | bau_uvhub_set(uvhub, &bau_desc->distribution); | 756 | * The distribution vector is a bit map of pnodes, relative |
757 | if (uvhub == bcp->uvhub) | 757 | * to the partition base pnode (and the partition base nasid |
758 | * in the header). | ||
759 | * Translate cpu to pnode and hub using an array stored | ||
760 | * in local memory. | ||
761 | */ | ||
762 | hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; | ||
763 | tpnode = hpp->pnode - bcp->partition_base_pnode; | ||
764 | bau_uvhub_set(tpnode, &bau_desc->distribution); | ||
765 | if (hpp->uvhub == bcp->uvhub) | ||
758 | locals++; | 766 | locals++; |
759 | else | 767 | else |
760 | remotes++; | 768 | remotes++; |
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) | |||
855 | * an interrupt, but causes an error message to be returned to | 863 | * an interrupt, but causes an error message to be returned to |
856 | * the sender. | 864 | * the sender. |
857 | */ | 865 | */ |
858 | static void uv_enable_timeouts(void) | 866 | static void __init uv_enable_timeouts(void) |
859 | { | 867 | { |
860 | int uvhub; | 868 | int uvhub; |
861 | int nuvhubs; | 869 | int nuvhubs; |
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void) | |||
1326 | } | 1334 | } |
1327 | 1335 | ||
1328 | /* | 1336 | /* |
1329 | * initialize the sending side's sending buffers | 1337 | * Initialize the sending side's sending buffers. |
1330 | */ | 1338 | */ |
1331 | static void | 1339 | static void |
1332 | uv_activation_descriptor_init(int node, int pnode) | 1340 | uv_activation_descriptor_init(int node, int pnode, int base_pnode) |
1333 | { | 1341 | { |
1334 | int i; | 1342 | int i; |
1335 | int cpu; | 1343 | int cpu; |
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1352 | n = pa >> uv_nshift; | 1360 | n = pa >> uv_nshift; |
1353 | m = pa & uv_mmask; | 1361 | m = pa & uv_mmask; |
1354 | 1362 | ||
1363 | /* the 14-bit pnode */ | ||
1355 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 1364 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
1356 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | 1365 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); |
1357 | |||
1358 | /* | 1366 | /* |
1359 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | 1367 | * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each |
1360 | * cpu even though we only use the first one; one descriptor can | 1368 | * cpu even though we only use the first one; one descriptor can |
1361 | * describe a broadcast to 256 uv hubs. | 1369 | * describe a broadcast to 256 uv hubs. |
1362 | */ | 1370 | */ |
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1365 | memset(bd2, 0, sizeof(struct bau_desc)); | 1373 | memset(bd2, 0, sizeof(struct bau_desc)); |
1366 | bd2->header.sw_ack_flag = 1; | 1374 | bd2->header.sw_ack_flag = 1; |
1367 | /* | 1375 | /* |
1368 | * base_dest_nodeid is the nasid of the first uvhub | 1376 | * The base_dest_nasid set in the message header is the nasid |
1369 | * in the partition. The bit map will indicate uvhub numbers, | 1377 | * of the first uvhub in the partition. The bit map will |
1370 | * which are 0-N in a partition. Pnodes are unique system-wide. | 1378 | * indicate destination pnode numbers relative to that base. |
1379 | * They may not be consecutive if nasid striding is being used. | ||
1371 | */ | 1380 | */ |
1372 | bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); | 1381 | bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); |
1373 | bd2->header.dest_subnodeid = 0x10; /* the LB */ | 1382 | bd2->header.dest_subnodeid = UV_LB_SUBNODEID; |
1374 | bd2->header.command = UV_NET_ENDPOINT_INTD; | 1383 | bd2->header.command = UV_NET_ENDPOINT_INTD; |
1375 | bd2->header.int_both = 1; | 1384 | bd2->header.int_both = 1; |
1376 | /* | 1385 | /* |
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode) | |||
1442 | /* | 1451 | /* |
1443 | * Initialization of each UV hub's structures | 1452 | * Initialization of each UV hub's structures |
1444 | */ | 1453 | */ |
1445 | static void __init uv_init_uvhub(int uvhub, int vector) | 1454 | static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) |
1446 | { | 1455 | { |
1447 | int node; | 1456 | int node; |
1448 | int pnode; | 1457 | int pnode; |
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector) | |||
1450 | 1459 | ||
1451 | node = uvhub_to_first_node(uvhub); | 1460 | node = uvhub_to_first_node(uvhub); |
1452 | pnode = uv_blade_to_pnode(uvhub); | 1461 | pnode = uv_blade_to_pnode(uvhub); |
1453 | uv_activation_descriptor_init(node, pnode); | 1462 | uv_activation_descriptor_init(node, pnode, base_pnode); |
1454 | uv_payload_queue_init(node, pnode); | 1463 | uv_payload_queue_init(node, pnode); |
1455 | /* | 1464 | /* |
1456 | * the below initialization can't be in firmware because the | 1465 | * The below initialization can't be in firmware because the |
1457 | * messaging IRQ will be determined by the OS | 1466 | * messaging IRQ will be determined by the OS. |
1458 | */ | 1467 | */ |
1459 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; | 1468 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; |
1460 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | 1469 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void) | |||
1491 | /* | 1500 | /* |
1492 | * initialize the bau_control structure for each cpu | 1501 | * initialize the bau_control structure for each cpu |
1493 | */ | 1502 | */ |
1494 | static int __init uv_init_per_cpu(int nuvhubs) | 1503 | static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) |
1495 | { | 1504 | { |
1496 | int i; | 1505 | int i; |
1497 | int cpu; | 1506 | int cpu; |
1507 | int tcpu; | ||
1498 | int pnode; | 1508 | int pnode; |
1499 | int uvhub; | 1509 | int uvhub; |
1500 | int have_hmaster; | 1510 | int have_hmaster; |
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1528 | bcp = &per_cpu(bau_control, cpu); | 1538 | bcp = &per_cpu(bau_control, cpu); |
1529 | memset(bcp, 0, sizeof(struct bau_control)); | 1539 | memset(bcp, 0, sizeof(struct bau_control)); |
1530 | pnode = uv_cpu_hub_info(cpu)->pnode; | 1540 | pnode = uv_cpu_hub_info(cpu)->pnode; |
1541 | if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { | ||
1542 | printk(KERN_EMERG | ||
1543 | "cpu %d pnode %d-%d beyond %d; BAU disabled\n", | ||
1544 | cpu, pnode, base_part_pnode, | ||
1545 | UV_DISTRIBUTION_SIZE); | ||
1546 | return 1; | ||
1547 | } | ||
1548 | bcp->osnode = cpu_to_node(cpu); | ||
1549 | bcp->partition_base_pnode = uv_partition_base_pnode; | ||
1531 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | 1550 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; |
1532 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); | 1551 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); |
1533 | bdp = &uvhub_descs[uvhub]; | 1552 | bdp = &uvhub_descs[uvhub]; |
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1536 | bdp->pnode = pnode; | 1555 | bdp->pnode = pnode; |
1537 | /* kludge: 'assuming' one node per socket, and assuming that | 1556 | /* kludge: 'assuming' one node per socket, and assuming that |
1538 | disabling a socket just leaves a gap in node numbers */ | 1557 | disabling a socket just leaves a gap in node numbers */ |
1539 | socket = (cpu_to_node(cpu) & 1); | 1558 | socket = bcp->osnode & 1; |
1540 | bdp->socket_mask |= (1 << socket); | 1559 | bdp->socket_mask |= (1 << socket); |
1541 | sdp = &bdp->socket[socket]; | 1560 | sdp = &bdp->socket[socket]; |
1542 | sdp->cpu_number[sdp->num_cpus] = cpu; | 1561 | sdp->cpu_number[sdp->num_cpus] = cpu; |
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1585 | nextsocket: | 1604 | nextsocket: |
1586 | socket++; | 1605 | socket++; |
1587 | socket_mask = (socket_mask >> 1); | 1606 | socket_mask = (socket_mask >> 1); |
1607 | /* each socket gets a local array of pnodes/hubs */ | ||
1608 | bcp = smaster; | ||
1609 | bcp->target_hub_and_pnode = kmalloc_node( | ||
1610 | sizeof(struct hub_and_pnode) * | ||
1611 | num_possible_cpus(), GFP_KERNEL, bcp->osnode); | ||
1612 | memset(bcp->target_hub_and_pnode, 0, | ||
1613 | sizeof(struct hub_and_pnode) * | ||
1614 | num_possible_cpus()); | ||
1615 | for_each_present_cpu(tcpu) { | ||
1616 | bcp->target_hub_and_pnode[tcpu].pnode = | ||
1617 | uv_cpu_hub_info(tcpu)->pnode; | ||
1618 | bcp->target_hub_and_pnode[tcpu].uvhub = | ||
1619 | uv_cpu_hub_info(tcpu)->numa_blade_id; | ||
1620 | } | ||
1588 | } | 1621 | } |
1589 | } | 1622 | } |
1590 | kfree(uvhub_descs); | 1623 | kfree(uvhub_descs); |
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void) | |||
1637 | spin_lock_init(&disable_lock); | 1670 | spin_lock_init(&disable_lock); |
1638 | congested_cycles = microsec_2_cycles(congested_response_us); | 1671 | congested_cycles = microsec_2_cycles(congested_response_us); |
1639 | 1672 | ||
1640 | if (uv_init_per_cpu(nuvhubs)) { | ||
1641 | nobau = 1; | ||
1642 | return 0; | ||
1643 | } | ||
1644 | |||
1645 | uv_partition_base_pnode = 0x7fffffff; | 1673 | uv_partition_base_pnode = 0x7fffffff; |
1646 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) | 1674 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
1647 | if (uv_blade_nr_possible_cpus(uvhub) && | 1675 | if (uv_blade_nr_possible_cpus(uvhub) && |
1648 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) | 1676 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) |
1649 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); | 1677 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); |
1678 | } | ||
1679 | |||
1680 | if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { | ||
1681 | nobau = 1; | ||
1682 | return 0; | ||
1683 | } | ||
1650 | 1684 | ||
1651 | vector = UV_BAU_MESSAGE; | 1685 | vector = UV_BAU_MESSAGE; |
1652 | for_each_possible_blade(uvhub) | 1686 | for_each_possible_blade(uvhub) |
1653 | if (uv_blade_nr_possible_cpus(uvhub)) | 1687 | if (uv_blade_nr_possible_cpus(uvhub)) |
1654 | uv_init_uvhub(uvhub, vector); | 1688 | uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); |
1655 | 1689 | ||
1656 | uv_enable_timeouts(); | 1690 | uv_enable_timeouts(); |
1657 | alloc_intr_gate(vector, uv_bau_message_intr1); | 1691 | alloc_intr_gate(vector, uv_bau_message_intr1); |
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 9daf5d1af9f1..0eb90184515f 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c | |||
@@ -40,7 +40,6 @@ static struct clocksource clocksource_uv = { | |||
40 | .rating = 400, | 40 | .rating = 400, |
41 | .read = uv_read_rtc, | 41 | .read = uv_read_rtc, |
42 | .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, | 42 | .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, |
43 | .shift = 10, | ||
44 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 43 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
45 | }; | 44 | }; |
46 | 45 | ||
@@ -372,14 +371,11 @@ static __init int uv_rtc_setup_clock(void) | |||
372 | if (!is_uv_system()) | 371 | if (!is_uv_system()) |
373 | return -ENODEV; | 372 | return -ENODEV; |
374 | 373 | ||
375 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, | ||
376 | clocksource_uv.shift); | ||
377 | |||
378 | /* If single blade, prefer tsc */ | 374 | /* If single blade, prefer tsc */ |
379 | if (uv_num_possible_blades() == 1) | 375 | if (uv_num_possible_blades() == 1) |
380 | clocksource_uv.rating = 250; | 376 | clocksource_uv.rating = 250; |
381 | 377 | ||
382 | rc = clocksource_register(&clocksource_uv); | 378 | rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second); |
383 | if (rc) | 379 | if (rc) |
384 | printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); | 380 | printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); |
385 | else | 381 | else |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index b6552b189bcd..bef0bc962400 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -11,7 +11,7 @@ vdso-install-$(VDSO32-y) += $(vdso32-images) | |||
11 | 11 | ||
12 | 12 | ||
13 | # files to link into the vdso | 13 | # files to link into the vdso |
14 | vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o | 14 | vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o |
15 | 15 | ||
16 | # files to link into kernel | 16 | # files to link into kernel |
17 | obj-$(VDSO64-y) += vma.o vdso.o | 17 | obj-$(VDSO64-y) += vma.o vdso.o |
@@ -37,11 +37,24 @@ $(obj)/%.so: OBJCOPYFLAGS := -S | |||
37 | $(obj)/%.so: $(obj)/%.so.dbg FORCE | 37 | $(obj)/%.so: $(obj)/%.so.dbg FORCE |
38 | $(call if_changed,objcopy) | 38 | $(call if_changed,objcopy) |
39 | 39 | ||
40 | # | ||
41 | # Don't omit frame pointers for ease of userspace debugging, but do | ||
42 | # optimize sibling calls. | ||
43 | # | ||
40 | CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ | 44 | CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ |
41 | $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) | 45 | $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ |
46 | -fno-omit-frame-pointer -foptimize-sibling-calls | ||
42 | 47 | ||
43 | $(vobjs): KBUILD_CFLAGS += $(CFL) | 48 | $(vobjs): KBUILD_CFLAGS += $(CFL) |
44 | 49 | ||
50 | # | ||
51 | # vDSO code runs in userspace and -pg doesn't help with profiling anyway. | ||
52 | # | ||
53 | CFLAGS_REMOVE_vdso-note.o = -pg | ||
54 | CFLAGS_REMOVE_vclock_gettime.o = -pg | ||
55 | CFLAGS_REMOVE_vgetcpu.o = -pg | ||
56 | CFLAGS_REMOVE_vvar.o = -pg | ||
57 | |||
45 | targets += vdso-syms.lds | 58 | targets += vdso-syms.lds |
46 | obj-$(VDSO64-y) += vdso-syms.lds | 59 | obj-$(VDSO64-y) += vdso-syms.lds |
47 | 60 | ||
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index ee55754cc3c5..a724905fdae7 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright 2006 Andi Kleen, SUSE Labs. | 2 | * Copyright 2006 Andi Kleen, SUSE Labs. |
3 | * Subject to the GNU Public License, v.2 | 3 | * Subject to the GNU Public License, v.2 |
4 | * | 4 | * |
5 | * Fast user context implementation of clock_gettime and gettimeofday. | 5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
6 | * | 6 | * |
7 | * The code should have no internal unresolved relocations. | 7 | * The code should have no internal unresolved relocations. |
8 | * Check with readelf after changing. | 8 | * Check with readelf after changing. |
@@ -22,9 +22,8 @@ | |||
22 | #include <asm/hpet.h> | 22 | #include <asm/hpet.h> |
23 | #include <asm/unistd.h> | 23 | #include <asm/unistd.h> |
24 | #include <asm/io.h> | 24 | #include <asm/io.h> |
25 | #include "vextern.h" | ||
26 | 25 | ||
27 | #define gtod vdso_vsyscall_gtod_data | 26 | #define gtod (&VVAR(vsyscall_gtod_data)) |
28 | 27 | ||
29 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | 28 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
30 | { | 29 | { |
@@ -56,22 +55,6 @@ notrace static noinline int do_realtime(struct timespec *ts) | |||
56 | return 0; | 55 | return 0; |
57 | } | 56 | } |
58 | 57 | ||
59 | /* Copy of the version in kernel/time.c which we cannot directly access */ | ||
60 | notrace static void | ||
61 | vset_normalized_timespec(struct timespec *ts, long sec, long nsec) | ||
62 | { | ||
63 | while (nsec >= NSEC_PER_SEC) { | ||
64 | nsec -= NSEC_PER_SEC; | ||
65 | ++sec; | ||
66 | } | ||
67 | while (nsec < 0) { | ||
68 | nsec += NSEC_PER_SEC; | ||
69 | --sec; | ||
70 | } | ||
71 | ts->tv_sec = sec; | ||
72 | ts->tv_nsec = nsec; | ||
73 | } | ||
74 | |||
75 | notrace static noinline int do_monotonic(struct timespec *ts) | 58 | notrace static noinline int do_monotonic(struct timespec *ts) |
76 | { | 59 | { |
77 | unsigned long seq, ns, secs; | 60 | unsigned long seq, ns, secs; |
@@ -82,7 +65,17 @@ notrace static noinline int do_monotonic(struct timespec *ts) | |||
82 | secs += gtod->wall_to_monotonic.tv_sec; | 65 | secs += gtod->wall_to_monotonic.tv_sec; |
83 | ns += gtod->wall_to_monotonic.tv_nsec; | 66 | ns += gtod->wall_to_monotonic.tv_nsec; |
84 | } while (unlikely(read_seqretry(>od->lock, seq))); | 67 | } while (unlikely(read_seqretry(>od->lock, seq))); |
85 | vset_normalized_timespec(ts, secs, ns); | 68 | |
69 | /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec | ||
70 | * are all guaranteed to be nonnegative. | ||
71 | */ | ||
72 | while (ns >= NSEC_PER_SEC) { | ||
73 | ns -= NSEC_PER_SEC; | ||
74 | ++secs; | ||
75 | } | ||
76 | ts->tv_sec = secs; | ||
77 | ts->tv_nsec = ns; | ||
78 | |||
86 | return 0; | 79 | return 0; |
87 | } | 80 | } |
88 | 81 | ||
@@ -107,7 +100,17 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts) | |||
107 | secs += gtod->wall_to_monotonic.tv_sec; | 100 | secs += gtod->wall_to_monotonic.tv_sec; |
108 | ns += gtod->wall_to_monotonic.tv_nsec; | 101 | ns += gtod->wall_to_monotonic.tv_nsec; |
109 | } while (unlikely(read_seqretry(>od->lock, seq))); | 102 | } while (unlikely(read_seqretry(>od->lock, seq))); |
110 | vset_normalized_timespec(ts, secs, ns); | 103 | |
104 | /* wall_time_nsec and wall_to_monotonic.tv_nsec are | ||
105 | * guaranteed to be between 0 and NSEC_PER_SEC. | ||
106 | */ | ||
107 | if (ns >= NSEC_PER_SEC) { | ||
108 | ns -= NSEC_PER_SEC; | ||
109 | ++secs; | ||
110 | } | ||
111 | ts->tv_sec = secs; | ||
112 | ts->tv_nsec = ns; | ||
113 | |||
111 | return 0; | 114 | return 0; |
112 | } | 115 | } |
113 | 116 | ||
@@ -157,3 +160,32 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | |||
157 | } | 160 | } |
158 | int gettimeofday(struct timeval *, struct timezone *) | 161 | int gettimeofday(struct timeval *, struct timezone *) |
159 | __attribute__((weak, alias("__vdso_gettimeofday"))); | 162 | __attribute__((weak, alias("__vdso_gettimeofday"))); |
163 | |||
164 | /* This will break when the xtime seconds get inaccurate, but that is | ||
165 | * unlikely */ | ||
166 | |||
167 | static __always_inline long time_syscall(long *t) | ||
168 | { | ||
169 | long secs; | ||
170 | asm volatile("syscall" | ||
171 | : "=a" (secs) | ||
172 | : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory"); | ||
173 | return secs; | ||
174 | } | ||
175 | |||
176 | notrace time_t __vdso_time(time_t *t) | ||
177 | { | ||
178 | time_t result; | ||
179 | |||
180 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled)) | ||
181 | return time_syscall(t); | ||
182 | |||
183 | /* This is atomic on x86_64 so we don't need any locks. */ | ||
184 | result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); | ||
185 | |||
186 | if (t) | ||
187 | *t = result; | ||
188 | return result; | ||
189 | } | ||
190 | int time(time_t *t) | ||
191 | __attribute__((weak, alias("__vdso_time"))); | ||
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S index 4e5dd3b4de7f..b96b2677cad8 100644 --- a/arch/x86/vdso/vdso.lds.S +++ b/arch/x86/vdso/vdso.lds.S | |||
@@ -23,15 +23,10 @@ VERSION { | |||
23 | __vdso_gettimeofday; | 23 | __vdso_gettimeofday; |
24 | getcpu; | 24 | getcpu; |
25 | __vdso_getcpu; | 25 | __vdso_getcpu; |
26 | time; | ||
27 | __vdso_time; | ||
26 | local: *; | 28 | local: *; |
27 | }; | 29 | }; |
28 | } | 30 | } |
29 | 31 | ||
30 | VDSO64_PRELINK = VDSO_PRELINK; | 32 | VDSO64_PRELINK = VDSO_PRELINK; |
31 | |||
32 | /* | ||
33 | * Define VDSO64_x for each VEXTERN(x), for use via VDSO64_SYMBOL. | ||
34 | */ | ||
35 | #define VEXTERN(x) VDSO64_ ## x = vdso_ ## x; | ||
36 | #include "vextern.h" | ||
37 | #undef VEXTERN | ||
diff --git a/arch/x86/vdso/vextern.h b/arch/x86/vdso/vextern.h deleted file mode 100644 index 1683ba2ae3e8..000000000000 --- a/arch/x86/vdso/vextern.h +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | #ifndef VEXTERN | ||
2 | #include <asm/vsyscall.h> | ||
3 | #define VEXTERN(x) \ | ||
4 | extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden"))); | ||
5 | #endif | ||
6 | |||
7 | #define VMAGIC 0xfeedbabeabcdefabUL | ||
8 | |||
9 | /* Any kernel variables used in the vDSO must be exported in the main | ||
10 | kernel's vmlinux.lds.S/vsyscall.h/proper __section and | ||
11 | put into vextern.h and be referenced as a pointer with vdso prefix. | ||
12 | The main kernel later fills in the values. */ | ||
13 | |||
14 | VEXTERN(jiffies) | ||
15 | VEXTERN(vgetcpu_mode) | ||
16 | VEXTERN(vsyscall_gtod_data) | ||
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c index 9fbc6b20026b..5463ad558573 100644 --- a/arch/x86/vdso/vgetcpu.c +++ b/arch/x86/vdso/vgetcpu.c | |||
@@ -11,14 +11,13 @@ | |||
11 | #include <linux/time.h> | 11 | #include <linux/time.h> |
12 | #include <asm/vsyscall.h> | 12 | #include <asm/vsyscall.h> |
13 | #include <asm/vgtod.h> | 13 | #include <asm/vgtod.h> |
14 | #include "vextern.h" | ||
15 | 14 | ||
16 | notrace long | 15 | notrace long |
17 | __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) | 16 | __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) |
18 | { | 17 | { |
19 | unsigned int p; | 18 | unsigned int p; |
20 | 19 | ||
21 | if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) { | 20 | if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { |
22 | /* Load per CPU data from RDTSCP */ | 21 | /* Load per CPU data from RDTSCP */ |
23 | native_read_tscp(&p); | 22 | native_read_tscp(&p); |
24 | } else { | 23 | } else { |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 4b5d26f108bb..7abd2be0f9b9 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -15,9 +15,6 @@ | |||
15 | #include <asm/proto.h> | 15 | #include <asm/proto.h> |
16 | #include <asm/vdso.h> | 16 | #include <asm/vdso.h> |
17 | 17 | ||
18 | #include "vextern.h" /* Just for VMAGIC. */ | ||
19 | #undef VEXTERN | ||
20 | |||
21 | unsigned int __read_mostly vdso_enabled = 1; | 18 | unsigned int __read_mostly vdso_enabled = 1; |
22 | 19 | ||
23 | extern char vdso_start[], vdso_end[]; | 20 | extern char vdso_start[], vdso_end[]; |
@@ -26,20 +23,10 @@ extern unsigned short vdso_sync_cpuid; | |||
26 | static struct page **vdso_pages; | 23 | static struct page **vdso_pages; |
27 | static unsigned vdso_size; | 24 | static unsigned vdso_size; |
28 | 25 | ||
29 | static inline void *var_ref(void *p, char *name) | ||
30 | { | ||
31 | if (*(void **)p != (void *)VMAGIC) { | ||
32 | printk("VDSO: variable %s broken\n", name); | ||
33 | vdso_enabled = 0; | ||
34 | } | ||
35 | return p; | ||
36 | } | ||
37 | |||
38 | static int __init init_vdso_vars(void) | 26 | static int __init init_vdso_vars(void) |
39 | { | 27 | { |
40 | int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; | 28 | int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; |
41 | int i; | 29 | int i; |
42 | char *vbase; | ||
43 | 30 | ||
44 | vdso_size = npages << PAGE_SHIFT; | 31 | vdso_size = npages << PAGE_SHIFT; |
45 | vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); | 32 | vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); |
@@ -54,20 +41,6 @@ static int __init init_vdso_vars(void) | |||
54 | copy_page(page_address(p), vdso_start + i*PAGE_SIZE); | 41 | copy_page(page_address(p), vdso_start + i*PAGE_SIZE); |
55 | } | 42 | } |
56 | 43 | ||
57 | vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL); | ||
58 | if (!vbase) | ||
59 | goto oom; | ||
60 | |||
61 | if (memcmp(vbase, "\177ELF", 4)) { | ||
62 | printk("VDSO: I'm broken; not ELF\n"); | ||
63 | vdso_enabled = 0; | ||
64 | } | ||
65 | |||
66 | #define VEXTERN(x) \ | ||
67 | *(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x; | ||
68 | #include "vextern.h" | ||
69 | #undef VEXTERN | ||
70 | vunmap(vbase); | ||
71 | return 0; | 44 | return 0; |
72 | 45 | ||
73 | oom: | 46 | oom: |
diff --git a/arch/x86/vdso/vvar.c b/arch/x86/vdso/vvar.c deleted file mode 100644 index 1b7e703684f9..000000000000 --- a/arch/x86/vdso/vvar.c +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | /* Define pointer to external vDSO variables. | ||
2 | These are part of the vDSO. The kernel fills in the real addresses | ||
3 | at boot time. This is done because when the vdso is linked the | ||
4 | kernel isn't yet and we don't know the final addresses. */ | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/time.h> | ||
7 | #include <asm/vsyscall.h> | ||
8 | #include <asm/timex.h> | ||
9 | #include <asm/vgtod.h> | ||
10 | |||
11 | #define VEXTERN(x) typeof (__ ## x) *const vdso_ ## x = (void *)VMAGIC; | ||
12 | #include "vextern.h" | ||
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1c7121ba18ff..5cc821cb2e09 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -39,6 +39,7 @@ config XEN_MAX_DOMAIN_MEMORY | |||
39 | config XEN_SAVE_RESTORE | 39 | config XEN_SAVE_RESTORE |
40 | bool | 40 | bool |
41 | depends on XEN | 41 | depends on XEN |
42 | select HIBERNATE_CALLBACKS | ||
42 | default y | 43 | default y |
43 | 44 | ||
44 | config XEN_DEBUG_FS | 45 | config XEN_DEBUG_FS |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 49dbd78ec3cb..dd7b88f2ec7a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -235,9 +235,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
235 | *dx &= maskedx; | 235 | *dx &= maskedx; |
236 | } | 236 | } |
237 | 237 | ||
238 | static __init void xen_init_cpuid_mask(void) | 238 | static void __init xen_init_cpuid_mask(void) |
239 | { | 239 | { |
240 | unsigned int ax, bx, cx, dx; | 240 | unsigned int ax, bx, cx, dx; |
241 | unsigned int xsave_mask; | ||
241 | 242 | ||
242 | cpuid_leaf1_edx_mask = | 243 | cpuid_leaf1_edx_mask = |
243 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ | 244 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ |
@@ -249,24 +250,16 @@ static __init void xen_init_cpuid_mask(void) | |||
249 | cpuid_leaf1_edx_mask &= | 250 | cpuid_leaf1_edx_mask &= |
250 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ | 251 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ |
251 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ | 252 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ |
252 | |||
253 | ax = 1; | 253 | ax = 1; |
254 | cx = 0; | ||
255 | xen_cpuid(&ax, &bx, &cx, &dx); | 254 | xen_cpuid(&ax, &bx, &cx, &dx); |
256 | 255 | ||
257 | /* cpuid claims we support xsave; try enabling it to see what happens */ | 256 | xsave_mask = |
258 | if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { | 257 | (1 << (X86_FEATURE_XSAVE % 32)) | |
259 | unsigned long cr4; | 258 | (1 << (X86_FEATURE_OSXSAVE % 32)); |
260 | |||
261 | set_in_cr4(X86_CR4_OSXSAVE); | ||
262 | |||
263 | cr4 = read_cr4(); | ||
264 | 259 | ||
265 | if ((cr4 & X86_CR4_OSXSAVE) == 0) | 260 | /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ |
266 | cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); | 261 | if ((cx & xsave_mask) != xsave_mask) |
267 | 262 | cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ | |
268 | clear_in_cr4(X86_CR4_OSXSAVE); | ||
269 | } | ||
270 | } | 263 | } |
271 | 264 | ||
272 | static void xen_set_debugreg(int reg, unsigned long val) | 265 | static void xen_set_debugreg(int reg, unsigned long val) |
@@ -407,7 +400,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr) | |||
407 | /* | 400 | /* |
408 | * load_gdt for early boot, when the gdt is only mapped once | 401 | * load_gdt for early boot, when the gdt is only mapped once |
409 | */ | 402 | */ |
410 | static __init void xen_load_gdt_boot(const struct desc_ptr *dtr) | 403 | static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) |
411 | { | 404 | { |
412 | unsigned long va = dtr->address; | 405 | unsigned long va = dtr->address; |
413 | unsigned int size = dtr->size + 1; | 406 | unsigned int size = dtr->size + 1; |
@@ -669,7 +662,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
669 | * Version of write_gdt_entry for use at early boot-time needed to | 662 | * Version of write_gdt_entry for use at early boot-time needed to |
670 | * update an entry as simply as possible. | 663 | * update an entry as simply as possible. |
671 | */ | 664 | */ |
672 | static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | 665 | static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, |
673 | const void *desc, int type) | 666 | const void *desc, int type) |
674 | { | 667 | { |
675 | switch (type) { | 668 | switch (type) { |
@@ -940,18 +933,18 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | |||
940 | return ret; | 933 | return ret; |
941 | } | 934 | } |
942 | 935 | ||
943 | static const struct pv_info xen_info __initdata = { | 936 | static const struct pv_info xen_info __initconst = { |
944 | .paravirt_enabled = 1, | 937 | .paravirt_enabled = 1, |
945 | .shared_kernel_pmd = 0, | 938 | .shared_kernel_pmd = 0, |
946 | 939 | ||
947 | .name = "Xen", | 940 | .name = "Xen", |
948 | }; | 941 | }; |
949 | 942 | ||
950 | static const struct pv_init_ops xen_init_ops __initdata = { | 943 | static const struct pv_init_ops xen_init_ops __initconst = { |
951 | .patch = xen_patch, | 944 | .patch = xen_patch, |
952 | }; | 945 | }; |
953 | 946 | ||
954 | static const struct pv_cpu_ops xen_cpu_ops __initdata = { | 947 | static const struct pv_cpu_ops xen_cpu_ops __initconst = { |
955 | .cpuid = xen_cpuid, | 948 | .cpuid = xen_cpuid, |
956 | 949 | ||
957 | .set_debugreg = xen_set_debugreg, | 950 | .set_debugreg = xen_set_debugreg, |
@@ -1011,7 +1004,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1011 | .end_context_switch = xen_end_context_switch, | 1004 | .end_context_switch = xen_end_context_switch, |
1012 | }; | 1005 | }; |
1013 | 1006 | ||
1014 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 1007 | static const struct pv_apic_ops xen_apic_ops __initconst = { |
1015 | #ifdef CONFIG_X86_LOCAL_APIC | 1008 | #ifdef CONFIG_X86_LOCAL_APIC |
1016 | .startup_ipi_hook = paravirt_nop, | 1009 | .startup_ipi_hook = paravirt_nop, |
1017 | #endif | 1010 | #endif |
@@ -1062,7 +1055,7 @@ int xen_panic_handler_init(void) | |||
1062 | return 0; | 1055 | return 0; |
1063 | } | 1056 | } |
1064 | 1057 | ||
1065 | static const struct machine_ops __initdata xen_machine_ops = { | 1058 | static const struct machine_ops xen_machine_ops __initconst = { |
1066 | .restart = xen_restart, | 1059 | .restart = xen_restart, |
1067 | .halt = xen_machine_halt, | 1060 | .halt = xen_machine_halt, |
1068 | .power_off = xen_machine_halt, | 1061 | .power_off = xen_machine_halt, |
@@ -1339,7 +1332,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | |||
1339 | return NOTIFY_OK; | 1332 | return NOTIFY_OK; |
1340 | } | 1333 | } |
1341 | 1334 | ||
1342 | static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = { | 1335 | static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { |
1343 | .notifier_call = xen_hvm_cpu_notify, | 1336 | .notifier_call = xen_hvm_cpu_notify, |
1344 | }; | 1337 | }; |
1345 | 1338 | ||
@@ -1388,7 +1381,7 @@ bool xen_hvm_need_lapic(void) | |||
1388 | } | 1381 | } |
1389 | EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); | 1382 | EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); |
1390 | 1383 | ||
1391 | const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { | 1384 | const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { |
1392 | .name = "Xen HVM", | 1385 | .name = "Xen HVM", |
1393 | .detect = xen_hvm_platform, | 1386 | .detect = xen_hvm_platform, |
1394 | .init_platform = xen_hvm_guest_init, | 1387 | .init_platform = xen_hvm_guest_init, |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 6a6fe8939645..8bbb465b6f0a 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -113,7 +113,7 @@ static void xen_halt(void) | |||
113 | xen_safe_halt(); | 113 | xen_safe_halt(); |
114 | } | 114 | } |
115 | 115 | ||
116 | static const struct pv_irq_ops xen_irq_ops __initdata = { | 116 | static const struct pv_irq_ops xen_irq_ops __initconst = { |
117 | .save_fl = PV_CALLEE_SAVE(xen_save_fl), | 117 | .save_fl = PV_CALLEE_SAVE(xen_save_fl), |
118 | .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), | 118 | .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), |
119 | .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), | 119 | .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index c82df6c9c0f0..dc708dcc62f1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -75,67 +75,12 @@ | |||
75 | #include "mmu.h" | 75 | #include "mmu.h" |
76 | #include "debugfs.h" | 76 | #include "debugfs.h" |
77 | 77 | ||
78 | #define MMU_UPDATE_HISTO 30 | ||
79 | |||
80 | /* | 78 | /* |
81 | * Protects atomic reservation decrease/increase against concurrent increases. | 79 | * Protects atomic reservation decrease/increase against concurrent increases. |
82 | * Also protects non-atomic updates of current_pages and balloon lists. | 80 | * Also protects non-atomic updates of current_pages and balloon lists. |
83 | */ | 81 | */ |
84 | DEFINE_SPINLOCK(xen_reservation_lock); | 82 | DEFINE_SPINLOCK(xen_reservation_lock); |
85 | 83 | ||
86 | #ifdef CONFIG_XEN_DEBUG_FS | ||
87 | |||
88 | static struct { | ||
89 | u32 pgd_update; | ||
90 | u32 pgd_update_pinned; | ||
91 | u32 pgd_update_batched; | ||
92 | |||
93 | u32 pud_update; | ||
94 | u32 pud_update_pinned; | ||
95 | u32 pud_update_batched; | ||
96 | |||
97 | u32 pmd_update; | ||
98 | u32 pmd_update_pinned; | ||
99 | u32 pmd_update_batched; | ||
100 | |||
101 | u32 pte_update; | ||
102 | u32 pte_update_pinned; | ||
103 | u32 pte_update_batched; | ||
104 | |||
105 | u32 mmu_update; | ||
106 | u32 mmu_update_extended; | ||
107 | u32 mmu_update_histo[MMU_UPDATE_HISTO]; | ||
108 | |||
109 | u32 prot_commit; | ||
110 | u32 prot_commit_batched; | ||
111 | |||
112 | u32 set_pte_at; | ||
113 | u32 set_pte_at_batched; | ||
114 | u32 set_pte_at_pinned; | ||
115 | u32 set_pte_at_current; | ||
116 | u32 set_pte_at_kernel; | ||
117 | } mmu_stats; | ||
118 | |||
119 | static u8 zero_stats; | ||
120 | |||
121 | static inline void check_zero(void) | ||
122 | { | ||
123 | if (unlikely(zero_stats)) { | ||
124 | memset(&mmu_stats, 0, sizeof(mmu_stats)); | ||
125 | zero_stats = 0; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | #define ADD_STATS(elem, val) \ | ||
130 | do { check_zero(); mmu_stats.elem += (val); } while(0) | ||
131 | |||
132 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
133 | |||
134 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | ||
135 | |||
136 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
137 | |||
138 | |||
139 | /* | 84 | /* |
140 | * Identity map, in addition to plain kernel map. This needs to be | 85 | * Identity map, in addition to plain kernel map. This needs to be |
141 | * large enough to allocate page table pages to allocate the rest. | 86 | * large enough to allocate page table pages to allocate the rest. |
@@ -243,11 +188,6 @@ static bool xen_page_pinned(void *ptr) | |||
243 | return PagePinned(page); | 188 | return PagePinned(page); |
244 | } | 189 | } |
245 | 190 | ||
246 | static bool xen_iomap_pte(pte_t pte) | ||
247 | { | ||
248 | return pte_flags(pte) & _PAGE_IOMAP; | ||
249 | } | ||
250 | |||
251 | void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) | 191 | void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) |
252 | { | 192 | { |
253 | struct multicall_space mcs; | 193 | struct multicall_space mcs; |
@@ -257,7 +197,7 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) | |||
257 | u = mcs.args; | 197 | u = mcs.args; |
258 | 198 | ||
259 | /* ptep might be kmapped when using 32-bit HIGHPTE */ | 199 | /* ptep might be kmapped when using 32-bit HIGHPTE */ |
260 | u->ptr = arbitrary_virt_to_machine(ptep).maddr; | 200 | u->ptr = virt_to_machine(ptep).maddr; |
261 | u->val = pte_val_ma(pteval); | 201 | u->val = pte_val_ma(pteval); |
262 | 202 | ||
263 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid); | 203 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid); |
@@ -266,11 +206,6 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) | |||
266 | } | 206 | } |
267 | EXPORT_SYMBOL_GPL(xen_set_domain_pte); | 207 | EXPORT_SYMBOL_GPL(xen_set_domain_pte); |
268 | 208 | ||
269 | static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval) | ||
270 | { | ||
271 | xen_set_domain_pte(ptep, pteval, DOMID_IO); | ||
272 | } | ||
273 | |||
274 | static void xen_extend_mmu_update(const struct mmu_update *update) | 209 | static void xen_extend_mmu_update(const struct mmu_update *update) |
275 | { | 210 | { |
276 | struct multicall_space mcs; | 211 | struct multicall_space mcs; |
@@ -279,27 +214,17 @@ static void xen_extend_mmu_update(const struct mmu_update *update) | |||
279 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); | 214 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
280 | 215 | ||
281 | if (mcs.mc != NULL) { | 216 | if (mcs.mc != NULL) { |
282 | ADD_STATS(mmu_update_extended, 1); | ||
283 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1); | ||
284 | |||
285 | mcs.mc->args[1]++; | 217 | mcs.mc->args[1]++; |
286 | |||
287 | if (mcs.mc->args[1] < MMU_UPDATE_HISTO) | ||
288 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1); | ||
289 | else | ||
290 | ADD_STATS(mmu_update_histo[0], 1); | ||
291 | } else { | 218 | } else { |
292 | ADD_STATS(mmu_update, 1); | ||
293 | mcs = __xen_mc_entry(sizeof(*u)); | 219 | mcs = __xen_mc_entry(sizeof(*u)); |
294 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | 220 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); |
295 | ADD_STATS(mmu_update_histo[1], 1); | ||
296 | } | 221 | } |
297 | 222 | ||
298 | u = mcs.args; | 223 | u = mcs.args; |
299 | *u = *update; | 224 | *u = *update; |
300 | } | 225 | } |
301 | 226 | ||
302 | void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | 227 | static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) |
303 | { | 228 | { |
304 | struct mmu_update u; | 229 | struct mmu_update u; |
305 | 230 | ||
@@ -312,17 +237,13 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
312 | u.val = pmd_val_ma(val); | 237 | u.val = pmd_val_ma(val); |
313 | xen_extend_mmu_update(&u); | 238 | xen_extend_mmu_update(&u); |
314 | 239 | ||
315 | ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
316 | |||
317 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 240 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
318 | 241 | ||
319 | preempt_enable(); | 242 | preempt_enable(); |
320 | } | 243 | } |
321 | 244 | ||
322 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 245 | static void xen_set_pmd(pmd_t *ptr, pmd_t val) |
323 | { | 246 | { |
324 | ADD_STATS(pmd_update, 1); | ||
325 | |||
326 | /* If page is not pinned, we can just update the entry | 247 | /* If page is not pinned, we can just update the entry |
327 | directly */ | 248 | directly */ |
328 | if (!xen_page_pinned(ptr)) { | 249 | if (!xen_page_pinned(ptr)) { |
@@ -330,8 +251,6 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val) | |||
330 | return; | 251 | return; |
331 | } | 252 | } |
332 | 253 | ||
333 | ADD_STATS(pmd_update_pinned, 1); | ||
334 | |||
335 | xen_set_pmd_hyper(ptr, val); | 254 | xen_set_pmd_hyper(ptr, val); |
336 | } | 255 | } |
337 | 256 | ||
@@ -344,35 +263,34 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | |||
344 | set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); | 263 | set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); |
345 | } | 264 | } |
346 | 265 | ||
347 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 266 | static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) |
348 | pte_t *ptep, pte_t pteval) | ||
349 | { | 267 | { |
350 | if (xen_iomap_pte(pteval)) { | 268 | struct mmu_update u; |
351 | xen_set_iomap_pte(ptep, pteval); | 269 | |
352 | goto out; | 270 | if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) |
353 | } | 271 | return false; |
354 | 272 | ||
355 | ADD_STATS(set_pte_at, 1); | 273 | xen_mc_batch(); |
356 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); | ||
357 | ADD_STATS(set_pte_at_current, mm == current->mm); | ||
358 | ADD_STATS(set_pte_at_kernel, mm == &init_mm); | ||
359 | 274 | ||
360 | if (mm == current->mm || mm == &init_mm) { | 275 | u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; |
361 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | 276 | u.val = pte_val_ma(pteval); |
362 | struct multicall_space mcs; | 277 | xen_extend_mmu_update(&u); |
363 | mcs = xen_mc_entry(0); | ||
364 | 278 | ||
365 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); | 279 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
366 | ADD_STATS(set_pte_at_batched, 1); | 280 | |
367 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 281 | return true; |
368 | goto out; | 282 | } |
369 | } else | 283 | |
370 | if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) | 284 | static void xen_set_pte(pte_t *ptep, pte_t pteval) |
371 | goto out; | 285 | { |
372 | } | 286 | if (!xen_batched_set_pte(ptep, pteval)) |
373 | xen_set_pte(ptep, pteval); | 287 | native_set_pte(ptep, pteval); |
288 | } | ||
374 | 289 | ||
375 | out: return; | 290 | static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
291 | pte_t *ptep, pte_t pteval) | ||
292 | { | ||
293 | xen_set_pte(ptep, pteval); | ||
376 | } | 294 | } |
377 | 295 | ||
378 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, | 296 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
@@ -389,13 +307,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
389 | 307 | ||
390 | xen_mc_batch(); | 308 | xen_mc_batch(); |
391 | 309 | ||
392 | u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | 310 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
393 | u.val = pte_val_ma(pte); | 311 | u.val = pte_val_ma(pte); |
394 | xen_extend_mmu_update(&u); | 312 | xen_extend_mmu_update(&u); |
395 | 313 | ||
396 | ADD_STATS(prot_commit, 1); | ||
397 | ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
398 | |||
399 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 314 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
400 | } | 315 | } |
401 | 316 | ||
@@ -463,7 +378,7 @@ static pteval_t iomap_pte(pteval_t val) | |||
463 | return val; | 378 | return val; |
464 | } | 379 | } |
465 | 380 | ||
466 | pteval_t xen_pte_val(pte_t pte) | 381 | static pteval_t xen_pte_val(pte_t pte) |
467 | { | 382 | { |
468 | pteval_t pteval = pte.pte; | 383 | pteval_t pteval = pte.pte; |
469 | 384 | ||
@@ -480,7 +395,7 @@ pteval_t xen_pte_val(pte_t pte) | |||
480 | } | 395 | } |
481 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); | 396 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); |
482 | 397 | ||
483 | pgdval_t xen_pgd_val(pgd_t pgd) | 398 | static pgdval_t xen_pgd_val(pgd_t pgd) |
484 | { | 399 | { |
485 | return pte_mfn_to_pfn(pgd.pgd); | 400 | return pte_mfn_to_pfn(pgd.pgd); |
486 | } | 401 | } |
@@ -511,7 +426,7 @@ void xen_set_pat(u64 pat) | |||
511 | WARN_ON(pat != 0x0007010600070106ull); | 426 | WARN_ON(pat != 0x0007010600070106ull); |
512 | } | 427 | } |
513 | 428 | ||
514 | pte_t xen_make_pte(pteval_t pte) | 429 | static pte_t xen_make_pte(pteval_t pte) |
515 | { | 430 | { |
516 | phys_addr_t addr = (pte & PTE_PFN_MASK); | 431 | phys_addr_t addr = (pte & PTE_PFN_MASK); |
517 | 432 | ||
@@ -565,13 +480,13 @@ pte_t xen_make_pte_debug(pteval_t pte) | |||
565 | if (io_page && | 480 | if (io_page && |
566 | (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { | 481 | (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { |
567 | other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; | 482 | other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; |
568 | WARN(addr != other_addr, | 483 | WARN_ONCE(addr != other_addr, |
569 | "0x%lx is using VM_IO, but it is 0x%lx!\n", | 484 | "0x%lx is using VM_IO, but it is 0x%lx!\n", |
570 | (unsigned long)addr, (unsigned long)other_addr); | 485 | (unsigned long)addr, (unsigned long)other_addr); |
571 | } else { | 486 | } else { |
572 | pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; | 487 | pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; |
573 | other_addr = (_pte.pte & PTE_PFN_MASK); | 488 | other_addr = (_pte.pte & PTE_PFN_MASK); |
574 | WARN((addr == other_addr) && (!io_page) && (!iomap_set), | 489 | WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set), |
575 | "0x%lx is missing VM_IO (and wasn't fixed)!\n", | 490 | "0x%lx is missing VM_IO (and wasn't fixed)!\n", |
576 | (unsigned long)addr); | 491 | (unsigned long)addr); |
577 | } | 492 | } |
@@ -581,20 +496,20 @@ pte_t xen_make_pte_debug(pteval_t pte) | |||
581 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); | 496 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); |
582 | #endif | 497 | #endif |
583 | 498 | ||
584 | pgd_t xen_make_pgd(pgdval_t pgd) | 499 | static pgd_t xen_make_pgd(pgdval_t pgd) |
585 | { | 500 | { |
586 | pgd = pte_pfn_to_mfn(pgd); | 501 | pgd = pte_pfn_to_mfn(pgd); |
587 | return native_make_pgd(pgd); | 502 | return native_make_pgd(pgd); |
588 | } | 503 | } |
589 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); | 504 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); |
590 | 505 | ||
591 | pmdval_t xen_pmd_val(pmd_t pmd) | 506 | static pmdval_t xen_pmd_val(pmd_t pmd) |
592 | { | 507 | { |
593 | return pte_mfn_to_pfn(pmd.pmd); | 508 | return pte_mfn_to_pfn(pmd.pmd); |
594 | } | 509 | } |
595 | PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); | 510 | PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); |
596 | 511 | ||
597 | void xen_set_pud_hyper(pud_t *ptr, pud_t val) | 512 | static void xen_set_pud_hyper(pud_t *ptr, pud_t val) |
598 | { | 513 | { |
599 | struct mmu_update u; | 514 | struct mmu_update u; |
600 | 515 | ||
@@ -607,17 +522,13 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
607 | u.val = pud_val_ma(val); | 522 | u.val = pud_val_ma(val); |
608 | xen_extend_mmu_update(&u); | 523 | xen_extend_mmu_update(&u); |
609 | 524 | ||
610 | ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
611 | |||
612 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 525 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
613 | 526 | ||
614 | preempt_enable(); | 527 | preempt_enable(); |
615 | } | 528 | } |
616 | 529 | ||
617 | void xen_set_pud(pud_t *ptr, pud_t val) | 530 | static void xen_set_pud(pud_t *ptr, pud_t val) |
618 | { | 531 | { |
619 | ADD_STATS(pud_update, 1); | ||
620 | |||
621 | /* If page is not pinned, we can just update the entry | 532 | /* If page is not pinned, we can just update the entry |
622 | directly */ | 533 | directly */ |
623 | if (!xen_page_pinned(ptr)) { | 534 | if (!xen_page_pinned(ptr)) { |
@@ -625,56 +536,28 @@ void xen_set_pud(pud_t *ptr, pud_t val) | |||
625 | return; | 536 | return; |
626 | } | 537 | } |
627 | 538 | ||
628 | ADD_STATS(pud_update_pinned, 1); | ||
629 | |||
630 | xen_set_pud_hyper(ptr, val); | 539 | xen_set_pud_hyper(ptr, val); |
631 | } | 540 | } |
632 | 541 | ||
633 | void xen_set_pte(pte_t *ptep, pte_t pte) | ||
634 | { | ||
635 | if (xen_iomap_pte(pte)) { | ||
636 | xen_set_iomap_pte(ptep, pte); | ||
637 | return; | ||
638 | } | ||
639 | |||
640 | ADD_STATS(pte_update, 1); | ||
641 | // ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); | ||
642 | ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
643 | |||
644 | #ifdef CONFIG_X86_PAE | 542 | #ifdef CONFIG_X86_PAE |
645 | ptep->pte_high = pte.pte_high; | 543 | static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
646 | smp_wmb(); | ||
647 | ptep->pte_low = pte.pte_low; | ||
648 | #else | ||
649 | *ptep = pte; | ||
650 | #endif | ||
651 | } | ||
652 | |||
653 | #ifdef CONFIG_X86_PAE | ||
654 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
655 | { | 544 | { |
656 | if (xen_iomap_pte(pte)) { | ||
657 | xen_set_iomap_pte(ptep, pte); | ||
658 | return; | ||
659 | } | ||
660 | |||
661 | set_64bit((u64 *)ptep, native_pte_val(pte)); | 545 | set_64bit((u64 *)ptep, native_pte_val(pte)); |
662 | } | 546 | } |
663 | 547 | ||
664 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 548 | static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
665 | { | 549 | { |
666 | ptep->pte_low = 0; | 550 | if (!xen_batched_set_pte(ptep, native_make_pte(0))) |
667 | smp_wmb(); /* make sure low gets written first */ | 551 | native_pte_clear(mm, addr, ptep); |
668 | ptep->pte_high = 0; | ||
669 | } | 552 | } |
670 | 553 | ||
671 | void xen_pmd_clear(pmd_t *pmdp) | 554 | static void xen_pmd_clear(pmd_t *pmdp) |
672 | { | 555 | { |
673 | set_pmd(pmdp, __pmd(0)); | 556 | set_pmd(pmdp, __pmd(0)); |
674 | } | 557 | } |
675 | #endif /* CONFIG_X86_PAE */ | 558 | #endif /* CONFIG_X86_PAE */ |
676 | 559 | ||
677 | pmd_t xen_make_pmd(pmdval_t pmd) | 560 | static pmd_t xen_make_pmd(pmdval_t pmd) |
678 | { | 561 | { |
679 | pmd = pte_pfn_to_mfn(pmd); | 562 | pmd = pte_pfn_to_mfn(pmd); |
680 | return native_make_pmd(pmd); | 563 | return native_make_pmd(pmd); |
@@ -682,13 +565,13 @@ pmd_t xen_make_pmd(pmdval_t pmd) | |||
682 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); | 565 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
683 | 566 | ||
684 | #if PAGETABLE_LEVELS == 4 | 567 | #if PAGETABLE_LEVELS == 4 |
685 | pudval_t xen_pud_val(pud_t pud) | 568 | static pudval_t xen_pud_val(pud_t pud) |
686 | { | 569 | { |
687 | return pte_mfn_to_pfn(pud.pud); | 570 | return pte_mfn_to_pfn(pud.pud); |
688 | } | 571 | } |
689 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); | 572 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); |
690 | 573 | ||
691 | pud_t xen_make_pud(pudval_t pud) | 574 | static pud_t xen_make_pud(pudval_t pud) |
692 | { | 575 | { |
693 | pud = pte_pfn_to_mfn(pud); | 576 | pud = pte_pfn_to_mfn(pud); |
694 | 577 | ||
@@ -696,7 +579,7 @@ pud_t xen_make_pud(pudval_t pud) | |||
696 | } | 579 | } |
697 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); | 580 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); |
698 | 581 | ||
699 | pgd_t *xen_get_user_pgd(pgd_t *pgd) | 582 | static pgd_t *xen_get_user_pgd(pgd_t *pgd) |
700 | { | 583 | { |
701 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); | 584 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); |
702 | unsigned offset = pgd - pgd_page; | 585 | unsigned offset = pgd - pgd_page; |
@@ -728,7 +611,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
728 | * 2. It is always pinned | 611 | * 2. It is always pinned |
729 | * 3. It has no user pagetable attached to it | 612 | * 3. It has no user pagetable attached to it |
730 | */ | 613 | */ |
731 | void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | 614 | static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) |
732 | { | 615 | { |
733 | preempt_disable(); | 616 | preempt_disable(); |
734 | 617 | ||
@@ -741,12 +624,10 @@ void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
741 | preempt_enable(); | 624 | preempt_enable(); |
742 | } | 625 | } |
743 | 626 | ||
744 | void xen_set_pgd(pgd_t *ptr, pgd_t val) | 627 | static void xen_set_pgd(pgd_t *ptr, pgd_t val) |
745 | { | 628 | { |
746 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | 629 | pgd_t *user_ptr = xen_get_user_pgd(ptr); |
747 | 630 | ||
748 | ADD_STATS(pgd_update, 1); | ||
749 | |||
750 | /* If page is not pinned, we can just update the entry | 631 | /* If page is not pinned, we can just update the entry |
751 | directly */ | 632 | directly */ |
752 | if (!xen_page_pinned(ptr)) { | 633 | if (!xen_page_pinned(ptr)) { |
@@ -758,9 +639,6 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
758 | return; | 639 | return; |
759 | } | 640 | } |
760 | 641 | ||
761 | ADD_STATS(pgd_update_pinned, 1); | ||
762 | ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
763 | |||
764 | /* If it's pinned, then we can at least batch the kernel and | 642 | /* If it's pinned, then we can at least batch the kernel and |
765 | user updates together. */ | 643 | user updates together. */ |
766 | xen_mc_batch(); | 644 | xen_mc_batch(); |
@@ -1054,7 +932,7 @@ void xen_mm_pin_all(void) | |||
1054 | * that's before we have page structures to store the bits. So do all | 932 | * that's before we have page structures to store the bits. So do all |
1055 | * the book-keeping now. | 933 | * the book-keeping now. |
1056 | */ | 934 | */ |
1057 | static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, | 935 | static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, |
1058 | enum pt_level level) | 936 | enum pt_level level) |
1059 | { | 937 | { |
1060 | SetPagePinned(page); | 938 | SetPagePinned(page); |
@@ -1162,14 +1040,14 @@ void xen_mm_unpin_all(void) | |||
1162 | spin_unlock(&pgd_lock); | 1040 | spin_unlock(&pgd_lock); |
1163 | } | 1041 | } |
1164 | 1042 | ||
1165 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 1043 | static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
1166 | { | 1044 | { |
1167 | spin_lock(&next->page_table_lock); | 1045 | spin_lock(&next->page_table_lock); |
1168 | xen_pgd_pin(next); | 1046 | xen_pgd_pin(next); |
1169 | spin_unlock(&next->page_table_lock); | 1047 | spin_unlock(&next->page_table_lock); |
1170 | } | 1048 | } |
1171 | 1049 | ||
1172 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | 1050 | static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) |
1173 | { | 1051 | { |
1174 | spin_lock(&mm->page_table_lock); | 1052 | spin_lock(&mm->page_table_lock); |
1175 | xen_pgd_pin(mm); | 1053 | xen_pgd_pin(mm); |
@@ -1187,7 +1065,7 @@ static void drop_other_mm_ref(void *info) | |||
1187 | 1065 | ||
1188 | active_mm = percpu_read(cpu_tlbstate.active_mm); | 1066 | active_mm = percpu_read(cpu_tlbstate.active_mm); |
1189 | 1067 | ||
1190 | if (active_mm == mm) | 1068 | if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) |
1191 | leave_mm(smp_processor_id()); | 1069 | leave_mm(smp_processor_id()); |
1192 | 1070 | ||
1193 | /* If this cpu still has a stale cr3 reference, then make sure | 1071 | /* If this cpu still has a stale cr3 reference, then make sure |
@@ -1256,7 +1134,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1256 | * pagetable because of lazy tlb flushing. This means we need need to | 1134 | * pagetable because of lazy tlb flushing. This means we need need to |
1257 | * switch all CPUs off this pagetable before we can unpin it. | 1135 | * switch all CPUs off this pagetable before we can unpin it. |
1258 | */ | 1136 | */ |
1259 | void xen_exit_mmap(struct mm_struct *mm) | 1137 | static void xen_exit_mmap(struct mm_struct *mm) |
1260 | { | 1138 | { |
1261 | get_cpu(); /* make sure we don't move around */ | 1139 | get_cpu(); /* make sure we don't move around */ |
1262 | xen_drop_mm_ref(mm); | 1140 | xen_drop_mm_ref(mm); |
@@ -1271,13 +1149,27 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
1271 | spin_unlock(&mm->page_table_lock); | 1149 | spin_unlock(&mm->page_table_lock); |
1272 | } | 1150 | } |
1273 | 1151 | ||
1274 | static __init void xen_pagetable_setup_start(pgd_t *base) | 1152 | static void __init xen_pagetable_setup_start(pgd_t *base) |
1275 | { | 1153 | { |
1276 | } | 1154 | } |
1277 | 1155 | ||
1156 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | ||
1157 | { | ||
1158 | /* reserve the range used */ | ||
1159 | native_pagetable_reserve(start, end); | ||
1160 | |||
1161 | /* set as RW the rest */ | ||
1162 | printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, | ||
1163 | PFN_PHYS(pgt_buf_top)); | ||
1164 | while (end < PFN_PHYS(pgt_buf_top)) { | ||
1165 | make_lowmem_page_readwrite(__va(end)); | ||
1166 | end += PAGE_SIZE; | ||
1167 | } | ||
1168 | } | ||
1169 | |||
1278 | static void xen_post_allocator_init(void); | 1170 | static void xen_post_allocator_init(void); |
1279 | 1171 | ||
1280 | static __init void xen_pagetable_setup_done(pgd_t *base) | 1172 | static void __init xen_pagetable_setup_done(pgd_t *base) |
1281 | { | 1173 | { |
1282 | xen_setup_shared_info(); | 1174 | xen_setup_shared_info(); |
1283 | xen_post_allocator_init(); | 1175 | xen_post_allocator_init(); |
@@ -1473,16 +1365,20 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
1473 | #endif | 1365 | #endif |
1474 | } | 1366 | } |
1475 | 1367 | ||
1476 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | ||
1477 | { | ||
1478 | unsigned long pfn = pte_pfn(pte); | ||
1479 | |||
1480 | #ifdef CONFIG_X86_32 | 1368 | #ifdef CONFIG_X86_32 |
1369 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | ||
1370 | { | ||
1481 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ | 1371 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ |
1482 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) | 1372 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) |
1483 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & | 1373 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & |
1484 | pte_val_ma(pte)); | 1374 | pte_val_ma(pte)); |
1485 | #endif | 1375 | |
1376 | return pte; | ||
1377 | } | ||
1378 | #else /* CONFIG_X86_64 */ | ||
1379 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | ||
1380 | { | ||
1381 | unsigned long pfn = pte_pfn(pte); | ||
1486 | 1382 | ||
1487 | /* | 1383 | /* |
1488 | * If the new pfn is within the range of the newly allocated | 1384 | * If the new pfn is within the range of the newly allocated |
@@ -1491,16 +1387,17 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1491 | * it is RO. | 1387 | * it is RO. |
1492 | */ | 1388 | */ |
1493 | if (((!is_early_ioremap_ptep(ptep) && | 1389 | if (((!is_early_ioremap_ptep(ptep) && |
1494 | pfn >= pgt_buf_start && pfn < pgt_buf_end)) || | 1390 | pfn >= pgt_buf_start && pfn < pgt_buf_top)) || |
1495 | (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) | 1391 | (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) |
1496 | pte = pte_wrprotect(pte); | 1392 | pte = pte_wrprotect(pte); |
1497 | 1393 | ||
1498 | return pte; | 1394 | return pte; |
1499 | } | 1395 | } |
1396 | #endif /* CONFIG_X86_64 */ | ||
1500 | 1397 | ||
1501 | /* Init-time set_pte while constructing initial pagetables, which | 1398 | /* Init-time set_pte while constructing initial pagetables, which |
1502 | doesn't allow RO pagetable pages to be remapped RW */ | 1399 | doesn't allow RO pagetable pages to be remapped RW */ |
1503 | static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | 1400 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
1504 | { | 1401 | { |
1505 | pte = mask_rw_pte(ptep, pte); | 1402 | pte = mask_rw_pte(ptep, pte); |
1506 | 1403 | ||
@@ -1518,7 +1415,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | |||
1518 | 1415 | ||
1519 | /* Early in boot, while setting up the initial pagetable, assume | 1416 | /* Early in boot, while setting up the initial pagetable, assume |
1520 | everything is pinned. */ | 1417 | everything is pinned. */ |
1521 | static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | 1418 | static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) |
1522 | { | 1419 | { |
1523 | #ifdef CONFIG_FLATMEM | 1420 | #ifdef CONFIG_FLATMEM |
1524 | BUG_ON(mem_map); /* should only be used early */ | 1421 | BUG_ON(mem_map); /* should only be used early */ |
@@ -1528,7 +1425,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | |||
1528 | } | 1425 | } |
1529 | 1426 | ||
1530 | /* Used for pmd and pud */ | 1427 | /* Used for pmd and pud */ |
1531 | static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) | 1428 | static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) |
1532 | { | 1429 | { |
1533 | #ifdef CONFIG_FLATMEM | 1430 | #ifdef CONFIG_FLATMEM |
1534 | BUG_ON(mem_map); /* should only be used early */ | 1431 | BUG_ON(mem_map); /* should only be used early */ |
@@ -1538,13 +1435,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) | |||
1538 | 1435 | ||
1539 | /* Early release_pte assumes that all pts are pinned, since there's | 1436 | /* Early release_pte assumes that all pts are pinned, since there's |
1540 | only init_mm and anything attached to that is pinned. */ | 1437 | only init_mm and anything attached to that is pinned. */ |
1541 | static __init void xen_release_pte_init(unsigned long pfn) | 1438 | static void __init xen_release_pte_init(unsigned long pfn) |
1542 | { | 1439 | { |
1543 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 1440 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
1544 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1441 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1545 | } | 1442 | } |
1546 | 1443 | ||
1547 | static __init void xen_release_pmd_init(unsigned long pfn) | 1444 | static void __init xen_release_pmd_init(unsigned long pfn) |
1548 | { | 1445 | { |
1549 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1446 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1550 | } | 1447 | } |
@@ -1670,7 +1567,7 @@ static void set_page_prot(void *addr, pgprot_t prot) | |||
1670 | BUG(); | 1567 | BUG(); |
1671 | } | 1568 | } |
1672 | 1569 | ||
1673 | static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | 1570 | static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) |
1674 | { | 1571 | { |
1675 | unsigned pmdidx, pteidx; | 1572 | unsigned pmdidx, pteidx; |
1676 | unsigned ident_pte; | 1573 | unsigned ident_pte; |
@@ -1753,7 +1650,7 @@ static void convert_pfn_mfn(void *v) | |||
1753 | * of the physical mapping once some sort of allocator has been set | 1650 | * of the physical mapping once some sort of allocator has been set |
1754 | * up. | 1651 | * up. |
1755 | */ | 1652 | */ |
1756 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 1653 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, |
1757 | unsigned long max_pfn) | 1654 | unsigned long max_pfn) |
1758 | { | 1655 | { |
1759 | pud_t *l3; | 1656 | pud_t *l3; |
@@ -1824,7 +1721,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1824 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); | 1721 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
1825 | static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); | 1722 | static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); |
1826 | 1723 | ||
1827 | static __init void xen_write_cr3_init(unsigned long cr3) | 1724 | static void __init xen_write_cr3_init(unsigned long cr3) |
1828 | { | 1725 | { |
1829 | unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); | 1726 | unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); |
1830 | 1727 | ||
@@ -1861,7 +1758,7 @@ static __init void xen_write_cr3_init(unsigned long cr3) | |||
1861 | pv_mmu_ops.write_cr3 = &xen_write_cr3; | 1758 | pv_mmu_ops.write_cr3 = &xen_write_cr3; |
1862 | } | 1759 | } |
1863 | 1760 | ||
1864 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 1761 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, |
1865 | unsigned long max_pfn) | 1762 | unsigned long max_pfn) |
1866 | { | 1763 | { |
1867 | pmd_t *kernel_pmd; | 1764 | pmd_t *kernel_pmd; |
@@ -1967,7 +1864,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1967 | #endif | 1864 | #endif |
1968 | } | 1865 | } |
1969 | 1866 | ||
1970 | __init void xen_ident_map_ISA(void) | 1867 | void __init xen_ident_map_ISA(void) |
1971 | { | 1868 | { |
1972 | unsigned long pa; | 1869 | unsigned long pa; |
1973 | 1870 | ||
@@ -1990,7 +1887,7 @@ __init void xen_ident_map_ISA(void) | |||
1990 | xen_flush_tlb(); | 1887 | xen_flush_tlb(); |
1991 | } | 1888 | } |
1992 | 1889 | ||
1993 | static __init void xen_post_allocator_init(void) | 1890 | static void __init xen_post_allocator_init(void) |
1994 | { | 1891 | { |
1995 | #ifdef CONFIG_XEN_DEBUG | 1892 | #ifdef CONFIG_XEN_DEBUG |
1996 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); | 1893 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); |
@@ -2027,7 +1924,7 @@ static void xen_leave_lazy_mmu(void) | |||
2027 | preempt_enable(); | 1924 | preempt_enable(); |
2028 | } | 1925 | } |
2029 | 1926 | ||
2030 | static const struct pv_mmu_ops xen_mmu_ops __initdata = { | 1927 | static const struct pv_mmu_ops xen_mmu_ops __initconst = { |
2031 | .read_cr2 = xen_read_cr2, | 1928 | .read_cr2 = xen_read_cr2, |
2032 | .write_cr2 = xen_write_cr2, | 1929 | .write_cr2 = xen_write_cr2, |
2033 | 1930 | ||
@@ -2100,6 +1997,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
2100 | 1997 | ||
2101 | void __init xen_init_mmu_ops(void) | 1998 | void __init xen_init_mmu_ops(void) |
2102 | { | 1999 | { |
2000 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; | ||
2103 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; | 2001 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; |
2104 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; | 2002 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; |
2105 | pv_mmu_ops = xen_mmu_ops; | 2003 | pv_mmu_ops = xen_mmu_ops; |
@@ -2351,7 +2249,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, | |||
2351 | struct remap_data *rmd = data; | 2249 | struct remap_data *rmd = data; |
2352 | pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); | 2250 | pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); |
2353 | 2251 | ||
2354 | rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; | 2252 | rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; |
2355 | rmd->mmu_update->val = pte_val_ma(pte); | 2253 | rmd->mmu_update->val = pte_val_ma(pte); |
2356 | rmd->mmu_update++; | 2254 | rmd->mmu_update++; |
2357 | 2255 | ||
@@ -2405,7 +2303,6 @@ out: | |||
2405 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); | 2303 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); |
2406 | 2304 | ||
2407 | #ifdef CONFIG_XEN_DEBUG_FS | 2305 | #ifdef CONFIG_XEN_DEBUG_FS |
2408 | |||
2409 | static int p2m_dump_open(struct inode *inode, struct file *filp) | 2306 | static int p2m_dump_open(struct inode *inode, struct file *filp) |
2410 | { | 2307 | { |
2411 | return single_open(filp, p2m_dump_show, NULL); | 2308 | return single_open(filp, p2m_dump_show, NULL); |
@@ -2417,65 +2314,4 @@ static const struct file_operations p2m_dump_fops = { | |||
2417 | .llseek = seq_lseek, | 2314 | .llseek = seq_lseek, |
2418 | .release = single_release, | 2315 | .release = single_release, |
2419 | }; | 2316 | }; |
2420 | 2317 | #endif /* CONFIG_XEN_DEBUG_FS */ | |
2421 | static struct dentry *d_mmu_debug; | ||
2422 | |||
2423 | static int __init xen_mmu_debugfs(void) | ||
2424 | { | ||
2425 | struct dentry *d_xen = xen_init_debugfs(); | ||
2426 | |||
2427 | if (d_xen == NULL) | ||
2428 | return -ENOMEM; | ||
2429 | |||
2430 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | ||
2431 | |||
2432 | debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats); | ||
2433 | |||
2434 | debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update); | ||
2435 | debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug, | ||
2436 | &mmu_stats.pgd_update_pinned); | ||
2437 | debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug, | ||
2438 | &mmu_stats.pgd_update_pinned); | ||
2439 | |||
2440 | debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update); | ||
2441 | debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug, | ||
2442 | &mmu_stats.pud_update_pinned); | ||
2443 | debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug, | ||
2444 | &mmu_stats.pud_update_pinned); | ||
2445 | |||
2446 | debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update); | ||
2447 | debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug, | ||
2448 | &mmu_stats.pmd_update_pinned); | ||
2449 | debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug, | ||
2450 | &mmu_stats.pmd_update_pinned); | ||
2451 | |||
2452 | debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update); | ||
2453 | // debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug, | ||
2454 | // &mmu_stats.pte_update_pinned); | ||
2455 | debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug, | ||
2456 | &mmu_stats.pte_update_pinned); | ||
2457 | |||
2458 | debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update); | ||
2459 | debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug, | ||
2460 | &mmu_stats.mmu_update_extended); | ||
2461 | xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug, | ||
2462 | mmu_stats.mmu_update_histo, 20); | ||
2463 | |||
2464 | debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at); | ||
2465 | debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug, | ||
2466 | &mmu_stats.set_pte_at_batched); | ||
2467 | debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug, | ||
2468 | &mmu_stats.set_pte_at_current); | ||
2469 | debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug, | ||
2470 | &mmu_stats.set_pte_at_kernel); | ||
2471 | |||
2472 | debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit); | ||
2473 | debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, | ||
2474 | &mmu_stats.prot_commit_batched); | ||
2475 | |||
2476 | debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); | ||
2477 | return 0; | ||
2478 | } | ||
2479 | fs_initcall(xen_mmu_debugfs); | ||
2480 | |||
2481 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 537bb9aab777..73809bb951b4 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -15,43 +15,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | |||
15 | 15 | ||
16 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 16 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
17 | 17 | ||
18 | |||
19 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); | ||
20 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | ||
21 | void xen_exit_mmap(struct mm_struct *mm); | ||
22 | |||
23 | pteval_t xen_pte_val(pte_t); | ||
24 | pmdval_t xen_pmd_val(pmd_t); | ||
25 | pgdval_t xen_pgd_val(pgd_t); | ||
26 | |||
27 | pte_t xen_make_pte(pteval_t); | ||
28 | pmd_t xen_make_pmd(pmdval_t); | ||
29 | pgd_t xen_make_pgd(pgdval_t); | ||
30 | |||
31 | void xen_set_pte(pte_t *ptep, pte_t pteval); | ||
32 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
33 | pte_t *ptep, pte_t pteval); | ||
34 | |||
35 | #ifdef CONFIG_X86_PAE | ||
36 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); | ||
37 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | ||
38 | void xen_pmd_clear(pmd_t *pmdp); | ||
39 | #endif /* CONFIG_X86_PAE */ | ||
40 | |||
41 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | ||
42 | void xen_set_pud(pud_t *ptr, pud_t val); | ||
43 | void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); | ||
44 | void xen_set_pud_hyper(pud_t *ptr, pud_t val); | ||
45 | |||
46 | #if PAGETABLE_LEVELS == 4 | ||
47 | pudval_t xen_pud_val(pud_t pud); | ||
48 | pud_t xen_make_pud(pudval_t pudval); | ||
49 | void xen_set_pgd(pgd_t *pgdp, pgd_t pgd); | ||
50 | void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd); | ||
51 | #endif | ||
52 | |||
53 | pgd_t *xen_get_user_pgd(pgd_t *pgd); | ||
54 | |||
55 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 18 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
56 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | 19 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, |
57 | pte_t *ptep, pte_t pte); | 20 | pte_t *ptep, pte_t pte); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 141eb0de8b06..58efeb9d5440 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -522,11 +522,20 @@ static bool __init __early_alloc_p2m(unsigned long pfn) | |||
522 | /* Boundary cross-over for the edges: */ | 522 | /* Boundary cross-over for the edges: */ |
523 | if (idx) { | 523 | if (idx) { |
524 | unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); | 524 | unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); |
525 | unsigned long *mid_mfn_p; | ||
525 | 526 | ||
526 | p2m_init(p2m); | 527 | p2m_init(p2m); |
527 | 528 | ||
528 | p2m_top[topidx][mididx] = p2m; | 529 | p2m_top[topidx][mididx] = p2m; |
529 | 530 | ||
531 | /* For save/restore we need to MFN of the P2M saved */ | ||
532 | |||
533 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
534 | WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), | ||
535 | "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", | ||
536 | topidx, mididx); | ||
537 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
538 | |||
530 | } | 539 | } |
531 | return idx != 0; | 540 | return idx != 0; |
532 | } | 541 | } |
@@ -549,12 +558,29 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, | |||
549 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) | 558 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) |
550 | { | 559 | { |
551 | unsigned topidx = p2m_top_index(pfn); | 560 | unsigned topidx = p2m_top_index(pfn); |
552 | if (p2m_top[topidx] == p2m_mid_missing) { | 561 | unsigned long *mid_mfn_p; |
553 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | 562 | unsigned long **mid; |
563 | |||
564 | mid = p2m_top[topidx]; | ||
565 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
566 | if (mid == p2m_mid_missing) { | ||
567 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
554 | 568 | ||
555 | p2m_mid_init(mid); | 569 | p2m_mid_init(mid); |
556 | 570 | ||
557 | p2m_top[topidx] = mid; | 571 | p2m_top[topidx] = mid; |
572 | |||
573 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
574 | } | ||
575 | /* And the save/restore P2M tables.. */ | ||
576 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
577 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
578 | p2m_mid_mfn_init(mid_mfn_p); | ||
579 | |||
580 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
581 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
582 | /* Note: we don't set mid_mfn_p[midix] here, | ||
583 | * look in __early_alloc_p2m */ | ||
558 | } | 584 | } |
559 | } | 585 | } |
560 | 586 | ||
@@ -650,7 +676,7 @@ static unsigned long mfn_hash(unsigned long mfn) | |||
650 | } | 676 | } |
651 | 677 | ||
652 | /* Add an MFN override for a particular page */ | 678 | /* Add an MFN override for a particular page */ |
653 | int m2p_add_override(unsigned long mfn, struct page *page) | 679 | int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) |
654 | { | 680 | { |
655 | unsigned long flags; | 681 | unsigned long flags; |
656 | unsigned long pfn; | 682 | unsigned long pfn; |
@@ -662,7 +688,6 @@ int m2p_add_override(unsigned long mfn, struct page *page) | |||
662 | if (!PageHighMem(page)) { | 688 | if (!PageHighMem(page)) { |
663 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | 689 | address = (unsigned long)__va(pfn << PAGE_SHIFT); |
664 | ptep = lookup_address(address, &level); | 690 | ptep = lookup_address(address, &level); |
665 | |||
666 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | 691 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, |
667 | "m2p_add_override: pfn %lx not mapped", pfn)) | 692 | "m2p_add_override: pfn %lx not mapped", pfn)) |
668 | return -EINVAL; | 693 | return -EINVAL; |
@@ -674,18 +699,17 @@ int m2p_add_override(unsigned long mfn, struct page *page) | |||
674 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) | 699 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) |
675 | return -ENOMEM; | 700 | return -ENOMEM; |
676 | 701 | ||
677 | if (!PageHighMem(page)) | 702 | if (clear_pte && !PageHighMem(page)) |
678 | /* Just zap old mapping for now */ | 703 | /* Just zap old mapping for now */ |
679 | pte_clear(&init_mm, address, ptep); | 704 | pte_clear(&init_mm, address, ptep); |
680 | |||
681 | spin_lock_irqsave(&m2p_override_lock, flags); | 705 | spin_lock_irqsave(&m2p_override_lock, flags); |
682 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | 706 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); |
683 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 707 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
684 | 708 | ||
685 | return 0; | 709 | return 0; |
686 | } | 710 | } |
687 | 711 | EXPORT_SYMBOL_GPL(m2p_add_override); | |
688 | int m2p_remove_override(struct page *page) | 712 | int m2p_remove_override(struct page *page, bool clear_pte) |
689 | { | 713 | { |
690 | unsigned long flags; | 714 | unsigned long flags; |
691 | unsigned long mfn; | 715 | unsigned long mfn; |
@@ -713,7 +737,7 @@ int m2p_remove_override(struct page *page) | |||
713 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 737 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
714 | set_phys_to_machine(pfn, page->index); | 738 | set_phys_to_machine(pfn, page->index); |
715 | 739 | ||
716 | if (!PageHighMem(page)) | 740 | if (clear_pte && !PageHighMem(page)) |
717 | set_pte_at(&init_mm, address, ptep, | 741 | set_pte_at(&init_mm, address, ptep, |
718 | pfn_pte(pfn, PAGE_KERNEL)); | 742 | pfn_pte(pfn, PAGE_KERNEL)); |
719 | /* No tlb flush necessary because the caller already | 743 | /* No tlb flush necessary because the caller already |
@@ -721,6 +745,7 @@ int m2p_remove_override(struct page *page) | |||
721 | 745 | ||
722 | return 0; | 746 | return 0; |
723 | } | 747 | } |
748 | EXPORT_SYMBOL_GPL(m2p_remove_override); | ||
724 | 749 | ||
725 | struct page *m2p_find_override(unsigned long mfn) | 750 | struct page *m2p_find_override(unsigned long mfn) |
726 | { | 751 | { |
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index bfd0632fe65e..b480d4207a4c 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c | |||
@@ -36,7 +36,7 @@ int __init pci_xen_swiotlb_detect(void) | |||
36 | 36 | ||
37 | /* If running as PV guest, either iommu=soft, or swiotlb=force will | 37 | /* If running as PV guest, either iommu=soft, or swiotlb=force will |
38 | * activate this IOMMU. If running as PV privileged, activate it | 38 | * activate this IOMMU. If running as PV privileged, activate it |
39 | * irregardlesss. | 39 | * irregardless. |
40 | */ | 40 | */ |
41 | if ((xen_initial_domain() || swiotlb || swiotlb_force) && | 41 | if ((xen_initial_domain() || swiotlb || swiotlb_force) && |
42 | (xen_pv_domain())) | 42 | (xen_pv_domain())) |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index fa0269a99377..be1a464f6d66 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -50,7 +50,7 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size; | |||
50 | */ | 50 | */ |
51 | #define EXTRA_MEM_RATIO (10) | 51 | #define EXTRA_MEM_RATIO (10) |
52 | 52 | ||
53 | static __init void xen_add_extra_mem(unsigned long pages) | 53 | static void __init xen_add_extra_mem(unsigned long pages) |
54 | { | 54 | { |
55 | unsigned long pfn; | 55 | unsigned long pfn; |
56 | 56 | ||
@@ -166,7 +166,7 @@ static unsigned long __init xen_set_identity(const struct e820entry *list, | |||
166 | if (last > end) | 166 | if (last > end) |
167 | continue; | 167 | continue; |
168 | 168 | ||
169 | if (entry->type == E820_RAM) { | 169 | if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { |
170 | if (start > start_pci) | 170 | if (start > start_pci) |
171 | identity += set_phys_range_identity( | 171 | identity += set_phys_range_identity( |
172 | PFN_UP(start_pci), PFN_DOWN(start)); | 172 | PFN_UP(start_pci), PFN_DOWN(start)); |
@@ -227,7 +227,11 @@ char * __init xen_memory_setup(void) | |||
227 | 227 | ||
228 | memcpy(map_raw, map, sizeof(map)); | 228 | memcpy(map_raw, map, sizeof(map)); |
229 | e820.nr_map = 0; | 229 | e820.nr_map = 0; |
230 | #ifdef CONFIG_X86_32 | ||
230 | xen_extra_mem_start = mem_end; | 231 | xen_extra_mem_start = mem_end; |
232 | #else | ||
233 | xen_extra_mem_start = max((1ULL << 32), mem_end); | ||
234 | #endif | ||
231 | for (i = 0; i < memmap.nr_entries; i++) { | 235 | for (i = 0; i < memmap.nr_entries; i++) { |
232 | unsigned long long end; | 236 | unsigned long long end; |
233 | 237 | ||
@@ -336,7 +340,7 @@ static void __init fiddle_vdso(void) | |||
336 | #endif | 340 | #endif |
337 | } | 341 | } |
338 | 342 | ||
339 | static __cpuinit int register_callback(unsigned type, const void *func) | 343 | static int __cpuinit register_callback(unsigned type, const void *func) |
340 | { | 344 | { |
341 | struct callback_register callback = { | 345 | struct callback_register callback = { |
342 | .type = type, | 346 | .type = type, |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 30612441ed99..41038c01de40 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -46,18 +46,17 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | |||
46 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); | 46 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * Reschedule call back. Nothing to do, | 49 | * Reschedule call back. |
50 | * all the work is done automatically when | ||
51 | * we return from the interrupt. | ||
52 | */ | 50 | */ |
53 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | 51 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) |
54 | { | 52 | { |
55 | inc_irq_stat(irq_resched_count); | 53 | inc_irq_stat(irq_resched_count); |
54 | scheduler_ipi(); | ||
56 | 55 | ||
57 | return IRQ_HANDLED; | 56 | return IRQ_HANDLED; |
58 | } | 57 | } |
59 | 58 | ||
60 | static __cpuinit void cpu_bringup(void) | 59 | static void __cpuinit cpu_bringup(void) |
61 | { | 60 | { |
62 | int cpu = smp_processor_id(); | 61 | int cpu = smp_processor_id(); |
63 | 62 | ||
@@ -85,7 +84,7 @@ static __cpuinit void cpu_bringup(void) | |||
85 | wmb(); /* make sure everything is out */ | 84 | wmb(); /* make sure everything is out */ |
86 | } | 85 | } |
87 | 86 | ||
88 | static __cpuinit void cpu_bringup_and_idle(void) | 87 | static void __cpuinit cpu_bringup_and_idle(void) |
89 | { | 88 | { |
90 | cpu_bringup(); | 89 | cpu_bringup(); |
91 | cpu_idle(); | 90 | cpu_idle(); |
@@ -242,7 +241,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
242 | } | 241 | } |
243 | } | 242 | } |
244 | 243 | ||
245 | static __cpuinit int | 244 | static int __cpuinit |
246 | cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | 245 | cpu_initialize_context(unsigned int cpu, struct task_struct *idle) |
247 | { | 246 | { |
248 | struct vcpu_guest_context *ctxt; | 247 | struct vcpu_guest_context *ctxt; |
@@ -486,7 +485,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) | |||
486 | return IRQ_HANDLED; | 485 | return IRQ_HANDLED; |
487 | } | 486 | } |
488 | 487 | ||
489 | static const struct smp_ops xen_smp_ops __initdata = { | 488 | static const struct smp_ops xen_smp_ops __initconst = { |
490 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | 489 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, |
491 | .smp_prepare_cpus = xen_smp_prepare_cpus, | 490 | .smp_prepare_cpus = xen_smp_prepare_cpus, |
492 | .smp_cpus_done = xen_smp_cpus_done, | 491 | .smp_cpus_done = xen_smp_cpus_done, |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 2e2d370a47b1..5158c505bef9 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -26,8 +26,6 @@ | |||
26 | 26 | ||
27 | #include "xen-ops.h" | 27 | #include "xen-ops.h" |
28 | 28 | ||
29 | #define XEN_SHIFT 22 | ||
30 | |||
31 | /* Xen may fire a timer up to this many ns early */ | 29 | /* Xen may fire a timer up to this many ns early */ |
32 | #define TIMER_SLOP 100000 | 30 | #define TIMER_SLOP 100000 |
33 | #define NS_PER_TICK (1000000000LL / HZ) | 31 | #define NS_PER_TICK (1000000000LL / HZ) |
@@ -211,8 +209,6 @@ static struct clocksource xen_clocksource __read_mostly = { | |||
211 | .rating = 400, | 209 | .rating = 400, |
212 | .read = xen_clocksource_get_cycles, | 210 | .read = xen_clocksource_get_cycles, |
213 | .mask = ~0, | 211 | .mask = ~0, |
214 | .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */ | ||
215 | .shift = XEN_SHIFT, | ||
216 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 212 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
217 | }; | 213 | }; |
218 | 214 | ||
@@ -439,16 +435,16 @@ void xen_timer_resume(void) | |||
439 | } | 435 | } |
440 | } | 436 | } |
441 | 437 | ||
442 | static const struct pv_time_ops xen_time_ops __initdata = { | 438 | static const struct pv_time_ops xen_time_ops __initconst = { |
443 | .sched_clock = xen_clocksource_read, | 439 | .sched_clock = xen_clocksource_read, |
444 | }; | 440 | }; |
445 | 441 | ||
446 | static __init void xen_time_init(void) | 442 | static void __init xen_time_init(void) |
447 | { | 443 | { |
448 | int cpu = smp_processor_id(); | 444 | int cpu = smp_processor_id(); |
449 | struct timespec tp; | 445 | struct timespec tp; |
450 | 446 | ||
451 | clocksource_register(&xen_clocksource); | 447 | clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); |
452 | 448 | ||
453 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { | 449 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { |
454 | /* Successfully turned off 100Hz tick, so we have the | 450 | /* Successfully turned off 100Hz tick, so we have the |
@@ -468,7 +464,7 @@ static __init void xen_time_init(void) | |||
468 | xen_setup_cpu_clockevents(); | 464 | xen_setup_cpu_clockevents(); |
469 | } | 465 | } |
470 | 466 | ||
471 | __init void xen_init_time_ops(void) | 467 | void __init xen_init_time_ops(void) |
472 | { | 468 | { |
473 | pv_time_ops = xen_time_ops; | 469 | pv_time_ops = xen_time_ops; |
474 | 470 | ||
@@ -490,7 +486,7 @@ static void xen_hvm_setup_cpu_clockevents(void) | |||
490 | xen_setup_cpu_clockevents(); | 486 | xen_setup_cpu_clockevents(); |
491 | } | 487 | } |
492 | 488 | ||
493 | __init void xen_hvm_init_time_ops(void) | 489 | void __init xen_hvm_init_time_ops(void) |
494 | { | 490 | { |
495 | /* vector callback is needed otherwise we cannot receive interrupts | 491 | /* vector callback is needed otherwise we cannot receive interrupts |
496 | * on cpu > 0 and at this point we don't know how many cpus are | 492 | * on cpu > 0 and at this point we don't know how many cpus are |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 3112f55638c4..97dfdc8757b3 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -74,7 +74,7 @@ static inline void xen_hvm_smp_init(void) {} | |||
74 | 74 | ||
75 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | 75 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
76 | void __init xen_init_spinlocks(void); | 76 | void __init xen_init_spinlocks(void); |
77 | __cpuinit void xen_init_lock_cpu(int cpu); | 77 | void __cpuinit xen_init_lock_cpu(int cpu); |
78 | void xen_uninit_lock_cpu(int cpu); | 78 | void xen_uninit_lock_cpu(int cpu); |
79 | #else | 79 | #else |
80 | static inline void xen_init_spinlocks(void) | 80 | static inline void xen_init_spinlocks(void) |