diff options
Diffstat (limited to 'arch/x86')
167 files changed, 3355 insertions, 13266 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 0e103236b754..0e9dec6cadd1 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild | |||
@@ -15,3 +15,4 @@ obj-y += vdso/ | |||
15 | obj-$(CONFIG_IA32_EMULATION) += ia32/ | 15 | obj-$(CONFIG_IA32_EMULATION) += ia32/ |
16 | 16 | ||
17 | obj-y += platform/ | 17 | obj-y += platform/ |
18 | obj-y += net/ | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cc6c53a95bfd..880fcb6c86f4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -8,6 +8,7 @@ config 64BIT | |||
8 | 8 | ||
9 | config X86_32 | 9 | config X86_32 |
10 | def_bool !64BIT | 10 | def_bool !64BIT |
11 | select CLKSRC_I8253 | ||
11 | 12 | ||
12 | config X86_64 | 13 | config X86_64 |
13 | def_bool 64BIT | 14 | def_bool 64BIT |
@@ -71,7 +72,7 @@ config X86 | |||
71 | select GENERIC_IRQ_SHOW | 72 | select GENERIC_IRQ_SHOW |
72 | select IRQ_FORCED_THREADING | 73 | select IRQ_FORCED_THREADING |
73 | select USE_GENERIC_SMP_HELPERS if SMP | 74 | select USE_GENERIC_SMP_HELPERS if SMP |
74 | select ARCH_NO_SYSDEV_OPS | 75 | select HAVE_BPF_JIT if (X86_64 && NET) |
75 | 76 | ||
76 | config INSTRUCTION_DECODER | 77 | config INSTRUCTION_DECODER |
77 | def_bool (KPROBES || PERF_EVENTS) | 78 | def_bool (KPROBES || PERF_EVENTS) |
@@ -112,7 +113,14 @@ config MMU | |||
112 | def_bool y | 113 | def_bool y |
113 | 114 | ||
114 | config ZONE_DMA | 115 | config ZONE_DMA |
115 | def_bool y | 116 | bool "DMA memory allocation support" if EXPERT |
117 | default y | ||
118 | help | ||
119 | DMA memory allocation support allows devices with less than 32-bit | ||
120 | addressing to allocate within the first 16MB of address space. | ||
121 | Disable if no such devices will be used. | ||
122 | |||
123 | If unsure, say Y. | ||
116 | 124 | ||
117 | config SBUS | 125 | config SBUS |
118 | bool | 126 | bool |
@@ -365,17 +373,6 @@ config X86_UV | |||
365 | # Following is an alphabetically sorted list of 32 bit extended platforms | 373 | # Following is an alphabetically sorted list of 32 bit extended platforms |
366 | # Please maintain the alphabetic order if and when there are additions | 374 | # Please maintain the alphabetic order if and when there are additions |
367 | 375 | ||
368 | config X86_ELAN | ||
369 | bool "AMD Elan" | ||
370 | depends on X86_32 | ||
371 | depends on X86_EXTENDED_PLATFORM | ||
372 | ---help--- | ||
373 | Select this for an AMD Elan processor. | ||
374 | |||
375 | Do not use this option for K6/Athlon/Opteron processors! | ||
376 | |||
377 | If unsure, choose "PC-compatible" instead. | ||
378 | |||
379 | config X86_INTEL_CE | 376 | config X86_INTEL_CE |
380 | bool "CE4100 TV platform" | 377 | bool "CE4100 TV platform" |
381 | depends on PCI | 378 | depends on PCI |
@@ -690,6 +687,7 @@ config AMD_IOMMU | |||
690 | bool "AMD IOMMU support" | 687 | bool "AMD IOMMU support" |
691 | select SWIOTLB | 688 | select SWIOTLB |
692 | select PCI_MSI | 689 | select PCI_MSI |
690 | select PCI_IOV | ||
693 | depends on X86_64 && PCI && ACPI | 691 | depends on X86_64 && PCI && ACPI |
694 | ---help--- | 692 | ---help--- |
695 | With this option you can enable support for AMD IOMMU hardware in | 693 | With this option you can enable support for AMD IOMMU hardware in |
@@ -1174,7 +1172,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" | |||
1174 | config AMD_NUMA | 1172 | config AMD_NUMA |
1175 | def_bool y | 1173 | def_bool y |
1176 | prompt "Old style AMD Opteron NUMA detection" | 1174 | prompt "Old style AMD Opteron NUMA detection" |
1177 | depends on X86_64 && NUMA && PCI | 1175 | depends on NUMA && PCI |
1178 | ---help--- | 1176 | ---help--- |
1179 | Enable AMD NUMA node topology detection. You should say Y here if | 1177 | Enable AMD NUMA node topology detection. You should say Y here if |
1180 | you have a multi processor AMD system. This uses an old method to | 1178 | you have a multi processor AMD system. This uses an old method to |
@@ -1201,7 +1199,7 @@ config NODES_SPAN_OTHER_NODES | |||
1201 | 1199 | ||
1202 | config NUMA_EMU | 1200 | config NUMA_EMU |
1203 | bool "NUMA emulation" | 1201 | bool "NUMA emulation" |
1204 | depends on X86_64 && NUMA | 1202 | depends on NUMA |
1205 | ---help--- | 1203 | ---help--- |
1206 | Enable NUMA emulation. A flat machine will be split | 1204 | Enable NUMA emulation. A flat machine will be split |
1207 | into virtual nodes when booted with "numa=fake=N", where N is the | 1205 | into virtual nodes when booted with "numa=fake=N", where N is the |
@@ -1223,6 +1221,10 @@ config HAVE_ARCH_BOOTMEM | |||
1223 | def_bool y | 1221 | def_bool y |
1224 | depends on X86_32 && NUMA | 1222 | depends on X86_32 && NUMA |
1225 | 1223 | ||
1224 | config HAVE_ARCH_ALLOC_REMAP | ||
1225 | def_bool y | ||
1226 | depends on X86_32 && NUMA | ||
1227 | |||
1226 | config ARCH_HAVE_MEMORY_PRESENT | 1228 | config ARCH_HAVE_MEMORY_PRESENT |
1227 | def_bool y | 1229 | def_bool y |
1228 | depends on X86_32 && DISCONTIGMEM | 1230 | depends on X86_32 && DISCONTIGMEM |
@@ -1231,13 +1233,9 @@ config NEED_NODE_MEMMAP_SIZE | |||
1231 | def_bool y | 1233 | def_bool y |
1232 | depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) | 1234 | depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) |
1233 | 1235 | ||
1234 | config HAVE_ARCH_ALLOC_REMAP | ||
1235 | def_bool y | ||
1236 | depends on X86_32 && NUMA | ||
1237 | |||
1238 | config ARCH_FLATMEM_ENABLE | 1236 | config ARCH_FLATMEM_ENABLE |
1239 | def_bool y | 1237 | def_bool y |
1240 | depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA | 1238 | depends on X86_32 && !NUMA |
1241 | 1239 | ||
1242 | config ARCH_DISCONTIGMEM_ENABLE | 1240 | config ARCH_DISCONTIGMEM_ENABLE |
1243 | def_bool y | 1241 | def_bool y |
@@ -1247,20 +1245,16 @@ config ARCH_DISCONTIGMEM_DEFAULT | |||
1247 | def_bool y | 1245 | def_bool y |
1248 | depends on NUMA && X86_32 | 1246 | depends on NUMA && X86_32 |
1249 | 1247 | ||
1250 | config ARCH_PROC_KCORE_TEXT | ||
1251 | def_bool y | ||
1252 | depends on X86_64 && PROC_KCORE | ||
1253 | |||
1254 | config ARCH_SPARSEMEM_DEFAULT | ||
1255 | def_bool y | ||
1256 | depends on X86_64 | ||
1257 | |||
1258 | config ARCH_SPARSEMEM_ENABLE | 1248 | config ARCH_SPARSEMEM_ENABLE |
1259 | def_bool y | 1249 | def_bool y |
1260 | depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD | 1250 | depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD |
1261 | select SPARSEMEM_STATIC if X86_32 | 1251 | select SPARSEMEM_STATIC if X86_32 |
1262 | select SPARSEMEM_VMEMMAP_ENABLE if X86_64 | 1252 | select SPARSEMEM_VMEMMAP_ENABLE if X86_64 |
1263 | 1253 | ||
1254 | config ARCH_SPARSEMEM_DEFAULT | ||
1255 | def_bool y | ||
1256 | depends on X86_64 | ||
1257 | |||
1264 | config ARCH_SELECT_MEMORY_MODEL | 1258 | config ARCH_SELECT_MEMORY_MODEL |
1265 | def_bool y | 1259 | def_bool y |
1266 | depends on ARCH_SPARSEMEM_ENABLE | 1260 | depends on ARCH_SPARSEMEM_ENABLE |
@@ -1269,6 +1263,10 @@ config ARCH_MEMORY_PROBE | |||
1269 | def_bool X86_64 | 1263 | def_bool X86_64 |
1270 | depends on MEMORY_HOTPLUG | 1264 | depends on MEMORY_HOTPLUG |
1271 | 1265 | ||
1266 | config ARCH_PROC_KCORE_TEXT | ||
1267 | def_bool y | ||
1268 | depends on X86_64 && PROC_KCORE | ||
1269 | |||
1272 | config ILLEGAL_POINTER_VALUE | 1270 | config ILLEGAL_POINTER_VALUE |
1273 | hex | 1271 | hex |
1274 | default 0 if X86_32 | 1272 | default 0 if X86_32 |
@@ -1703,10 +1701,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE | |||
1703 | def_bool y | 1701 | def_bool y |
1704 | depends on MEMORY_HOTPLUG | 1702 | depends on MEMORY_HOTPLUG |
1705 | 1703 | ||
1706 | config HAVE_ARCH_EARLY_PFN_TO_NID | ||
1707 | def_bool X86_64 | ||
1708 | depends on NUMA | ||
1709 | |||
1710 | config USE_PERCPU_NUMA_NODE_ID | 1704 | config USE_PERCPU_NUMA_NODE_ID |
1711 | def_bool y | 1705 | def_bool y |
1712 | depends on NUMA | 1706 | depends on NUMA |
@@ -1848,7 +1842,7 @@ config APM_ALLOW_INTS | |||
1848 | 1842 | ||
1849 | endif # APM | 1843 | endif # APM |
1850 | 1844 | ||
1851 | source "arch/x86/kernel/cpu/cpufreq/Kconfig" | 1845 | source "drivers/cpufreq/Kconfig" |
1852 | 1846 | ||
1853 | source "drivers/cpuidle/Kconfig" | 1847 | source "drivers/cpuidle/Kconfig" |
1854 | 1848 | ||
@@ -2076,7 +2070,7 @@ config OLPC | |||
2076 | depends on !X86_PAE | 2070 | depends on !X86_PAE |
2077 | select GPIOLIB | 2071 | select GPIOLIB |
2078 | select OF | 2072 | select OF |
2079 | select OF_PROMTREE if PROC_DEVICETREE | 2073 | select OF_PROMTREE |
2080 | ---help--- | 2074 | ---help--- |
2081 | Add support for detecting the unique features of the OLPC | 2075 | Add support for detecting the unique features of the OLPC |
2082 | XO hardware. | 2076 | XO hardware. |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index d161e939df62..6a7cfdf8ff69 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -1,6 +1,4 @@ | |||
1 | # Put here option for CPU selection and depending optimization | 1 | # Put here option for CPU selection and depending optimization |
2 | if !X86_ELAN | ||
3 | |||
4 | choice | 2 | choice |
5 | prompt "Processor family" | 3 | prompt "Processor family" |
6 | default M686 if X86_32 | 4 | default M686 if X86_32 |
@@ -203,6 +201,14 @@ config MWINCHIP3D | |||
203 | stores for this CPU, which can increase performance of some | 201 | stores for this CPU, which can increase performance of some |
204 | operations. | 202 | operations. |
205 | 203 | ||
204 | config MELAN | ||
205 | bool "AMD Elan" | ||
206 | depends on X86_32 | ||
207 | ---help--- | ||
208 | Select this for an AMD Elan processor. | ||
209 | |||
210 | Do not use this option for K6/Athlon/Opteron processors! | ||
211 | |||
206 | config MGEODEGX1 | 212 | config MGEODEGX1 |
207 | bool "GeodeGX1" | 213 | bool "GeodeGX1" |
208 | depends on X86_32 | 214 | depends on X86_32 |
@@ -292,8 +298,6 @@ config X86_GENERIC | |||
292 | This is really intended for distributors who need more | 298 | This is really intended for distributors who need more |
293 | generic optimizations. | 299 | generic optimizations. |
294 | 300 | ||
295 | endif | ||
296 | |||
297 | # | 301 | # |
298 | # Define implied options from the CPU selection here | 302 | # Define implied options from the CPU selection here |
299 | config X86_INTERNODE_CACHE_SHIFT | 303 | config X86_INTERNODE_CACHE_SHIFT |
@@ -312,7 +316,7 @@ config X86_L1_CACHE_SHIFT | |||
312 | int | 316 | int |
313 | default "7" if MPENTIUM4 || MPSC | 317 | default "7" if MPENTIUM4 || MPSC |
314 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU | 318 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU |
315 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 319 | default "4" if MELAN || M486 || M386 || MGEODEGX1 |
316 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 320 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
317 | 321 | ||
318 | config X86_XADD | 322 | config X86_XADD |
@@ -358,7 +362,7 @@ config X86_POPAD_OK | |||
358 | 362 | ||
359 | config X86_ALIGNMENT_16 | 363 | config X86_ALIGNMENT_16 |
360 | def_bool y | 364 | def_bool y |
361 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 | 365 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 |
362 | 366 | ||
363 | config X86_INTEL_USERCOPY | 367 | config X86_INTEL_USERCOPY |
364 | def_bool y | 368 | def_bool y |
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index f2ee1abb1df9..86cee7b749e1 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu | |||
@@ -37,7 +37,7 @@ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march= | |||
37 | $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) | 37 | $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) |
38 | 38 | ||
39 | # AMD Elan support | 39 | # AMD Elan support |
40 | cflags-$(CONFIG_X86_ELAN) += -march=i486 | 40 | cflags-$(CONFIG_MELAN) += -march=i486 |
41 | 41 | ||
42 | # Geode GX1 support | 42 | # Geode GX1 support |
43 | cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx | 43 | cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 1a58ad89fdf7..c04f1b7a9139 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,8 +2,6 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_FPU) += fpu.o | ||
6 | |||
7 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
8 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
9 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
@@ -24,6 +22,6 @@ aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o | |||
24 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 22 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
25 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 23 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
26 | 24 | ||
27 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o | 25 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
28 | 26 | ||
29 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 27 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2577613fb32b..feee8ff1d05e 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -94,6 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
94 | const u8 *in, unsigned int len, u8 *iv); | 94 | const u8 *in, unsigned int len, u8 *iv); |
95 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 95 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
96 | const u8 *in, unsigned int len, u8 *iv); | 96 | const u8 *in, unsigned int len, u8 *iv); |
97 | |||
98 | int crypto_fpu_init(void); | ||
99 | void crypto_fpu_exit(void); | ||
100 | |||
97 | #ifdef CONFIG_X86_64 | 101 | #ifdef CONFIG_X86_64 |
98 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 102 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
99 | const u8 *in, unsigned int len, u8 *iv); | 103 | const u8 *in, unsigned int len, u8 *iv); |
@@ -1257,6 +1261,8 @@ static int __init aesni_init(void) | |||
1257 | return -ENODEV; | 1261 | return -ENODEV; |
1258 | } | 1262 | } |
1259 | 1263 | ||
1264 | if ((err = crypto_fpu_init())) | ||
1265 | goto fpu_err; | ||
1260 | if ((err = crypto_register_alg(&aesni_alg))) | 1266 | if ((err = crypto_register_alg(&aesni_alg))) |
1261 | goto aes_err; | 1267 | goto aes_err; |
1262 | if ((err = crypto_register_alg(&__aesni_alg))) | 1268 | if ((err = crypto_register_alg(&__aesni_alg))) |
@@ -1334,6 +1340,7 @@ blk_ecb_err: | |||
1334 | __aes_err: | 1340 | __aes_err: |
1335 | crypto_unregister_alg(&aesni_alg); | 1341 | crypto_unregister_alg(&aesni_alg); |
1336 | aes_err: | 1342 | aes_err: |
1343 | fpu_err: | ||
1337 | return err; | 1344 | return err; |
1338 | } | 1345 | } |
1339 | 1346 | ||
@@ -1363,6 +1370,8 @@ static void __exit aesni_exit(void) | |||
1363 | crypto_unregister_alg(&blk_ecb_alg); | 1370 | crypto_unregister_alg(&blk_ecb_alg); |
1364 | crypto_unregister_alg(&__aesni_alg); | 1371 | crypto_unregister_alg(&__aesni_alg); |
1365 | crypto_unregister_alg(&aesni_alg); | 1372 | crypto_unregister_alg(&aesni_alg); |
1373 | |||
1374 | crypto_fpu_exit(); | ||
1366 | } | 1375 | } |
1367 | 1376 | ||
1368 | module_init(aesni_init); | 1377 | module_init(aesni_init); |
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 1a8f8649c035..98d7a188f46b 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c | |||
@@ -150,18 +150,12 @@ static struct crypto_template crypto_fpu_tmpl = { | |||
150 | .module = THIS_MODULE, | 150 | .module = THIS_MODULE, |
151 | }; | 151 | }; |
152 | 152 | ||
153 | static int __init crypto_fpu_module_init(void) | 153 | int __init crypto_fpu_init(void) |
154 | { | 154 | { |
155 | return crypto_register_template(&crypto_fpu_tmpl); | 155 | return crypto_register_template(&crypto_fpu_tmpl); |
156 | } | 156 | } |
157 | 157 | ||
158 | static void __exit crypto_fpu_module_exit(void) | 158 | void __exit crypto_fpu_exit(void) |
159 | { | 159 | { |
160 | crypto_unregister_template(&crypto_fpu_tmpl); | 160 | crypto_unregister_template(&crypto_fpu_tmpl); |
161 | } | 161 | } |
162 | |||
163 | module_init(crypto_fpu_module_init); | ||
164 | module_exit(crypto_fpu_module_exit); | ||
165 | |||
166 | MODULE_LICENSE("GPL"); | ||
167 | MODULE_DESCRIPTION("FPU block cipher wrapper"); | ||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 849a9d23c71d..95f5826be458 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -848,4 +848,5 @@ ia32_sys_call_table: | |||
848 | .quad compat_sys_open_by_handle_at | 848 | .quad compat_sys_open_by_handle_at |
849 | .quad compat_sys_clock_adjtime | 849 | .quad compat_sys_clock_adjtime |
850 | .quad sys_syncfs | 850 | .quad sys_syncfs |
851 | .quad compat_sys_sendmmsg /* 345 */ | ||
851 | ia32_syscall_end: | 852 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 12e0e7dd869c..416d865eae39 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -183,8 +183,6 @@ static inline void disable_acpi(void) { } | |||
183 | 183 | ||
184 | #define ARCH_HAS_POWER_INIT 1 | 184 | #define ARCH_HAS_POWER_INIT 1 |
185 | 185 | ||
186 | struct bootnode; | ||
187 | |||
188 | #ifdef CONFIG_ACPI_NUMA | 186 | #ifdef CONFIG_ACPI_NUMA |
189 | extern int acpi_numa; | 187 | extern int acpi_numa; |
190 | extern int x86_acpi_numa_init(void); | 188 | extern int x86_acpi_numa_init(void); |
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index a63a68be1cce..94d420b360d1 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -15,4 +15,13 @@ | |||
15 | .endm | 15 | .endm |
16 | #endif | 16 | #endif |
17 | 17 | ||
18 | .macro altinstruction_entry orig alt feature orig_len alt_len | ||
19 | .align 8 | ||
20 | .quad \orig | ||
21 | .quad \alt | ||
22 | .word \feature | ||
23 | .byte \orig_len | ||
24 | .byte \alt_len | ||
25 | .endm | ||
26 | |||
18 | #endif /* __ASSEMBLY__ */ | 27 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 13009d1af99a..bf535f947e8c 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -4,7 +4,6 @@ | |||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <linux/stddef.h> | 5 | #include <linux/stddef.h> |
6 | #include <linux/stringify.h> | 6 | #include <linux/stringify.h> |
7 | #include <linux/jump_label.h> | ||
8 | #include <asm/asm.h> | 7 | #include <asm/asm.h> |
9 | 8 | ||
10 | /* | 9 | /* |
@@ -191,12 +190,4 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); | |||
191 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); | 190 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); |
192 | extern void text_poke_smp_batch(struct text_poke_param *params, int n); | 191 | extern void text_poke_smp_batch(struct text_poke_param *params, int n); |
193 | 192 | ||
194 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | ||
195 | #define IDEAL_NOP_SIZE_5 5 | ||
196 | extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; | ||
197 | extern void arch_init_ideal_nop5(void); | ||
198 | #else | ||
199 | static inline void arch_init_ideal_nop5(void) {} | ||
200 | #endif | ||
201 | |||
202 | #endif /* _ASM_X86_ALTERNATIVE_H */ | 193 | #endif /* _ASM_X86_ALTERNATIVE_H */ |
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 916bc8111a01..55d95eb789b3 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h | |||
@@ -19,13 +19,12 @@ | |||
19 | #ifndef _ASM_X86_AMD_IOMMU_PROTO_H | 19 | #ifndef _ASM_X86_AMD_IOMMU_PROTO_H |
20 | #define _ASM_X86_AMD_IOMMU_PROTO_H | 20 | #define _ASM_X86_AMD_IOMMU_PROTO_H |
21 | 21 | ||
22 | struct amd_iommu; | 22 | #include <asm/amd_iommu_types.h> |
23 | 23 | ||
24 | extern int amd_iommu_init_dma_ops(void); | 24 | extern int amd_iommu_init_dma_ops(void); |
25 | extern int amd_iommu_init_passthrough(void); | 25 | extern int amd_iommu_init_passthrough(void); |
26 | extern irqreturn_t amd_iommu_int_thread(int irq, void *data); | ||
26 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | 27 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); |
27 | extern void amd_iommu_flush_all_domains(void); | ||
28 | extern void amd_iommu_flush_all_devices(void); | ||
29 | extern void amd_iommu_apply_erratum_63(u16 devid); | 28 | extern void amd_iommu_apply_erratum_63(u16 devid); |
30 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | 29 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); |
31 | extern int amd_iommu_init_devices(void); | 30 | extern int amd_iommu_init_devices(void); |
@@ -44,4 +43,12 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev) | |||
44 | (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); | 43 | (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); |
45 | } | 44 | } |
46 | 45 | ||
46 | static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) | ||
47 | { | ||
48 | if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) | ||
49 | return false; | ||
50 | |||
51 | return !!(iommu->features & f); | ||
52 | } | ||
53 | |||
47 | #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ | 54 | #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index e3509fc303bf..4c9982995414 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -68,12 +68,25 @@ | |||
68 | #define MMIO_CONTROL_OFFSET 0x0018 | 68 | #define MMIO_CONTROL_OFFSET 0x0018 |
69 | #define MMIO_EXCL_BASE_OFFSET 0x0020 | 69 | #define MMIO_EXCL_BASE_OFFSET 0x0020 |
70 | #define MMIO_EXCL_LIMIT_OFFSET 0x0028 | 70 | #define MMIO_EXCL_LIMIT_OFFSET 0x0028 |
71 | #define MMIO_EXT_FEATURES 0x0030 | ||
71 | #define MMIO_CMD_HEAD_OFFSET 0x2000 | 72 | #define MMIO_CMD_HEAD_OFFSET 0x2000 |
72 | #define MMIO_CMD_TAIL_OFFSET 0x2008 | 73 | #define MMIO_CMD_TAIL_OFFSET 0x2008 |
73 | #define MMIO_EVT_HEAD_OFFSET 0x2010 | 74 | #define MMIO_EVT_HEAD_OFFSET 0x2010 |
74 | #define MMIO_EVT_TAIL_OFFSET 0x2018 | 75 | #define MMIO_EVT_TAIL_OFFSET 0x2018 |
75 | #define MMIO_STATUS_OFFSET 0x2020 | 76 | #define MMIO_STATUS_OFFSET 0x2020 |
76 | 77 | ||
78 | |||
79 | /* Extended Feature Bits */ | ||
80 | #define FEATURE_PREFETCH (1ULL<<0) | ||
81 | #define FEATURE_PPR (1ULL<<1) | ||
82 | #define FEATURE_X2APIC (1ULL<<2) | ||
83 | #define FEATURE_NX (1ULL<<3) | ||
84 | #define FEATURE_GT (1ULL<<4) | ||
85 | #define FEATURE_IA (1ULL<<6) | ||
86 | #define FEATURE_GA (1ULL<<7) | ||
87 | #define FEATURE_HE (1ULL<<8) | ||
88 | #define FEATURE_PC (1ULL<<9) | ||
89 | |||
77 | /* MMIO status bits */ | 90 | /* MMIO status bits */ |
78 | #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 | 91 | #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 |
79 | 92 | ||
@@ -113,7 +126,9 @@ | |||
113 | /* command specific defines */ | 126 | /* command specific defines */ |
114 | #define CMD_COMPL_WAIT 0x01 | 127 | #define CMD_COMPL_WAIT 0x01 |
115 | #define CMD_INV_DEV_ENTRY 0x02 | 128 | #define CMD_INV_DEV_ENTRY 0x02 |
116 | #define CMD_INV_IOMMU_PAGES 0x03 | 129 | #define CMD_INV_IOMMU_PAGES 0x03 |
130 | #define CMD_INV_IOTLB_PAGES 0x04 | ||
131 | #define CMD_INV_ALL 0x08 | ||
117 | 132 | ||
118 | #define CMD_COMPL_WAIT_STORE_MASK 0x01 | 133 | #define CMD_COMPL_WAIT_STORE_MASK 0x01 |
119 | #define CMD_COMPL_WAIT_INT_MASK 0x02 | 134 | #define CMD_COMPL_WAIT_INT_MASK 0x02 |
@@ -215,6 +230,8 @@ | |||
215 | #define IOMMU_PTE_IR (1ULL << 61) | 230 | #define IOMMU_PTE_IR (1ULL << 61) |
216 | #define IOMMU_PTE_IW (1ULL << 62) | 231 | #define IOMMU_PTE_IW (1ULL << 62) |
217 | 232 | ||
233 | #define DTE_FLAG_IOTLB 0x01 | ||
234 | |||
218 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) | 235 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) |
219 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) | 236 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) |
220 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) | 237 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) |
@@ -227,6 +244,7 @@ | |||
227 | /* IOMMU capabilities */ | 244 | /* IOMMU capabilities */ |
228 | #define IOMMU_CAP_IOTLB 24 | 245 | #define IOMMU_CAP_IOTLB 24 |
229 | #define IOMMU_CAP_NPCACHE 26 | 246 | #define IOMMU_CAP_NPCACHE 26 |
247 | #define IOMMU_CAP_EFR 27 | ||
230 | 248 | ||
231 | #define MAX_DOMAIN_ID 65536 | 249 | #define MAX_DOMAIN_ID 65536 |
232 | 250 | ||
@@ -249,6 +267,8 @@ extern bool amd_iommu_dump; | |||
249 | 267 | ||
250 | /* global flag if IOMMUs cache non-present entries */ | 268 | /* global flag if IOMMUs cache non-present entries */ |
251 | extern bool amd_iommu_np_cache; | 269 | extern bool amd_iommu_np_cache; |
270 | /* Only true if all IOMMUs support device IOTLBs */ | ||
271 | extern bool amd_iommu_iotlb_sup; | ||
252 | 272 | ||
253 | /* | 273 | /* |
254 | * Make iterating over all IOMMUs easier | 274 | * Make iterating over all IOMMUs easier |
@@ -371,6 +391,9 @@ struct amd_iommu { | |||
371 | /* flags read from acpi table */ | 391 | /* flags read from acpi table */ |
372 | u8 acpi_flags; | 392 | u8 acpi_flags; |
373 | 393 | ||
394 | /* Extended features */ | ||
395 | u64 features; | ||
396 | |||
374 | /* | 397 | /* |
375 | * Capability pointer. There could be more than one IOMMU per PCI | 398 | * Capability pointer. There could be more than one IOMMU per PCI |
376 | * device function if there are more than one AMD IOMMU capability | 399 | * device function if there are more than one AMD IOMMU capability |
@@ -409,9 +432,6 @@ struct amd_iommu { | |||
409 | /* if one, we need to send a completion wait command */ | 432 | /* if one, we need to send a completion wait command */ |
410 | bool need_sync; | 433 | bool need_sync; |
411 | 434 | ||
412 | /* becomes true if a command buffer reset is running */ | ||
413 | bool reset_in_progress; | ||
414 | |||
415 | /* default dma_ops domain for that IOMMU */ | 435 | /* default dma_ops domain for that IOMMU */ |
416 | struct dma_ops_domain *default_dom; | 436 | struct dma_ops_domain *default_dom; |
417 | 437 | ||
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 331682231bb4..67f87f257611 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -11,7 +11,6 @@ struct amd_nb_bus_dev_range { | |||
11 | 11 | ||
12 | extern const struct pci_device_id amd_nb_misc_ids[]; | 12 | extern const struct pci_device_id amd_nb_misc_ids[]; |
13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; | 13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; |
14 | struct bootnode; | ||
15 | 14 | ||
16 | extern bool early_is_amd_nb(u32 value); | 15 | extern bool early_is_amd_nb(u32 value); |
17 | extern int amd_cache_northbridges(void); | 16 | extern int amd_cache_northbridges(void); |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2b7d573be549..a0c46f061210 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -363,7 +363,12 @@ struct apic { | |||
363 | */ | 363 | */ |
364 | int (*x86_32_early_logical_apicid)(int cpu); | 364 | int (*x86_32_early_logical_apicid)(int cpu); |
365 | 365 | ||
366 | /* determine CPU -> NUMA node mapping */ | 366 | /* |
367 | * Optional method called from setup_local_APIC() after logical | ||
368 | * apicid is guaranteed to be known to initialize apicid -> node | ||
369 | * mapping if NUMA initialization hasn't done so already. Don't | ||
370 | * add new users. | ||
371 | */ | ||
367 | int (*x86_32_numa_cpu_node)(int cpu); | 372 | int (*x86_32_numa_cpu_node)(int cpu); |
368 | #endif | 373 | #endif |
369 | }; | 374 | }; |
@@ -537,8 +542,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) | |||
537 | return cpuid_apic >> index_msb; | 542 | return cpuid_apic >> index_msb; |
538 | } | 543 | } |
539 | 544 | ||
540 | extern int default_x86_32_numa_cpu_node(int cpu); | ||
541 | |||
542 | #endif | 545 | #endif |
543 | 546 | ||
544 | static inline unsigned int | 547 | static inline unsigned int |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index d87988bacf3e..34595d5e1038 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -78,6 +78,7 @@ | |||
78 | #define APIC_DEST_LOGICAL 0x00800 | 78 | #define APIC_DEST_LOGICAL 0x00800 |
79 | #define APIC_DEST_PHYSICAL 0x00000 | 79 | #define APIC_DEST_PHYSICAL 0x00000 |
80 | #define APIC_DM_FIXED 0x00000 | 80 | #define APIC_DM_FIXED 0x00000 |
81 | #define APIC_DM_FIXED_MASK 0x00700 | ||
81 | #define APIC_DM_LOWEST 0x00100 | 82 | #define APIC_DM_LOWEST 0x00100 |
82 | #define APIC_DM_SMI 0x00200 | 83 | #define APIC_DM_SMI 0x00200 |
83 | #define APIC_DM_REMRD 0x00300 | 84 | #define APIC_DM_REMRD 0x00300 |
diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h index 3c7521063d3f..aa6a3170ab5a 100644 --- a/arch/x86/include/asm/bios_ebda.h +++ b/arch/x86/include/asm/bios_ebda.h | |||
@@ -4,16 +4,40 @@ | |||
4 | #include <asm/io.h> | 4 | #include <asm/io.h> |
5 | 5 | ||
6 | /* | 6 | /* |
7 | * there is a real-mode segmented pointer pointing to the | 7 | * Returns physical address of EBDA. Returns 0 if there is no EBDA. |
8 | * 4K EBDA area at 0x40E. | ||
9 | */ | 8 | */ |
10 | static inline unsigned int get_bios_ebda(void) | 9 | static inline unsigned int get_bios_ebda(void) |
11 | { | 10 | { |
11 | /* | ||
12 | * There is a real-mode segmented pointer pointing to the | ||
13 | * 4K EBDA area at 0x40E. | ||
14 | */ | ||
12 | unsigned int address = *(unsigned short *)phys_to_virt(0x40E); | 15 | unsigned int address = *(unsigned short *)phys_to_virt(0x40E); |
13 | address <<= 4; | 16 | address <<= 4; |
14 | return address; /* 0 means none */ | 17 | return address; /* 0 means none */ |
15 | } | 18 | } |
16 | 19 | ||
20 | /* | ||
21 | * Return the sanitized length of the EBDA in bytes, if it exists. | ||
22 | */ | ||
23 | static inline unsigned int get_bios_ebda_length(void) | ||
24 | { | ||
25 | unsigned int address; | ||
26 | unsigned int length; | ||
27 | |||
28 | address = get_bios_ebda(); | ||
29 | if (!address) | ||
30 | return 0; | ||
31 | |||
32 | /* EBDA length is byte 0 of the EBDA (stored in KiB) */ | ||
33 | length = *(unsigned char *)phys_to_virt(address); | ||
34 | length <<= 10; | ||
35 | |||
36 | /* Trim the length if it extends beyond 640KiB */ | ||
37 | length = min_t(unsigned int, (640 * 1024) - address, length); | ||
38 | return length; | ||
39 | } | ||
40 | |||
17 | void reserve_ebda_region(void); | 41 | void reserve_ebda_region(void); |
18 | 42 | ||
19 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | 43 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 91f3e087cf21..5dc6acc98dbd 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -195,6 +195,8 @@ | |||
195 | 195 | ||
196 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | 196 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ |
197 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | 197 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ |
198 | #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ | ||
199 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | ||
198 | 200 | ||
199 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 201 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
200 | 202 | ||
@@ -207,8 +209,7 @@ extern const char * const x86_power_flags[32]; | |||
207 | #define test_cpu_cap(c, bit) \ | 209 | #define test_cpu_cap(c, bit) \ |
208 | test_bit(bit, (unsigned long *)((c)->x86_capability)) | 210 | test_bit(bit, (unsigned long *)((c)->x86_capability)) |
209 | 211 | ||
210 | #define cpu_has(c, bit) \ | 212 | #define REQUIRED_MASK_BIT_SET(bit) \ |
211 | (__builtin_constant_p(bit) && \ | ||
212 | ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ | 213 | ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ |
213 | (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ | 214 | (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ |
214 | (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ | 215 | (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ |
@@ -218,10 +219,16 @@ extern const char * const x86_power_flags[32]; | |||
218 | (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ | 219 | (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ |
219 | (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ | 220 | (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ |
220 | (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ | 221 | (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ |
221 | (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \ | 222 | (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) |
222 | ? 1 : \ | 223 | |
224 | #define cpu_has(c, bit) \ | ||
225 | (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ | ||
223 | test_cpu_cap(c, bit)) | 226 | test_cpu_cap(c, bit)) |
224 | 227 | ||
228 | #define this_cpu_has(bit) \ | ||
229 | (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ | ||
230 | x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) | ||
231 | |||
225 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) | 232 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) |
226 | 233 | ||
227 | #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) | 234 | #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) |
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 057099e5faba..0bdb0c54d9a1 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h | |||
@@ -69,22 +69,18 @@ | |||
69 | 69 | ||
70 | #define MAX_DMA_CHANNELS 8 | 70 | #define MAX_DMA_CHANNELS 8 |
71 | 71 | ||
72 | #ifdef CONFIG_X86_32 | ||
73 | |||
74 | /* The maximum address that we can perform a DMA transfer to on this platform */ | ||
75 | #define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) | ||
76 | |||
77 | #else | ||
78 | |||
79 | /* 16MB ISA DMA zone */ | 72 | /* 16MB ISA DMA zone */ |
80 | #define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) | 73 | #define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) |
81 | 74 | ||
82 | /* 4GB broken PCI/AGP hardware bus master zone */ | 75 | /* 4GB broken PCI/AGP hardware bus master zone */ |
83 | #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) | 76 | #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) |
84 | 77 | ||
78 | #ifdef CONFIG_X86_32 | ||
79 | /* The maximum address that we can perform a DMA transfer to on this platform */ | ||
80 | #define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) | ||
81 | #else | ||
85 | /* Compat define for old dma zone */ | 82 | /* Compat define for old dma zone */ |
86 | #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) | 83 | #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) |
87 | |||
88 | #endif | 84 | #endif |
89 | 85 | ||
90 | /* 8237 DMA controllers */ | 86 | /* 8237 DMA controllers */ |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8e4a16508d4e..7093e4a6a0bc 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | |||
90 | #endif /* CONFIG_X86_32 */ | 90 | #endif /* CONFIG_X86_32 */ |
91 | 91 | ||
92 | extern int add_efi_memmap; | 92 | extern int add_efi_memmap; |
93 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); | ||
93 | extern void efi_memblock_x86_reserve_range(void); | 94 | extern void efi_memblock_x86_reserve_range(void); |
94 | extern void efi_call_phys_prelog(void); | 95 | extern void efi_call_phys_prelog(void); |
95 | extern void efi_call_phys_epilog(void); | 96 | extern void efi_call_phys_epilog(void); |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index db24c2278be0..268c783ab1c0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
@@ -38,11 +38,10 @@ extern void mcount(void); | |||
38 | static inline unsigned long ftrace_call_adjust(unsigned long addr) | 38 | static inline unsigned long ftrace_call_adjust(unsigned long addr) |
39 | { | 39 | { |
40 | /* | 40 | /* |
41 | * call mcount is "e8 <4 byte offset>" | 41 | * addr is the address of the mcount call instruction. |
42 | * The addr points to the 4 byte offset and the caller of this | 42 | * recordmcount does the necessary offset calculation. |
43 | * function wants the pointer to e8. Simply subtract one. | ||
44 | */ | 43 | */ |
45 | return addr - 1; | 44 | return addr; |
46 | } | 45 | } |
47 | 46 | ||
48 | #ifdef CONFIG_DYNAMIC_FTRACE | 47 | #ifdef CONFIG_DYNAMIC_FTRACE |
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index fc1f579fb965..65aaa91d5850 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h | |||
@@ -6,6 +6,8 @@ | |||
6 | #define PIT_CH0 0x40 | 6 | #define PIT_CH0 0x40 |
7 | #define PIT_CH2 0x42 | 7 | #define PIT_CH2 0x42 |
8 | 8 | ||
9 | #define PIT_LATCH LATCH | ||
10 | |||
9 | extern raw_spinlock_t i8253_lock; | 11 | extern raw_spinlock_t i8253_lock; |
10 | 12 | ||
11 | extern struct clock_event_device *global_clock_event; | 13 | extern struct clock_event_device *global_clock_event; |
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 574dbc22893a..a32b18ce6ead 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h | |||
@@ -5,20 +5,25 @@ | |||
5 | 5 | ||
6 | #include <linux/types.h> | 6 | #include <linux/types.h> |
7 | #include <asm/nops.h> | 7 | #include <asm/nops.h> |
8 | #include <asm/asm.h> | ||
8 | 9 | ||
9 | #define JUMP_LABEL_NOP_SIZE 5 | 10 | #define JUMP_LABEL_NOP_SIZE 5 |
10 | 11 | ||
11 | # define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" | 12 | #define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" |
12 | 13 | ||
13 | # define JUMP_LABEL(key, label) \ | 14 | static __always_inline bool arch_static_branch(struct jump_label_key *key) |
14 | do { \ | 15 | { |
15 | asm goto("1:" \ | 16 | asm goto("1:" |
16 | JUMP_LABEL_INITIAL_NOP \ | 17 | JUMP_LABEL_INITIAL_NOP |
17 | ".pushsection __jump_table, \"aw\" \n\t"\ | 18 | ".pushsection __jump_table, \"aw\" \n\t" |
18 | _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ | 19 | _ASM_ALIGN "\n\t" |
19 | ".popsection \n\t" \ | 20 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" |
20 | : : "i" (key) : : label); \ | 21 | ".popsection \n\t" |
21 | } while (0) | 22 | : : "i" (key) : : l_yes); |
23 | return false; | ||
24 | l_yes: | ||
25 | return true; | ||
26 | } | ||
22 | 27 | ||
23 | #endif /* __KERNEL__ */ | 28 | #endif /* __KERNEL__ */ |
24 | 29 | ||
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index eb16e94ae04f..021979a6e23f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -142,8 +142,6 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | |||
142 | static inline void enable_p5_mce(void) {} | 142 | static inline void enable_p5_mce(void) {} |
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | extern void (*x86_mce_decode_callback)(struct mce *m); | ||
146 | |||
147 | void mce_setup(struct mce *m); | 145 | void mce_setup(struct mce *m); |
148 | void mce_log(struct mce *m); | 146 | void mce_log(struct mce *m); |
149 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 147 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 91df7c51806c..5e83a416eca8 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h | |||
@@ -13,31 +13,11 @@ extern struct pglist_data *node_data[]; | |||
13 | #define NODE_DATA(nid) (node_data[nid]) | 13 | #define NODE_DATA(nid) (node_data[nid]) |
14 | 14 | ||
15 | #include <asm/numaq.h> | 15 | #include <asm/numaq.h> |
16 | /* summit or generic arch */ | ||
17 | #include <asm/srat.h> | ||
18 | |||
19 | extern int get_memcfg_numa_flat(void); | ||
20 | /* | ||
21 | * This allows any one NUMA architecture to be compiled | ||
22 | * for, and still fall back to the flat function if it | ||
23 | * fails. | ||
24 | */ | ||
25 | static inline void get_memcfg_numa(void) | ||
26 | { | ||
27 | |||
28 | if (get_memcfg_numaq()) | ||
29 | return; | ||
30 | if (get_memcfg_from_srat()) | ||
31 | return; | ||
32 | get_memcfg_numa_flat(); | ||
33 | } | ||
34 | 16 | ||
35 | extern void resume_map_numa_kva(pgd_t *pgd); | 17 | extern void resume_map_numa_kva(pgd_t *pgd); |
36 | 18 | ||
37 | #else /* !CONFIG_NUMA */ | 19 | #else /* !CONFIG_NUMA */ |
38 | 20 | ||
39 | #define get_memcfg_numa get_memcfg_numa_flat | ||
40 | |||
41 | static inline void resume_map_numa_kva(pgd_t *pgd) {} | 21 | static inline void resume_map_numa_kva(pgd_t *pgd) {} |
42 | 22 | ||
43 | #endif /* CONFIG_NUMA */ | 23 | #endif /* CONFIG_NUMA */ |
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index 288b96f815a6..b3f88d7867c7 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h | |||
@@ -4,36 +4,13 @@ | |||
4 | #ifndef _ASM_X86_MMZONE_64_H | 4 | #ifndef _ASM_X86_MMZONE_64_H |
5 | #define _ASM_X86_MMZONE_64_H | 5 | #define _ASM_X86_MMZONE_64_H |
6 | 6 | ||
7 | |||
8 | #ifdef CONFIG_NUMA | 7 | #ifdef CONFIG_NUMA |
9 | 8 | ||
10 | #include <linux/mmdebug.h> | 9 | #include <linux/mmdebug.h> |
11 | |||
12 | #include <asm/smp.h> | 10 | #include <asm/smp.h> |
13 | 11 | ||
14 | /* Simple perfect hash to map physical addresses to node numbers */ | ||
15 | struct memnode { | ||
16 | int shift; | ||
17 | unsigned int mapsize; | ||
18 | s16 *map; | ||
19 | s16 embedded_map[64 - 8]; | ||
20 | } ____cacheline_aligned; /* total size = 128 bytes */ | ||
21 | extern struct memnode memnode; | ||
22 | #define memnode_shift memnode.shift | ||
23 | #define memnodemap memnode.map | ||
24 | #define memnodemapsize memnode.mapsize | ||
25 | |||
26 | extern struct pglist_data *node_data[]; | 12 | extern struct pglist_data *node_data[]; |
27 | 13 | ||
28 | static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) | ||
29 | { | ||
30 | unsigned nid; | ||
31 | VIRTUAL_BUG_ON(!memnodemap); | ||
32 | nid = memnodemap[addr >> memnode_shift]; | ||
33 | VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); | ||
34 | return nid; | ||
35 | } | ||
36 | |||
37 | #define NODE_DATA(nid) (node_data[nid]) | 14 | #define NODE_DATA(nid) (node_data[nid]) |
38 | 15 | ||
39 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) | 16 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) |
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 67763c5d8b4e..9eae7752ae9b 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h | |||
@@ -35,7 +35,7 @@ | |||
35 | #define MODULE_PROC_FAMILY "K7 " | 35 | #define MODULE_PROC_FAMILY "K7 " |
36 | #elif defined CONFIG_MK8 | 36 | #elif defined CONFIG_MK8 |
37 | #define MODULE_PROC_FAMILY "K8 " | 37 | #define MODULE_PROC_FAMILY "K8 " |
38 | #elif defined CONFIG_X86_ELAN | 38 | #elif defined CONFIG_MELAN |
39 | #define MODULE_PROC_FAMILY "ELAN " | 39 | #define MODULE_PROC_FAMILY "ELAN " |
40 | #elif defined CONFIG_MCRUSOE | 40 | #elif defined CONFIG_MCRUSOE |
41 | #define MODULE_PROC_FAMILY "CRUSOE " | 41 | #define MODULE_PROC_FAMILY "CRUSOE " |
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index af788496020b..405b4032a60b 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h | |||
@@ -1,7 +1,13 @@ | |||
1 | #ifndef _ASM_X86_NOPS_H | 1 | #ifndef _ASM_X86_NOPS_H |
2 | #define _ASM_X86_NOPS_H | 2 | #define _ASM_X86_NOPS_H |
3 | 3 | ||
4 | /* Define nops for use with alternative() */ | 4 | /* |
5 | * Define nops for use with alternative() and for tracing. | ||
6 | * | ||
7 | * *_NOP5_ATOMIC must be a single instruction. | ||
8 | */ | ||
9 | |||
10 | #define NOP_DS_PREFIX 0x3e | ||
5 | 11 | ||
6 | /* generic versions from gas | 12 | /* generic versions from gas |
7 | 1: nop | 13 | 1: nop |
@@ -13,14 +19,15 @@ | |||
13 | 6: leal 0x00000000(%esi),%esi | 19 | 6: leal 0x00000000(%esi),%esi |
14 | 7: leal 0x00000000(,%esi,1),%esi | 20 | 7: leal 0x00000000(,%esi,1),%esi |
15 | */ | 21 | */ |
16 | #define GENERIC_NOP1 ".byte 0x90\n" | 22 | #define GENERIC_NOP1 0x90 |
17 | #define GENERIC_NOP2 ".byte 0x89,0xf6\n" | 23 | #define GENERIC_NOP2 0x89,0xf6 |
18 | #define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" | 24 | #define GENERIC_NOP3 0x8d,0x76,0x00 |
19 | #define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" | 25 | #define GENERIC_NOP4 0x8d,0x74,0x26,0x00 |
20 | #define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 | 26 | #define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4 |
21 | #define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" | 27 | #define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 |
22 | #define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" | 28 | #define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 |
23 | #define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 | 29 | #define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7 |
30 | #define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4 | ||
24 | 31 | ||
25 | /* Opteron 64bit nops | 32 | /* Opteron 64bit nops |
26 | 1: nop | 33 | 1: nop |
@@ -29,13 +36,14 @@ | |||
29 | 4: osp osp osp nop | 36 | 4: osp osp osp nop |
30 | */ | 37 | */ |
31 | #define K8_NOP1 GENERIC_NOP1 | 38 | #define K8_NOP1 GENERIC_NOP1 |
32 | #define K8_NOP2 ".byte 0x66,0x90\n" | 39 | #define K8_NOP2 0x66,K8_NOP1 |
33 | #define K8_NOP3 ".byte 0x66,0x66,0x90\n" | 40 | #define K8_NOP3 0x66,K8_NOP2 |
34 | #define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" | 41 | #define K8_NOP4 0x66,K8_NOP3 |
35 | #define K8_NOP5 K8_NOP3 K8_NOP2 | 42 | #define K8_NOP5 K8_NOP3,K8_NOP2 |
36 | #define K8_NOP6 K8_NOP3 K8_NOP3 | 43 | #define K8_NOP6 K8_NOP3,K8_NOP3 |
37 | #define K8_NOP7 K8_NOP4 K8_NOP3 | 44 | #define K8_NOP7 K8_NOP4,K8_NOP3 |
38 | #define K8_NOP8 K8_NOP4 K8_NOP4 | 45 | #define K8_NOP8 K8_NOP4,K8_NOP4 |
46 | #define K8_NOP5_ATOMIC 0x66,K8_NOP4 | ||
39 | 47 | ||
40 | /* K7 nops | 48 | /* K7 nops |
41 | uses eax dependencies (arbitrary choice) | 49 | uses eax dependencies (arbitrary choice) |
@@ -47,13 +55,14 @@ | |||
47 | 7: leal 0x00000000(,%eax,1),%eax | 55 | 7: leal 0x00000000(,%eax,1),%eax |
48 | */ | 56 | */ |
49 | #define K7_NOP1 GENERIC_NOP1 | 57 | #define K7_NOP1 GENERIC_NOP1 |
50 | #define K7_NOP2 ".byte 0x8b,0xc0\n" | 58 | #define K7_NOP2 0x8b,0xc0 |
51 | #define K7_NOP3 ".byte 0x8d,0x04,0x20\n" | 59 | #define K7_NOP3 0x8d,0x04,0x20 |
52 | #define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n" | 60 | #define K7_NOP4 0x8d,0x44,0x20,0x00 |
53 | #define K7_NOP5 K7_NOP4 ASM_NOP1 | 61 | #define K7_NOP5 K7_NOP4,K7_NOP1 |
54 | #define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n" | 62 | #define K7_NOP6 0x8d,0x80,0,0,0,0 |
55 | #define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" | 63 | #define K7_NOP7 0x8D,0x04,0x05,0,0,0,0 |
56 | #define K7_NOP8 K7_NOP7 ASM_NOP1 | 64 | #define K7_NOP8 K7_NOP7,K7_NOP1 |
65 | #define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4 | ||
57 | 66 | ||
58 | /* P6 nops | 67 | /* P6 nops |
59 | uses eax dependencies (Intel-recommended choice) | 68 | uses eax dependencies (Intel-recommended choice) |
@@ -69,52 +78,65 @@ | |||
69 | There is kernel code that depends on this. | 78 | There is kernel code that depends on this. |
70 | */ | 79 | */ |
71 | #define P6_NOP1 GENERIC_NOP1 | 80 | #define P6_NOP1 GENERIC_NOP1 |
72 | #define P6_NOP2 ".byte 0x66,0x90\n" | 81 | #define P6_NOP2 0x66,0x90 |
73 | #define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" | 82 | #define P6_NOP3 0x0f,0x1f,0x00 |
74 | #define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" | 83 | #define P6_NOP4 0x0f,0x1f,0x40,0 |
75 | #define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" | 84 | #define P6_NOP5 0x0f,0x1f,0x44,0x00,0 |
76 | #define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" | 85 | #define P6_NOP6 0x66,0x0f,0x1f,0x44,0x00,0 |
77 | #define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" | 86 | #define P6_NOP7 0x0f,0x1f,0x80,0,0,0,0 |
78 | #define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" | 87 | #define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0 |
88 | #define P6_NOP5_ATOMIC P6_NOP5 | ||
89 | |||
90 | #define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" | ||
79 | 91 | ||
80 | #if defined(CONFIG_MK7) | 92 | #if defined(CONFIG_MK7) |
81 | #define ASM_NOP1 K7_NOP1 | 93 | #define ASM_NOP1 _ASM_MK_NOP(K7_NOP1) |
82 | #define ASM_NOP2 K7_NOP2 | 94 | #define ASM_NOP2 _ASM_MK_NOP(K7_NOP2) |
83 | #define ASM_NOP3 K7_NOP3 | 95 | #define ASM_NOP3 _ASM_MK_NOP(K7_NOP3) |
84 | #define ASM_NOP4 K7_NOP4 | 96 | #define ASM_NOP4 _ASM_MK_NOP(K7_NOP4) |
85 | #define ASM_NOP5 K7_NOP5 | 97 | #define ASM_NOP5 _ASM_MK_NOP(K7_NOP5) |
86 | #define ASM_NOP6 K7_NOP6 | 98 | #define ASM_NOP6 _ASM_MK_NOP(K7_NOP6) |
87 | #define ASM_NOP7 K7_NOP7 | 99 | #define ASM_NOP7 _ASM_MK_NOP(K7_NOP7) |
88 | #define ASM_NOP8 K7_NOP8 | 100 | #define ASM_NOP8 _ASM_MK_NOP(K7_NOP8) |
101 | #define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC) | ||
89 | #elif defined(CONFIG_X86_P6_NOP) | 102 | #elif defined(CONFIG_X86_P6_NOP) |
90 | #define ASM_NOP1 P6_NOP1 | 103 | #define ASM_NOP1 _ASM_MK_NOP(P6_NOP1) |
91 | #define ASM_NOP2 P6_NOP2 | 104 | #define ASM_NOP2 _ASM_MK_NOP(P6_NOP2) |
92 | #define ASM_NOP3 P6_NOP3 | 105 | #define ASM_NOP3 _ASM_MK_NOP(P6_NOP3) |
93 | #define ASM_NOP4 P6_NOP4 | 106 | #define ASM_NOP4 _ASM_MK_NOP(P6_NOP4) |
94 | #define ASM_NOP5 P6_NOP5 | 107 | #define ASM_NOP5 _ASM_MK_NOP(P6_NOP5) |
95 | #define ASM_NOP6 P6_NOP6 | 108 | #define ASM_NOP6 _ASM_MK_NOP(P6_NOP6) |
96 | #define ASM_NOP7 P6_NOP7 | 109 | #define ASM_NOP7 _ASM_MK_NOP(P6_NOP7) |
97 | #define ASM_NOP8 P6_NOP8 | 110 | #define ASM_NOP8 _ASM_MK_NOP(P6_NOP8) |
111 | #define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC) | ||
98 | #elif defined(CONFIG_X86_64) | 112 | #elif defined(CONFIG_X86_64) |
99 | #define ASM_NOP1 K8_NOP1 | 113 | #define ASM_NOP1 _ASM_MK_NOP(K8_NOP1) |
100 | #define ASM_NOP2 K8_NOP2 | 114 | #define ASM_NOP2 _ASM_MK_NOP(K8_NOP2) |
101 | #define ASM_NOP3 K8_NOP3 | 115 | #define ASM_NOP3 _ASM_MK_NOP(K8_NOP3) |
102 | #define ASM_NOP4 K8_NOP4 | 116 | #define ASM_NOP4 _ASM_MK_NOP(K8_NOP4) |
103 | #define ASM_NOP5 K8_NOP5 | 117 | #define ASM_NOP5 _ASM_MK_NOP(K8_NOP5) |
104 | #define ASM_NOP6 K8_NOP6 | 118 | #define ASM_NOP6 _ASM_MK_NOP(K8_NOP6) |
105 | #define ASM_NOP7 K8_NOP7 | 119 | #define ASM_NOP7 _ASM_MK_NOP(K8_NOP7) |
106 | #define ASM_NOP8 K8_NOP8 | 120 | #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8) |
121 | #define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC) | ||
107 | #else | 122 | #else |
108 | #define ASM_NOP1 GENERIC_NOP1 | 123 | #define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1) |
109 | #define ASM_NOP2 GENERIC_NOP2 | 124 | #define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2) |
110 | #define ASM_NOP3 GENERIC_NOP3 | 125 | #define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3) |
111 | #define ASM_NOP4 GENERIC_NOP4 | 126 | #define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4) |
112 | #define ASM_NOP5 GENERIC_NOP5 | 127 | #define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5) |
113 | #define ASM_NOP6 GENERIC_NOP6 | 128 | #define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6) |
114 | #define ASM_NOP7 GENERIC_NOP7 | 129 | #define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7) |
115 | #define ASM_NOP8 GENERIC_NOP8 | 130 | #define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8) |
131 | #define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC) | ||
116 | #endif | 132 | #endif |
117 | 133 | ||
118 | #define ASM_NOP_MAX 8 | 134 | #define ASM_NOP_MAX 8 |
135 | #define NOP_ATOMIC5 (ASM_NOP_MAX+1) /* Entry for the 5-byte atomic NOP */ | ||
136 | |||
137 | #ifndef __ASSEMBLY__ | ||
138 | extern const unsigned char * const *ideal_nops; | ||
139 | extern void arch_init_ideal_nops(void); | ||
140 | #endif | ||
119 | 141 | ||
120 | #endif /* _ASM_X86_NOPS_H */ | 142 | #endif /* _ASM_X86_NOPS_H */ |
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index a50fc9f493b3..bfacd2ccf651 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h | |||
@@ -1,12 +1,24 @@ | |||
1 | #ifndef _ASM_X86_NUMA_H | 1 | #ifndef _ASM_X86_NUMA_H |
2 | #define _ASM_X86_NUMA_H | 2 | #define _ASM_X86_NUMA_H |
3 | 3 | ||
4 | #include <linux/nodemask.h> | ||
5 | |||
4 | #include <asm/topology.h> | 6 | #include <asm/topology.h> |
5 | #include <asm/apicdef.h> | 7 | #include <asm/apicdef.h> |
6 | 8 | ||
7 | #ifdef CONFIG_NUMA | 9 | #ifdef CONFIG_NUMA |
8 | 10 | ||
9 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) | 11 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) |
12 | #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) | ||
13 | |||
14 | /* | ||
15 | * Too small node sizes may confuse the VM badly. Usually they | ||
16 | * result from BIOS bugs. So dont recognize nodes as standalone | ||
17 | * NUMA entities that have less than this amount of RAM listed: | ||
18 | */ | ||
19 | #define NODE_MIN_SIZE (4*1024*1024) | ||
20 | |||
21 | extern int numa_off; | ||
10 | 22 | ||
11 | /* | 23 | /* |
12 | * __apicid_to_node[] stores the raw mapping between physical apicid and | 24 | * __apicid_to_node[] stores the raw mapping between physical apicid and |
@@ -17,15 +29,27 @@ | |||
17 | * numa_cpu_node(). | 29 | * numa_cpu_node(). |
18 | */ | 30 | */ |
19 | extern s16 __apicid_to_node[MAX_LOCAL_APIC]; | 31 | extern s16 __apicid_to_node[MAX_LOCAL_APIC]; |
32 | extern nodemask_t numa_nodes_parsed __initdata; | ||
33 | |||
34 | extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); | ||
35 | extern void __init numa_set_distance(int from, int to, int distance); | ||
20 | 36 | ||
21 | static inline void set_apicid_to_node(int apicid, s16 node) | 37 | static inline void set_apicid_to_node(int apicid, s16 node) |
22 | { | 38 | { |
23 | __apicid_to_node[apicid] = node; | 39 | __apicid_to_node[apicid] = node; |
24 | } | 40 | } |
41 | |||
42 | extern int __cpuinit numa_cpu_node(int cpu); | ||
43 | |||
25 | #else /* CONFIG_NUMA */ | 44 | #else /* CONFIG_NUMA */ |
26 | static inline void set_apicid_to_node(int apicid, s16 node) | 45 | static inline void set_apicid_to_node(int apicid, s16 node) |
27 | { | 46 | { |
28 | } | 47 | } |
48 | |||
49 | static inline int numa_cpu_node(int cpu) | ||
50 | { | ||
51 | return NUMA_NO_NODE; | ||
52 | } | ||
29 | #endif /* CONFIG_NUMA */ | 53 | #endif /* CONFIG_NUMA */ |
30 | 54 | ||
31 | #ifdef CONFIG_X86_32 | 55 | #ifdef CONFIG_X86_32 |
@@ -37,14 +61,12 @@ static inline void set_apicid_to_node(int apicid, s16 node) | |||
37 | #ifdef CONFIG_NUMA | 61 | #ifdef CONFIG_NUMA |
38 | extern void __cpuinit numa_set_node(int cpu, int node); | 62 | extern void __cpuinit numa_set_node(int cpu, int node); |
39 | extern void __cpuinit numa_clear_node(int cpu); | 63 | extern void __cpuinit numa_clear_node(int cpu); |
40 | extern void __init numa_init_array(void); | ||
41 | extern void __init init_cpu_to_node(void); | 64 | extern void __init init_cpu_to_node(void); |
42 | extern void __cpuinit numa_add_cpu(int cpu); | 65 | extern void __cpuinit numa_add_cpu(int cpu); |
43 | extern void __cpuinit numa_remove_cpu(int cpu); | 66 | extern void __cpuinit numa_remove_cpu(int cpu); |
44 | #else /* CONFIG_NUMA */ | 67 | #else /* CONFIG_NUMA */ |
45 | static inline void numa_set_node(int cpu, int node) { } | 68 | static inline void numa_set_node(int cpu, int node) { } |
46 | static inline void numa_clear_node(int cpu) { } | 69 | static inline void numa_clear_node(int cpu) { } |
47 | static inline void numa_init_array(void) { } | ||
48 | static inline void init_cpu_to_node(void) { } | 70 | static inline void init_cpu_to_node(void) { } |
49 | static inline void numa_add_cpu(int cpu) { } | 71 | static inline void numa_add_cpu(int cpu) { } |
50 | static inline void numa_remove_cpu(int cpu) { } | 72 | static inline void numa_remove_cpu(int cpu) { } |
@@ -54,4 +76,10 @@ static inline void numa_remove_cpu(int cpu) { } | |||
54 | void debug_cpumask_set_cpu(int cpu, int node, bool enable); | 76 | void debug_cpumask_set_cpu(int cpu, int node, bool enable); |
55 | #endif | 77 | #endif |
56 | 78 | ||
79 | #ifdef CONFIG_NUMA_EMU | ||
80 | #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) | ||
81 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) | ||
82 | void numa_emu_cmdline(char *); | ||
83 | #endif /* CONFIG_NUMA_EMU */ | ||
84 | |||
57 | #endif /* _ASM_X86_NUMA_H */ | 85 | #endif /* _ASM_X86_NUMA_H */ |
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h index c6beed1ef103..e7d6b8254742 100644 --- a/arch/x86/include/asm/numa_32.h +++ b/arch/x86/include/asm/numa_32.h | |||
@@ -1,16 +1,6 @@ | |||
1 | #ifndef _ASM_X86_NUMA_32_H | 1 | #ifndef _ASM_X86_NUMA_32_H |
2 | #define _ASM_X86_NUMA_32_H | 2 | #define _ASM_X86_NUMA_32_H |
3 | 3 | ||
4 | extern int numa_off; | ||
5 | |||
6 | extern int pxm_to_nid(int pxm); | ||
7 | |||
8 | #ifdef CONFIG_NUMA | ||
9 | extern int __cpuinit numa_cpu_node(int cpu); | ||
10 | #else /* CONFIG_NUMA */ | ||
11 | static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } | ||
12 | #endif /* CONFIG_NUMA */ | ||
13 | |||
14 | #ifdef CONFIG_HIGHMEM | 4 | #ifdef CONFIG_HIGHMEM |
15 | extern void set_highmem_pages_init(void); | 5 | extern void set_highmem_pages_init(void); |
16 | #else | 6 | #else |
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 344eb1790b46..0c05f7ae46e8 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h | |||
@@ -1,42 +1,6 @@ | |||
1 | #ifndef _ASM_X86_NUMA_64_H | 1 | #ifndef _ASM_X86_NUMA_64_H |
2 | #define _ASM_X86_NUMA_64_H | 2 | #define _ASM_X86_NUMA_64_H |
3 | 3 | ||
4 | #include <linux/nodemask.h> | ||
5 | |||
6 | struct bootnode { | ||
7 | u64 start; | ||
8 | u64 end; | ||
9 | }; | ||
10 | |||
11 | #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) | ||
12 | |||
13 | extern int numa_off; | ||
14 | |||
15 | extern unsigned long numa_free_all_bootmem(void); | 4 | extern unsigned long numa_free_all_bootmem(void); |
16 | extern void setup_node_bootmem(int nodeid, unsigned long start, | ||
17 | unsigned long end); | ||
18 | |||
19 | #ifdef CONFIG_NUMA | ||
20 | /* | ||
21 | * Too small node sizes may confuse the VM badly. Usually they | ||
22 | * result from BIOS bugs. So dont recognize nodes as standalone | ||
23 | * NUMA entities that have less than this amount of RAM listed: | ||
24 | */ | ||
25 | #define NODE_MIN_SIZE (4*1024*1024) | ||
26 | |||
27 | extern nodemask_t numa_nodes_parsed __initdata; | ||
28 | |||
29 | extern int __cpuinit numa_cpu_node(int cpu); | ||
30 | extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); | ||
31 | extern void __init numa_set_distance(int from, int to, int distance); | ||
32 | |||
33 | #ifdef CONFIG_NUMA_EMU | ||
34 | #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) | ||
35 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) | ||
36 | void numa_emu_cmdline(char *); | ||
37 | #endif /* CONFIG_NUMA_EMU */ | ||
38 | #else | ||
39 | static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } | ||
40 | #endif | ||
41 | 5 | ||
42 | #endif /* _ASM_X86_NUMA_64_H */ | 6 | #endif /* _ASM_X86_NUMA_64_H */ |
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h index 37c516545ec8..c3b3c322fd87 100644 --- a/arch/x86/include/asm/numaq.h +++ b/arch/x86/include/asm/numaq.h | |||
@@ -29,7 +29,7 @@ | |||
29 | #ifdef CONFIG_X86_NUMAQ | 29 | #ifdef CONFIG_X86_NUMAQ |
30 | 30 | ||
31 | extern int found_numaq; | 31 | extern int found_numaq; |
32 | extern int get_memcfg_numaq(void); | 32 | extern int numaq_numa_init(void); |
33 | extern int pci_numaq_init(void); | 33 | extern int pci_numaq_init(void); |
34 | 34 | ||
35 | extern void *xquad_portio; | 35 | extern void *xquad_portio; |
@@ -166,11 +166,6 @@ struct sys_cfg_data { | |||
166 | 166 | ||
167 | void numaq_tsc_disable(void); | 167 | void numaq_tsc_disable(void); |
168 | 168 | ||
169 | #else | ||
170 | static inline int get_memcfg_numaq(void) | ||
171 | { | ||
172 | return 0; | ||
173 | } | ||
174 | #endif /* CONFIG_X86_NUMAQ */ | 169 | #endif /* CONFIG_X86_NUMAQ */ |
175 | #endif /* _ASM_X86_NUMAQ_H */ | 170 | #endif /* _ASM_X86_NUMAQ_H */ |
176 | 171 | ||
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h index c5d3a5abbb9f..24487712e0b1 100644 --- a/arch/x86/include/asm/olpc_ofw.h +++ b/arch/x86/include/asm/olpc_ofw.h | |||
@@ -26,15 +26,12 @@ extern void setup_olpc_ofw_pgd(void); | |||
26 | /* check if OFW was detected during boot */ | 26 | /* check if OFW was detected during boot */ |
27 | extern bool olpc_ofw_present(void); | 27 | extern bool olpc_ofw_present(void); |
28 | 28 | ||
29 | extern void olpc_dt_build_devicetree(void); | ||
30 | |||
29 | #else /* !CONFIG_OLPC */ | 31 | #else /* !CONFIG_OLPC */ |
30 | static inline void olpc_ofw_detect(void) { } | 32 | static inline void olpc_ofw_detect(void) { } |
31 | static inline void setup_olpc_ofw_pgd(void) { } | 33 | static inline void setup_olpc_ofw_pgd(void) { } |
32 | #endif /* !CONFIG_OLPC */ | ||
33 | |||
34 | #ifdef CONFIG_OF_PROMTREE | ||
35 | extern void olpc_dt_build_devicetree(void); | ||
36 | #else | ||
37 | static inline void olpc_dt_build_devicetree(void) { } | 34 | static inline void olpc_dt_build_devicetree(void) { } |
38 | #endif | 35 | #endif /* !CONFIG_OLPC */ |
39 | 36 | ||
40 | #endif /* _ASM_X86_OLPC_OFW_H */ | 37 | #endif /* _ASM_X86_OLPC_OFW_H */ |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d475b4398d8b..53278b0dfdf6 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -517,7 +517,7 @@ do { \ | |||
517 | typeof(o2) __o2 = o2; \ | 517 | typeof(o2) __o2 = o2; \ |
518 | typeof(o2) __n2 = n2; \ | 518 | typeof(o2) __n2 = n2; \ |
519 | typeof(o2) __dummy; \ | 519 | typeof(o2) __dummy; \ |
520 | alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ | 520 | alternative_io("call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP4, \ |
521 | "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ | 521 | "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ |
522 | X86_FEATURE_CX16, \ | 522 | X86_FEATURE_CX16, \ |
523 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | 523 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ |
@@ -542,6 +542,33 @@ do { \ | |||
542 | old__; \ | 542 | old__; \ |
543 | }) | 543 | }) |
544 | 544 | ||
545 | static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, | ||
546 | const unsigned long __percpu *addr) | ||
547 | { | ||
548 | unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; | ||
549 | |||
550 | return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0; | ||
551 | } | ||
552 | |||
553 | static inline int x86_this_cpu_variable_test_bit(int nr, | ||
554 | const unsigned long __percpu *addr) | ||
555 | { | ||
556 | int oldbit; | ||
557 | |||
558 | asm volatile("bt "__percpu_arg(2)",%1\n\t" | ||
559 | "sbb %0,%0" | ||
560 | : "=r" (oldbit) | ||
561 | : "m" (*(unsigned long *)addr), "Ir" (nr)); | ||
562 | |||
563 | return oldbit; | ||
564 | } | ||
565 | |||
566 | #define x86_this_cpu_test_bit(nr, addr) \ | ||
567 | (__builtin_constant_p((nr)) \ | ||
568 | ? x86_this_cpu_constant_test_bit((nr), (addr)) \ | ||
569 | : x86_this_cpu_variable_test_bit((nr), (addr))) | ||
570 | |||
571 | |||
545 | #include <asm-generic/percpu.h> | 572 | #include <asm-generic/percpu.h> |
546 | 573 | ||
547 | /* We can use this directly for local CPU (faster). */ | 574 | /* We can use this directly for local CPU (faster). */ |
diff --git a/arch/x86/include/asm/probe_roms.h b/arch/x86/include/asm/probe_roms.h new file mode 100644 index 000000000000..4950a0b1d09c --- /dev/null +++ b/arch/x86/include/asm/probe_roms.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef _PROBE_ROMS_H_ | ||
2 | #define _PROBE_ROMS_H_ | ||
3 | struct pci_dev; | ||
4 | |||
5 | extern void __iomem *pci_map_biosrom(struct pci_dev *pdev); | ||
6 | extern void pci_unmap_biosrom(void __iomem *rom); | ||
7 | extern size_t pci_biosrom_size(struct pci_dev *pdev); | ||
8 | #endif | ||
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index a898a2b6e10c..59ab4dffa377 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -60,6 +60,7 @@ | |||
60 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ | 60 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ |
61 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ | 61 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ |
62 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ | 62 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ |
63 | #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ | ||
63 | 64 | ||
64 | /* | 65 | /* |
65 | * x86-64 Task Priority Register, CR8 | 66 | * x86-64 Task Priority Register, CR8 |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index db8aa19a08a2..9756551ec760 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align); | |||
88 | * executable.) | 88 | * executable.) |
89 | */ | 89 | */ |
90 | #define RESERVE_BRK(name,sz) \ | 90 | #define RESERVE_BRK(name,sz) \ |
91 | static void __section(.discard.text) __used \ | 91 | static void __section(.discard.text) __used notrace \ |
92 | __brk_reservation_fn_##name##__(void) { \ | 92 | __brk_reservation_fn_##name##__(void) { \ |
93 | asm volatile ( \ | 93 | asm volatile ( \ |
94 | ".pushsection .brk_reservation,\"aw\",@nobits;" \ | 94 | ".pushsection .brk_reservation,\"aw\",@nobits;" \ |
@@ -104,10 +104,10 @@ void *extend_brk(size_t size, size_t align); | |||
104 | type *name; \ | 104 | type *name; \ |
105 | RESERVE_BRK(name, sizeof(type) * entries) | 105 | RESERVE_BRK(name, sizeof(type) * entries) |
106 | 106 | ||
107 | extern void probe_roms(void); | ||
107 | #ifdef __i386__ | 108 | #ifdef __i386__ |
108 | 109 | ||
109 | void __init i386_start_kernel(void); | 110 | void __init i386_start_kernel(void); |
110 | extern void probe_roms(void); | ||
111 | 111 | ||
112 | #else | 112 | #else |
113 | void __init x86_64_start_kernel(char *real_mode); | 113 | void __init x86_64_start_kernel(char *real_mode); |
diff --git a/arch/x86/include/asm/srat.h b/arch/x86/include/asm/srat.h deleted file mode 100644 index b508d639d1a7..000000000000 --- a/arch/x86/include/asm/srat.h +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | /* | ||
2 | * Some of the code in this file has been gleaned from the 64 bit | ||
3 | * discontigmem support code base. | ||
4 | * | ||
5 | * Copyright (C) 2002, IBM Corp. | ||
6 | * | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
17 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
18 | * details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | * | ||
24 | * Send feedback to Pat Gaughen <gone@us.ibm.com> | ||
25 | */ | ||
26 | |||
27 | #ifndef _ASM_X86_SRAT_H | ||
28 | #define _ASM_X86_SRAT_H | ||
29 | |||
30 | #ifdef CONFIG_ACPI_NUMA | ||
31 | extern int get_memcfg_from_srat(void); | ||
32 | #else | ||
33 | static inline int get_memcfg_from_srat(void) | ||
34 | { | ||
35 | return 0; | ||
36 | } | ||
37 | #endif | ||
38 | |||
39 | #endif /* _ASM_X86_SRAT_H */ | ||
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index d7e89c83645d..70bbe39043a9 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo, | |||
37 | /* Generic stack tracer with callbacks */ | 37 | /* Generic stack tracer with callbacks */ |
38 | 38 | ||
39 | struct stacktrace_ops { | 39 | struct stacktrace_ops { |
40 | void (*warning)(void *data, char *msg); | ||
41 | /* msg must contain %s for the symbol */ | ||
42 | void (*warning_symbol)(void *data, char *msg, unsigned long symbol); | ||
43 | void (*address)(void *data, unsigned long address, int reliable); | 40 | void (*address)(void *data, unsigned long address, int reliable); |
44 | /* On negative return stop dumping */ | 41 | /* On negative return stop dumping */ |
45 | int (*stack)(void *data, char *name); | 42 | int (*stack)(void *data, char *name); |
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 12569e691ce3..c2ff2a1d845e 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -303,24 +303,81 @@ static inline void native_wbinvd(void) | |||
303 | #ifdef CONFIG_PARAVIRT | 303 | #ifdef CONFIG_PARAVIRT |
304 | #include <asm/paravirt.h> | 304 | #include <asm/paravirt.h> |
305 | #else | 305 | #else |
306 | #define read_cr0() (native_read_cr0()) | 306 | |
307 | #define write_cr0(x) (native_write_cr0(x)) | 307 | static inline unsigned long read_cr0(void) |
308 | #define read_cr2() (native_read_cr2()) | 308 | { |
309 | #define write_cr2(x) (native_write_cr2(x)) | 309 | return native_read_cr0(); |
310 | #define read_cr3() (native_read_cr3()) | 310 | } |
311 | #define write_cr3(x) (native_write_cr3(x)) | 311 | |
312 | #define read_cr4() (native_read_cr4()) | 312 | static inline void write_cr0(unsigned long x) |
313 | #define read_cr4_safe() (native_read_cr4_safe()) | 313 | { |
314 | #define write_cr4(x) (native_write_cr4(x)) | 314 | native_write_cr0(x); |
315 | #define wbinvd() (native_wbinvd()) | 315 | } |
316 | |||
317 | static inline unsigned long read_cr2(void) | ||
318 | { | ||
319 | return native_read_cr2(); | ||
320 | } | ||
321 | |||
322 | static inline void write_cr2(unsigned long x) | ||
323 | { | ||
324 | native_write_cr2(x); | ||
325 | } | ||
326 | |||
327 | static inline unsigned long read_cr3(void) | ||
328 | { | ||
329 | return native_read_cr3(); | ||
330 | } | ||
331 | |||
332 | static inline void write_cr3(unsigned long x) | ||
333 | { | ||
334 | native_write_cr3(x); | ||
335 | } | ||
336 | |||
337 | static inline unsigned long read_cr4(void) | ||
338 | { | ||
339 | return native_read_cr4(); | ||
340 | } | ||
341 | |||
342 | static inline unsigned long read_cr4_safe(void) | ||
343 | { | ||
344 | return native_read_cr4_safe(); | ||
345 | } | ||
346 | |||
347 | static inline void write_cr4(unsigned long x) | ||
348 | { | ||
349 | native_write_cr4(x); | ||
350 | } | ||
351 | |||
352 | static inline void wbinvd(void) | ||
353 | { | ||
354 | native_wbinvd(); | ||
355 | } | ||
356 | |||
316 | #ifdef CONFIG_X86_64 | 357 | #ifdef CONFIG_X86_64 |
317 | #define read_cr8() (native_read_cr8()) | 358 | |
318 | #define write_cr8(x) (native_write_cr8(x)) | 359 | static inline unsigned long read_cr8(void) |
319 | #define load_gs_index native_load_gs_index | 360 | { |
361 | return native_read_cr8(); | ||
362 | } | ||
363 | |||
364 | static inline void write_cr8(unsigned long x) | ||
365 | { | ||
366 | native_write_cr8(x); | ||
367 | } | ||
368 | |||
369 | static inline void load_gs_index(unsigned selector) | ||
370 | { | ||
371 | native_load_gs_index(selector); | ||
372 | } | ||
373 | |||
320 | #endif | 374 | #endif |
321 | 375 | ||
322 | /* Clear the 'TS' bit */ | 376 | /* Clear the 'TS' bit */ |
323 | #define clts() (native_clts()) | 377 | static inline void clts(void) |
378 | { | ||
379 | native_clts(); | ||
380 | } | ||
324 | 381 | ||
325 | #endif/* CONFIG_PARAVIRT */ | 382 | #endif/* CONFIG_PARAVIRT */ |
326 | 383 | ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 910a7084f7f2..c00692476e9f 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -93,19 +93,11 @@ extern void setup_node_to_cpumask_map(void); | |||
93 | #define pcibus_to_node(bus) __pcibus_to_node(bus) | 93 | #define pcibus_to_node(bus) __pcibus_to_node(bus) |
94 | 94 | ||
95 | #ifdef CONFIG_X86_32 | 95 | #ifdef CONFIG_X86_32 |
96 | extern unsigned long node_start_pfn[]; | ||
97 | extern unsigned long node_end_pfn[]; | ||
98 | extern unsigned long node_remap_size[]; | ||
99 | #define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) | ||
100 | |||
101 | # define SD_CACHE_NICE_TRIES 1 | 96 | # define SD_CACHE_NICE_TRIES 1 |
102 | # define SD_IDLE_IDX 1 | 97 | # define SD_IDLE_IDX 1 |
103 | |||
104 | #else | 98 | #else |
105 | |||
106 | # define SD_CACHE_NICE_TRIES 2 | 99 | # define SD_CACHE_NICE_TRIES 2 |
107 | # define SD_IDLE_IDX 2 | 100 | # define SD_IDLE_IDX 2 |
108 | |||
109 | #endif | 101 | #endif |
110 | 102 | ||
111 | /* sched_domains SD_NODE_INIT for NUMA machines */ | 103 | /* sched_domains SD_NODE_INIT for NUMA machines */ |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index abd3e0ea762a..99ddd148a760 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
7 | #include <linux/compiler.h> | 7 | #include <linux/compiler.h> |
8 | #include <linux/thread_info.h> | 8 | #include <linux/thread_info.h> |
9 | #include <linux/prefetch.h> | ||
10 | #include <linux/string.h> | 9 | #include <linux/string.h> |
11 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
12 | #include <asm/page.h> | 11 | #include <asm/page.h> |
@@ -42,7 +41,7 @@ | |||
42 | * Returns 0 if the range is valid, nonzero otherwise. | 41 | * Returns 0 if the range is valid, nonzero otherwise. |
43 | * | 42 | * |
44 | * This is equivalent to the following test: | 43 | * This is equivalent to the following test: |
45 | * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) | 44 | * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) |
46 | * | 45 | * |
47 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... | 46 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... |
48 | */ | 47 | */ |
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 088d09fb1615..566e803cc602 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -6,7 +6,6 @@ | |||
6 | */ | 6 | */ |
7 | #include <linux/errno.h> | 7 | #include <linux/errno.h> |
8 | #include <linux/thread_info.h> | 8 | #include <linux/thread_info.h> |
9 | #include <linux/prefetch.h> | ||
10 | #include <linux/string.h> | 9 | #include <linux/string.h> |
11 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
12 | #include <asm/page.h> | 11 | #include <asm/page.h> |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 316708d5af92..1c66d30971ad 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -6,7 +6,6 @@ | |||
6 | */ | 6 | */ |
7 | #include <linux/compiler.h> | 7 | #include <linux/compiler.h> |
8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
9 | #include <linux/prefetch.h> | ||
10 | #include <linux/lockdep.h> | 9 | #include <linux/lockdep.h> |
11 | #include <asm/alternative.h> | 10 | #include <asm/alternative.h> |
12 | #include <asm/cpufeature.h> | 11 | #include <asm/cpufeature.h> |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index a755ef5e5977..fb6a625c99bf 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -350,10 +350,11 @@ | |||
350 | #define __NR_open_by_handle_at 342 | 350 | #define __NR_open_by_handle_at 342 |
351 | #define __NR_clock_adjtime 343 | 351 | #define __NR_clock_adjtime 343 |
352 | #define __NR_syncfs 344 | 352 | #define __NR_syncfs 344 |
353 | #define __NR_sendmmsg 345 | ||
353 | 354 | ||
354 | #ifdef __KERNEL__ | 355 | #ifdef __KERNEL__ |
355 | 356 | ||
356 | #define NR_syscalls 345 | 357 | #define NR_syscalls 346 |
357 | 358 | ||
358 | #define __ARCH_WANT_IPC_PARSE_VERSION | 359 | #define __ARCH_WANT_IPC_PARSE_VERSION |
359 | #define __ARCH_WANT_OLD_READDIR | 360 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 160fa76bd578..79f90eb15aad 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) | |||
677 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) | 677 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) |
678 | #define __NR_syncfs 306 | 678 | #define __NR_syncfs 306 |
679 | __SYSCALL(__NR_syncfs, sys_syncfs) | 679 | __SYSCALL(__NR_syncfs, sys_syncfs) |
680 | #define __NR_sendmmsg 307 | ||
681 | __SYSCALL(__NR_sendmmsg, sys_sendmmsg) | ||
680 | 682 | ||
681 | #ifndef __NO_STUBS | 683 | #ifndef __NO_STUBS |
682 | #define __ARCH_WANT_OLD_READDIR | 684 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 3e094af443c3..130f1eeee5fe 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -94,6 +94,8 @@ | |||
94 | /* after this # consecutive successes, bump up the throttle if it was lowered */ | 94 | /* after this # consecutive successes, bump up the throttle if it was lowered */ |
95 | #define COMPLETE_THRESHOLD 5 | 95 | #define COMPLETE_THRESHOLD 5 |
96 | 96 | ||
97 | #define UV_LB_SUBNODEID 0x10 | ||
98 | |||
97 | /* | 99 | /* |
98 | * number of entries in the destination side payload queue | 100 | * number of entries in the destination side payload queue |
99 | */ | 101 | */ |
@@ -124,7 +126,7 @@ | |||
124 | * The distribution specification (32 bytes) is interpreted as a 256-bit | 126 | * The distribution specification (32 bytes) is interpreted as a 256-bit |
125 | * distribution vector. Adjacent bits correspond to consecutive even numbered | 127 | * distribution vector. Adjacent bits correspond to consecutive even numbered |
126 | * nodeIDs. The result of adding the index of a given bit to the 15-bit | 128 | * nodeIDs. The result of adding the index of a given bit to the 15-bit |
127 | * 'base_dest_nodeid' field of the header corresponds to the | 129 | * 'base_dest_nasid' field of the header corresponds to the |
128 | * destination nodeID associated with that specified bit. | 130 | * destination nodeID associated with that specified bit. |
129 | */ | 131 | */ |
130 | struct bau_target_uvhubmask { | 132 | struct bau_target_uvhubmask { |
@@ -176,7 +178,7 @@ struct bau_msg_payload { | |||
176 | struct bau_msg_header { | 178 | struct bau_msg_header { |
177 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ | 179 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ |
178 | /* bits 5:0 */ | 180 | /* bits 5:0 */ |
179 | unsigned int base_dest_nodeid:15; /* nasid of the */ | 181 | unsigned int base_dest_nasid:15; /* nasid of the */ |
180 | /* bits 20:6 */ /* first bit in uvhub map */ | 182 | /* bits 20:6 */ /* first bit in uvhub map */ |
181 | unsigned int command:8; /* message type */ | 183 | unsigned int command:8; /* message type */ |
182 | /* bits 28:21 */ | 184 | /* bits 28:21 */ |
@@ -378,6 +380,10 @@ struct ptc_stats { | |||
378 | unsigned long d_rcanceled; /* number of messages canceled by resets */ | 380 | unsigned long d_rcanceled; /* number of messages canceled by resets */ |
379 | }; | 381 | }; |
380 | 382 | ||
383 | struct hub_and_pnode { | ||
384 | short uvhub; | ||
385 | short pnode; | ||
386 | }; | ||
381 | /* | 387 | /* |
382 | * one per-cpu; to locate the software tables | 388 | * one per-cpu; to locate the software tables |
383 | */ | 389 | */ |
@@ -399,10 +405,12 @@ struct bau_control { | |||
399 | int baudisabled; | 405 | int baudisabled; |
400 | int set_bau_off; | 406 | int set_bau_off; |
401 | short cpu; | 407 | short cpu; |
408 | short osnode; | ||
402 | short uvhub_cpu; | 409 | short uvhub_cpu; |
403 | short uvhub; | 410 | short uvhub; |
404 | short cpus_in_socket; | 411 | short cpus_in_socket; |
405 | short cpus_in_uvhub; | 412 | short cpus_in_uvhub; |
413 | short partition_base_pnode; | ||
406 | unsigned short message_number; | 414 | unsigned short message_number; |
407 | unsigned short uvhub_quiesce; | 415 | unsigned short uvhub_quiesce; |
408 | short socket_acknowledge_count[DEST_Q_SIZE]; | 416 | short socket_acknowledge_count[DEST_Q_SIZE]; |
@@ -422,15 +430,16 @@ struct bau_control { | |||
422 | int congested_period; | 430 | int congested_period; |
423 | cycles_t period_time; | 431 | cycles_t period_time; |
424 | long period_requests; | 432 | long period_requests; |
433 | struct hub_and_pnode *target_hub_and_pnode; | ||
425 | }; | 434 | }; |
426 | 435 | ||
427 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) | 436 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) |
428 | { | 437 | { |
429 | return constant_test_bit(uvhub, &dstp->bits[0]); | 438 | return constant_test_bit(uvhub, &dstp->bits[0]); |
430 | } | 439 | } |
431 | static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) | 440 | static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) |
432 | { | 441 | { |
433 | __set_bit(uvhub, &dstp->bits[0]); | 442 | __set_bit(pnode, &dstp->bits[0]); |
434 | } | 443 | } |
435 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, | 444 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, |
436 | int nbits) | 445 | int nbits) |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a501741c2335..4298002d0c83 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -398,6 +398,8 @@ struct uv_blade_info { | |||
398 | unsigned short nr_online_cpus; | 398 | unsigned short nr_online_cpus; |
399 | unsigned short pnode; | 399 | unsigned short pnode; |
400 | short memory_nid; | 400 | short memory_nid; |
401 | spinlock_t nmi_lock; | ||
402 | unsigned long nmi_count; | ||
401 | }; | 403 | }; |
402 | extern struct uv_blade_info *uv_blade_info; | 404 | extern struct uv_blade_info *uv_blade_info; |
403 | extern short *uv_node_to_blade; | 405 | extern short *uv_node_to_blade; |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 20cafeac7455..f5bb64a823d7 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * SGI UV MMR definitions | 6 | * SGI UV MMR definitions |
7 | * | 7 | * |
8 | * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #ifndef _ASM_X86_UV_UV_MMRS_H | 11 | #ifndef _ASM_X86_UV_UV_MMRS_H |
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u { | |||
1099 | } s; | 1099 | } s; |
1100 | }; | 1100 | }; |
1101 | 1101 | ||
1102 | /* ========================================================================= */ | ||
1103 | /* UVH_SCRATCH5 */ | ||
1104 | /* ========================================================================= */ | ||
1105 | #define UVH_SCRATCH5 0x2d0200UL | ||
1106 | #define UVH_SCRATCH5_32 0x00778 | ||
1107 | |||
1108 | #define UVH_SCRATCH5_SCRATCH5_SHFT 0 | ||
1109 | #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL | ||
1110 | union uvh_scratch5_u { | ||
1111 | unsigned long v; | ||
1112 | struct uvh_scratch5_s { | ||
1113 | unsigned long scratch5 : 64; /* RW, W1CS */ | ||
1114 | } s; | ||
1115 | }; | ||
1102 | 1116 | ||
1103 | #endif /* __ASM_UV_MMRS_X86_H__ */ | 1117 | #endif /* __ASM_UV_MMRS_X86_H__ */ |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c61934fbf22a..64a619d47d34 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | |||
47 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, | 47 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, |
48 | unsigned long pfn_e); | 48 | unsigned long pfn_e); |
49 | 49 | ||
50 | extern int m2p_add_override(unsigned long mfn, struct page *page); | 50 | extern int m2p_add_override(unsigned long mfn, struct page *page, |
51 | extern int m2p_remove_override(struct page *page); | 51 | bool clear_pte); |
52 | extern int m2p_remove_override(struct page *page, bool clear_pte); | ||
52 | extern struct page *m2p_find_override(unsigned long mfn); | 53 | extern struct page *m2p_find_override(unsigned long mfn); |
53 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | 54 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); |
54 | 55 | ||
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index aa8620989162..4fbda9a3f339 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h | |||
@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void) | |||
15 | #endif | 15 | #endif |
16 | #if defined(CONFIG_XEN_DOM0) | 16 | #if defined(CONFIG_XEN_DOM0) |
17 | void __init xen_setup_pirqs(void); | 17 | void __init xen_setup_pirqs(void); |
18 | int xen_find_device_domain_owner(struct pci_dev *dev); | ||
19 | int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); | ||
20 | int xen_unregister_device_domain_owner(struct pci_dev *dev); | ||
18 | #else | 21 | #else |
19 | static inline void __init xen_setup_pirqs(void) | 22 | static inline void __init xen_setup_pirqs(void) |
20 | { | 23 | { |
21 | } | 24 | } |
25 | static inline int xen_find_device_domain_owner(struct pci_dev *dev) | ||
26 | { | ||
27 | return -1; | ||
28 | } | ||
29 | static inline int xen_register_device_domain_owner(struct pci_dev *dev, | ||
30 | uint16_t domain) | ||
31 | { | ||
32 | return -1; | ||
33 | } | ||
34 | static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) | ||
35 | { | ||
36 | return -1; | ||
37 | } | ||
22 | #endif | 38 | #endif |
23 | 39 | ||
24 | #if defined(CONFIG_PCI_MSI) | 40 | #if defined(CONFIG_PCI_MSI) |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7338ef2218bc..250806472a7e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -36,7 +36,7 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | |||
36 | obj-y += time.o ioport.o ldt.o dumpstack.o | 36 | obj-y += time.o ioport.o ldt.o dumpstack.o |
37 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o | 37 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o |
38 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 38 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
39 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 39 | obj-y += probe_roms.o |
40 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 40 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
41 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 41 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
42 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 42 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
@@ -117,7 +117,7 @@ obj-$(CONFIG_OF) += devicetree.o | |||
117 | ifeq ($(CONFIG_X86_64),y) | 117 | ifeq ($(CONFIG_X86_64),y) |
118 | obj-$(CONFIG_AUDIT) += audit_64.o | 118 | obj-$(CONFIG_AUDIT) += audit_64.o |
119 | 119 | ||
120 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | 120 | obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o |
121 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o | 121 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o |
122 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o | 122 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o |
123 | 123 | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ff93bc1b09c3..18a857ba7a25 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -112,11 +112,6 @@ static int __init acpi_sleep_setup(char *str) | |||
112 | #ifdef CONFIG_HIBERNATION | 112 | #ifdef CONFIG_HIBERNATION |
113 | if (strncmp(str, "s4_nohwsig", 10) == 0) | 113 | if (strncmp(str, "s4_nohwsig", 10) == 0) |
114 | acpi_no_s4_hw_signature(); | 114 | acpi_no_s4_hw_signature(); |
115 | if (strncmp(str, "s4_nonvs", 8) == 0) { | ||
116 | pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " | ||
117 | "please use acpi_sleep=nonvs instead"); | ||
118 | acpi_nvs_nosave(); | ||
119 | } | ||
120 | #endif | 115 | #endif |
121 | if (strncmp(str, "nonvs", 5) == 0) | 116 | if (strncmp(str, "nonvs", 5) == 0) |
122 | acpi_nvs_nosave(); | 117 | acpi_nvs_nosave(); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4a234677e213..a81f2d52f869 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -67,17 +67,30 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); | |||
67 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | 67 | #define DPRINTK(fmt, args...) if (debug_alternative) \ |
68 | printk(KERN_DEBUG fmt, args) | 68 | printk(KERN_DEBUG fmt, args) |
69 | 69 | ||
70 | /* | ||
71 | * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes | ||
72 | * that correspond to that nop. Getting from one nop to the next, we | ||
73 | * add to the array the offset that is equal to the sum of all sizes of | ||
74 | * nops preceding the one we are after. | ||
75 | * | ||
76 | * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the | ||
77 | * nice symmetry of sizes of the previous nops. | ||
78 | */ | ||
70 | #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) | 79 | #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) |
71 | /* Use inline assembly to define this because the nops are defined | 80 | static const unsigned char intelnops[] = |
72 | as inline assembly strings in the include files and we cannot | 81 | { |
73 | get them easily into strings. */ | 82 | GENERIC_NOP1, |
74 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: " | 83 | GENERIC_NOP2, |
75 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 | 84 | GENERIC_NOP3, |
76 | GENERIC_NOP7 GENERIC_NOP8 | 85 | GENERIC_NOP4, |
77 | "\t.previous"); | 86 | GENERIC_NOP5, |
78 | extern const unsigned char intelnops[]; | 87 | GENERIC_NOP6, |
79 | static const unsigned char *const __initconst_or_module | 88 | GENERIC_NOP7, |
80 | intel_nops[ASM_NOP_MAX+1] = { | 89 | GENERIC_NOP8, |
90 | GENERIC_NOP5_ATOMIC | ||
91 | }; | ||
92 | static const unsigned char * const intel_nops[ASM_NOP_MAX+2] = | ||
93 | { | ||
81 | NULL, | 94 | NULL, |
82 | intelnops, | 95 | intelnops, |
83 | intelnops + 1, | 96 | intelnops + 1, |
@@ -87,17 +100,25 @@ intel_nops[ASM_NOP_MAX+1] = { | |||
87 | intelnops + 1 + 2 + 3 + 4 + 5, | 100 | intelnops + 1 + 2 + 3 + 4 + 5, |
88 | intelnops + 1 + 2 + 3 + 4 + 5 + 6, | 101 | intelnops + 1 + 2 + 3 + 4 + 5 + 6, |
89 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 102 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
103 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, | ||
90 | }; | 104 | }; |
91 | #endif | 105 | #endif |
92 | 106 | ||
93 | #ifdef K8_NOP1 | 107 | #ifdef K8_NOP1 |
94 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: " | 108 | static const unsigned char k8nops[] = |
95 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | 109 | { |
96 | K8_NOP7 K8_NOP8 | 110 | K8_NOP1, |
97 | "\t.previous"); | 111 | K8_NOP2, |
98 | extern const unsigned char k8nops[]; | 112 | K8_NOP3, |
99 | static const unsigned char *const __initconst_or_module | 113 | K8_NOP4, |
100 | k8_nops[ASM_NOP_MAX+1] = { | 114 | K8_NOP5, |
115 | K8_NOP6, | ||
116 | K8_NOP7, | ||
117 | K8_NOP8, | ||
118 | K8_NOP5_ATOMIC | ||
119 | }; | ||
120 | static const unsigned char * const k8_nops[ASM_NOP_MAX+2] = | ||
121 | { | ||
101 | NULL, | 122 | NULL, |
102 | k8nops, | 123 | k8nops, |
103 | k8nops + 1, | 124 | k8nops + 1, |
@@ -107,17 +128,25 @@ k8_nops[ASM_NOP_MAX+1] = { | |||
107 | k8nops + 1 + 2 + 3 + 4 + 5, | 128 | k8nops + 1 + 2 + 3 + 4 + 5, |
108 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, | 129 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, |
109 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 130 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
131 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, | ||
110 | }; | 132 | }; |
111 | #endif | 133 | #endif |
112 | 134 | ||
113 | #if defined(K7_NOP1) && !defined(CONFIG_X86_64) | 135 | #if defined(K7_NOP1) && !defined(CONFIG_X86_64) |
114 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: " | 136 | static const unsigned char k7nops[] = |
115 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | 137 | { |
116 | K7_NOP7 K7_NOP8 | 138 | K7_NOP1, |
117 | "\t.previous"); | 139 | K7_NOP2, |
118 | extern const unsigned char k7nops[]; | 140 | K7_NOP3, |
119 | static const unsigned char *const __initconst_or_module | 141 | K7_NOP4, |
120 | k7_nops[ASM_NOP_MAX+1] = { | 142 | K7_NOP5, |
143 | K7_NOP6, | ||
144 | K7_NOP7, | ||
145 | K7_NOP8, | ||
146 | K7_NOP5_ATOMIC | ||
147 | }; | ||
148 | static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = | ||
149 | { | ||
121 | NULL, | 150 | NULL, |
122 | k7nops, | 151 | k7nops, |
123 | k7nops + 1, | 152 | k7nops + 1, |
@@ -127,17 +156,25 @@ k7_nops[ASM_NOP_MAX+1] = { | |||
127 | k7nops + 1 + 2 + 3 + 4 + 5, | 156 | k7nops + 1 + 2 + 3 + 4 + 5, |
128 | k7nops + 1 + 2 + 3 + 4 + 5 + 6, | 157 | k7nops + 1 + 2 + 3 + 4 + 5 + 6, |
129 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 158 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
159 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, | ||
130 | }; | 160 | }; |
131 | #endif | 161 | #endif |
132 | 162 | ||
133 | #ifdef P6_NOP1 | 163 | #ifdef P6_NOP1 |
134 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: " | 164 | static const unsigned char __initconst_or_module p6nops[] = |
135 | P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 | 165 | { |
136 | P6_NOP7 P6_NOP8 | 166 | P6_NOP1, |
137 | "\t.previous"); | 167 | P6_NOP2, |
138 | extern const unsigned char p6nops[]; | 168 | P6_NOP3, |
139 | static const unsigned char *const __initconst_or_module | 169 | P6_NOP4, |
140 | p6_nops[ASM_NOP_MAX+1] = { | 170 | P6_NOP5, |
171 | P6_NOP6, | ||
172 | P6_NOP7, | ||
173 | P6_NOP8, | ||
174 | P6_NOP5_ATOMIC | ||
175 | }; | ||
176 | static const unsigned char * const p6_nops[ASM_NOP_MAX+2] = | ||
177 | { | ||
141 | NULL, | 178 | NULL, |
142 | p6nops, | 179 | p6nops, |
143 | p6nops + 1, | 180 | p6nops + 1, |
@@ -147,47 +184,65 @@ p6_nops[ASM_NOP_MAX+1] = { | |||
147 | p6nops + 1 + 2 + 3 + 4 + 5, | 184 | p6nops + 1 + 2 + 3 + 4 + 5, |
148 | p6nops + 1 + 2 + 3 + 4 + 5 + 6, | 185 | p6nops + 1 + 2 + 3 + 4 + 5 + 6, |
149 | p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 186 | p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
187 | p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, | ||
150 | }; | 188 | }; |
151 | #endif | 189 | #endif |
152 | 190 | ||
191 | /* Initialize these to a safe default */ | ||
153 | #ifdef CONFIG_X86_64 | 192 | #ifdef CONFIG_X86_64 |
193 | const unsigned char * const *ideal_nops = p6_nops; | ||
194 | #else | ||
195 | const unsigned char * const *ideal_nops = intel_nops; | ||
196 | #endif | ||
154 | 197 | ||
155 | extern char __vsyscall_0; | 198 | void __init arch_init_ideal_nops(void) |
156 | static const unsigned char *const *__init_or_module find_nop_table(void) | ||
157 | { | 199 | { |
158 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | 200 | switch (boot_cpu_data.x86_vendor) { |
159 | boot_cpu_has(X86_FEATURE_NOPL)) | 201 | case X86_VENDOR_INTEL: |
160 | return p6_nops; | 202 | /* |
161 | else | 203 | * Due to a decoder implementation quirk, some |
162 | return k8_nops; | 204 | * specific Intel CPUs actually perform better with |
163 | } | 205 | * the "k8_nops" than with the SDM-recommended NOPs. |
164 | 206 | */ | |
165 | #else /* CONFIG_X86_64 */ | 207 | if (boot_cpu_data.x86 == 6 && |
208 | boot_cpu_data.x86_model >= 0x0f && | ||
209 | boot_cpu_data.x86_model != 0x1c && | ||
210 | boot_cpu_data.x86_model != 0x26 && | ||
211 | boot_cpu_data.x86_model != 0x27 && | ||
212 | boot_cpu_data.x86_model < 0x30) { | ||
213 | ideal_nops = k8_nops; | ||
214 | } else if (boot_cpu_has(X86_FEATURE_NOPL)) { | ||
215 | ideal_nops = p6_nops; | ||
216 | } else { | ||
217 | #ifdef CONFIG_X86_64 | ||
218 | ideal_nops = k8_nops; | ||
219 | #else | ||
220 | ideal_nops = intel_nops; | ||
221 | #endif | ||
222 | } | ||
166 | 223 | ||
167 | static const unsigned char *const *__init_or_module find_nop_table(void) | 224 | default: |
168 | { | 225 | #ifdef CONFIG_X86_64 |
169 | if (boot_cpu_has(X86_FEATURE_K8)) | 226 | ideal_nops = k8_nops; |
170 | return k8_nops; | 227 | #else |
171 | else if (boot_cpu_has(X86_FEATURE_K7)) | 228 | if (boot_cpu_has(X86_FEATURE_K8)) |
172 | return k7_nops; | 229 | ideal_nops = k8_nops; |
173 | else if (boot_cpu_has(X86_FEATURE_NOPL)) | 230 | else if (boot_cpu_has(X86_FEATURE_K7)) |
174 | return p6_nops; | 231 | ideal_nops = k7_nops; |
175 | else | 232 | else |
176 | return intel_nops; | 233 | ideal_nops = intel_nops; |
234 | #endif | ||
235 | } | ||
177 | } | 236 | } |
178 | 237 | ||
179 | #endif /* CONFIG_X86_64 */ | ||
180 | |||
181 | /* Use this to add nops to a buffer, then text_poke the whole buffer. */ | 238 | /* Use this to add nops to a buffer, then text_poke the whole buffer. */ |
182 | static void __init_or_module add_nops(void *insns, unsigned int len) | 239 | static void __init_or_module add_nops(void *insns, unsigned int len) |
183 | { | 240 | { |
184 | const unsigned char *const *noptable = find_nop_table(); | ||
185 | |||
186 | while (len > 0) { | 241 | while (len > 0) { |
187 | unsigned int noplen = len; | 242 | unsigned int noplen = len; |
188 | if (noplen > ASM_NOP_MAX) | 243 | if (noplen > ASM_NOP_MAX) |
189 | noplen = ASM_NOP_MAX; | 244 | noplen = ASM_NOP_MAX; |
190 | memcpy(insns, noptable[noplen], noplen); | 245 | memcpy(insns, ideal_nops[noplen], noplen); |
191 | insns += noplen; | 246 | insns += noplen; |
192 | len -= noplen; | 247 | len -= noplen; |
193 | } | 248 | } |
@@ -195,6 +250,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
195 | 250 | ||
196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 251 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
197 | extern s32 __smp_locks[], __smp_locks_end[]; | 252 | extern s32 __smp_locks[], __smp_locks_end[]; |
253 | extern char __vsyscall_0; | ||
198 | void *text_poke_early(void *addr, const void *opcode, size_t len); | 254 | void *text_poke_early(void *addr, const void *opcode, size_t len); |
199 | 255 | ||
200 | /* Replace instructions with better alternatives for this CPU type. | 256 | /* Replace instructions with better alternatives for this CPU type. |
@@ -210,6 +266,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
210 | u8 insnbuf[MAX_PATCH_LEN]; | 266 | u8 insnbuf[MAX_PATCH_LEN]; |
211 | 267 | ||
212 | DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); | 268 | DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); |
269 | /* | ||
270 | * The scan order should be from start to end. A later scanned | ||
271 | * alternative code can overwrite a previous scanned alternative code. | ||
272 | * Some kernel functions (e.g. memcpy, memset, etc) use this order to | ||
273 | * patch code. | ||
274 | * | ||
275 | * So be careful if you want to change the scan order to any other | ||
276 | * order. | ||
277 | */ | ||
213 | for (a = start; a < end; a++) { | 278 | for (a = start; a < end; a++) { |
214 | u8 *instr = a->instr; | 279 | u8 *instr = a->instr; |
215 | BUG_ON(a->replacementlen > a->instrlen); | 280 | BUG_ON(a->replacementlen > a->instrlen); |
@@ -678,29 +743,3 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) | |||
678 | wrote_text = 0; | 743 | wrote_text = 0; |
679 | __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | 744 | __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); |
680 | } | 745 | } |
681 | |||
682 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | ||
683 | |||
684 | #ifdef CONFIG_X86_64 | ||
685 | unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 }; | ||
686 | #else | ||
687 | unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; | ||
688 | #endif | ||
689 | |||
690 | void __init arch_init_ideal_nop5(void) | ||
691 | { | ||
692 | /* | ||
693 | * There is no good nop for all x86 archs. This selection | ||
694 | * algorithm should be unified with the one in find_nop_table(), | ||
695 | * but this should be good enough for now. | ||
696 | * | ||
697 | * For cases other than the ones below, use the safe (as in | ||
698 | * always functional) defaults above. | ||
699 | */ | ||
700 | #ifdef CONFIG_X86_64 | ||
701 | /* Don't use these on 32 bits due to broken virtualizers */ | ||
702 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
703 | memcpy(ideal_nop5, p6_nops[5], 5); | ||
704 | #endif | ||
705 | } | ||
706 | #endif | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/amd_gart_64.c index b117efd24f71..b117efd24f71 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c | |||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 57ca77787220..873e7e1ead7b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/pci.h> | 20 | #include <linux/pci.h> |
21 | #include <linux/pci-ats.h> | ||
21 | #include <linux/bitmap.h> | 22 | #include <linux/bitmap.h> |
22 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
23 | #include <linux/debugfs.h> | 24 | #include <linux/debugfs.h> |
@@ -25,6 +26,7 @@ | |||
25 | #include <linux/dma-mapping.h> | 26 | #include <linux/dma-mapping.h> |
26 | #include <linux/iommu-helper.h> | 27 | #include <linux/iommu-helper.h> |
27 | #include <linux/iommu.h> | 28 | #include <linux/iommu.h> |
29 | #include <linux/delay.h> | ||
28 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
29 | #include <asm/iommu.h> | 31 | #include <asm/iommu.h> |
30 | #include <asm/gart.h> | 32 | #include <asm/gart.h> |
@@ -34,7 +36,7 @@ | |||
34 | 36 | ||
35 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) | 37 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) |
36 | 38 | ||
37 | #define EXIT_LOOP_COUNT 10000000 | 39 | #define LOOP_TIMEOUT 100000 |
38 | 40 | ||
39 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 41 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
40 | 42 | ||
@@ -57,7 +59,6 @@ struct iommu_cmd { | |||
57 | u32 data[4]; | 59 | u32 data[4]; |
58 | }; | 60 | }; |
59 | 61 | ||
60 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | ||
61 | static void update_domain(struct protection_domain *domain); | 62 | static void update_domain(struct protection_domain *domain); |
62 | 63 | ||
63 | /**************************************************************************** | 64 | /**************************************************************************** |
@@ -322,8 +323,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | |||
322 | break; | 323 | break; |
323 | case EVENT_TYPE_ILL_CMD: | 324 | case EVENT_TYPE_ILL_CMD: |
324 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 325 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
325 | iommu->reset_in_progress = true; | ||
326 | reset_iommu_command_buffer(iommu); | ||
327 | dump_command(address); | 326 | dump_command(address); |
328 | break; | 327 | break; |
329 | case EVENT_TYPE_CMD_HARD_ERR: | 328 | case EVENT_TYPE_CMD_HARD_ERR: |
@@ -367,7 +366,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) | |||
367 | spin_unlock_irqrestore(&iommu->lock, flags); | 366 | spin_unlock_irqrestore(&iommu->lock, flags); |
368 | } | 367 | } |
369 | 368 | ||
370 | irqreturn_t amd_iommu_int_handler(int irq, void *data) | 369 | irqreturn_t amd_iommu_int_thread(int irq, void *data) |
371 | { | 370 | { |
372 | struct amd_iommu *iommu; | 371 | struct amd_iommu *iommu; |
373 | 372 | ||
@@ -377,192 +376,300 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) | |||
377 | return IRQ_HANDLED; | 376 | return IRQ_HANDLED; |
378 | } | 377 | } |
379 | 378 | ||
379 | irqreturn_t amd_iommu_int_handler(int irq, void *data) | ||
380 | { | ||
381 | return IRQ_WAKE_THREAD; | ||
382 | } | ||
383 | |||
380 | /**************************************************************************** | 384 | /**************************************************************************** |
381 | * | 385 | * |
382 | * IOMMU command queuing functions | 386 | * IOMMU command queuing functions |
383 | * | 387 | * |
384 | ****************************************************************************/ | 388 | ****************************************************************************/ |
385 | 389 | ||
386 | /* | 390 | static int wait_on_sem(volatile u64 *sem) |
387 | * Writes the command to the IOMMUs command buffer and informs the | 391 | { |
388 | * hardware about the new command. Must be called with iommu->lock held. | 392 | int i = 0; |
389 | */ | 393 | |
390 | static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | 394 | while (*sem == 0 && i < LOOP_TIMEOUT) { |
395 | udelay(1); | ||
396 | i += 1; | ||
397 | } | ||
398 | |||
399 | if (i == LOOP_TIMEOUT) { | ||
400 | pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); | ||
401 | return -EIO; | ||
402 | } | ||
403 | |||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | static void copy_cmd_to_buffer(struct amd_iommu *iommu, | ||
408 | struct iommu_cmd *cmd, | ||
409 | u32 tail) | ||
391 | { | 410 | { |
392 | u32 tail, head; | ||
393 | u8 *target; | 411 | u8 *target; |
394 | 412 | ||
395 | WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); | ||
396 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
397 | target = iommu->cmd_buf + tail; | 413 | target = iommu->cmd_buf + tail; |
398 | memcpy_toio(target, cmd, sizeof(*cmd)); | 414 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; |
399 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; | 415 | |
400 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | 416 | /* Copy command to buffer */ |
401 | if (tail == head) | 417 | memcpy(target, cmd, sizeof(*cmd)); |
402 | return -ENOMEM; | 418 | |
419 | /* Tell the IOMMU about it */ | ||
403 | writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 420 | writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
421 | } | ||
404 | 422 | ||
405 | return 0; | 423 | static void build_completion_wait(struct iommu_cmd *cmd, u64 address) |
424 | { | ||
425 | WARN_ON(address & 0x7ULL); | ||
426 | |||
427 | memset(cmd, 0, sizeof(*cmd)); | ||
428 | cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; | ||
429 | cmd->data[1] = upper_32_bits(__pa(address)); | ||
430 | cmd->data[2] = 1; | ||
431 | CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); | ||
432 | } | ||
433 | |||
434 | static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) | ||
435 | { | ||
436 | memset(cmd, 0, sizeof(*cmd)); | ||
437 | cmd->data[0] = devid; | ||
438 | CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); | ||
439 | } | ||
440 | |||
441 | static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | ||
442 | size_t size, u16 domid, int pde) | ||
443 | { | ||
444 | u64 pages; | ||
445 | int s; | ||
446 | |||
447 | pages = iommu_num_pages(address, size, PAGE_SIZE); | ||
448 | s = 0; | ||
449 | |||
450 | if (pages > 1) { | ||
451 | /* | ||
452 | * If we have to flush more than one page, flush all | ||
453 | * TLB entries for this domain | ||
454 | */ | ||
455 | address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
456 | s = 1; | ||
457 | } | ||
458 | |||
459 | address &= PAGE_MASK; | ||
460 | |||
461 | memset(cmd, 0, sizeof(*cmd)); | ||
462 | cmd->data[1] |= domid; | ||
463 | cmd->data[2] = lower_32_bits(address); | ||
464 | cmd->data[3] = upper_32_bits(address); | ||
465 | CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); | ||
466 | if (s) /* size bit - we flush more than one 4kb page */ | ||
467 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
468 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | ||
469 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | ||
470 | } | ||
471 | |||
472 | static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, | ||
473 | u64 address, size_t size) | ||
474 | { | ||
475 | u64 pages; | ||
476 | int s; | ||
477 | |||
478 | pages = iommu_num_pages(address, size, PAGE_SIZE); | ||
479 | s = 0; | ||
480 | |||
481 | if (pages > 1) { | ||
482 | /* | ||
483 | * If we have to flush more than one page, flush all | ||
484 | * TLB entries for this domain | ||
485 | */ | ||
486 | address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
487 | s = 1; | ||
488 | } | ||
489 | |||
490 | address &= PAGE_MASK; | ||
491 | |||
492 | memset(cmd, 0, sizeof(*cmd)); | ||
493 | cmd->data[0] = devid; | ||
494 | cmd->data[0] |= (qdep & 0xff) << 24; | ||
495 | cmd->data[1] = devid; | ||
496 | cmd->data[2] = lower_32_bits(address); | ||
497 | cmd->data[3] = upper_32_bits(address); | ||
498 | CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); | ||
499 | if (s) | ||
500 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
501 | } | ||
502 | |||
503 | static void build_inv_all(struct iommu_cmd *cmd) | ||
504 | { | ||
505 | memset(cmd, 0, sizeof(*cmd)); | ||
506 | CMD_SET_TYPE(cmd, CMD_INV_ALL); | ||
406 | } | 507 | } |
407 | 508 | ||
408 | /* | 509 | /* |
409 | * General queuing function for commands. Takes iommu->lock and calls | 510 | * Writes the command to the IOMMUs command buffer and informs the |
410 | * __iommu_queue_command(). | 511 | * hardware about the new command. |
411 | */ | 512 | */ |
412 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | 513 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) |
413 | { | 514 | { |
515 | u32 left, tail, head, next_tail; | ||
414 | unsigned long flags; | 516 | unsigned long flags; |
415 | int ret; | ||
416 | 517 | ||
518 | WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); | ||
519 | |||
520 | again: | ||
417 | spin_lock_irqsave(&iommu->lock, flags); | 521 | spin_lock_irqsave(&iommu->lock, flags); |
418 | ret = __iommu_queue_command(iommu, cmd); | ||
419 | if (!ret) | ||
420 | iommu->need_sync = true; | ||
421 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
422 | 522 | ||
423 | return ret; | 523 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); |
424 | } | 524 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
525 | next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; | ||
526 | left = (head - next_tail) % iommu->cmd_buf_size; | ||
425 | 527 | ||
426 | /* | 528 | if (left <= 2) { |
427 | * This function waits until an IOMMU has completed a completion | 529 | struct iommu_cmd sync_cmd; |
428 | * wait command | 530 | volatile u64 sem = 0; |
429 | */ | 531 | int ret; |
430 | static void __iommu_wait_for_completion(struct amd_iommu *iommu) | ||
431 | { | ||
432 | int ready = 0; | ||
433 | unsigned status = 0; | ||
434 | unsigned long i = 0; | ||
435 | 532 | ||
436 | INC_STATS_COUNTER(compl_wait); | 533 | build_completion_wait(&sync_cmd, (u64)&sem); |
534 | copy_cmd_to_buffer(iommu, &sync_cmd, tail); | ||
437 | 535 | ||
438 | while (!ready && (i < EXIT_LOOP_COUNT)) { | 536 | spin_unlock_irqrestore(&iommu->lock, flags); |
439 | ++i; | 537 | |
440 | /* wait for the bit to become one */ | 538 | if ((ret = wait_on_sem(&sem)) != 0) |
441 | status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); | 539 | return ret; |
442 | ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; | 540 | |
541 | goto again; | ||
443 | } | 542 | } |
444 | 543 | ||
445 | /* set bit back to zero */ | 544 | copy_cmd_to_buffer(iommu, cmd, tail); |
446 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 545 | |
447 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 546 | /* We need to sync now to make sure all commands are processed */ |
547 | iommu->need_sync = true; | ||
548 | |||
549 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
448 | 550 | ||
449 | if (unlikely(i == EXIT_LOOP_COUNT)) | 551 | return 0; |
450 | iommu->reset_in_progress = true; | ||
451 | } | 552 | } |
452 | 553 | ||
453 | /* | 554 | /* |
454 | * This function queues a completion wait command into the command | 555 | * This function queues a completion wait command into the command |
455 | * buffer of an IOMMU | 556 | * buffer of an IOMMU |
456 | */ | 557 | */ |
457 | static int __iommu_completion_wait(struct amd_iommu *iommu) | 558 | static int iommu_completion_wait(struct amd_iommu *iommu) |
458 | { | 559 | { |
459 | struct iommu_cmd cmd; | 560 | struct iommu_cmd cmd; |
561 | volatile u64 sem = 0; | ||
562 | int ret; | ||
460 | 563 | ||
461 | memset(&cmd, 0, sizeof(cmd)); | 564 | if (!iommu->need_sync) |
462 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; | 565 | return 0; |
463 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | ||
464 | 566 | ||
465 | return __iommu_queue_command(iommu, &cmd); | 567 | build_completion_wait(&cmd, (u64)&sem); |
568 | |||
569 | ret = iommu_queue_command(iommu, &cmd); | ||
570 | if (ret) | ||
571 | return ret; | ||
572 | |||
573 | return wait_on_sem(&sem); | ||
466 | } | 574 | } |
467 | 575 | ||
468 | /* | 576 | static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) |
469 | * This function is called whenever we need to ensure that the IOMMU has | ||
470 | * completed execution of all commands we sent. It sends a | ||
471 | * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs | ||
472 | * us about that by writing a value to a physical address we pass with | ||
473 | * the command. | ||
474 | */ | ||
475 | static int iommu_completion_wait(struct amd_iommu *iommu) | ||
476 | { | 577 | { |
477 | int ret = 0; | 578 | struct iommu_cmd cmd; |
478 | unsigned long flags; | ||
479 | 579 | ||
480 | spin_lock_irqsave(&iommu->lock, flags); | 580 | build_inv_dte(&cmd, devid); |
481 | 581 | ||
482 | if (!iommu->need_sync) | 582 | return iommu_queue_command(iommu, &cmd); |
483 | goto out; | 583 | } |
484 | 584 | ||
485 | ret = __iommu_completion_wait(iommu); | 585 | static void iommu_flush_dte_all(struct amd_iommu *iommu) |
586 | { | ||
587 | u32 devid; | ||
486 | 588 | ||
487 | iommu->need_sync = false; | 589 | for (devid = 0; devid <= 0xffff; ++devid) |
590 | iommu_flush_dte(iommu, devid); | ||
488 | 591 | ||
489 | if (ret) | 592 | iommu_completion_wait(iommu); |
490 | goto out; | 593 | } |
491 | |||
492 | __iommu_wait_for_completion(iommu); | ||
493 | 594 | ||
494 | out: | 595 | /* |
495 | spin_unlock_irqrestore(&iommu->lock, flags); | 596 | * This function uses heavy locking and may disable irqs for some time. But |
597 | * this is no issue because it is only called during resume. | ||
598 | */ | ||
599 | static void iommu_flush_tlb_all(struct amd_iommu *iommu) | ||
600 | { | ||
601 | u32 dom_id; | ||
496 | 602 | ||
497 | if (iommu->reset_in_progress) | 603 | for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { |
498 | reset_iommu_command_buffer(iommu); | 604 | struct iommu_cmd cmd; |
605 | build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
606 | dom_id, 1); | ||
607 | iommu_queue_command(iommu, &cmd); | ||
608 | } | ||
499 | 609 | ||
500 | return 0; | 610 | iommu_completion_wait(iommu); |
501 | } | 611 | } |
502 | 612 | ||
503 | static void iommu_flush_complete(struct protection_domain *domain) | 613 | static void iommu_flush_all(struct amd_iommu *iommu) |
504 | { | 614 | { |
505 | int i; | 615 | struct iommu_cmd cmd; |
506 | 616 | ||
507 | for (i = 0; i < amd_iommus_present; ++i) { | 617 | build_inv_all(&cmd); |
508 | if (!domain->dev_iommu[i]) | ||
509 | continue; | ||
510 | 618 | ||
511 | /* | 619 | iommu_queue_command(iommu, &cmd); |
512 | * Devices of this domain are behind this IOMMU | 620 | iommu_completion_wait(iommu); |
513 | * We need to wait for completion of all commands. | 621 | } |
514 | */ | 622 | |
515 | iommu_completion_wait(amd_iommus[i]); | 623 | void iommu_flush_all_caches(struct amd_iommu *iommu) |
624 | { | ||
625 | if (iommu_feature(iommu, FEATURE_IA)) { | ||
626 | iommu_flush_all(iommu); | ||
627 | } else { | ||
628 | iommu_flush_dte_all(iommu); | ||
629 | iommu_flush_tlb_all(iommu); | ||
516 | } | 630 | } |
517 | } | 631 | } |
518 | 632 | ||
519 | /* | 633 | /* |
520 | * Command send function for invalidating a device table entry | 634 | * Command send function for flushing on-device TLB |
521 | */ | 635 | */ |
522 | static int iommu_flush_device(struct device *dev) | 636 | static int device_flush_iotlb(struct device *dev, u64 address, size_t size) |
523 | { | 637 | { |
638 | struct pci_dev *pdev = to_pci_dev(dev); | ||
524 | struct amd_iommu *iommu; | 639 | struct amd_iommu *iommu; |
525 | struct iommu_cmd cmd; | 640 | struct iommu_cmd cmd; |
526 | u16 devid; | 641 | u16 devid; |
642 | int qdep; | ||
527 | 643 | ||
644 | qdep = pci_ats_queue_depth(pdev); | ||
528 | devid = get_device_id(dev); | 645 | devid = get_device_id(dev); |
529 | iommu = amd_iommu_rlookup_table[devid]; | 646 | iommu = amd_iommu_rlookup_table[devid]; |
530 | 647 | ||
531 | /* Build command */ | 648 | build_inv_iotlb_pages(&cmd, devid, qdep, address, size); |
532 | memset(&cmd, 0, sizeof(cmd)); | ||
533 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); | ||
534 | cmd.data[0] = devid; | ||
535 | 649 | ||
536 | return iommu_queue_command(iommu, &cmd); | 650 | return iommu_queue_command(iommu, &cmd); |
537 | } | 651 | } |
538 | 652 | ||
539 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | ||
540 | u16 domid, int pde, int s) | ||
541 | { | ||
542 | memset(cmd, 0, sizeof(*cmd)); | ||
543 | address &= PAGE_MASK; | ||
544 | CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); | ||
545 | cmd->data[1] |= domid; | ||
546 | cmd->data[2] = lower_32_bits(address); | ||
547 | cmd->data[3] = upper_32_bits(address); | ||
548 | if (s) /* size bit - we flush more than one 4kb page */ | ||
549 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
550 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | ||
551 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | ||
552 | } | ||
553 | |||
554 | /* | 653 | /* |
555 | * Generic command send function for invalidaing TLB entries | 654 | * Command send function for invalidating a device table entry |
556 | */ | 655 | */ |
557 | static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | 656 | static int device_flush_dte(struct device *dev) |
558 | u64 address, u16 domid, int pde, int s) | ||
559 | { | 657 | { |
560 | struct iommu_cmd cmd; | 658 | struct amd_iommu *iommu; |
659 | struct pci_dev *pdev; | ||
660 | u16 devid; | ||
561 | int ret; | 661 | int ret; |
562 | 662 | ||
563 | __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); | 663 | pdev = to_pci_dev(dev); |
664 | devid = get_device_id(dev); | ||
665 | iommu = amd_iommu_rlookup_table[devid]; | ||
564 | 666 | ||
565 | ret = iommu_queue_command(iommu, &cmd); | 667 | ret = iommu_flush_dte(iommu, devid); |
668 | if (ret) | ||
669 | return ret; | ||
670 | |||
671 | if (pci_ats_enabled(pdev)) | ||
672 | ret = device_flush_iotlb(dev, 0, ~0UL); | ||
566 | 673 | ||
567 | return ret; | 674 | return ret; |
568 | } | 675 | } |
@@ -572,23 +679,14 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
572 | * It invalidates a single PTE if the range to flush is within a single | 679 | * It invalidates a single PTE if the range to flush is within a single |
573 | * page. Otherwise it flushes the whole TLB of the IOMMU. | 680 | * page. Otherwise it flushes the whole TLB of the IOMMU. |
574 | */ | 681 | */ |
575 | static void __iommu_flush_pages(struct protection_domain *domain, | 682 | static void __domain_flush_pages(struct protection_domain *domain, |
576 | u64 address, size_t size, int pde) | 683 | u64 address, size_t size, int pde) |
577 | { | 684 | { |
578 | int s = 0, i; | 685 | struct iommu_dev_data *dev_data; |
579 | unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); | 686 | struct iommu_cmd cmd; |
580 | 687 | int ret = 0, i; | |
581 | address &= PAGE_MASK; | ||
582 | |||
583 | if (pages > 1) { | ||
584 | /* | ||
585 | * If we have to flush more than one page, flush all | ||
586 | * TLB entries for this domain | ||
587 | */ | ||
588 | address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
589 | s = 1; | ||
590 | } | ||
591 | 688 | ||
689 | build_inv_iommu_pages(&cmd, address, size, domain->id, pde); | ||
592 | 690 | ||
593 | for (i = 0; i < amd_iommus_present; ++i) { | 691 | for (i = 0; i < amd_iommus_present; ++i) { |
594 | if (!domain->dev_iommu[i]) | 692 | if (!domain->dev_iommu[i]) |
@@ -598,101 +696,70 @@ static void __iommu_flush_pages(struct protection_domain *domain, | |||
598 | * Devices of this domain are behind this IOMMU | 696 | * Devices of this domain are behind this IOMMU |
599 | * We need a TLB flush | 697 | * We need a TLB flush |
600 | */ | 698 | */ |
601 | iommu_queue_inv_iommu_pages(amd_iommus[i], address, | 699 | ret |= iommu_queue_command(amd_iommus[i], &cmd); |
602 | domain->id, pde, s); | 700 | } |
701 | |||
702 | list_for_each_entry(dev_data, &domain->dev_list, list) { | ||
703 | struct pci_dev *pdev = to_pci_dev(dev_data->dev); | ||
704 | |||
705 | if (!pci_ats_enabled(pdev)) | ||
706 | continue; | ||
707 | |||
708 | ret |= device_flush_iotlb(dev_data->dev, address, size); | ||
603 | } | 709 | } |
604 | 710 | ||
605 | return; | 711 | WARN_ON(ret); |
606 | } | 712 | } |
607 | 713 | ||
608 | static void iommu_flush_pages(struct protection_domain *domain, | 714 | static void domain_flush_pages(struct protection_domain *domain, |
609 | u64 address, size_t size) | 715 | u64 address, size_t size) |
610 | { | 716 | { |
611 | __iommu_flush_pages(domain, address, size, 0); | 717 | __domain_flush_pages(domain, address, size, 0); |
612 | } | 718 | } |
613 | 719 | ||
614 | /* Flush the whole IO/TLB for a given protection domain */ | 720 | /* Flush the whole IO/TLB for a given protection domain */ |
615 | static void iommu_flush_tlb(struct protection_domain *domain) | 721 | static void domain_flush_tlb(struct protection_domain *domain) |
616 | { | 722 | { |
617 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); | 723 | __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); |
618 | } | 724 | } |
619 | 725 | ||
620 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ | 726 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ |
621 | static void iommu_flush_tlb_pde(struct protection_domain *domain) | 727 | static void domain_flush_tlb_pde(struct protection_domain *domain) |
622 | { | 728 | { |
623 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); | 729 | __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); |
624 | } | ||
625 | |||
626 | |||
627 | /* | ||
628 | * This function flushes the DTEs for all devices in domain | ||
629 | */ | ||
630 | static void iommu_flush_domain_devices(struct protection_domain *domain) | ||
631 | { | ||
632 | struct iommu_dev_data *dev_data; | ||
633 | unsigned long flags; | ||
634 | |||
635 | spin_lock_irqsave(&domain->lock, flags); | ||
636 | |||
637 | list_for_each_entry(dev_data, &domain->dev_list, list) | ||
638 | iommu_flush_device(dev_data->dev); | ||
639 | |||
640 | spin_unlock_irqrestore(&domain->lock, flags); | ||
641 | } | 730 | } |
642 | 731 | ||
643 | static void iommu_flush_all_domain_devices(void) | 732 | static void domain_flush_complete(struct protection_domain *domain) |
644 | { | 733 | { |
645 | struct protection_domain *domain; | 734 | int i; |
646 | unsigned long flags; | ||
647 | 735 | ||
648 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | 736 | for (i = 0; i < amd_iommus_present; ++i) { |
737 | if (!domain->dev_iommu[i]) | ||
738 | continue; | ||
649 | 739 | ||
650 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { | 740 | /* |
651 | iommu_flush_domain_devices(domain); | 741 | * Devices of this domain are behind this IOMMU |
652 | iommu_flush_complete(domain); | 742 | * We need to wait for completion of all commands. |
743 | */ | ||
744 | iommu_completion_wait(amd_iommus[i]); | ||
653 | } | 745 | } |
654 | |||
655 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
656 | } | 746 | } |
657 | 747 | ||
658 | void amd_iommu_flush_all_devices(void) | ||
659 | { | ||
660 | iommu_flush_all_domain_devices(); | ||
661 | } | ||
662 | 748 | ||
663 | /* | 749 | /* |
664 | * This function uses heavy locking and may disable irqs for some time. But | 750 | * This function flushes the DTEs for all devices in domain |
665 | * this is no issue because it is only called during resume. | ||
666 | */ | 751 | */ |
667 | void amd_iommu_flush_all_domains(void) | 752 | static void domain_flush_devices(struct protection_domain *domain) |
668 | { | 753 | { |
669 | struct protection_domain *domain; | 754 | struct iommu_dev_data *dev_data; |
670 | unsigned long flags; | 755 | unsigned long flags; |
671 | 756 | ||
672 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | 757 | spin_lock_irqsave(&domain->lock, flags); |
673 | |||
674 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { | ||
675 | spin_lock(&domain->lock); | ||
676 | iommu_flush_tlb_pde(domain); | ||
677 | iommu_flush_complete(domain); | ||
678 | spin_unlock(&domain->lock); | ||
679 | } | ||
680 | |||
681 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
682 | } | ||
683 | |||
684 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | ||
685 | { | ||
686 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | ||
687 | |||
688 | if (iommu->reset_in_progress) | ||
689 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | ||
690 | 758 | ||
691 | amd_iommu_reset_cmd_buffer(iommu); | 759 | list_for_each_entry(dev_data, &domain->dev_list, list) |
692 | amd_iommu_flush_all_devices(); | 760 | device_flush_dte(dev_data->dev); |
693 | amd_iommu_flush_all_domains(); | ||
694 | 761 | ||
695 | iommu->reset_in_progress = false; | 762 | spin_unlock_irqrestore(&domain->lock, flags); |
696 | } | 763 | } |
697 | 764 | ||
698 | /**************************************************************************** | 765 | /**************************************************************************** |
@@ -1410,17 +1477,22 @@ static bool dma_ops_domain(struct protection_domain *domain) | |||
1410 | return domain->flags & PD_DMA_OPS_MASK; | 1477 | return domain->flags & PD_DMA_OPS_MASK; |
1411 | } | 1478 | } |
1412 | 1479 | ||
1413 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | 1480 | static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) |
1414 | { | 1481 | { |
1415 | u64 pte_root = virt_to_phys(domain->pt_root); | 1482 | u64 pte_root = virt_to_phys(domain->pt_root); |
1483 | u32 flags = 0; | ||
1416 | 1484 | ||
1417 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | 1485 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) |
1418 | << DEV_ENTRY_MODE_SHIFT; | 1486 | << DEV_ENTRY_MODE_SHIFT; |
1419 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | 1487 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; |
1420 | 1488 | ||
1421 | amd_iommu_dev_table[devid].data[2] = domain->id; | 1489 | if (ats) |
1422 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | 1490 | flags |= DTE_FLAG_IOTLB; |
1423 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1491 | |
1492 | amd_iommu_dev_table[devid].data[3] |= flags; | ||
1493 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1494 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1495 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | ||
1424 | } | 1496 | } |
1425 | 1497 | ||
1426 | static void clear_dte_entry(u16 devid) | 1498 | static void clear_dte_entry(u16 devid) |
@@ -1437,34 +1509,42 @@ static void do_attach(struct device *dev, struct protection_domain *domain) | |||
1437 | { | 1509 | { |
1438 | struct iommu_dev_data *dev_data; | 1510 | struct iommu_dev_data *dev_data; |
1439 | struct amd_iommu *iommu; | 1511 | struct amd_iommu *iommu; |
1512 | struct pci_dev *pdev; | ||
1513 | bool ats = false; | ||
1440 | u16 devid; | 1514 | u16 devid; |
1441 | 1515 | ||
1442 | devid = get_device_id(dev); | 1516 | devid = get_device_id(dev); |
1443 | iommu = amd_iommu_rlookup_table[devid]; | 1517 | iommu = amd_iommu_rlookup_table[devid]; |
1444 | dev_data = get_dev_data(dev); | 1518 | dev_data = get_dev_data(dev); |
1519 | pdev = to_pci_dev(dev); | ||
1520 | |||
1521 | if (amd_iommu_iotlb_sup) | ||
1522 | ats = pci_ats_enabled(pdev); | ||
1445 | 1523 | ||
1446 | /* Update data structures */ | 1524 | /* Update data structures */ |
1447 | dev_data->domain = domain; | 1525 | dev_data->domain = domain; |
1448 | list_add(&dev_data->list, &domain->dev_list); | 1526 | list_add(&dev_data->list, &domain->dev_list); |
1449 | set_dte_entry(devid, domain); | 1527 | set_dte_entry(devid, domain, ats); |
1450 | 1528 | ||
1451 | /* Do reference counting */ | 1529 | /* Do reference counting */ |
1452 | domain->dev_iommu[iommu->index] += 1; | 1530 | domain->dev_iommu[iommu->index] += 1; |
1453 | domain->dev_cnt += 1; | 1531 | domain->dev_cnt += 1; |
1454 | 1532 | ||
1455 | /* Flush the DTE entry */ | 1533 | /* Flush the DTE entry */ |
1456 | iommu_flush_device(dev); | 1534 | device_flush_dte(dev); |
1457 | } | 1535 | } |
1458 | 1536 | ||
1459 | static void do_detach(struct device *dev) | 1537 | static void do_detach(struct device *dev) |
1460 | { | 1538 | { |
1461 | struct iommu_dev_data *dev_data; | 1539 | struct iommu_dev_data *dev_data; |
1462 | struct amd_iommu *iommu; | 1540 | struct amd_iommu *iommu; |
1541 | struct pci_dev *pdev; | ||
1463 | u16 devid; | 1542 | u16 devid; |
1464 | 1543 | ||
1465 | devid = get_device_id(dev); | 1544 | devid = get_device_id(dev); |
1466 | iommu = amd_iommu_rlookup_table[devid]; | 1545 | iommu = amd_iommu_rlookup_table[devid]; |
1467 | dev_data = get_dev_data(dev); | 1546 | dev_data = get_dev_data(dev); |
1547 | pdev = to_pci_dev(dev); | ||
1468 | 1548 | ||
1469 | /* decrease reference counters */ | 1549 | /* decrease reference counters */ |
1470 | dev_data->domain->dev_iommu[iommu->index] -= 1; | 1550 | dev_data->domain->dev_iommu[iommu->index] -= 1; |
@@ -1476,7 +1556,7 @@ static void do_detach(struct device *dev) | |||
1476 | clear_dte_entry(devid); | 1556 | clear_dte_entry(devid); |
1477 | 1557 | ||
1478 | /* Flush the DTE entry */ | 1558 | /* Flush the DTE entry */ |
1479 | iommu_flush_device(dev); | 1559 | device_flush_dte(dev); |
1480 | } | 1560 | } |
1481 | 1561 | ||
1482 | /* | 1562 | /* |
@@ -1539,9 +1619,13 @@ out_unlock: | |||
1539 | static int attach_device(struct device *dev, | 1619 | static int attach_device(struct device *dev, |
1540 | struct protection_domain *domain) | 1620 | struct protection_domain *domain) |
1541 | { | 1621 | { |
1622 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1542 | unsigned long flags; | 1623 | unsigned long flags; |
1543 | int ret; | 1624 | int ret; |
1544 | 1625 | ||
1626 | if (amd_iommu_iotlb_sup) | ||
1627 | pci_enable_ats(pdev, PAGE_SHIFT); | ||
1628 | |||
1545 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1629 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1546 | ret = __attach_device(dev, domain); | 1630 | ret = __attach_device(dev, domain); |
1547 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1631 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
@@ -1551,7 +1635,7 @@ static int attach_device(struct device *dev, | |||
1551 | * left the caches in the IOMMU dirty. So we have to flush | 1635 | * left the caches in the IOMMU dirty. So we have to flush |
1552 | * here to evict all dirty stuff. | 1636 | * here to evict all dirty stuff. |
1553 | */ | 1637 | */ |
1554 | iommu_flush_tlb_pde(domain); | 1638 | domain_flush_tlb_pde(domain); |
1555 | 1639 | ||
1556 | return ret; | 1640 | return ret; |
1557 | } | 1641 | } |
@@ -1598,12 +1682,16 @@ static void __detach_device(struct device *dev) | |||
1598 | */ | 1682 | */ |
1599 | static void detach_device(struct device *dev) | 1683 | static void detach_device(struct device *dev) |
1600 | { | 1684 | { |
1685 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1601 | unsigned long flags; | 1686 | unsigned long flags; |
1602 | 1687 | ||
1603 | /* lock device table */ | 1688 | /* lock device table */ |
1604 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1689 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1605 | __detach_device(dev); | 1690 | __detach_device(dev); |
1606 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1691 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1692 | |||
1693 | if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev)) | ||
1694 | pci_disable_ats(pdev); | ||
1607 | } | 1695 | } |
1608 | 1696 | ||
1609 | /* | 1697 | /* |
@@ -1692,7 +1780,7 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1692 | goto out; | 1780 | goto out; |
1693 | } | 1781 | } |
1694 | 1782 | ||
1695 | iommu_flush_device(dev); | 1783 | device_flush_dte(dev); |
1696 | iommu_completion_wait(iommu); | 1784 | iommu_completion_wait(iommu); |
1697 | 1785 | ||
1698 | out: | 1786 | out: |
@@ -1753,8 +1841,9 @@ static void update_device_table(struct protection_domain *domain) | |||
1753 | struct iommu_dev_data *dev_data; | 1841 | struct iommu_dev_data *dev_data; |
1754 | 1842 | ||
1755 | list_for_each_entry(dev_data, &domain->dev_list, list) { | 1843 | list_for_each_entry(dev_data, &domain->dev_list, list) { |
1844 | struct pci_dev *pdev = to_pci_dev(dev_data->dev); | ||
1756 | u16 devid = get_device_id(dev_data->dev); | 1845 | u16 devid = get_device_id(dev_data->dev); |
1757 | set_dte_entry(devid, domain); | 1846 | set_dte_entry(devid, domain, pci_ats_enabled(pdev)); |
1758 | } | 1847 | } |
1759 | } | 1848 | } |
1760 | 1849 | ||
@@ -1764,8 +1853,9 @@ static void update_domain(struct protection_domain *domain) | |||
1764 | return; | 1853 | return; |
1765 | 1854 | ||
1766 | update_device_table(domain); | 1855 | update_device_table(domain); |
1767 | iommu_flush_domain_devices(domain); | 1856 | |
1768 | iommu_flush_tlb_pde(domain); | 1857 | domain_flush_devices(domain); |
1858 | domain_flush_tlb_pde(domain); | ||
1769 | 1859 | ||
1770 | domain->updated = false; | 1860 | domain->updated = false; |
1771 | } | 1861 | } |
@@ -1924,10 +2014,10 @@ retry: | |||
1924 | ADD_STATS_COUNTER(alloced_io_mem, size); | 2014 | ADD_STATS_COUNTER(alloced_io_mem, size); |
1925 | 2015 | ||
1926 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | 2016 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { |
1927 | iommu_flush_tlb(&dma_dom->domain); | 2017 | domain_flush_tlb(&dma_dom->domain); |
1928 | dma_dom->need_flush = false; | 2018 | dma_dom->need_flush = false; |
1929 | } else if (unlikely(amd_iommu_np_cache)) | 2019 | } else if (unlikely(amd_iommu_np_cache)) |
1930 | iommu_flush_pages(&dma_dom->domain, address, size); | 2020 | domain_flush_pages(&dma_dom->domain, address, size); |
1931 | 2021 | ||
1932 | out: | 2022 | out: |
1933 | return address; | 2023 | return address; |
@@ -1976,7 +2066,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, | |||
1976 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 2066 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
1977 | 2067 | ||
1978 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | 2068 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { |
1979 | iommu_flush_pages(&dma_dom->domain, flush_addr, size); | 2069 | domain_flush_pages(&dma_dom->domain, flush_addr, size); |
1980 | dma_dom->need_flush = false; | 2070 | dma_dom->need_flush = false; |
1981 | } | 2071 | } |
1982 | } | 2072 | } |
@@ -2012,7 +2102,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, | |||
2012 | if (addr == DMA_ERROR_CODE) | 2102 | if (addr == DMA_ERROR_CODE) |
2013 | goto out; | 2103 | goto out; |
2014 | 2104 | ||
2015 | iommu_flush_complete(domain); | 2105 | domain_flush_complete(domain); |
2016 | 2106 | ||
2017 | out: | 2107 | out: |
2018 | spin_unlock_irqrestore(&domain->lock, flags); | 2108 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -2039,7 +2129,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | |||
2039 | 2129 | ||
2040 | __unmap_single(domain->priv, dma_addr, size, dir); | 2130 | __unmap_single(domain->priv, dma_addr, size, dir); |
2041 | 2131 | ||
2042 | iommu_flush_complete(domain); | 2132 | domain_flush_complete(domain); |
2043 | 2133 | ||
2044 | spin_unlock_irqrestore(&domain->lock, flags); | 2134 | spin_unlock_irqrestore(&domain->lock, flags); |
2045 | } | 2135 | } |
@@ -2104,7 +2194,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
2104 | goto unmap; | 2194 | goto unmap; |
2105 | } | 2195 | } |
2106 | 2196 | ||
2107 | iommu_flush_complete(domain); | 2197 | domain_flush_complete(domain); |
2108 | 2198 | ||
2109 | out: | 2199 | out: |
2110 | spin_unlock_irqrestore(&domain->lock, flags); | 2200 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -2150,7 +2240,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
2150 | s->dma_address = s->dma_length = 0; | 2240 | s->dma_address = s->dma_length = 0; |
2151 | } | 2241 | } |
2152 | 2242 | ||
2153 | iommu_flush_complete(domain); | 2243 | domain_flush_complete(domain); |
2154 | 2244 | ||
2155 | spin_unlock_irqrestore(&domain->lock, flags); | 2245 | spin_unlock_irqrestore(&domain->lock, flags); |
2156 | } | 2246 | } |
@@ -2200,7 +2290,7 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
2200 | goto out_free; | 2290 | goto out_free; |
2201 | } | 2291 | } |
2202 | 2292 | ||
2203 | iommu_flush_complete(domain); | 2293 | domain_flush_complete(domain); |
2204 | 2294 | ||
2205 | spin_unlock_irqrestore(&domain->lock, flags); | 2295 | spin_unlock_irqrestore(&domain->lock, flags); |
2206 | 2296 | ||
@@ -2232,7 +2322,7 @@ static void free_coherent(struct device *dev, size_t size, | |||
2232 | 2322 | ||
2233 | __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 2323 | __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
2234 | 2324 | ||
2235 | iommu_flush_complete(domain); | 2325 | domain_flush_complete(domain); |
2236 | 2326 | ||
2237 | spin_unlock_irqrestore(&domain->lock, flags); | 2327 | spin_unlock_irqrestore(&domain->lock, flags); |
2238 | 2328 | ||
@@ -2476,7 +2566,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, | |||
2476 | if (!iommu) | 2566 | if (!iommu) |
2477 | return; | 2567 | return; |
2478 | 2568 | ||
2479 | iommu_flush_device(dev); | 2569 | device_flush_dte(dev); |
2480 | iommu_completion_wait(iommu); | 2570 | iommu_completion_wait(iommu); |
2481 | } | 2571 | } |
2482 | 2572 | ||
@@ -2542,7 +2632,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, | |||
2542 | unmap_size = iommu_unmap_page(domain, iova, page_size); | 2632 | unmap_size = iommu_unmap_page(domain, iova, page_size); |
2543 | mutex_unlock(&domain->api_lock); | 2633 | mutex_unlock(&domain->api_lock); |
2544 | 2634 | ||
2545 | iommu_flush_tlb_pde(domain); | 2635 | domain_flush_tlb_pde(domain); |
2546 | 2636 | ||
2547 | return get_order(unmap_size); | 2637 | return get_order(unmap_size); |
2548 | } | 2638 | } |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 246d727b65b7..9179c21120a8 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -137,6 +137,7 @@ int amd_iommus_present; | |||
137 | 137 | ||
138 | /* IOMMUs have a non-present cache? */ | 138 | /* IOMMUs have a non-present cache? */ |
139 | bool amd_iommu_np_cache __read_mostly; | 139 | bool amd_iommu_np_cache __read_mostly; |
140 | bool amd_iommu_iotlb_sup __read_mostly = true; | ||
140 | 141 | ||
141 | /* | 142 | /* |
142 | * The ACPI table parsing functions set this variable on an error | 143 | * The ACPI table parsing functions set this variable on an error |
@@ -180,6 +181,12 @@ static u32 dev_table_size; /* size of the device table */ | |||
180 | static u32 alias_table_size; /* size of the alias table */ | 181 | static u32 alias_table_size; /* size of the alias table */ |
181 | static u32 rlookup_table_size; /* size if the rlookup table */ | 182 | static u32 rlookup_table_size; /* size if the rlookup table */ |
182 | 183 | ||
184 | /* | ||
185 | * This function flushes all internal caches of | ||
186 | * the IOMMU used by this driver. | ||
187 | */ | ||
188 | extern void iommu_flush_all_caches(struct amd_iommu *iommu); | ||
189 | |||
183 | static inline void update_last_devid(u16 devid) | 190 | static inline void update_last_devid(u16 devid) |
184 | { | 191 | { |
185 | if (devid > amd_iommu_last_bdf) | 192 | if (devid > amd_iommu_last_bdf) |
@@ -293,9 +300,23 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
293 | /* Function to enable the hardware */ | 300 | /* Function to enable the hardware */ |
294 | static void iommu_enable(struct amd_iommu *iommu) | 301 | static void iommu_enable(struct amd_iommu *iommu) |
295 | { | 302 | { |
296 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", | 303 | static const char * const feat_str[] = { |
304 | "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", | ||
305 | "IA", "GA", "HE", "PC", NULL | ||
306 | }; | ||
307 | int i; | ||
308 | |||
309 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx", | ||
297 | dev_name(&iommu->dev->dev), iommu->cap_ptr); | 310 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
298 | 311 | ||
312 | if (iommu->cap & (1 << IOMMU_CAP_EFR)) { | ||
313 | printk(KERN_CONT " extended features: "); | ||
314 | for (i = 0; feat_str[i]; ++i) | ||
315 | if (iommu_feature(iommu, (1ULL << i))) | ||
316 | printk(KERN_CONT " %s", feat_str[i]); | ||
317 | } | ||
318 | printk(KERN_CONT "\n"); | ||
319 | |||
299 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 320 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
300 | } | 321 | } |
301 | 322 | ||
@@ -651,7 +672,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | |||
651 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) | 672 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) |
652 | { | 673 | { |
653 | int cap_ptr = iommu->cap_ptr; | 674 | int cap_ptr = iommu->cap_ptr; |
654 | u32 range, misc; | 675 | u32 range, misc, low, high; |
655 | int i, j; | 676 | int i, j; |
656 | 677 | ||
657 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, | 678 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, |
@@ -667,6 +688,15 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
667 | MMIO_GET_LD(range)); | 688 | MMIO_GET_LD(range)); |
668 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | 689 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); |
669 | 690 | ||
691 | if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) | ||
692 | amd_iommu_iotlb_sup = false; | ||
693 | |||
694 | /* read extended feature bits */ | ||
695 | low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); | ||
696 | high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); | ||
697 | |||
698 | iommu->features = ((u64)high << 32) | low; | ||
699 | |||
670 | if (!is_rd890_iommu(iommu->dev)) | 700 | if (!is_rd890_iommu(iommu->dev)) |
671 | return; | 701 | return; |
672 | 702 | ||
@@ -1004,10 +1034,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu) | |||
1004 | if (pci_enable_msi(iommu->dev)) | 1034 | if (pci_enable_msi(iommu->dev)) |
1005 | return 1; | 1035 | return 1; |
1006 | 1036 | ||
1007 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | 1037 | r = request_threaded_irq(iommu->dev->irq, |
1008 | IRQF_SAMPLE_RANDOM, | 1038 | amd_iommu_int_handler, |
1009 | "AMD-Vi", | 1039 | amd_iommu_int_thread, |
1010 | NULL); | 1040 | 0, "AMD-Vi", |
1041 | iommu->dev); | ||
1011 | 1042 | ||
1012 | if (r) { | 1043 | if (r) { |
1013 | pci_disable_msi(iommu->dev); | 1044 | pci_disable_msi(iommu->dev); |
@@ -1244,6 +1275,7 @@ static void enable_iommus(void) | |||
1244 | iommu_set_exclusion_range(iommu); | 1275 | iommu_set_exclusion_range(iommu); |
1245 | iommu_init_msi(iommu); | 1276 | iommu_init_msi(iommu); |
1246 | iommu_enable(iommu); | 1277 | iommu_enable(iommu); |
1278 | iommu_flush_all_caches(iommu); | ||
1247 | } | 1279 | } |
1248 | } | 1280 | } |
1249 | 1281 | ||
@@ -1274,8 +1306,8 @@ static void amd_iommu_resume(void) | |||
1274 | * we have to flush after the IOMMUs are enabled because a | 1306 | * we have to flush after the IOMMUs are enabled because a |
1275 | * disabled IOMMU will never execute the commands we send | 1307 | * disabled IOMMU will never execute the commands we send |
1276 | */ | 1308 | */ |
1277 | amd_iommu_flush_all_devices(); | 1309 | for_each_iommu(iommu) |
1278 | amd_iommu_flush_all_domains(); | 1310 | iommu_flush_all_caches(iommu); |
1279 | } | 1311 | } |
1280 | 1312 | ||
1281 | static int amd_iommu_suspend(void) | 1313 | static int amd_iommu_suspend(void) |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index cd1ffed4ee22..289e92862fd9 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -177,7 +177,6 @@ static struct clocksource clocksource_apbt = { | |||
177 | .rating = APBT_CLOCKSOURCE_RATING, | 177 | .rating = APBT_CLOCKSOURCE_RATING, |
178 | .read = apbt_read_clocksource, | 178 | .read = apbt_read_clocksource, |
179 | .mask = APBT_MASK, | 179 | .mask = APBT_MASK, |
180 | .shift = APBT_SHIFT, | ||
181 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 180 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
182 | .resume = apbt_restart_clocksource, | 181 | .resume = apbt_restart_clocksource, |
183 | }; | 182 | }; |
@@ -543,14 +542,7 @@ static int apbt_clocksource_register(void) | |||
543 | if (t1 == apbt_read_clocksource(&clocksource_apbt)) | 542 | if (t1 == apbt_read_clocksource(&clocksource_apbt)) |
544 | panic("APBT counter not counting. APBT disabled\n"); | 543 | panic("APBT counter not counting. APBT disabled\n"); |
545 | 544 | ||
546 | /* | 545 | clocksource_register_khz(&clocksource_apbt, (u32)apbt_freq*1000); |
547 | * initialize and register APBT clocksource | ||
548 | * convert that to ns/clock cycle | ||
549 | * mult = (ns/c) * 2^APBT_SHIFT | ||
550 | */ | ||
551 | clocksource_apbt.mult = div_sc(MSEC_PER_SEC, | ||
552 | (unsigned long) apbt_freq, APBT_SHIFT); | ||
553 | clocksource_register(&clocksource_apbt); | ||
554 | 546 | ||
555 | return 0; | 547 | return 0; |
556 | } | 548 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 73fb469908c6..3d2661ca6542 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -30,6 +30,22 @@ | |||
30 | #include <asm/amd_nb.h> | 30 | #include <asm/amd_nb.h> |
31 | #include <asm/x86_init.h> | 31 | #include <asm/x86_init.h> |
32 | 32 | ||
33 | /* | ||
34 | * Using 512M as goal, in case kexec will load kernel_big | ||
35 | * that will do the on-position decompress, and could overlap with | ||
36 | * with the gart aperture that is used. | ||
37 | * Sequence: | ||
38 | * kernel_small | ||
39 | * ==> kexec (with kdump trigger path or gart still enabled) | ||
40 | * ==> kernel_small (gart area become e820_reserved) | ||
41 | * ==> kexec (with kdump trigger path or gart still enabled) | ||
42 | * ==> kerne_big (uncompressed size will be big than 64M or 128M) | ||
43 | * So don't use 512M below as gart iommu, leave the space for kernel | ||
44 | * code for safe. | ||
45 | */ | ||
46 | #define GART_MIN_ADDR (512ULL << 20) | ||
47 | #define GART_MAX_ADDR (1ULL << 32) | ||
48 | |||
33 | int gart_iommu_aperture; | 49 | int gart_iommu_aperture; |
34 | int gart_iommu_aperture_disabled __initdata; | 50 | int gart_iommu_aperture_disabled __initdata; |
35 | int gart_iommu_aperture_allowed __initdata; | 51 | int gart_iommu_aperture_allowed __initdata; |
@@ -70,21 +86,9 @@ static u32 __init allocate_aperture(void) | |||
70 | * memory. Unfortunately we cannot move it up because that would | 86 | * memory. Unfortunately we cannot move it up because that would |
71 | * make the IOMMU useless. | 87 | * make the IOMMU useless. |
72 | */ | 88 | */ |
73 | /* | 89 | addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, |
74 | * using 512M as goal, in case kexec will load kernel_big | 90 | aper_size, aper_size); |
75 | * that will do the on position decompress, and could overlap with | 91 | if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { |
76 | * that position with gart that is used. | ||
77 | * sequende: | ||
78 | * kernel_small | ||
79 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) | ||
80 | * ==> kernel_small(gart area become e820_reserved) | ||
81 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) | ||
82 | * ==> kerne_big (uncompressed size will be big than 64M or 128M) | ||
83 | * so don't use 512M below as gart iommu, leave the space for kernel | ||
84 | * code for safe | ||
85 | */ | ||
86 | addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20); | ||
87 | if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) { | ||
88 | printk(KERN_ERR | 92 | printk(KERN_ERR |
89 | "Cannot allocate aperture memory hole (%lx,%uK)\n", | 93 | "Cannot allocate aperture memory hole (%lx,%uK)\n", |
90 | addr, aper_size>>10); | 94 | addr, aper_size>>10); |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index fabf01eff771..f92a8e5d1e21 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void) | |||
505 | { | 505 | { |
506 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 506 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
507 | 507 | ||
508 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) { | 508 | if (this_cpu_has(X86_FEATURE_ARAT)) { |
509 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; | 509 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; |
510 | /* Make LAPIC timer preferrable over percpu HPET */ | 510 | /* Make LAPIC timer preferrable over percpu HPET */ |
511 | lapic_clockevent.rating = 150; | 511 | lapic_clockevent.rating = 150; |
@@ -1237,6 +1237,17 @@ void __cpuinit setup_local_APIC(void) | |||
1237 | /* always use the value from LDR */ | 1237 | /* always use the value from LDR */ |
1238 | early_per_cpu(x86_cpu_to_logical_apicid, cpu) = | 1238 | early_per_cpu(x86_cpu_to_logical_apicid, cpu) = |
1239 | logical_smp_processor_id(); | 1239 | logical_smp_processor_id(); |
1240 | |||
1241 | /* | ||
1242 | * Some NUMA implementations (NUMAQ) don't initialize apicid to | ||
1243 | * node mapping during NUMA init. Now that logical apicid is | ||
1244 | * guaranteed to be known, give it another chance. This is already | ||
1245 | * a bit too late - percpu allocation has already happened without | ||
1246 | * proper NUMA affinity. | ||
1247 | */ | ||
1248 | if (apic->x86_32_numa_cpu_node) | ||
1249 | set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu), | ||
1250 | apic->x86_32_numa_cpu_node(cpu)); | ||
1240 | #endif | 1251 | #endif |
1241 | 1252 | ||
1242 | /* | 1253 | /* |
@@ -1812,30 +1823,41 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1812 | */ | 1823 | */ |
1813 | void smp_error_interrupt(struct pt_regs *regs) | 1824 | void smp_error_interrupt(struct pt_regs *regs) |
1814 | { | 1825 | { |
1815 | u32 v, v1; | 1826 | u32 v0, v1; |
1827 | u32 i = 0; | ||
1828 | static const char * const error_interrupt_reason[] = { | ||
1829 | "Send CS error", /* APIC Error Bit 0 */ | ||
1830 | "Receive CS error", /* APIC Error Bit 1 */ | ||
1831 | "Send accept error", /* APIC Error Bit 2 */ | ||
1832 | "Receive accept error", /* APIC Error Bit 3 */ | ||
1833 | "Redirectable IPI", /* APIC Error Bit 4 */ | ||
1834 | "Send illegal vector", /* APIC Error Bit 5 */ | ||
1835 | "Received illegal vector", /* APIC Error Bit 6 */ | ||
1836 | "Illegal register address", /* APIC Error Bit 7 */ | ||
1837 | }; | ||
1816 | 1838 | ||
1817 | exit_idle(); | 1839 | exit_idle(); |
1818 | irq_enter(); | 1840 | irq_enter(); |
1819 | /* First tickle the hardware, only then report what went on. -- REW */ | 1841 | /* First tickle the hardware, only then report what went on. -- REW */ |
1820 | v = apic_read(APIC_ESR); | 1842 | v0 = apic_read(APIC_ESR); |
1821 | apic_write(APIC_ESR, 0); | 1843 | apic_write(APIC_ESR, 0); |
1822 | v1 = apic_read(APIC_ESR); | 1844 | v1 = apic_read(APIC_ESR); |
1823 | ack_APIC_irq(); | 1845 | ack_APIC_irq(); |
1824 | atomic_inc(&irq_err_count); | 1846 | atomic_inc(&irq_err_count); |
1825 | 1847 | ||
1826 | /* | 1848 | apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)", |
1827 | * Here is what the APIC error bits mean: | 1849 | smp_processor_id(), v0 , v1); |
1828 | * 0: Send CS error | 1850 | |
1829 | * 1: Receive CS error | 1851 | v1 = v1 & 0xff; |
1830 | * 2: Send accept error | 1852 | while (v1) { |
1831 | * 3: Receive accept error | 1853 | if (v1 & 0x1) |
1832 | * 4: Reserved | 1854 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); |
1833 | * 5: Send illegal vector | 1855 | i++; |
1834 | * 6: Received illegal vector | 1856 | v1 >>= 1; |
1835 | * 7: Illegal register address | 1857 | }; |
1836 | */ | 1858 | |
1837 | pr_debug("APIC error on CPU%d: %02x(%02x)\n", | 1859 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); |
1838 | smp_processor_id(), v , v1); | 1860 | |
1839 | irq_exit(); | 1861 | irq_exit(); |
1840 | } | 1862 | } |
1841 | 1863 | ||
@@ -2003,21 +2025,6 @@ void default_init_apic_ldr(void) | |||
2003 | apic_write(APIC_LDR, val); | 2025 | apic_write(APIC_LDR, val); |
2004 | } | 2026 | } |
2005 | 2027 | ||
2006 | #ifdef CONFIG_X86_32 | ||
2007 | int default_x86_32_numa_cpu_node(int cpu) | ||
2008 | { | ||
2009 | #ifdef CONFIG_NUMA | ||
2010 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | ||
2011 | |||
2012 | if (apicid != BAD_APICID) | ||
2013 | return __apicid_to_node[apicid]; | ||
2014 | return NUMA_NO_NODE; | ||
2015 | #else | ||
2016 | return 0; | ||
2017 | #endif | ||
2018 | } | ||
2019 | #endif | ||
2020 | |||
2021 | /* | 2028 | /* |
2022 | * Power management | 2029 | * Power management |
2023 | */ | 2030 | */ |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index f1baa2dc087a..775b82bc655c 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -119,14 +119,6 @@ static void noop_apic_write(u32 reg, u32 v) | |||
119 | WARN_ON_ONCE(cpu_has_apic && !disable_apic); | 119 | WARN_ON_ONCE(cpu_has_apic && !disable_apic); |
120 | } | 120 | } |
121 | 121 | ||
122 | #ifdef CONFIG_X86_32 | ||
123 | static int noop_x86_32_numa_cpu_node(int cpu) | ||
124 | { | ||
125 | /* we're always on node 0 */ | ||
126 | return 0; | ||
127 | } | ||
128 | #endif | ||
129 | |||
130 | struct apic apic_noop = { | 122 | struct apic apic_noop = { |
131 | .name = "noop", | 123 | .name = "noop", |
132 | .probe = noop_probe, | 124 | .probe = noop_probe, |
@@ -195,6 +187,5 @@ struct apic apic_noop = { | |||
195 | 187 | ||
196 | #ifdef CONFIG_X86_32 | 188 | #ifdef CONFIG_X86_32 |
197 | .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, | 189 | .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, |
198 | .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node, | ||
199 | #endif | 190 | #endif |
200 | }; | 191 | }; |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 541a2e431659..d84ac5a584b5 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -253,5 +253,4 @@ struct apic apic_bigsmp = { | |||
253 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 253 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
254 | 254 | ||
255 | .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, | 255 | .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, |
256 | .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, | ||
257 | }; | 256 | }; |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 3e9de4854c5b..70533de5bd29 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -510,11 +510,6 @@ static void es7000_setup_apic_routing(void) | |||
510 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); | 510 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); |
511 | } | 511 | } |
512 | 512 | ||
513 | static int es7000_numa_cpu_node(int cpu) | ||
514 | { | ||
515 | return 0; | ||
516 | } | ||
517 | |||
518 | static int es7000_cpu_present_to_apicid(int mps_cpu) | 513 | static int es7000_cpu_present_to_apicid(int mps_cpu) |
519 | { | 514 | { |
520 | if (!mps_cpu) | 515 | if (!mps_cpu) |
@@ -688,7 +683,6 @@ struct apic __refdata apic_es7000_cluster = { | |||
688 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 683 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
689 | 684 | ||
690 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, | 685 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, |
691 | .x86_32_numa_cpu_node = es7000_numa_cpu_node, | ||
692 | }; | 686 | }; |
693 | 687 | ||
694 | struct apic __refdata apic_es7000 = { | 688 | struct apic __refdata apic_es7000 = { |
@@ -752,5 +746,4 @@ struct apic __refdata apic_es7000 = { | |||
752 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 746 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
753 | 747 | ||
754 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, | 748 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, |
755 | .x86_32_numa_cpu_node = es7000_numa_cpu_node, | ||
756 | }; | 749 | }; |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 6273eee5134b..30f13319e24b 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -48,8 +48,6 @@ | |||
48 | #include <asm/e820.h> | 48 | #include <asm/e820.h> |
49 | #include <asm/ipi.h> | 49 | #include <asm/ipi.h> |
50 | 50 | ||
51 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | ||
52 | |||
53 | int found_numaq; | 51 | int found_numaq; |
54 | 52 | ||
55 | /* | 53 | /* |
@@ -79,31 +77,20 @@ int quad_local_to_mp_bus_id[NR_CPUS/4][4]; | |||
79 | static inline void numaq_register_node(int node, struct sys_cfg_data *scd) | 77 | static inline void numaq_register_node(int node, struct sys_cfg_data *scd) |
80 | { | 78 | { |
81 | struct eachquadmem *eq = scd->eq + node; | 79 | struct eachquadmem *eq = scd->eq + node; |
80 | u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20; | ||
81 | u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20; | ||
82 | int ret; | ||
82 | 83 | ||
83 | node_set_online(node); | 84 | node_set(node, numa_nodes_parsed); |
84 | 85 | ret = numa_add_memblk(node, start, end); | |
85 | /* Convert to pages */ | 86 | BUG_ON(ret < 0); |
86 | node_start_pfn[node] = | ||
87 | MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size); | ||
88 | |||
89 | node_end_pfn[node] = | ||
90 | MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); | ||
91 | |||
92 | memblock_x86_register_active_regions(node, node_start_pfn[node], | ||
93 | node_end_pfn[node]); | ||
94 | |||
95 | memory_present(node, node_start_pfn[node], node_end_pfn[node]); | ||
96 | |||
97 | node_remap_size[node] = node_memmap_size_bytes(node, | ||
98 | node_start_pfn[node], | ||
99 | node_end_pfn[node]); | ||
100 | } | 87 | } |
101 | 88 | ||
102 | /* | 89 | /* |
103 | * Function: smp_dump_qct() | 90 | * Function: smp_dump_qct() |
104 | * | 91 | * |
105 | * Description: gets memory layout from the quad config table. This | 92 | * Description: gets memory layout from the quad config table. This |
106 | * function also updates node_online_map with the nodes (quads) present. | 93 | * function also updates numa_nodes_parsed with the nodes (quads) present. |
107 | */ | 94 | */ |
108 | static void __init smp_dump_qct(void) | 95 | static void __init smp_dump_qct(void) |
109 | { | 96 | { |
@@ -112,7 +99,6 @@ static void __init smp_dump_qct(void) | |||
112 | 99 | ||
113 | scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); | 100 | scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); |
114 | 101 | ||
115 | nodes_clear(node_online_map); | ||
116 | for_each_node(node) { | 102 | for_each_node(node) { |
117 | if (scd->quads_present31_0 & (1 << node)) | 103 | if (scd->quads_present31_0 & (1 << node)) |
118 | numaq_register_node(node, scd); | 104 | numaq_register_node(node, scd); |
@@ -282,14 +268,14 @@ static __init void early_check_numaq(void) | |||
282 | } | 268 | } |
283 | } | 269 | } |
284 | 270 | ||
285 | int __init get_memcfg_numaq(void) | 271 | int __init numaq_numa_init(void) |
286 | { | 272 | { |
287 | early_check_numaq(); | 273 | early_check_numaq(); |
288 | if (!found_numaq) | 274 | if (!found_numaq) |
289 | return 0; | 275 | return -ENOENT; |
290 | smp_dump_qct(); | 276 | smp_dump_qct(); |
291 | 277 | ||
292 | return 1; | 278 | return 0; |
293 | } | 279 | } |
294 | 280 | ||
295 | #define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) | 281 | #define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index fc84c7b61108..6541e471fd91 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -172,7 +172,6 @@ struct apic apic_default = { | |||
172 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 172 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
173 | 173 | ||
174 | .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, | 174 | .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, |
175 | .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, | ||
176 | }; | 175 | }; |
177 | 176 | ||
178 | extern struct apic apic_numaq; | 177 | extern struct apic apic_numaq; |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index e4b8059b414a..35bcd7d995a1 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -551,5 +551,4 @@ struct apic apic_summit = { | |||
551 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 551 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
552 | 552 | ||
553 | .x86_32_early_logical_apicid = summit_early_logical_apicid, | 553 | .x86_32_early_logical_apicid = summit_early_logical_apicid, |
554 | .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, | ||
555 | }; | 554 | }; |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 33b10a0fc095..7acd2d2ac965 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -37,6 +37,13 @@ | |||
37 | #include <asm/smp.h> | 37 | #include <asm/smp.h> |
38 | #include <asm/x86_init.h> | 38 | #include <asm/x86_init.h> |
39 | #include <asm/emergency-restart.h> | 39 | #include <asm/emergency-restart.h> |
40 | #include <asm/nmi.h> | ||
41 | |||
42 | /* BMC sets a bit this MMR non-zero before sending an NMI */ | ||
43 | #define UVH_NMI_MMR UVH_SCRATCH5 | ||
44 | #define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) | ||
45 | #define UV_NMI_PENDING_MASK (1UL << 63) | ||
46 | DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); | ||
40 | 47 | ||
41 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 48 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
42 | 49 | ||
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void) | |||
642 | */ | 649 | */ |
643 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | 650 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) |
644 | { | 651 | { |
652 | unsigned long real_uv_nmi; | ||
653 | int bid; | ||
654 | |||
645 | if (reason != DIE_NMIUNKNOWN) | 655 | if (reason != DIE_NMIUNKNOWN) |
646 | return NOTIFY_OK; | 656 | return NOTIFY_OK; |
647 | 657 | ||
648 | if (in_crash_kexec) | 658 | if (in_crash_kexec) |
649 | /* do nothing if entering the crash kernel */ | 659 | /* do nothing if entering the crash kernel */ |
650 | return NOTIFY_OK; | 660 | return NOTIFY_OK; |
661 | |||
651 | /* | 662 | /* |
652 | * Use a lock so only one cpu prints at a time | 663 | * Each blade has an MMR that indicates when an NMI has been sent |
653 | * to prevent intermixed output. | 664 | * to cpus on the blade. If an NMI is detected, atomically |
665 | * clear the MMR and update a per-blade NMI count used to | ||
666 | * cause each cpu on the blade to notice a new NMI. | ||
667 | */ | ||
668 | bid = uv_numa_blade_id(); | ||
669 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
670 | |||
671 | if (unlikely(real_uv_nmi)) { | ||
672 | spin_lock(&uv_blade_info[bid].nmi_lock); | ||
673 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
674 | if (real_uv_nmi) { | ||
675 | uv_blade_info[bid].nmi_count++; | ||
676 | uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); | ||
677 | } | ||
678 | spin_unlock(&uv_blade_info[bid].nmi_lock); | ||
679 | } | ||
680 | |||
681 | if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) | ||
682 | return NOTIFY_DONE; | ||
683 | |||
684 | __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; | ||
685 | |||
686 | /* | ||
687 | * Use a lock so only one cpu prints at a time. | ||
688 | * This prevents intermixed output. | ||
654 | */ | 689 | */ |
655 | spin_lock(&uv_nmi_lock); | 690 | spin_lock(&uv_nmi_lock); |
656 | pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); | 691 | pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); |
657 | dump_stack(); | 692 | dump_stack(); |
658 | spin_unlock(&uv_nmi_lock); | 693 | spin_unlock(&uv_nmi_lock); |
659 | 694 | ||
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | |||
661 | } | 696 | } |
662 | 697 | ||
663 | static struct notifier_block uv_dump_stack_nmi_nb = { | 698 | static struct notifier_block uv_dump_stack_nmi_nb = { |
664 | .notifier_call = uv_handle_nmi | 699 | .notifier_call = uv_handle_nmi, |
700 | .priority = NMI_LOCAL_LOW_PRIOR - 1, | ||
665 | }; | 701 | }; |
666 | 702 | ||
667 | void uv_register_nmi_notifier(void) | 703 | void uv_register_nmi_notifier(void) |
@@ -720,8 +756,9 @@ void __init uv_system_init(void) | |||
720 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); | 756 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); |
721 | 757 | ||
722 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 758 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
723 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); | 759 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); |
724 | BUG_ON(!uv_blade_info); | 760 | BUG_ON(!uv_blade_info); |
761 | |||
725 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | 762 | for (blade = 0; blade < uv_num_possible_blades(); blade++) |
726 | uv_blade_info[blade].memory_nid = -1; | 763 | uv_blade_info[blade].memory_nid = -1; |
727 | 764 | ||
@@ -747,6 +784,7 @@ void __init uv_system_init(void) | |||
747 | uv_blade_info[blade].pnode = pnode; | 784 | uv_blade_info[blade].pnode = pnode; |
748 | uv_blade_info[blade].nr_possible_cpus = 0; | 785 | uv_blade_info[blade].nr_possible_cpus = 0; |
749 | uv_blade_info[blade].nr_online_cpus = 0; | 786 | uv_blade_info[blade].nr_online_cpus = 0; |
787 | spin_lock_init(&uv_blade_info[blade].nmi_lock); | ||
750 | max_pnode = max(pnode, max_pnode); | 788 | max_pnode = max(pnode, max_pnode); |
751 | blade++; | 789 | blade++; |
752 | } | 790 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index adee12e0da1f..3bfa02235965 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -1238,7 +1238,6 @@ static int suspend(int vetoable) | |||
1238 | dpm_suspend_noirq(PMSG_SUSPEND); | 1238 | dpm_suspend_noirq(PMSG_SUSPEND); |
1239 | 1239 | ||
1240 | local_irq_disable(); | 1240 | local_irq_disable(); |
1241 | sysdev_suspend(PMSG_SUSPEND); | ||
1242 | syscore_suspend(); | 1241 | syscore_suspend(); |
1243 | 1242 | ||
1244 | local_irq_enable(); | 1243 | local_irq_enable(); |
@@ -1258,7 +1257,6 @@ static int suspend(int vetoable) | |||
1258 | err = (err == APM_SUCCESS) ? 0 : -EIO; | 1257 | err = (err == APM_SUCCESS) ? 0 : -EIO; |
1259 | 1258 | ||
1260 | syscore_resume(); | 1259 | syscore_resume(); |
1261 | sysdev_resume(); | ||
1262 | local_irq_enable(); | 1260 | local_irq_enable(); |
1263 | 1261 | ||
1264 | dpm_resume_noirq(PMSG_RESUME); | 1262 | dpm_resume_noirq(PMSG_RESUME); |
@@ -1282,7 +1280,6 @@ static void standby(void) | |||
1282 | dpm_suspend_noirq(PMSG_SUSPEND); | 1280 | dpm_suspend_noirq(PMSG_SUSPEND); |
1283 | 1281 | ||
1284 | local_irq_disable(); | 1282 | local_irq_disable(); |
1285 | sysdev_suspend(PMSG_SUSPEND); | ||
1286 | syscore_suspend(); | 1283 | syscore_suspend(); |
1287 | local_irq_enable(); | 1284 | local_irq_enable(); |
1288 | 1285 | ||
@@ -1292,7 +1289,6 @@ static void standby(void) | |||
1292 | 1289 | ||
1293 | local_irq_disable(); | 1290 | local_irq_disable(); |
1294 | syscore_resume(); | 1291 | syscore_resume(); |
1295 | sysdev_resume(); | ||
1296 | local_irq_enable(); | 1292 | local_irq_enable(); |
1297 | 1293 | ||
1298 | dpm_resume_noirq(PMSG_RESUME); | 1294 | dpm_resume_noirq(PMSG_RESUME); |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3f0ebe429a01..6042981d0309 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -30,7 +30,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o | |||
30 | 30 | ||
31 | obj-$(CONFIG_X86_MCE) += mcheck/ | 31 | obj-$(CONFIG_X86_MCE) += mcheck/ |
32 | obj-$(CONFIG_MTRR) += mtrr/ | 32 | obj-$(CONFIG_MTRR) += mtrr/ |
33 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
34 | 33 | ||
35 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | 34 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o |
36 | 35 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bb9eb29a52dd..6f9d1f6063e9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
613 | #endif | 613 | #endif |
614 | 614 | ||
615 | /* As a rule processors have APIC timer running in deep C states */ | 615 | /* As a rule processors have APIC timer running in deep C states */ |
616 | if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) | 616 | if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) |
617 | set_cpu_cap(c, X86_FEATURE_ARAT); | 617 | set_cpu_cap(c, X86_FEATURE_ARAT); |
618 | 618 | ||
619 | /* | 619 | /* |
@@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev); | |||
698 | */ | 698 | */ |
699 | 699 | ||
700 | const int amd_erratum_400[] = | 700 | const int amd_erratum_400[] = |
701 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), | 701 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), |
702 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); | 702 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); |
703 | EXPORT_SYMBOL_GPL(amd_erratum_400); | 703 | EXPORT_SYMBOL_GPL(amd_erratum_400); |
704 | 704 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2ced0074a45..cbc70a27430c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -254,6 +254,25 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | |||
254 | } | 254 | } |
255 | #endif | 255 | #endif |
256 | 256 | ||
257 | static int disable_smep __initdata; | ||
258 | static __init int setup_disable_smep(char *arg) | ||
259 | { | ||
260 | disable_smep = 1; | ||
261 | return 1; | ||
262 | } | ||
263 | __setup("nosmep", setup_disable_smep); | ||
264 | |||
265 | static __init void setup_smep(struct cpuinfo_x86 *c) | ||
266 | { | ||
267 | if (cpu_has(c, X86_FEATURE_SMEP)) { | ||
268 | if (unlikely(disable_smep)) { | ||
269 | setup_clear_cpu_cap(X86_FEATURE_SMEP); | ||
270 | clear_in_cr4(X86_CR4_SMEP); | ||
271 | } else | ||
272 | set_in_cr4(X86_CR4_SMEP); | ||
273 | } | ||
274 | } | ||
275 | |||
257 | /* | 276 | /* |
258 | * Some CPU features depend on higher CPUID levels, which may not always | 277 | * Some CPU features depend on higher CPUID levels, which may not always |
259 | * be available due to CPUID level capping or broken virtualization | 278 | * be available due to CPUID level capping or broken virtualization |
@@ -565,8 +584,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
565 | 584 | ||
566 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); | 585 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); |
567 | 586 | ||
568 | if (eax > 0) | 587 | c->x86_capability[9] = ebx; |
569 | c->x86_capability[9] = ebx; | ||
570 | } | 588 | } |
571 | 589 | ||
572 | /* AMD-defined flags: level 0x80000001 */ | 590 | /* AMD-defined flags: level 0x80000001 */ |
@@ -668,6 +686,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
668 | c->cpu_index = 0; | 686 | c->cpu_index = 0; |
669 | #endif | 687 | #endif |
670 | filter_cpuid_features(c, false); | 688 | filter_cpuid_features(c, false); |
689 | |||
690 | setup_smep(c); | ||
671 | } | 691 | } |
672 | 692 | ||
673 | void __init early_cpu_init(void) | 693 | void __init early_cpu_init(void) |
@@ -753,6 +773,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
753 | #endif | 773 | #endif |
754 | } | 774 | } |
755 | 775 | ||
776 | setup_smep(c); | ||
777 | |||
756 | get_model_name(c); /* Default name */ | 778 | get_model_name(c); /* Default name */ |
757 | 779 | ||
758 | detect_nopl(c); | 780 | detect_nopl(c); |
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig deleted file mode 100644 index 870e6cc6ad28..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ /dev/null | |||
@@ -1,266 +0,0 @@ | |||
1 | # | ||
2 | # CPU Frequency scaling | ||
3 | # | ||
4 | |||
5 | menu "CPU Frequency scaling" | ||
6 | |||
7 | source "drivers/cpufreq/Kconfig" | ||
8 | |||
9 | if CPU_FREQ | ||
10 | |||
11 | comment "CPUFreq processor drivers" | ||
12 | |||
13 | config X86_PCC_CPUFREQ | ||
14 | tristate "Processor Clocking Control interface driver" | ||
15 | depends on ACPI && ACPI_PROCESSOR | ||
16 | help | ||
17 | This driver adds support for the PCC interface. | ||
18 | |||
19 | For details, take a look at: | ||
20 | <file:Documentation/cpu-freq/pcc-cpufreq.txt>. | ||
21 | |||
22 | To compile this driver as a module, choose M here: the | ||
23 | module will be called pcc-cpufreq. | ||
24 | |||
25 | If in doubt, say N. | ||
26 | |||
27 | config X86_ACPI_CPUFREQ | ||
28 | tristate "ACPI Processor P-States driver" | ||
29 | select CPU_FREQ_TABLE | ||
30 | depends on ACPI_PROCESSOR | ||
31 | help | ||
32 | This driver adds a CPUFreq driver which utilizes the ACPI | ||
33 | Processor Performance States. | ||
34 | This driver also supports Intel Enhanced Speedstep. | ||
35 | |||
36 | To compile this driver as a module, choose M here: the | ||
37 | module will be called acpi-cpufreq. | ||
38 | |||
39 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
40 | |||
41 | If in doubt, say N. | ||
42 | |||
43 | config ELAN_CPUFREQ | ||
44 | tristate "AMD Elan SC400 and SC410" | ||
45 | select CPU_FREQ_TABLE | ||
46 | depends on X86_ELAN | ||
47 | ---help--- | ||
48 | This adds the CPUFreq driver for AMD Elan SC400 and SC410 | ||
49 | processors. | ||
50 | |||
51 | You need to specify the processor maximum speed as boot | ||
52 | parameter: elanfreq=maxspeed (in kHz) or as module | ||
53 | parameter "max_freq". | ||
54 | |||
55 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
56 | |||
57 | If in doubt, say N. | ||
58 | |||
59 | config SC520_CPUFREQ | ||
60 | tristate "AMD Elan SC520" | ||
61 | select CPU_FREQ_TABLE | ||
62 | depends on X86_ELAN | ||
63 | ---help--- | ||
64 | This adds the CPUFreq driver for AMD Elan SC520 processor. | ||
65 | |||
66 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
67 | |||
68 | If in doubt, say N. | ||
69 | |||
70 | |||
71 | config X86_POWERNOW_K6 | ||
72 | tristate "AMD Mobile K6-2/K6-3 PowerNow!" | ||
73 | select CPU_FREQ_TABLE | ||
74 | depends on X86_32 | ||
75 | help | ||
76 | This adds the CPUFreq driver for mobile AMD K6-2+ and mobile | ||
77 | AMD K6-3+ processors. | ||
78 | |||
79 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
80 | |||
81 | If in doubt, say N. | ||
82 | |||
83 | config X86_POWERNOW_K7 | ||
84 | tristate "AMD Mobile Athlon/Duron PowerNow!" | ||
85 | select CPU_FREQ_TABLE | ||
86 | depends on X86_32 | ||
87 | help | ||
88 | This adds the CPUFreq driver for mobile AMD K7 mobile processors. | ||
89 | |||
90 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
91 | |||
92 | If in doubt, say N. | ||
93 | |||
94 | config X86_POWERNOW_K7_ACPI | ||
95 | bool | ||
96 | depends on X86_POWERNOW_K7 && ACPI_PROCESSOR | ||
97 | depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) | ||
98 | depends on X86_32 | ||
99 | default y | ||
100 | |||
101 | config X86_POWERNOW_K8 | ||
102 | tristate "AMD Opteron/Athlon64 PowerNow!" | ||
103 | select CPU_FREQ_TABLE | ||
104 | depends on ACPI && ACPI_PROCESSOR | ||
105 | help | ||
106 | This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors. | ||
107 | |||
108 | To compile this driver as a module, choose M here: the | ||
109 | module will be called powernow-k8. | ||
110 | |||
111 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
112 | |||
113 | config X86_GX_SUSPMOD | ||
114 | tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" | ||
115 | depends on X86_32 && PCI | ||
116 | help | ||
117 | This add the CPUFreq driver for NatSemi Geode processors which | ||
118 | support suspend modulation. | ||
119 | |||
120 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
121 | |||
122 | If in doubt, say N. | ||
123 | |||
124 | config X86_SPEEDSTEP_CENTRINO | ||
125 | tristate "Intel Enhanced SpeedStep (deprecated)" | ||
126 | select CPU_FREQ_TABLE | ||
127 | select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32 | ||
128 | depends on X86_32 || (X86_64 && ACPI_PROCESSOR) | ||
129 | help | ||
130 | This is deprecated and this functionality is now merged into | ||
131 | acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of | ||
132 | speedstep_centrino. | ||
133 | This adds the CPUFreq driver for Enhanced SpeedStep enabled | ||
134 | mobile CPUs. This means Intel Pentium M (Centrino) CPUs | ||
135 | or 64bit enabled Intel Xeons. | ||
136 | |||
137 | To compile this driver as a module, choose M here: the | ||
138 | module will be called speedstep-centrino. | ||
139 | |||
140 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
141 | |||
142 | If in doubt, say N. | ||
143 | |||
144 | config X86_SPEEDSTEP_CENTRINO_TABLE | ||
145 | bool "Built-in tables for Banias CPUs" | ||
146 | depends on X86_32 && X86_SPEEDSTEP_CENTRINO | ||
147 | default y | ||
148 | help | ||
149 | Use built-in tables for Banias CPUs if ACPI encoding | ||
150 | is not available. | ||
151 | |||
152 | If in doubt, say N. | ||
153 | |||
154 | config X86_SPEEDSTEP_ICH | ||
155 | tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" | ||
156 | select CPU_FREQ_TABLE | ||
157 | depends on X86_32 | ||
158 | help | ||
159 | This adds the CPUFreq driver for certain mobile Intel Pentium III | ||
160 | (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all | ||
161 | mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, | ||
162 | ICH3 or ICH4 southbridge. | ||
163 | |||
164 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
165 | |||
166 | If in doubt, say N. | ||
167 | |||
168 | config X86_SPEEDSTEP_SMI | ||
169 | tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" | ||
170 | select CPU_FREQ_TABLE | ||
171 | depends on X86_32 && EXPERIMENTAL | ||
172 | help | ||
173 | This adds the CPUFreq driver for certain mobile Intel Pentium III | ||
174 | (Coppermine), all mobile Intel Pentium III-M (Tualatin) | ||
175 | on systems which have an Intel 440BX/ZX/MX southbridge. | ||
176 | |||
177 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
178 | |||
179 | If in doubt, say N. | ||
180 | |||
181 | config X86_P4_CLOCKMOD | ||
182 | tristate "Intel Pentium 4 clock modulation" | ||
183 | select CPU_FREQ_TABLE | ||
184 | help | ||
185 | This adds the CPUFreq driver for Intel Pentium 4 / XEON | ||
186 | processors. When enabled it will lower CPU temperature by skipping | ||
187 | clocks. | ||
188 | |||
189 | This driver should be only used in exceptional | ||
190 | circumstances when very low power is needed because it causes severe | ||
191 | slowdowns and noticeable latencies. Normally Speedstep should be used | ||
192 | instead. | ||
193 | |||
194 | To compile this driver as a module, choose M here: the | ||
195 | module will be called p4-clockmod. | ||
196 | |||
197 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
198 | |||
199 | Unless you are absolutely sure say N. | ||
200 | |||
201 | config X86_CPUFREQ_NFORCE2 | ||
202 | tristate "nVidia nForce2 FSB changing" | ||
203 | depends on X86_32 && EXPERIMENTAL | ||
204 | help | ||
205 | This adds the CPUFreq driver for FSB changing on nVidia nForce2 | ||
206 | platforms. | ||
207 | |||
208 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
209 | |||
210 | If in doubt, say N. | ||
211 | |||
212 | config X86_LONGRUN | ||
213 | tristate "Transmeta LongRun" | ||
214 | depends on X86_32 | ||
215 | help | ||
216 | This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors | ||
217 | which support LongRun. | ||
218 | |||
219 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
220 | |||
221 | If in doubt, say N. | ||
222 | |||
223 | config X86_LONGHAUL | ||
224 | tristate "VIA Cyrix III Longhaul" | ||
225 | select CPU_FREQ_TABLE | ||
226 | depends on X86_32 && ACPI_PROCESSOR | ||
227 | help | ||
228 | This adds the CPUFreq driver for VIA Samuel/CyrixIII, | ||
229 | VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T | ||
230 | processors. | ||
231 | |||
232 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
233 | |||
234 | If in doubt, say N. | ||
235 | |||
236 | config X86_E_POWERSAVER | ||
237 | tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)" | ||
238 | select CPU_FREQ_TABLE | ||
239 | depends on X86_32 && EXPERIMENTAL | ||
240 | help | ||
241 | This adds the CPUFreq driver for VIA C7 processors. However, this driver | ||
242 | does not have any safeguards to prevent operating the CPU out of spec | ||
243 | and is thus considered dangerous. Please use the regular ACPI cpufreq | ||
244 | driver, enabled by CONFIG_X86_ACPI_CPUFREQ. | ||
245 | |||
246 | If in doubt, say N. | ||
247 | |||
248 | comment "shared options" | ||
249 | |||
250 | config X86_SPEEDSTEP_LIB | ||
251 | tristate | ||
252 | default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) | ||
253 | |||
254 | config X86_SPEEDSTEP_RELAXED_CAP_CHECK | ||
255 | bool "Relaxed speedstep capability checks" | ||
256 | depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) | ||
257 | help | ||
258 | Don't perform all checks for a speedstep capable system which would | ||
259 | normally be done. Some ancient or strange systems, though speedstep | ||
260 | capable, don't always indicate that they are speedstep capable. This | ||
261 | option lets the probing code bypass some of those checks if the | ||
262 | parameter "relaxed_check=1" is passed to the module. | ||
263 | |||
264 | endif # CPU_FREQ | ||
265 | |||
266 | endmenu | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile deleted file mode 100644 index bd54bf67e6fb..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | # Link order matters. K8 is preferred to ACPI because of firmware bugs in early | ||
2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. | ||
3 | # speedstep-* is preferred over p4-clockmod. | ||
4 | |||
5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o | ||
6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o | ||
7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | ||
8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | ||
9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | ||
10 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o | ||
11 | obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o | ||
12 | obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o | ||
13 | obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o | ||
14 | obj-$(CONFIG_X86_LONGRUN) += longrun.o | ||
15 | obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o | ||
16 | obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o | ||
17 | obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o | ||
18 | obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o | ||
19 | obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o | ||
20 | obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o | ||
21 | obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c deleted file mode 100644 index a2baafb2fe6d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ /dev/null | |||
@@ -1,776 +0,0 @@ | |||
1 | /* | ||
2 | * acpi-cpufreq.c - ACPI Processor P-States Driver | ||
3 | * | ||
4 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> | ||
5 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | ||
6 | * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> | ||
7 | * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com> | ||
8 | * | ||
9 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or (at | ||
14 | * your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, but | ||
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License along | ||
22 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
23 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. | ||
24 | * | ||
25 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
26 | */ | ||
27 | |||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/smp.h> | ||
32 | #include <linux/sched.h> | ||
33 | #include <linux/cpufreq.h> | ||
34 | #include <linux/compiler.h> | ||
35 | #include <linux/dmi.h> | ||
36 | #include <linux/slab.h> | ||
37 | |||
38 | #include <linux/acpi.h> | ||
39 | #include <linux/io.h> | ||
40 | #include <linux/delay.h> | ||
41 | #include <linux/uaccess.h> | ||
42 | |||
43 | #include <acpi/processor.h> | ||
44 | |||
45 | #include <asm/msr.h> | ||
46 | #include <asm/processor.h> | ||
47 | #include <asm/cpufeature.h> | ||
48 | #include "mperf.h" | ||
49 | |||
50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
51 | "acpi-cpufreq", msg) | ||
52 | |||
53 | MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); | ||
54 | MODULE_DESCRIPTION("ACPI Processor P-States Driver"); | ||
55 | MODULE_LICENSE("GPL"); | ||
56 | |||
57 | enum { | ||
58 | UNDEFINED_CAPABLE = 0, | ||
59 | SYSTEM_INTEL_MSR_CAPABLE, | ||
60 | SYSTEM_IO_CAPABLE, | ||
61 | }; | ||
62 | |||
63 | #define INTEL_MSR_RANGE (0xffff) | ||
64 | |||
65 | struct acpi_cpufreq_data { | ||
66 | struct acpi_processor_performance *acpi_data; | ||
67 | struct cpufreq_frequency_table *freq_table; | ||
68 | unsigned int resume; | ||
69 | unsigned int cpu_feature; | ||
70 | }; | ||
71 | |||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | ||
73 | |||
74 | /* acpi_perf_data is a pointer to percpu data. */ | ||
75 | static struct acpi_processor_performance __percpu *acpi_perf_data; | ||
76 | |||
77 | static struct cpufreq_driver acpi_cpufreq_driver; | ||
78 | |||
79 | static unsigned int acpi_pstate_strict; | ||
80 | |||
81 | static int check_est_cpu(unsigned int cpuid) | ||
82 | { | ||
83 | struct cpuinfo_x86 *cpu = &cpu_data(cpuid); | ||
84 | |||
85 | return cpu_has(cpu, X86_FEATURE_EST); | ||
86 | } | ||
87 | |||
88 | static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) | ||
89 | { | ||
90 | struct acpi_processor_performance *perf; | ||
91 | int i; | ||
92 | |||
93 | perf = data->acpi_data; | ||
94 | |||
95 | for (i = 0; i < perf->state_count; i++) { | ||
96 | if (value == perf->states[i].status) | ||
97 | return data->freq_table[i].frequency; | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) | ||
103 | { | ||
104 | int i; | ||
105 | struct acpi_processor_performance *perf; | ||
106 | |||
107 | msr &= INTEL_MSR_RANGE; | ||
108 | perf = data->acpi_data; | ||
109 | |||
110 | for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { | ||
111 | if (msr == perf->states[data->freq_table[i].index].status) | ||
112 | return data->freq_table[i].frequency; | ||
113 | } | ||
114 | return data->freq_table[0].frequency; | ||
115 | } | ||
116 | |||
117 | static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) | ||
118 | { | ||
119 | switch (data->cpu_feature) { | ||
120 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
121 | return extract_msr(val, data); | ||
122 | case SYSTEM_IO_CAPABLE: | ||
123 | return extract_io(val, data); | ||
124 | default: | ||
125 | return 0; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | struct msr_addr { | ||
130 | u32 reg; | ||
131 | }; | ||
132 | |||
133 | struct io_addr { | ||
134 | u16 port; | ||
135 | u8 bit_width; | ||
136 | }; | ||
137 | |||
138 | struct drv_cmd { | ||
139 | unsigned int type; | ||
140 | const struct cpumask *mask; | ||
141 | union { | ||
142 | struct msr_addr msr; | ||
143 | struct io_addr io; | ||
144 | } addr; | ||
145 | u32 val; | ||
146 | }; | ||
147 | |||
148 | /* Called via smp_call_function_single(), on the target CPU */ | ||
149 | static void do_drv_read(void *_cmd) | ||
150 | { | ||
151 | struct drv_cmd *cmd = _cmd; | ||
152 | u32 h; | ||
153 | |||
154 | switch (cmd->type) { | ||
155 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
156 | rdmsr(cmd->addr.msr.reg, cmd->val, h); | ||
157 | break; | ||
158 | case SYSTEM_IO_CAPABLE: | ||
159 | acpi_os_read_port((acpi_io_address)cmd->addr.io.port, | ||
160 | &cmd->val, | ||
161 | (u32)cmd->addr.io.bit_width); | ||
162 | break; | ||
163 | default: | ||
164 | break; | ||
165 | } | ||
166 | } | ||
167 | |||
168 | /* Called via smp_call_function_many(), on the target CPUs */ | ||
169 | static void do_drv_write(void *_cmd) | ||
170 | { | ||
171 | struct drv_cmd *cmd = _cmd; | ||
172 | u32 lo, hi; | ||
173 | |||
174 | switch (cmd->type) { | ||
175 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
176 | rdmsr(cmd->addr.msr.reg, lo, hi); | ||
177 | lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); | ||
178 | wrmsr(cmd->addr.msr.reg, lo, hi); | ||
179 | break; | ||
180 | case SYSTEM_IO_CAPABLE: | ||
181 | acpi_os_write_port((acpi_io_address)cmd->addr.io.port, | ||
182 | cmd->val, | ||
183 | (u32)cmd->addr.io.bit_width); | ||
184 | break; | ||
185 | default: | ||
186 | break; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static void drv_read(struct drv_cmd *cmd) | ||
191 | { | ||
192 | int err; | ||
193 | cmd->val = 0; | ||
194 | |||
195 | err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); | ||
196 | WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ | ||
197 | } | ||
198 | |||
199 | static void drv_write(struct drv_cmd *cmd) | ||
200 | { | ||
201 | int this_cpu; | ||
202 | |||
203 | this_cpu = get_cpu(); | ||
204 | if (cpumask_test_cpu(this_cpu, cmd->mask)) | ||
205 | do_drv_write(cmd); | ||
206 | smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); | ||
207 | put_cpu(); | ||
208 | } | ||
209 | |||
210 | static u32 get_cur_val(const struct cpumask *mask) | ||
211 | { | ||
212 | struct acpi_processor_performance *perf; | ||
213 | struct drv_cmd cmd; | ||
214 | |||
215 | if (unlikely(cpumask_empty(mask))) | ||
216 | return 0; | ||
217 | |||
218 | switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) { | ||
219 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
220 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | ||
221 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; | ||
222 | break; | ||
223 | case SYSTEM_IO_CAPABLE: | ||
224 | cmd.type = SYSTEM_IO_CAPABLE; | ||
225 | perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; | ||
226 | cmd.addr.io.port = perf->control_register.address; | ||
227 | cmd.addr.io.bit_width = perf->control_register.bit_width; | ||
228 | break; | ||
229 | default: | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | cmd.mask = mask; | ||
234 | drv_read(&cmd); | ||
235 | |||
236 | dprintk("get_cur_val = %u\n", cmd.val); | ||
237 | |||
238 | return cmd.val; | ||
239 | } | ||
240 | |||
241 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | ||
242 | { | ||
243 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); | ||
244 | unsigned int freq; | ||
245 | unsigned int cached_freq; | ||
246 | |||
247 | dprintk("get_cur_freq_on_cpu (%d)\n", cpu); | ||
248 | |||
249 | if (unlikely(data == NULL || | ||
250 | data->acpi_data == NULL || data->freq_table == NULL)) { | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | cached_freq = data->freq_table[data->acpi_data->state].frequency; | ||
255 | freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); | ||
256 | if (freq != cached_freq) { | ||
257 | /* | ||
258 | * The dreaded BIOS frequency change behind our back. | ||
259 | * Force set the frequency on next target call. | ||
260 | */ | ||
261 | data->resume = 1; | ||
262 | } | ||
263 | |||
264 | dprintk("cur freq = %u\n", freq); | ||
265 | |||
266 | return freq; | ||
267 | } | ||
268 | |||
269 | static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, | ||
270 | struct acpi_cpufreq_data *data) | ||
271 | { | ||
272 | unsigned int cur_freq; | ||
273 | unsigned int i; | ||
274 | |||
275 | for (i = 0; i < 100; i++) { | ||
276 | cur_freq = extract_freq(get_cur_val(mask), data); | ||
277 | if (cur_freq == freq) | ||
278 | return 1; | ||
279 | udelay(10); | ||
280 | } | ||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | static int acpi_cpufreq_target(struct cpufreq_policy *policy, | ||
285 | unsigned int target_freq, unsigned int relation) | ||
286 | { | ||
287 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); | ||
288 | struct acpi_processor_performance *perf; | ||
289 | struct cpufreq_freqs freqs; | ||
290 | struct drv_cmd cmd; | ||
291 | unsigned int next_state = 0; /* Index into freq_table */ | ||
292 | unsigned int next_perf_state = 0; /* Index into perf table */ | ||
293 | unsigned int i; | ||
294 | int result = 0; | ||
295 | |||
296 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); | ||
297 | |||
298 | if (unlikely(data == NULL || | ||
299 | data->acpi_data == NULL || data->freq_table == NULL)) { | ||
300 | return -ENODEV; | ||
301 | } | ||
302 | |||
303 | perf = data->acpi_data; | ||
304 | result = cpufreq_frequency_table_target(policy, | ||
305 | data->freq_table, | ||
306 | target_freq, | ||
307 | relation, &next_state); | ||
308 | if (unlikely(result)) { | ||
309 | result = -ENODEV; | ||
310 | goto out; | ||
311 | } | ||
312 | |||
313 | next_perf_state = data->freq_table[next_state].index; | ||
314 | if (perf->state == next_perf_state) { | ||
315 | if (unlikely(data->resume)) { | ||
316 | dprintk("Called after resume, resetting to P%d\n", | ||
317 | next_perf_state); | ||
318 | data->resume = 0; | ||
319 | } else { | ||
320 | dprintk("Already at target state (P%d)\n", | ||
321 | next_perf_state); | ||
322 | goto out; | ||
323 | } | ||
324 | } | ||
325 | |||
326 | switch (data->cpu_feature) { | ||
327 | case SYSTEM_INTEL_MSR_CAPABLE: | ||
328 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | ||
329 | cmd.addr.msr.reg = MSR_IA32_PERF_CTL; | ||
330 | cmd.val = (u32) perf->states[next_perf_state].control; | ||
331 | break; | ||
332 | case SYSTEM_IO_CAPABLE: | ||
333 | cmd.type = SYSTEM_IO_CAPABLE; | ||
334 | cmd.addr.io.port = perf->control_register.address; | ||
335 | cmd.addr.io.bit_width = perf->control_register.bit_width; | ||
336 | cmd.val = (u32) perf->states[next_perf_state].control; | ||
337 | break; | ||
338 | default: | ||
339 | result = -ENODEV; | ||
340 | goto out; | ||
341 | } | ||
342 | |||
343 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | ||
344 | if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) | ||
345 | cmd.mask = policy->cpus; | ||
346 | else | ||
347 | cmd.mask = cpumask_of(policy->cpu); | ||
348 | |||
349 | freqs.old = perf->states[perf->state].core_frequency * 1000; | ||
350 | freqs.new = data->freq_table[next_state].frequency; | ||
351 | for_each_cpu(i, policy->cpus) { | ||
352 | freqs.cpu = i; | ||
353 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
354 | } | ||
355 | |||
356 | drv_write(&cmd); | ||
357 | |||
358 | if (acpi_pstate_strict) { | ||
359 | if (!check_freqs(cmd.mask, freqs.new, data)) { | ||
360 | dprintk("acpi_cpufreq_target failed (%d)\n", | ||
361 | policy->cpu); | ||
362 | result = -EAGAIN; | ||
363 | goto out; | ||
364 | } | ||
365 | } | ||
366 | |||
367 | for_each_cpu(i, policy->cpus) { | ||
368 | freqs.cpu = i; | ||
369 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
370 | } | ||
371 | perf->state = next_perf_state; | ||
372 | |||
373 | out: | ||
374 | return result; | ||
375 | } | ||
376 | |||
377 | static int acpi_cpufreq_verify(struct cpufreq_policy *policy) | ||
378 | { | ||
379 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); | ||
380 | |||
381 | dprintk("acpi_cpufreq_verify\n"); | ||
382 | |||
383 | return cpufreq_frequency_table_verify(policy, data->freq_table); | ||
384 | } | ||
385 | |||
386 | static unsigned long | ||
387 | acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) | ||
388 | { | ||
389 | struct acpi_processor_performance *perf = data->acpi_data; | ||
390 | |||
391 | if (cpu_khz) { | ||
392 | /* search the closest match to cpu_khz */ | ||
393 | unsigned int i; | ||
394 | unsigned long freq; | ||
395 | unsigned long freqn = perf->states[0].core_frequency * 1000; | ||
396 | |||
397 | for (i = 0; i < (perf->state_count-1); i++) { | ||
398 | freq = freqn; | ||
399 | freqn = perf->states[i+1].core_frequency * 1000; | ||
400 | if ((2 * cpu_khz) > (freqn + freq)) { | ||
401 | perf->state = i; | ||
402 | return freq; | ||
403 | } | ||
404 | } | ||
405 | perf->state = perf->state_count-1; | ||
406 | return freqn; | ||
407 | } else { | ||
408 | /* assume CPU is at P0... */ | ||
409 | perf->state = 0; | ||
410 | return perf->states[0].core_frequency * 1000; | ||
411 | } | ||
412 | } | ||
413 | |||
414 | static void free_acpi_perf_data(void) | ||
415 | { | ||
416 | unsigned int i; | ||
417 | |||
418 | /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ | ||
419 | for_each_possible_cpu(i) | ||
420 | free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) | ||
421 | ->shared_cpu_map); | ||
422 | free_percpu(acpi_perf_data); | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * acpi_cpufreq_early_init - initialize ACPI P-States library | ||
427 | * | ||
428 | * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) | ||
429 | * in order to determine correct frequency and voltage pairings. We can | ||
430 | * do _PDC and _PSD and find out the processor dependency for the | ||
431 | * actual init that will happen later... | ||
432 | */ | ||
433 | static int __init acpi_cpufreq_early_init(void) | ||
434 | { | ||
435 | unsigned int i; | ||
436 | dprintk("acpi_cpufreq_early_init\n"); | ||
437 | |||
438 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); | ||
439 | if (!acpi_perf_data) { | ||
440 | dprintk("Memory allocation error for acpi_perf_data.\n"); | ||
441 | return -ENOMEM; | ||
442 | } | ||
443 | for_each_possible_cpu(i) { | ||
444 | if (!zalloc_cpumask_var_node( | ||
445 | &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, | ||
446 | GFP_KERNEL, cpu_to_node(i))) { | ||
447 | |||
448 | /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ | ||
449 | free_acpi_perf_data(); | ||
450 | return -ENOMEM; | ||
451 | } | ||
452 | } | ||
453 | |||
454 | /* Do initialization in ACPI core */ | ||
455 | acpi_processor_preregister_performance(acpi_perf_data); | ||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | #ifdef CONFIG_SMP | ||
460 | /* | ||
461 | * Some BIOSes do SW_ANY coordination internally, either set it up in hw | ||
462 | * or do it in BIOS firmware and won't inform about it to OS. If not | ||
463 | * detected, this has a side effect of making CPU run at a different speed | ||
464 | * than OS intended it to run at. Detect it and handle it cleanly. | ||
465 | */ | ||
466 | static int bios_with_sw_any_bug; | ||
467 | |||
468 | static int sw_any_bug_found(const struct dmi_system_id *d) | ||
469 | { | ||
470 | bios_with_sw_any_bug = 1; | ||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | static const struct dmi_system_id sw_any_bug_dmi_table[] = { | ||
475 | { | ||
476 | .callback = sw_any_bug_found, | ||
477 | .ident = "Supermicro Server X6DLP", | ||
478 | .matches = { | ||
479 | DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), | ||
480 | DMI_MATCH(DMI_BIOS_VERSION, "080010"), | ||
481 | DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), | ||
482 | }, | ||
483 | }, | ||
484 | { } | ||
485 | }; | ||
486 | |||
487 | static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) | ||
488 | { | ||
489 | /* Intel Xeon Processor 7100 Series Specification Update | ||
490 | * http://www.intel.com/Assets/PDF/specupdate/314554.pdf | ||
491 | * AL30: A Machine Check Exception (MCE) Occurring during an | ||
492 | * Enhanced Intel SpeedStep Technology Ratio Change May Cause | ||
493 | * Both Processor Cores to Lock Up. */ | ||
494 | if (c->x86_vendor == X86_VENDOR_INTEL) { | ||
495 | if ((c->x86 == 15) && | ||
496 | (c->x86_model == 6) && | ||
497 | (c->x86_mask == 8)) { | ||
498 | printk(KERN_INFO "acpi-cpufreq: Intel(R) " | ||
499 | "Xeon(R) 7100 Errata AL30, processors may " | ||
500 | "lock up on frequency changes: disabling " | ||
501 | "acpi-cpufreq.\n"); | ||
502 | return -ENODEV; | ||
503 | } | ||
504 | } | ||
505 | return 0; | ||
506 | } | ||
507 | #endif | ||
508 | |||
509 | static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | ||
510 | { | ||
511 | unsigned int i; | ||
512 | unsigned int valid_states = 0; | ||
513 | unsigned int cpu = policy->cpu; | ||
514 | struct acpi_cpufreq_data *data; | ||
515 | unsigned int result = 0; | ||
516 | struct cpuinfo_x86 *c = &cpu_data(policy->cpu); | ||
517 | struct acpi_processor_performance *perf; | ||
518 | #ifdef CONFIG_SMP | ||
519 | static int blacklisted; | ||
520 | #endif | ||
521 | |||
522 | dprintk("acpi_cpufreq_cpu_init\n"); | ||
523 | |||
524 | #ifdef CONFIG_SMP | ||
525 | if (blacklisted) | ||
526 | return blacklisted; | ||
527 | blacklisted = acpi_cpufreq_blacklist(c); | ||
528 | if (blacklisted) | ||
529 | return blacklisted; | ||
530 | #endif | ||
531 | |||
532 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); | ||
533 | if (!data) | ||
534 | return -ENOMEM; | ||
535 | |||
536 | data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); | ||
537 | per_cpu(acfreq_data, cpu) = data; | ||
538 | |||
539 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) | ||
540 | acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
541 | |||
542 | result = acpi_processor_register_performance(data->acpi_data, cpu); | ||
543 | if (result) | ||
544 | goto err_free; | ||
545 | |||
546 | perf = data->acpi_data; | ||
547 | policy->shared_type = perf->shared_type; | ||
548 | |||
549 | /* | ||
550 | * Will let policy->cpus know about dependency only when software | ||
551 | * coordination is required. | ||
552 | */ | ||
553 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || | ||
554 | policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { | ||
555 | cpumask_copy(policy->cpus, perf->shared_cpu_map); | ||
556 | } | ||
557 | cpumask_copy(policy->related_cpus, perf->shared_cpu_map); | ||
558 | |||
559 | #ifdef CONFIG_SMP | ||
560 | dmi_check_system(sw_any_bug_dmi_table); | ||
561 | if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { | ||
562 | policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; | ||
563 | cpumask_copy(policy->cpus, cpu_core_mask(cpu)); | ||
564 | } | ||
565 | #endif | ||
566 | |||
567 | /* capability check */ | ||
568 | if (perf->state_count <= 1) { | ||
569 | dprintk("No P-States\n"); | ||
570 | result = -ENODEV; | ||
571 | goto err_unreg; | ||
572 | } | ||
573 | |||
574 | if (perf->control_register.space_id != perf->status_register.space_id) { | ||
575 | result = -ENODEV; | ||
576 | goto err_unreg; | ||
577 | } | ||
578 | |||
579 | switch (perf->control_register.space_id) { | ||
580 | case ACPI_ADR_SPACE_SYSTEM_IO: | ||
581 | dprintk("SYSTEM IO addr space\n"); | ||
582 | data->cpu_feature = SYSTEM_IO_CAPABLE; | ||
583 | break; | ||
584 | case ACPI_ADR_SPACE_FIXED_HARDWARE: | ||
585 | dprintk("HARDWARE addr space\n"); | ||
586 | if (!check_est_cpu(cpu)) { | ||
587 | result = -ENODEV; | ||
588 | goto err_unreg; | ||
589 | } | ||
590 | data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; | ||
591 | break; | ||
592 | default: | ||
593 | dprintk("Unknown addr space %d\n", | ||
594 | (u32) (perf->control_register.space_id)); | ||
595 | result = -ENODEV; | ||
596 | goto err_unreg; | ||
597 | } | ||
598 | |||
599 | data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * | ||
600 | (perf->state_count+1), GFP_KERNEL); | ||
601 | if (!data->freq_table) { | ||
602 | result = -ENOMEM; | ||
603 | goto err_unreg; | ||
604 | } | ||
605 | |||
606 | /* detect transition latency */ | ||
607 | policy->cpuinfo.transition_latency = 0; | ||
608 | for (i = 0; i < perf->state_count; i++) { | ||
609 | if ((perf->states[i].transition_latency * 1000) > | ||
610 | policy->cpuinfo.transition_latency) | ||
611 | policy->cpuinfo.transition_latency = | ||
612 | perf->states[i].transition_latency * 1000; | ||
613 | } | ||
614 | |||
615 | /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ | ||
616 | if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && | ||
617 | policy->cpuinfo.transition_latency > 20 * 1000) { | ||
618 | policy->cpuinfo.transition_latency = 20 * 1000; | ||
619 | printk_once(KERN_INFO | ||
620 | "P-state transition latency capped at 20 uS\n"); | ||
621 | } | ||
622 | |||
623 | /* table init */ | ||
624 | for (i = 0; i < perf->state_count; i++) { | ||
625 | if (i > 0 && perf->states[i].core_frequency >= | ||
626 | data->freq_table[valid_states-1].frequency / 1000) | ||
627 | continue; | ||
628 | |||
629 | data->freq_table[valid_states].index = i; | ||
630 | data->freq_table[valid_states].frequency = | ||
631 | perf->states[i].core_frequency * 1000; | ||
632 | valid_states++; | ||
633 | } | ||
634 | data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; | ||
635 | perf->state = 0; | ||
636 | |||
637 | result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); | ||
638 | if (result) | ||
639 | goto err_freqfree; | ||
640 | |||
641 | if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) | ||
642 | printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); | ||
643 | |||
644 | switch (perf->control_register.space_id) { | ||
645 | case ACPI_ADR_SPACE_SYSTEM_IO: | ||
646 | /* Current speed is unknown and not detectable by IO port */ | ||
647 | policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); | ||
648 | break; | ||
649 | case ACPI_ADR_SPACE_FIXED_HARDWARE: | ||
650 | acpi_cpufreq_driver.get = get_cur_freq_on_cpu; | ||
651 | policy->cur = get_cur_freq_on_cpu(cpu); | ||
652 | break; | ||
653 | default: | ||
654 | break; | ||
655 | } | ||
656 | |||
657 | /* notify BIOS that we exist */ | ||
658 | acpi_processor_notify_smm(THIS_MODULE); | ||
659 | |||
660 | /* Check for APERF/MPERF support in hardware */ | ||
661 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
662 | acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; | ||
663 | |||
664 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | ||
665 | for (i = 0; i < perf->state_count; i++) | ||
666 | dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", | ||
667 | (i == perf->state ? '*' : ' '), i, | ||
668 | (u32) perf->states[i].core_frequency, | ||
669 | (u32) perf->states[i].power, | ||
670 | (u32) perf->states[i].transition_latency); | ||
671 | |||
672 | cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); | ||
673 | |||
674 | /* | ||
675 | * the first call to ->target() should result in us actually | ||
676 | * writing something to the appropriate registers. | ||
677 | */ | ||
678 | data->resume = 1; | ||
679 | |||
680 | return result; | ||
681 | |||
682 | err_freqfree: | ||
683 | kfree(data->freq_table); | ||
684 | err_unreg: | ||
685 | acpi_processor_unregister_performance(perf, cpu); | ||
686 | err_free: | ||
687 | kfree(data); | ||
688 | per_cpu(acfreq_data, cpu) = NULL; | ||
689 | |||
690 | return result; | ||
691 | } | ||
692 | |||
693 | static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) | ||
694 | { | ||
695 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); | ||
696 | |||
697 | dprintk("acpi_cpufreq_cpu_exit\n"); | ||
698 | |||
699 | if (data) { | ||
700 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
701 | per_cpu(acfreq_data, policy->cpu) = NULL; | ||
702 | acpi_processor_unregister_performance(data->acpi_data, | ||
703 | policy->cpu); | ||
704 | kfree(data->freq_table); | ||
705 | kfree(data); | ||
706 | } | ||
707 | |||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | static int acpi_cpufreq_resume(struct cpufreq_policy *policy) | ||
712 | { | ||
713 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); | ||
714 | |||
715 | dprintk("acpi_cpufreq_resume\n"); | ||
716 | |||
717 | data->resume = 1; | ||
718 | |||
719 | return 0; | ||
720 | } | ||
721 | |||
722 | static struct freq_attr *acpi_cpufreq_attr[] = { | ||
723 | &cpufreq_freq_attr_scaling_available_freqs, | ||
724 | NULL, | ||
725 | }; | ||
726 | |||
727 | static struct cpufreq_driver acpi_cpufreq_driver = { | ||
728 | .verify = acpi_cpufreq_verify, | ||
729 | .target = acpi_cpufreq_target, | ||
730 | .bios_limit = acpi_processor_get_bios_limit, | ||
731 | .init = acpi_cpufreq_cpu_init, | ||
732 | .exit = acpi_cpufreq_cpu_exit, | ||
733 | .resume = acpi_cpufreq_resume, | ||
734 | .name = "acpi-cpufreq", | ||
735 | .owner = THIS_MODULE, | ||
736 | .attr = acpi_cpufreq_attr, | ||
737 | }; | ||
738 | |||
739 | static int __init acpi_cpufreq_init(void) | ||
740 | { | ||
741 | int ret; | ||
742 | |||
743 | if (acpi_disabled) | ||
744 | return 0; | ||
745 | |||
746 | dprintk("acpi_cpufreq_init\n"); | ||
747 | |||
748 | ret = acpi_cpufreq_early_init(); | ||
749 | if (ret) | ||
750 | return ret; | ||
751 | |||
752 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); | ||
753 | if (ret) | ||
754 | free_acpi_perf_data(); | ||
755 | |||
756 | return ret; | ||
757 | } | ||
758 | |||
759 | static void __exit acpi_cpufreq_exit(void) | ||
760 | { | ||
761 | dprintk("acpi_cpufreq_exit\n"); | ||
762 | |||
763 | cpufreq_unregister_driver(&acpi_cpufreq_driver); | ||
764 | |||
765 | free_percpu(acpi_perf_data); | ||
766 | } | ||
767 | |||
768 | module_param(acpi_pstate_strict, uint, 0644); | ||
769 | MODULE_PARM_DESC(acpi_pstate_strict, | ||
770 | "value 0 or non-zero. non-zero -> strict ACPI checks are " | ||
771 | "performed during frequency changes."); | ||
772 | |||
773 | late_initcall(acpi_cpufreq_init); | ||
774 | module_exit(acpi_cpufreq_exit); | ||
775 | |||
776 | MODULE_ALIAS("acpi"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c deleted file mode 100644 index 141abebc4516..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ /dev/null | |||
@@ -1,446 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2004-2006 Sebastian Witt <se.witt@gmx.net> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * Based upon reverse engineered information | ||
6 | * | ||
7 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/moduleparam.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/cpufreq.h> | ||
15 | #include <linux/pci.h> | ||
16 | #include <linux/delay.h> | ||
17 | |||
18 | #define NFORCE2_XTAL 25 | ||
19 | #define NFORCE2_BOOTFSB 0x48 | ||
20 | #define NFORCE2_PLLENABLE 0xa8 | ||
21 | #define NFORCE2_PLLREG 0xa4 | ||
22 | #define NFORCE2_PLLADR 0xa0 | ||
23 | #define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div) | ||
24 | |||
25 | #define NFORCE2_MIN_FSB 50 | ||
26 | #define NFORCE2_SAFE_DISTANCE 50 | ||
27 | |||
28 | /* Delay in ms between FSB changes */ | ||
29 | /* #define NFORCE2_DELAY 10 */ | ||
30 | |||
31 | /* | ||
32 | * nforce2_chipset: | ||
33 | * FSB is changed using the chipset | ||
34 | */ | ||
35 | static struct pci_dev *nforce2_dev; | ||
36 | |||
37 | /* fid: | ||
38 | * multiplier * 10 | ||
39 | */ | ||
40 | static int fid; | ||
41 | |||
42 | /* min_fsb, max_fsb: | ||
43 | * minimum and maximum FSB (= FSB at boot time) | ||
44 | */ | ||
45 | static int min_fsb; | ||
46 | static int max_fsb; | ||
47 | |||
48 | MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); | ||
49 | MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); | ||
50 | MODULE_LICENSE("GPL"); | ||
51 | |||
52 | module_param(fid, int, 0444); | ||
53 | module_param(min_fsb, int, 0444); | ||
54 | |||
55 | MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); | ||
56 | MODULE_PARM_DESC(min_fsb, | ||
57 | "Minimum FSB to use, if not defined: current FSB - 50"); | ||
58 | |||
59 | #define PFX "cpufreq-nforce2: " | ||
60 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
61 | "cpufreq-nforce2", msg) | ||
62 | |||
63 | /** | ||
64 | * nforce2_calc_fsb - calculate FSB | ||
65 | * @pll: PLL value | ||
66 | * | ||
67 | * Calculates FSB from PLL value | ||
68 | */ | ||
69 | static int nforce2_calc_fsb(int pll) | ||
70 | { | ||
71 | unsigned char mul, div; | ||
72 | |||
73 | mul = (pll >> 8) & 0xff; | ||
74 | div = pll & 0xff; | ||
75 | |||
76 | if (div > 0) | ||
77 | return NFORCE2_XTAL * mul / div; | ||
78 | |||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | /** | ||
83 | * nforce2_calc_pll - calculate PLL value | ||
84 | * @fsb: FSB | ||
85 | * | ||
86 | * Calculate PLL value for given FSB | ||
87 | */ | ||
88 | static int nforce2_calc_pll(unsigned int fsb) | ||
89 | { | ||
90 | unsigned char xmul, xdiv; | ||
91 | unsigned char mul = 0, div = 0; | ||
92 | int tried = 0; | ||
93 | |||
94 | /* Try to calculate multiplier and divider up to 4 times */ | ||
95 | while (((mul == 0) || (div == 0)) && (tried <= 3)) { | ||
96 | for (xdiv = 2; xdiv <= 0x80; xdiv++) | ||
97 | for (xmul = 1; xmul <= 0xfe; xmul++) | ||
98 | if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == | ||
99 | fsb + tried) { | ||
100 | mul = xmul; | ||
101 | div = xdiv; | ||
102 | } | ||
103 | tried++; | ||
104 | } | ||
105 | |||
106 | if ((mul == 0) || (div == 0)) | ||
107 | return -1; | ||
108 | |||
109 | return NFORCE2_PLL(mul, div); | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * nforce2_write_pll - write PLL value to chipset | ||
114 | * @pll: PLL value | ||
115 | * | ||
116 | * Writes new FSB PLL value to chipset | ||
117 | */ | ||
118 | static void nforce2_write_pll(int pll) | ||
119 | { | ||
120 | int temp; | ||
121 | |||
122 | /* Set the pll addr. to 0x00 */ | ||
123 | pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0); | ||
124 | |||
125 | /* Now write the value in all 64 registers */ | ||
126 | for (temp = 0; temp <= 0x3f; temp++) | ||
127 | pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll); | ||
128 | |||
129 | return; | ||
130 | } | ||
131 | |||
132 | /** | ||
133 | * nforce2_fsb_read - Read FSB | ||
134 | * | ||
135 | * Read FSB from chipset | ||
136 | * If bootfsb != 0, return FSB at boot-time | ||
137 | */ | ||
138 | static unsigned int nforce2_fsb_read(int bootfsb) | ||
139 | { | ||
140 | struct pci_dev *nforce2_sub5; | ||
141 | u32 fsb, temp = 0; | ||
142 | |||
143 | /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ | ||
144 | nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF, | ||
145 | PCI_ANY_ID, PCI_ANY_ID, NULL); | ||
146 | if (!nforce2_sub5) | ||
147 | return 0; | ||
148 | |||
149 | pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); | ||
150 | fsb /= 1000000; | ||
151 | |||
152 | /* Check if PLL register is already set */ | ||
153 | pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); | ||
154 | |||
155 | if (bootfsb || !temp) | ||
156 | return fsb; | ||
157 | |||
158 | /* Use PLL register FSB value */ | ||
159 | pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp); | ||
160 | fsb = nforce2_calc_fsb(temp); | ||
161 | |||
162 | return fsb; | ||
163 | } | ||
164 | |||
165 | /** | ||
166 | * nforce2_set_fsb - set new FSB | ||
167 | * @fsb: New FSB | ||
168 | * | ||
169 | * Sets new FSB | ||
170 | */ | ||
171 | static int nforce2_set_fsb(unsigned int fsb) | ||
172 | { | ||
173 | u32 temp = 0; | ||
174 | unsigned int tfsb; | ||
175 | int diff; | ||
176 | int pll = 0; | ||
177 | |||
178 | if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { | ||
179 | printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb); | ||
180 | return -EINVAL; | ||
181 | } | ||
182 | |||
183 | tfsb = nforce2_fsb_read(0); | ||
184 | if (!tfsb) { | ||
185 | printk(KERN_ERR PFX "Error while reading the FSB\n"); | ||
186 | return -EINVAL; | ||
187 | } | ||
188 | |||
189 | /* First write? Then set actual value */ | ||
190 | pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); | ||
191 | if (!temp) { | ||
192 | pll = nforce2_calc_pll(tfsb); | ||
193 | |||
194 | if (pll < 0) | ||
195 | return -EINVAL; | ||
196 | |||
197 | nforce2_write_pll(pll); | ||
198 | } | ||
199 | |||
200 | /* Enable write access */ | ||
201 | temp = 0x01; | ||
202 | pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp); | ||
203 | |||
204 | diff = tfsb - fsb; | ||
205 | |||
206 | if (!diff) | ||
207 | return 0; | ||
208 | |||
209 | while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) { | ||
210 | if (diff < 0) | ||
211 | tfsb++; | ||
212 | else | ||
213 | tfsb--; | ||
214 | |||
215 | /* Calculate the PLL reg. value */ | ||
216 | pll = nforce2_calc_pll(tfsb); | ||
217 | if (pll == -1) | ||
218 | return -EINVAL; | ||
219 | |||
220 | nforce2_write_pll(pll); | ||
221 | #ifdef NFORCE2_DELAY | ||
222 | mdelay(NFORCE2_DELAY); | ||
223 | #endif | ||
224 | } | ||
225 | |||
226 | temp = 0x40; | ||
227 | pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp); | ||
228 | |||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | /** | ||
233 | * nforce2_get - get the CPU frequency | ||
234 | * @cpu: CPU number | ||
235 | * | ||
236 | * Returns the CPU frequency | ||
237 | */ | ||
238 | static unsigned int nforce2_get(unsigned int cpu) | ||
239 | { | ||
240 | if (cpu) | ||
241 | return 0; | ||
242 | return nforce2_fsb_read(0) * fid * 100; | ||
243 | } | ||
244 | |||
245 | /** | ||
246 | * nforce2_target - set a new CPUFreq policy | ||
247 | * @policy: new policy | ||
248 | * @target_freq: the target frequency | ||
249 | * @relation: how that frequency relates to achieved frequency | ||
250 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
251 | * | ||
252 | * Sets a new CPUFreq policy. | ||
253 | */ | ||
254 | static int nforce2_target(struct cpufreq_policy *policy, | ||
255 | unsigned int target_freq, unsigned int relation) | ||
256 | { | ||
257 | /* unsigned long flags; */ | ||
258 | struct cpufreq_freqs freqs; | ||
259 | unsigned int target_fsb; | ||
260 | |||
261 | if ((target_freq > policy->max) || (target_freq < policy->min)) | ||
262 | return -EINVAL; | ||
263 | |||
264 | target_fsb = target_freq / (fid * 100); | ||
265 | |||
266 | freqs.old = nforce2_get(policy->cpu); | ||
267 | freqs.new = target_fsb * fid * 100; | ||
268 | freqs.cpu = 0; /* Only one CPU on nForce2 platforms */ | ||
269 | |||
270 | if (freqs.old == freqs.new) | ||
271 | return 0; | ||
272 | |||
273 | dprintk("Old CPU frequency %d kHz, new %d kHz\n", | ||
274 | freqs.old, freqs.new); | ||
275 | |||
276 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
277 | |||
278 | /* Disable IRQs */ | ||
279 | /* local_irq_save(flags); */ | ||
280 | |||
281 | if (nforce2_set_fsb(target_fsb) < 0) | ||
282 | printk(KERN_ERR PFX "Changing FSB to %d failed\n", | ||
283 | target_fsb); | ||
284 | else | ||
285 | dprintk("Changed FSB successfully to %d\n", | ||
286 | target_fsb); | ||
287 | |||
288 | /* Enable IRQs */ | ||
289 | /* local_irq_restore(flags); */ | ||
290 | |||
291 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
292 | |||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | /** | ||
297 | * nforce2_verify - verifies a new CPUFreq policy | ||
298 | * @policy: new policy | ||
299 | */ | ||
300 | static int nforce2_verify(struct cpufreq_policy *policy) | ||
301 | { | ||
302 | unsigned int fsb_pol_max; | ||
303 | |||
304 | fsb_pol_max = policy->max / (fid * 100); | ||
305 | |||
306 | if (policy->min < (fsb_pol_max * fid * 100)) | ||
307 | policy->max = (fsb_pol_max + 1) * fid * 100; | ||
308 | |||
309 | cpufreq_verify_within_limits(policy, | ||
310 | policy->cpuinfo.min_freq, | ||
311 | policy->cpuinfo.max_freq); | ||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | static int nforce2_cpu_init(struct cpufreq_policy *policy) | ||
316 | { | ||
317 | unsigned int fsb; | ||
318 | unsigned int rfid; | ||
319 | |||
320 | /* capability check */ | ||
321 | if (policy->cpu != 0) | ||
322 | return -ENODEV; | ||
323 | |||
324 | /* Get current FSB */ | ||
325 | fsb = nforce2_fsb_read(0); | ||
326 | |||
327 | if (!fsb) | ||
328 | return -EIO; | ||
329 | |||
330 | /* FIX: Get FID from CPU */ | ||
331 | if (!fid) { | ||
332 | if (!cpu_khz) { | ||
333 | printk(KERN_WARNING PFX | ||
334 | "cpu_khz not set, can't calculate multiplier!\n"); | ||
335 | return -ENODEV; | ||
336 | } | ||
337 | |||
338 | fid = cpu_khz / (fsb * 100); | ||
339 | rfid = fid % 5; | ||
340 | |||
341 | if (rfid) { | ||
342 | if (rfid > 2) | ||
343 | fid += 5 - rfid; | ||
344 | else | ||
345 | fid -= rfid; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb, | ||
350 | fid / 10, fid % 10); | ||
351 | |||
352 | /* Set maximum FSB to FSB at boot time */ | ||
353 | max_fsb = nforce2_fsb_read(1); | ||
354 | |||
355 | if (!max_fsb) | ||
356 | return -EIO; | ||
357 | |||
358 | if (!min_fsb) | ||
359 | min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE; | ||
360 | |||
361 | if (min_fsb < NFORCE2_MIN_FSB) | ||
362 | min_fsb = NFORCE2_MIN_FSB; | ||
363 | |||
364 | /* cpuinfo and default policy values */ | ||
365 | policy->cpuinfo.min_freq = min_fsb * fid * 100; | ||
366 | policy->cpuinfo.max_freq = max_fsb * fid * 100; | ||
367 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
368 | policy->cur = nforce2_get(policy->cpu); | ||
369 | policy->min = policy->cpuinfo.min_freq; | ||
370 | policy->max = policy->cpuinfo.max_freq; | ||
371 | |||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | static int nforce2_cpu_exit(struct cpufreq_policy *policy) | ||
376 | { | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | static struct cpufreq_driver nforce2_driver = { | ||
381 | .name = "nforce2", | ||
382 | .verify = nforce2_verify, | ||
383 | .target = nforce2_target, | ||
384 | .get = nforce2_get, | ||
385 | .init = nforce2_cpu_init, | ||
386 | .exit = nforce2_cpu_exit, | ||
387 | .owner = THIS_MODULE, | ||
388 | }; | ||
389 | |||
390 | /** | ||
391 | * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic | ||
392 | * | ||
393 | * Detects nForce2 A2 and C1 stepping | ||
394 | * | ||
395 | */ | ||
396 | static int nforce2_detect_chipset(void) | ||
397 | { | ||
398 | nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, | ||
399 | PCI_DEVICE_ID_NVIDIA_NFORCE2, | ||
400 | PCI_ANY_ID, PCI_ANY_ID, NULL); | ||
401 | |||
402 | if (nforce2_dev == NULL) | ||
403 | return -ENODEV; | ||
404 | |||
405 | printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n", | ||
406 | nforce2_dev->revision); | ||
407 | printk(KERN_INFO PFX | ||
408 | "FSB changing is maybe unstable and can lead to " | ||
409 | "crashes and data loss.\n"); | ||
410 | |||
411 | return 0; | ||
412 | } | ||
413 | |||
414 | /** | ||
415 | * nforce2_init - initializes the nForce2 CPUFreq driver | ||
416 | * | ||
417 | * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported | ||
418 | * devices, -EINVAL on problems during initiatization, and zero on | ||
419 | * success. | ||
420 | */ | ||
421 | static int __init nforce2_init(void) | ||
422 | { | ||
423 | /* TODO: do we need to detect the processor? */ | ||
424 | |||
425 | /* detect chipset */ | ||
426 | if (nforce2_detect_chipset()) { | ||
427 | printk(KERN_INFO PFX "No nForce2 chipset.\n"); | ||
428 | return -ENODEV; | ||
429 | } | ||
430 | |||
431 | return cpufreq_register_driver(&nforce2_driver); | ||
432 | } | ||
433 | |||
434 | /** | ||
435 | * nforce2_exit - unregisters cpufreq module | ||
436 | * | ||
437 | * Unregisters nForce2 FSB change support. | ||
438 | */ | ||
439 | static void __exit nforce2_exit(void) | ||
440 | { | ||
441 | cpufreq_unregister_driver(&nforce2_driver); | ||
442 | } | ||
443 | |||
444 | module_init(nforce2_init); | ||
445 | module_exit(nforce2_exit); | ||
446 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c deleted file mode 100644 index 35a257dd4bb7..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ /dev/null | |||
@@ -1,367 +0,0 @@ | |||
1 | /* | ||
2 | * Based on documentation provided by Dave Jones. Thanks! | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/cpufreq.h> | ||
13 | #include <linux/ioport.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/timex.h> | ||
16 | #include <linux/io.h> | ||
17 | #include <linux/delay.h> | ||
18 | |||
19 | #include <asm/msr.h> | ||
20 | #include <asm/tsc.h> | ||
21 | |||
22 | #define EPS_BRAND_C7M 0 | ||
23 | #define EPS_BRAND_C7 1 | ||
24 | #define EPS_BRAND_EDEN 2 | ||
25 | #define EPS_BRAND_C3 3 | ||
26 | #define EPS_BRAND_C7D 4 | ||
27 | |||
28 | struct eps_cpu_data { | ||
29 | u32 fsb; | ||
30 | struct cpufreq_frequency_table freq_table[]; | ||
31 | }; | ||
32 | |||
33 | static struct eps_cpu_data *eps_cpu[NR_CPUS]; | ||
34 | |||
35 | |||
36 | static unsigned int eps_get(unsigned int cpu) | ||
37 | { | ||
38 | struct eps_cpu_data *centaur; | ||
39 | u32 lo, hi; | ||
40 | |||
41 | if (cpu) | ||
42 | return 0; | ||
43 | centaur = eps_cpu[cpu]; | ||
44 | if (centaur == NULL) | ||
45 | return 0; | ||
46 | |||
47 | /* Return current frequency */ | ||
48 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
49 | return centaur->fsb * ((lo >> 8) & 0xff); | ||
50 | } | ||
51 | |||
52 | static int eps_set_state(struct eps_cpu_data *centaur, | ||
53 | unsigned int cpu, | ||
54 | u32 dest_state) | ||
55 | { | ||
56 | struct cpufreq_freqs freqs; | ||
57 | u32 lo, hi; | ||
58 | int err = 0; | ||
59 | int i; | ||
60 | |||
61 | freqs.old = eps_get(cpu); | ||
62 | freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); | ||
63 | freqs.cpu = cpu; | ||
64 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
65 | |||
66 | /* Wait while CPU is busy */ | ||
67 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
68 | i = 0; | ||
69 | while (lo & ((1 << 16) | (1 << 17))) { | ||
70 | udelay(16); | ||
71 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
72 | i++; | ||
73 | if (unlikely(i > 64)) { | ||
74 | err = -ENODEV; | ||
75 | goto postchange; | ||
76 | } | ||
77 | } | ||
78 | /* Set new multiplier and voltage */ | ||
79 | wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); | ||
80 | /* Wait until transition end */ | ||
81 | i = 0; | ||
82 | do { | ||
83 | udelay(16); | ||
84 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
85 | i++; | ||
86 | if (unlikely(i > 64)) { | ||
87 | err = -ENODEV; | ||
88 | goto postchange; | ||
89 | } | ||
90 | } while (lo & ((1 << 16) | (1 << 17))); | ||
91 | |||
92 | /* Return current frequency */ | ||
93 | postchange: | ||
94 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
95 | freqs.new = centaur->fsb * ((lo >> 8) & 0xff); | ||
96 | |||
97 | #ifdef DEBUG | ||
98 | { | ||
99 | u8 current_multiplier, current_voltage; | ||
100 | |||
101 | /* Print voltage and multiplier */ | ||
102 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
103 | current_voltage = lo & 0xff; | ||
104 | printk(KERN_INFO "eps: Current voltage = %dmV\n", | ||
105 | current_voltage * 16 + 700); | ||
106 | current_multiplier = (lo >> 8) & 0xff; | ||
107 | printk(KERN_INFO "eps: Current multiplier = %d\n", | ||
108 | current_multiplier); | ||
109 | } | ||
110 | #endif | ||
111 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
112 | return err; | ||
113 | } | ||
114 | |||
115 | static int eps_target(struct cpufreq_policy *policy, | ||
116 | unsigned int target_freq, | ||
117 | unsigned int relation) | ||
118 | { | ||
119 | struct eps_cpu_data *centaur; | ||
120 | unsigned int newstate = 0; | ||
121 | unsigned int cpu = policy->cpu; | ||
122 | unsigned int dest_state; | ||
123 | int ret; | ||
124 | |||
125 | if (unlikely(eps_cpu[cpu] == NULL)) | ||
126 | return -ENODEV; | ||
127 | centaur = eps_cpu[cpu]; | ||
128 | |||
129 | if (unlikely(cpufreq_frequency_table_target(policy, | ||
130 | &eps_cpu[cpu]->freq_table[0], | ||
131 | target_freq, | ||
132 | relation, | ||
133 | &newstate))) { | ||
134 | return -EINVAL; | ||
135 | } | ||
136 | |||
137 | /* Make frequency transition */ | ||
138 | dest_state = centaur->freq_table[newstate].index & 0xffff; | ||
139 | ret = eps_set_state(centaur, cpu, dest_state); | ||
140 | if (ret) | ||
141 | printk(KERN_ERR "eps: Timeout!\n"); | ||
142 | return ret; | ||
143 | } | ||
144 | |||
145 | static int eps_verify(struct cpufreq_policy *policy) | ||
146 | { | ||
147 | return cpufreq_frequency_table_verify(policy, | ||
148 | &eps_cpu[policy->cpu]->freq_table[0]); | ||
149 | } | ||
150 | |||
151 | static int eps_cpu_init(struct cpufreq_policy *policy) | ||
152 | { | ||
153 | unsigned int i; | ||
154 | u32 lo, hi; | ||
155 | u64 val; | ||
156 | u8 current_multiplier, current_voltage; | ||
157 | u8 max_multiplier, max_voltage; | ||
158 | u8 min_multiplier, min_voltage; | ||
159 | u8 brand = 0; | ||
160 | u32 fsb; | ||
161 | struct eps_cpu_data *centaur; | ||
162 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
163 | struct cpufreq_frequency_table *f_table; | ||
164 | int k, step, voltage; | ||
165 | int ret; | ||
166 | int states; | ||
167 | |||
168 | if (policy->cpu != 0) | ||
169 | return -ENODEV; | ||
170 | |||
171 | /* Check brand */ | ||
172 | printk(KERN_INFO "eps: Detected VIA "); | ||
173 | |||
174 | switch (c->x86_model) { | ||
175 | case 10: | ||
176 | rdmsr(0x1153, lo, hi); | ||
177 | brand = (((lo >> 2) ^ lo) >> 18) & 3; | ||
178 | printk(KERN_CONT "Model A "); | ||
179 | break; | ||
180 | case 13: | ||
181 | rdmsr(0x1154, lo, hi); | ||
182 | brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff; | ||
183 | printk(KERN_CONT "Model D "); | ||
184 | break; | ||
185 | } | ||
186 | |||
187 | switch (brand) { | ||
188 | case EPS_BRAND_C7M: | ||
189 | printk(KERN_CONT "C7-M\n"); | ||
190 | break; | ||
191 | case EPS_BRAND_C7: | ||
192 | printk(KERN_CONT "C7\n"); | ||
193 | break; | ||
194 | case EPS_BRAND_EDEN: | ||
195 | printk(KERN_CONT "Eden\n"); | ||
196 | break; | ||
197 | case EPS_BRAND_C7D: | ||
198 | printk(KERN_CONT "C7-D\n"); | ||
199 | break; | ||
200 | case EPS_BRAND_C3: | ||
201 | printk(KERN_CONT "C3\n"); | ||
202 | return -ENODEV; | ||
203 | break; | ||
204 | } | ||
205 | /* Enable Enhanced PowerSaver */ | ||
206 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | ||
207 | if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | ||
208 | val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; | ||
209 | wrmsrl(MSR_IA32_MISC_ENABLE, val); | ||
210 | /* Can be locked at 0 */ | ||
211 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | ||
212 | if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | ||
213 | printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); | ||
214 | return -ENODEV; | ||
215 | } | ||
216 | } | ||
217 | |||
218 | /* Print voltage and multiplier */ | ||
219 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
220 | current_voltage = lo & 0xff; | ||
221 | printk(KERN_INFO "eps: Current voltage = %dmV\n", | ||
222 | current_voltage * 16 + 700); | ||
223 | current_multiplier = (lo >> 8) & 0xff; | ||
224 | printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier); | ||
225 | |||
226 | /* Print limits */ | ||
227 | max_voltage = hi & 0xff; | ||
228 | printk(KERN_INFO "eps: Highest voltage = %dmV\n", | ||
229 | max_voltage * 16 + 700); | ||
230 | max_multiplier = (hi >> 8) & 0xff; | ||
231 | printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier); | ||
232 | min_voltage = (hi >> 16) & 0xff; | ||
233 | printk(KERN_INFO "eps: Lowest voltage = %dmV\n", | ||
234 | min_voltage * 16 + 700); | ||
235 | min_multiplier = (hi >> 24) & 0xff; | ||
236 | printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier); | ||
237 | |||
238 | /* Sanity checks */ | ||
239 | if (current_multiplier == 0 || max_multiplier == 0 | ||
240 | || min_multiplier == 0) | ||
241 | return -EINVAL; | ||
242 | if (current_multiplier > max_multiplier | ||
243 | || max_multiplier <= min_multiplier) | ||
244 | return -EINVAL; | ||
245 | if (current_voltage > 0x1f || max_voltage > 0x1f) | ||
246 | return -EINVAL; | ||
247 | if (max_voltage < min_voltage) | ||
248 | return -EINVAL; | ||
249 | |||
250 | /* Calc FSB speed */ | ||
251 | fsb = cpu_khz / current_multiplier; | ||
252 | /* Calc number of p-states supported */ | ||
253 | if (brand == EPS_BRAND_C7M) | ||
254 | states = max_multiplier - min_multiplier + 1; | ||
255 | else | ||
256 | states = 2; | ||
257 | |||
258 | /* Allocate private data and frequency table for current cpu */ | ||
259 | centaur = kzalloc(sizeof(struct eps_cpu_data) | ||
260 | + (states + 1) * sizeof(struct cpufreq_frequency_table), | ||
261 | GFP_KERNEL); | ||
262 | if (!centaur) | ||
263 | return -ENOMEM; | ||
264 | eps_cpu[0] = centaur; | ||
265 | |||
266 | /* Copy basic values */ | ||
267 | centaur->fsb = fsb; | ||
268 | |||
269 | /* Fill frequency and MSR value table */ | ||
270 | f_table = ¢aur->freq_table[0]; | ||
271 | if (brand != EPS_BRAND_C7M) { | ||
272 | f_table[0].frequency = fsb * min_multiplier; | ||
273 | f_table[0].index = (min_multiplier << 8) | min_voltage; | ||
274 | f_table[1].frequency = fsb * max_multiplier; | ||
275 | f_table[1].index = (max_multiplier << 8) | max_voltage; | ||
276 | f_table[2].frequency = CPUFREQ_TABLE_END; | ||
277 | } else { | ||
278 | k = 0; | ||
279 | step = ((max_voltage - min_voltage) * 256) | ||
280 | / (max_multiplier - min_multiplier); | ||
281 | for (i = min_multiplier; i <= max_multiplier; i++) { | ||
282 | voltage = (k * step) / 256 + min_voltage; | ||
283 | f_table[k].frequency = fsb * i; | ||
284 | f_table[k].index = (i << 8) | voltage; | ||
285 | k++; | ||
286 | } | ||
287 | f_table[k].frequency = CPUFREQ_TABLE_END; | ||
288 | } | ||
289 | |||
290 | policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ | ||
291 | policy->cur = fsb * current_multiplier; | ||
292 | |||
293 | ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); | ||
294 | if (ret) { | ||
295 | kfree(centaur); | ||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | static int eps_cpu_exit(struct cpufreq_policy *policy) | ||
304 | { | ||
305 | unsigned int cpu = policy->cpu; | ||
306 | struct eps_cpu_data *centaur; | ||
307 | u32 lo, hi; | ||
308 | |||
309 | if (eps_cpu[cpu] == NULL) | ||
310 | return -ENODEV; | ||
311 | centaur = eps_cpu[cpu]; | ||
312 | |||
313 | /* Get max frequency */ | ||
314 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
315 | /* Set max frequency */ | ||
316 | eps_set_state(centaur, cpu, hi & 0xffff); | ||
317 | /* Bye */ | ||
318 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
319 | kfree(eps_cpu[cpu]); | ||
320 | eps_cpu[cpu] = NULL; | ||
321 | return 0; | ||
322 | } | ||
323 | |||
324 | static struct freq_attr *eps_attr[] = { | ||
325 | &cpufreq_freq_attr_scaling_available_freqs, | ||
326 | NULL, | ||
327 | }; | ||
328 | |||
329 | static struct cpufreq_driver eps_driver = { | ||
330 | .verify = eps_verify, | ||
331 | .target = eps_target, | ||
332 | .init = eps_cpu_init, | ||
333 | .exit = eps_cpu_exit, | ||
334 | .get = eps_get, | ||
335 | .name = "e_powersaver", | ||
336 | .owner = THIS_MODULE, | ||
337 | .attr = eps_attr, | ||
338 | }; | ||
339 | |||
340 | static int __init eps_init(void) | ||
341 | { | ||
342 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
343 | |||
344 | /* This driver will work only on Centaur C7 processors with | ||
345 | * Enhanced SpeedStep/PowerSaver registers */ | ||
346 | if (c->x86_vendor != X86_VENDOR_CENTAUR | ||
347 | || c->x86 != 6 || c->x86_model < 10) | ||
348 | return -ENODEV; | ||
349 | if (!cpu_has(c, X86_FEATURE_EST)) | ||
350 | return -ENODEV; | ||
351 | |||
352 | if (cpufreq_register_driver(&eps_driver)) | ||
353 | return -EINVAL; | ||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | static void __exit eps_exit(void) | ||
358 | { | ||
359 | cpufreq_unregister_driver(&eps_driver); | ||
360 | } | ||
361 | |||
362 | MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>"); | ||
363 | MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); | ||
364 | MODULE_LICENSE("GPL"); | ||
365 | |||
366 | module_init(eps_init); | ||
367 | module_exit(eps_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c deleted file mode 100644 index c587db472a75..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ /dev/null | |||
@@ -1,309 +0,0 @@ | |||
1 | /* | ||
2 | * elanfreq: cpufreq driver for the AMD ELAN family | ||
3 | * | ||
4 | * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de> | ||
5 | * | ||
6 | * Parts of this code are (c) Sven Geggus <sven@geggus.net> | ||
7 | * | ||
8 | * All Rights Reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/init.h> | ||
22 | |||
23 | #include <linux/delay.h> | ||
24 | #include <linux/cpufreq.h> | ||
25 | |||
26 | #include <asm/msr.h> | ||
27 | #include <linux/timex.h> | ||
28 | #include <linux/io.h> | ||
29 | |||
30 | #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ | ||
31 | #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ | ||
32 | |||
33 | /* Module parameter */ | ||
34 | static int max_freq; | ||
35 | |||
36 | struct s_elan_multiplier { | ||
37 | int clock; /* frequency in kHz */ | ||
38 | int val40h; /* PMU Force Mode register */ | ||
39 | int val80h; /* CPU Clock Speed Register */ | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * It is important that the frequencies | ||
44 | * are listed in ascending order here! | ||
45 | */ | ||
46 | static struct s_elan_multiplier elan_multiplier[] = { | ||
47 | {1000, 0x02, 0x18}, | ||
48 | {2000, 0x02, 0x10}, | ||
49 | {4000, 0x02, 0x08}, | ||
50 | {8000, 0x00, 0x00}, | ||
51 | {16000, 0x00, 0x02}, | ||
52 | {33000, 0x00, 0x04}, | ||
53 | {66000, 0x01, 0x04}, | ||
54 | {99000, 0x01, 0x05} | ||
55 | }; | ||
56 | |||
57 | static struct cpufreq_frequency_table elanfreq_table[] = { | ||
58 | {0, 1000}, | ||
59 | {1, 2000}, | ||
60 | {2, 4000}, | ||
61 | {3, 8000}, | ||
62 | {4, 16000}, | ||
63 | {5, 33000}, | ||
64 | {6, 66000}, | ||
65 | {7, 99000}, | ||
66 | {0, CPUFREQ_TABLE_END}, | ||
67 | }; | ||
68 | |||
69 | |||
70 | /** | ||
71 | * elanfreq_get_cpu_frequency: determine current cpu speed | ||
72 | * | ||
73 | * Finds out at which frequency the CPU of the Elan SOC runs | ||
74 | * at the moment. Frequencies from 1 to 33 MHz are generated | ||
75 | * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" | ||
76 | * and have the rest of the chip running with 33 MHz. | ||
77 | */ | ||
78 | |||
79 | static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) | ||
80 | { | ||
81 | u8 clockspeed_reg; /* Clock Speed Register */ | ||
82 | |||
83 | local_irq_disable(); | ||
84 | outb_p(0x80, REG_CSCIR); | ||
85 | clockspeed_reg = inb_p(REG_CSCDR); | ||
86 | local_irq_enable(); | ||
87 | |||
88 | if ((clockspeed_reg & 0xE0) == 0xE0) | ||
89 | return 0; | ||
90 | |||
91 | /* Are we in CPU clock multiplied mode (66/99 MHz)? */ | ||
92 | if ((clockspeed_reg & 0xE0) == 0xC0) { | ||
93 | if ((clockspeed_reg & 0x01) == 0) | ||
94 | return 66000; | ||
95 | else | ||
96 | return 99000; | ||
97 | } | ||
98 | |||
99 | /* 33 MHz is not 32 MHz... */ | ||
100 | if ((clockspeed_reg & 0xE0) == 0xA0) | ||
101 | return 33000; | ||
102 | |||
103 | return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000; | ||
104 | } | ||
105 | |||
106 | |||
107 | /** | ||
108 | * elanfreq_set_cpu_frequency: Change the CPU core frequency | ||
109 | * @cpu: cpu number | ||
110 | * @freq: frequency in kHz | ||
111 | * | ||
112 | * This function takes a frequency value and changes the CPU frequency | ||
113 | * according to this. Note that the frequency has to be checked by | ||
114 | * elanfreq_validatespeed() for correctness! | ||
115 | * | ||
116 | * There is no return value. | ||
117 | */ | ||
118 | |||
119 | static void elanfreq_set_cpu_state(unsigned int state) | ||
120 | { | ||
121 | struct cpufreq_freqs freqs; | ||
122 | |||
123 | freqs.old = elanfreq_get_cpu_frequency(0); | ||
124 | freqs.new = elan_multiplier[state].clock; | ||
125 | freqs.cpu = 0; /* elanfreq.c is UP only driver */ | ||
126 | |||
127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
128 | |||
129 | printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", | ||
130 | elan_multiplier[state].clock); | ||
131 | |||
132 | |||
133 | /* | ||
134 | * Access to the Elan's internal registers is indexed via | ||
135 | * 0x22: Chip Setup & Control Register Index Register (CSCI) | ||
136 | * 0x23: Chip Setup & Control Register Data Register (CSCD) | ||
137 | * | ||
138 | */ | ||
139 | |||
140 | /* | ||
141 | * 0x40 is the Power Management Unit's Force Mode Register. | ||
142 | * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) | ||
143 | */ | ||
144 | |||
145 | local_irq_disable(); | ||
146 | outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */ | ||
147 | outb_p(0x00, REG_CSCDR); | ||
148 | local_irq_enable(); /* wait till internal pipelines and */ | ||
149 | udelay(1000); /* buffers have cleaned up */ | ||
150 | |||
151 | local_irq_disable(); | ||
152 | |||
153 | /* now, set the CPU clock speed register (0x80) */ | ||
154 | outb_p(0x80, REG_CSCIR); | ||
155 | outb_p(elan_multiplier[state].val80h, REG_CSCDR); | ||
156 | |||
157 | /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ | ||
158 | outb_p(0x40, REG_CSCIR); | ||
159 | outb_p(elan_multiplier[state].val40h, REG_CSCDR); | ||
160 | udelay(10000); | ||
161 | local_irq_enable(); | ||
162 | |||
163 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
164 | }; | ||
165 | |||
166 | |||
167 | /** | ||
168 | * elanfreq_validatespeed: test if frequency range is valid | ||
169 | * @policy: the policy to validate | ||
170 | * | ||
171 | * This function checks if a given frequency range in kHz is valid | ||
172 | * for the hardware supported by the driver. | ||
173 | */ | ||
174 | |||
175 | static int elanfreq_verify(struct cpufreq_policy *policy) | ||
176 | { | ||
177 | return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); | ||
178 | } | ||
179 | |||
180 | static int elanfreq_target(struct cpufreq_policy *policy, | ||
181 | unsigned int target_freq, | ||
182 | unsigned int relation) | ||
183 | { | ||
184 | unsigned int newstate = 0; | ||
185 | |||
186 | if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], | ||
187 | target_freq, relation, &newstate)) | ||
188 | return -EINVAL; | ||
189 | |||
190 | elanfreq_set_cpu_state(newstate); | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | |||
196 | /* | ||
197 | * Module init and exit code | ||
198 | */ | ||
199 | |||
200 | static int elanfreq_cpu_init(struct cpufreq_policy *policy) | ||
201 | { | ||
202 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
203 | unsigned int i; | ||
204 | int result; | ||
205 | |||
206 | /* capability check */ | ||
207 | if ((c->x86_vendor != X86_VENDOR_AMD) || | ||
208 | (c->x86 != 4) || (c->x86_model != 10)) | ||
209 | return -ENODEV; | ||
210 | |||
211 | /* max freq */ | ||
212 | if (!max_freq) | ||
213 | max_freq = elanfreq_get_cpu_frequency(0); | ||
214 | |||
215 | /* table init */ | ||
216 | for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { | ||
217 | if (elanfreq_table[i].frequency > max_freq) | ||
218 | elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
219 | } | ||
220 | |||
221 | /* cpuinfo and default policy values */ | ||
222 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
223 | policy->cur = elanfreq_get_cpu_frequency(0); | ||
224 | |||
225 | result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); | ||
226 | if (result) | ||
227 | return result; | ||
228 | |||
229 | cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | |||
234 | static int elanfreq_cpu_exit(struct cpufreq_policy *policy) | ||
235 | { | ||
236 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | |||
241 | #ifndef MODULE | ||
242 | /** | ||
243 | * elanfreq_setup - elanfreq command line parameter parsing | ||
244 | * | ||
245 | * elanfreq command line parameter. Use: | ||
246 | * elanfreq=66000 | ||
247 | * to set the maximum CPU frequency to 66 MHz. Note that in | ||
248 | * case you do not give this boot parameter, the maximum | ||
249 | * frequency will fall back to _current_ CPU frequency which | ||
250 | * might be lower. If you build this as a module, use the | ||
251 | * max_freq module parameter instead. | ||
252 | */ | ||
253 | static int __init elanfreq_setup(char *str) | ||
254 | { | ||
255 | max_freq = simple_strtoul(str, &str, 0); | ||
256 | printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); | ||
257 | return 1; | ||
258 | } | ||
259 | __setup("elanfreq=", elanfreq_setup); | ||
260 | #endif | ||
261 | |||
262 | |||
263 | static struct freq_attr *elanfreq_attr[] = { | ||
264 | &cpufreq_freq_attr_scaling_available_freqs, | ||
265 | NULL, | ||
266 | }; | ||
267 | |||
268 | |||
269 | static struct cpufreq_driver elanfreq_driver = { | ||
270 | .get = elanfreq_get_cpu_frequency, | ||
271 | .verify = elanfreq_verify, | ||
272 | .target = elanfreq_target, | ||
273 | .init = elanfreq_cpu_init, | ||
274 | .exit = elanfreq_cpu_exit, | ||
275 | .name = "elanfreq", | ||
276 | .owner = THIS_MODULE, | ||
277 | .attr = elanfreq_attr, | ||
278 | }; | ||
279 | |||
280 | |||
281 | static int __init elanfreq_init(void) | ||
282 | { | ||
283 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
284 | |||
285 | /* Test if we have the right hardware */ | ||
286 | if ((c->x86_vendor != X86_VENDOR_AMD) || | ||
287 | (c->x86 != 4) || (c->x86_model != 10)) { | ||
288 | printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); | ||
289 | return -ENODEV; | ||
290 | } | ||
291 | return cpufreq_register_driver(&elanfreq_driver); | ||
292 | } | ||
293 | |||
294 | |||
295 | static void __exit elanfreq_exit(void) | ||
296 | { | ||
297 | cpufreq_unregister_driver(&elanfreq_driver); | ||
298 | } | ||
299 | |||
300 | |||
301 | module_param(max_freq, int, 0444); | ||
302 | |||
303 | MODULE_LICENSE("GPL"); | ||
304 | MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, " | ||
305 | "Sven Geggus <sven@geggus.net>"); | ||
306 | MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); | ||
307 | |||
308 | module_init(elanfreq_init); | ||
309 | module_exit(elanfreq_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c deleted file mode 100644 index 32974cf84232..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ /dev/null | |||
@@ -1,517 +0,0 @@ | |||
1 | /* | ||
2 | * Cyrix MediaGX and NatSemi Geode Suspend Modulation | ||
3 | * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> | ||
4 | * (C) 2002 Hiroshi Miura <miura@da-cha.org> | ||
5 | * All Rights Reserved | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation | ||
10 | * | ||
11 | * The author(s) of this software shall not be held liable for damages | ||
12 | * of any nature resulting due to the use of this software. This | ||
13 | * software is provided AS-IS with no warranties. | ||
14 | * | ||
15 | * Theoretical note: | ||
16 | * | ||
17 | * (see Geode(tm) CS5530 manual (rev.4.1) page.56) | ||
18 | * | ||
19 | * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 | ||
20 | * are based on Suspend Modulation. | ||
21 | * | ||
22 | * Suspend Modulation works by asserting and de-asserting the SUSP# pin | ||
23 | * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# | ||
24 | * the CPU enters an idle state. GX1 stops its core clock when SUSP# is | ||
25 | * asserted then power consumption is reduced. | ||
26 | * | ||
27 | * Suspend Modulation's OFF/ON duration are configurable | ||
28 | * with 'Suspend Modulation OFF Count Register' | ||
29 | * and 'Suspend Modulation ON Count Register'. | ||
30 | * These registers are 8bit counters that represent the number of | ||
31 | * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) | ||
32 | * to the processor. | ||
33 | * | ||
34 | * These counters define a ratio which is the effective frequency | ||
35 | * of operation of the system. | ||
36 | * | ||
37 | * OFF Count | ||
38 | * F_eff = Fgx * ---------------------- | ||
39 | * OFF Count + ON Count | ||
40 | * | ||
41 | * 0 <= On Count, Off Count <= 255 | ||
42 | * | ||
43 | * From these limits, we can get register values | ||
44 | * | ||
45 | * off_duration + on_duration <= MAX_DURATION | ||
46 | * on_duration = off_duration * (stock_freq - freq) / freq | ||
47 | * | ||
48 | * off_duration = (freq * DURATION) / stock_freq | ||
49 | * on_duration = DURATION - off_duration | ||
50 | * | ||
51 | * | ||
52 | *--------------------------------------------------------------------------- | ||
53 | * | ||
54 | * ChangeLog: | ||
55 | * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org> | ||
56 | * - fix on/off register mistake | ||
57 | * - fix cpu_khz calc when it stops cpu modulation. | ||
58 | * | ||
59 | * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org> | ||
60 | * - rewrite for Cyrix MediaGX Cx5510/5520 and | ||
61 | * NatSemi Geode Cs5530(A). | ||
62 | * | ||
63 | * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com> | ||
64 | * - cs5530_mod patch for 2.4.19-rc1. | ||
65 | * | ||
66 | *--------------------------------------------------------------------------- | ||
67 | * | ||
68 | * Todo | ||
69 | * Test on machines with 5510, 5530, 5530A | ||
70 | */ | ||
71 | |||
72 | /************************************************************************ | ||
73 | * Suspend Modulation - Definitions * | ||
74 | ************************************************************************/ | ||
75 | |||
76 | #include <linux/kernel.h> | ||
77 | #include <linux/module.h> | ||
78 | #include <linux/init.h> | ||
79 | #include <linux/smp.h> | ||
80 | #include <linux/cpufreq.h> | ||
81 | #include <linux/pci.h> | ||
82 | #include <linux/errno.h> | ||
83 | #include <linux/slab.h> | ||
84 | |||
85 | #include <asm/processor-cyrix.h> | ||
86 | |||
87 | /* PCI config registers, all at F0 */ | ||
88 | #define PCI_PMER1 0x80 /* power management enable register 1 */ | ||
89 | #define PCI_PMER2 0x81 /* power management enable register 2 */ | ||
90 | #define PCI_PMER3 0x82 /* power management enable register 3 */ | ||
91 | #define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ | ||
92 | #define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ | ||
93 | #define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ | ||
94 | #define PCI_MODON 0x95 /* suspend modulation ON counter register */ | ||
95 | #define PCI_SUSCFG 0x96 /* suspend configuration register */ | ||
96 | |||
97 | /* PMER1 bits */ | ||
98 | #define GPM (1<<0) /* global power management */ | ||
99 | #define GIT (1<<1) /* globally enable PM device idle timers */ | ||
100 | #define GTR (1<<2) /* globally enable IO traps */ | ||
101 | #define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ | ||
102 | #define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ | ||
103 | |||
104 | /* SUSCFG bits */ | ||
105 | #define SUSMOD (1<<0) /* enable/disable suspend modulation */ | ||
106 | /* the below is supported only with cs5530 (after rev.1.2)/cs5530A */ | ||
107 | #define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ | ||
108 | /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ | ||
109 | #define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ | ||
110 | /* the below is supported only with cs5530A */ | ||
111 | #define PWRSVE_ISA (1<<3) /* stop ISA clock */ | ||
112 | #define PWRSVE (1<<4) /* active idle */ | ||
113 | |||
114 | struct gxfreq_params { | ||
115 | u8 on_duration; | ||
116 | u8 off_duration; | ||
117 | u8 pci_suscfg; | ||
118 | u8 pci_pmer1; | ||
119 | u8 pci_pmer2; | ||
120 | struct pci_dev *cs55x0; | ||
121 | }; | ||
122 | |||
123 | static struct gxfreq_params *gx_params; | ||
124 | static int stock_freq; | ||
125 | |||
126 | /* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ | ||
127 | static int pci_busclk; | ||
128 | module_param(pci_busclk, int, 0444); | ||
129 | |||
130 | /* maximum duration for which the cpu may be suspended | ||
131 | * (32us * MAX_DURATION). If no parameter is given, this defaults | ||
132 | * to 255. | ||
133 | * Note that this leads to a maximum of 8 ms(!) where the CPU clock | ||
134 | * is suspended -- processing power is just 0.39% of what it used to be, | ||
135 | * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ | ||
136 | static int max_duration = 255; | ||
137 | module_param(max_duration, int, 0444); | ||
138 | |||
139 | /* For the default policy, we want at least some processing power | ||
140 | * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) | ||
141 | */ | ||
142 | #define POLICY_MIN_DIV 20 | ||
143 | |||
144 | |||
145 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
146 | "gx-suspmod", msg) | ||
147 | |||
148 | /** | ||
149 | * we can detect a core multipiler from dir0_lsb | ||
150 | * from GX1 datasheet p.56, | ||
151 | * MULT[3:0]: | ||
152 | * 0000 = SYSCLK multiplied by 4 (test only) | ||
153 | * 0001 = SYSCLK multiplied by 10 | ||
154 | * 0010 = SYSCLK multiplied by 4 | ||
155 | * 0011 = SYSCLK multiplied by 6 | ||
156 | * 0100 = SYSCLK multiplied by 9 | ||
157 | * 0101 = SYSCLK multiplied by 5 | ||
158 | * 0110 = SYSCLK multiplied by 7 | ||
159 | * 0111 = SYSCLK multiplied by 8 | ||
160 | * of 33.3MHz | ||
161 | **/ | ||
162 | static int gx_freq_mult[16] = { | ||
163 | 4, 10, 4, 6, 9, 5, 7, 8, | ||
164 | 0, 0, 0, 0, 0, 0, 0, 0 | ||
165 | }; | ||
166 | |||
167 | |||
168 | /**************************************************************** | ||
169 | * Low Level chipset interface * | ||
170 | ****************************************************************/ | ||
171 | static struct pci_device_id gx_chipset_tbl[] __initdata = { | ||
172 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), }, | ||
173 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, | ||
174 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), }, | ||
175 | { 0, }, | ||
176 | }; | ||
177 | |||
178 | static void gx_write_byte(int reg, int value) | ||
179 | { | ||
180 | pci_write_config_byte(gx_params->cs55x0, reg, value); | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * gx_detect_chipset: | ||
185 | * | ||
186 | **/ | ||
187 | static __init struct pci_dev *gx_detect_chipset(void) | ||
188 | { | ||
189 | struct pci_dev *gx_pci = NULL; | ||
190 | |||
191 | /* check if CPU is a MediaGX or a Geode. */ | ||
192 | if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) && | ||
193 | (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { | ||
194 | dprintk("error: no MediaGX/Geode processor found!\n"); | ||
195 | return NULL; | ||
196 | } | ||
197 | |||
198 | /* detect which companion chip is used */ | ||
199 | for_each_pci_dev(gx_pci) { | ||
200 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) | ||
201 | return gx_pci; | ||
202 | } | ||
203 | |||
204 | dprintk("error: no supported chipset found!\n"); | ||
205 | return NULL; | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * gx_get_cpuspeed: | ||
210 | * | ||
211 | * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi | ||
212 | * Geode CPU runs. | ||
213 | */ | ||
214 | static unsigned int gx_get_cpuspeed(unsigned int cpu) | ||
215 | { | ||
216 | if ((gx_params->pci_suscfg & SUSMOD) == 0) | ||
217 | return stock_freq; | ||
218 | |||
219 | return (stock_freq * gx_params->off_duration) | ||
220 | / (gx_params->on_duration + gx_params->off_duration); | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * gx_validate_speed: | ||
225 | * determine current cpu speed | ||
226 | * | ||
227 | **/ | ||
228 | |||
229 | static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, | ||
230 | u8 *off_duration) | ||
231 | { | ||
232 | unsigned int i; | ||
233 | u8 tmp_on, tmp_off; | ||
234 | int old_tmp_freq = stock_freq; | ||
235 | int tmp_freq; | ||
236 | |||
237 | *off_duration = 1; | ||
238 | *on_duration = 0; | ||
239 | |||
240 | for (i = max_duration; i > 0; i--) { | ||
241 | tmp_off = ((khz * i) / stock_freq) & 0xff; | ||
242 | tmp_on = i - tmp_off; | ||
243 | tmp_freq = (stock_freq * tmp_off) / i; | ||
244 | /* if this relation is closer to khz, use this. If it's equal, | ||
245 | * prefer it, too - lower latency */ | ||
246 | if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { | ||
247 | *on_duration = tmp_on; | ||
248 | *off_duration = tmp_off; | ||
249 | old_tmp_freq = tmp_freq; | ||
250 | } | ||
251 | } | ||
252 | |||
253 | return old_tmp_freq; | ||
254 | } | ||
255 | |||
256 | |||
257 | /** | ||
258 | * gx_set_cpuspeed: | ||
259 | * set cpu speed in khz. | ||
260 | **/ | ||
261 | |||
262 | static void gx_set_cpuspeed(unsigned int khz) | ||
263 | { | ||
264 | u8 suscfg, pmer1; | ||
265 | unsigned int new_khz; | ||
266 | unsigned long flags; | ||
267 | struct cpufreq_freqs freqs; | ||
268 | |||
269 | freqs.cpu = 0; | ||
270 | freqs.old = gx_get_cpuspeed(0); | ||
271 | |||
272 | new_khz = gx_validate_speed(khz, &gx_params->on_duration, | ||
273 | &gx_params->off_duration); | ||
274 | |||
275 | freqs.new = new_khz; | ||
276 | |||
277 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
278 | local_irq_save(flags); | ||
279 | |||
280 | |||
281 | |||
282 | if (new_khz != stock_freq) { | ||
283 | /* if new khz == 100% of CPU speed, it is special case */ | ||
284 | switch (gx_params->cs55x0->device) { | ||
285 | case PCI_DEVICE_ID_CYRIX_5530_LEGACY: | ||
286 | pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; | ||
287 | /* FIXME: need to test other values -- Zwane,Miura */ | ||
288 | /* typical 2 to 4ms */ | ||
289 | gx_write_byte(PCI_IRQTC, 4); | ||
290 | /* typical 50 to 100ms */ | ||
291 | gx_write_byte(PCI_VIDTC, 100); | ||
292 | gx_write_byte(PCI_PMER1, pmer1); | ||
293 | |||
294 | if (gx_params->cs55x0->revision < 0x10) { | ||
295 | /* CS5530(rev 1.2, 1.3) */ | ||
296 | suscfg = gx_params->pci_suscfg|SUSMOD; | ||
297 | } else { | ||
298 | /* CS5530A,B.. */ | ||
299 | suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE; | ||
300 | } | ||
301 | break; | ||
302 | case PCI_DEVICE_ID_CYRIX_5520: | ||
303 | case PCI_DEVICE_ID_CYRIX_5510: | ||
304 | suscfg = gx_params->pci_suscfg | SUSMOD; | ||
305 | break; | ||
306 | default: | ||
307 | local_irq_restore(flags); | ||
308 | dprintk("fatal: try to set unknown chipset.\n"); | ||
309 | return; | ||
310 | } | ||
311 | } else { | ||
312 | suscfg = gx_params->pci_suscfg & ~(SUSMOD); | ||
313 | gx_params->off_duration = 0; | ||
314 | gx_params->on_duration = 0; | ||
315 | dprintk("suspend modulation disabled: cpu runs 100%% speed.\n"); | ||
316 | } | ||
317 | |||
318 | gx_write_byte(PCI_MODOFF, gx_params->off_duration); | ||
319 | gx_write_byte(PCI_MODON, gx_params->on_duration); | ||
320 | |||
321 | gx_write_byte(PCI_SUSCFG, suscfg); | ||
322 | pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); | ||
323 | |||
324 | local_irq_restore(flags); | ||
325 | |||
326 | gx_params->pci_suscfg = suscfg; | ||
327 | |||
328 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
329 | |||
330 | dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", | ||
331 | gx_params->on_duration * 32, gx_params->off_duration * 32); | ||
332 | dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); | ||
333 | } | ||
334 | |||
335 | /**************************************************************** | ||
336 | * High level functions * | ||
337 | ****************************************************************/ | ||
338 | |||
339 | /* | ||
340 | * cpufreq_gx_verify: test if frequency range is valid | ||
341 | * | ||
342 | * This function checks if a given frequency range in kHz is valid | ||
343 | * for the hardware supported by the driver. | ||
344 | */ | ||
345 | |||
346 | static int cpufreq_gx_verify(struct cpufreq_policy *policy) | ||
347 | { | ||
348 | unsigned int tmp_freq = 0; | ||
349 | u8 tmp1, tmp2; | ||
350 | |||
351 | if (!stock_freq || !policy) | ||
352 | return -EINVAL; | ||
353 | |||
354 | policy->cpu = 0; | ||
355 | cpufreq_verify_within_limits(policy, (stock_freq / max_duration), | ||
356 | stock_freq); | ||
357 | |||
358 | /* it needs to be assured that at least one supported frequency is | ||
359 | * within policy->min and policy->max. If it is not, policy->max | ||
360 | * needs to be increased until one freuqency is supported. | ||
361 | * policy->min may not be decreased, though. This way we guarantee a | ||
362 | * specific processing capacity. | ||
363 | */ | ||
364 | tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); | ||
365 | if (tmp_freq < policy->min) | ||
366 | tmp_freq += stock_freq / max_duration; | ||
367 | policy->min = tmp_freq; | ||
368 | if (policy->min > policy->max) | ||
369 | policy->max = tmp_freq; | ||
370 | tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); | ||
371 | if (tmp_freq > policy->max) | ||
372 | tmp_freq -= stock_freq / max_duration; | ||
373 | policy->max = tmp_freq; | ||
374 | if (policy->max < policy->min) | ||
375 | policy->max = policy->min; | ||
376 | cpufreq_verify_within_limits(policy, (stock_freq / max_duration), | ||
377 | stock_freq); | ||
378 | |||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * cpufreq_gx_target: | ||
384 | * | ||
385 | */ | ||
386 | static int cpufreq_gx_target(struct cpufreq_policy *policy, | ||
387 | unsigned int target_freq, | ||
388 | unsigned int relation) | ||
389 | { | ||
390 | u8 tmp1, tmp2; | ||
391 | unsigned int tmp_freq; | ||
392 | |||
393 | if (!stock_freq || !policy) | ||
394 | return -EINVAL; | ||
395 | |||
396 | policy->cpu = 0; | ||
397 | |||
398 | tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2); | ||
399 | while (tmp_freq < policy->min) { | ||
400 | tmp_freq += stock_freq / max_duration; | ||
401 | tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); | ||
402 | } | ||
403 | while (tmp_freq > policy->max) { | ||
404 | tmp_freq -= stock_freq / max_duration; | ||
405 | tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); | ||
406 | } | ||
407 | |||
408 | gx_set_cpuspeed(tmp_freq); | ||
409 | |||
410 | return 0; | ||
411 | } | ||
412 | |||
413 | static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) | ||
414 | { | ||
415 | unsigned int maxfreq, curfreq; | ||
416 | |||
417 | if (!policy || policy->cpu != 0) | ||
418 | return -ENODEV; | ||
419 | |||
420 | /* determine maximum frequency */ | ||
421 | if (pci_busclk) | ||
422 | maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; | ||
423 | else if (cpu_khz) | ||
424 | maxfreq = cpu_khz; | ||
425 | else | ||
426 | maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; | ||
427 | |||
428 | stock_freq = maxfreq; | ||
429 | curfreq = gx_get_cpuspeed(0); | ||
430 | |||
431 | dprintk("cpu max frequency is %d.\n", maxfreq); | ||
432 | dprintk("cpu current frequency is %dkHz.\n", curfreq); | ||
433 | |||
434 | /* setup basic struct for cpufreq API */ | ||
435 | policy->cpu = 0; | ||
436 | |||
437 | if (max_duration < POLICY_MIN_DIV) | ||
438 | policy->min = maxfreq / max_duration; | ||
439 | else | ||
440 | policy->min = maxfreq / POLICY_MIN_DIV; | ||
441 | policy->max = maxfreq; | ||
442 | policy->cur = curfreq; | ||
443 | policy->cpuinfo.min_freq = maxfreq / max_duration; | ||
444 | policy->cpuinfo.max_freq = maxfreq; | ||
445 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
446 | |||
447 | return 0; | ||
448 | } | ||
449 | |||
450 | /* | ||
451 | * cpufreq_gx_init: | ||
452 | * MediaGX/Geode GX initialize cpufreq driver | ||
453 | */ | ||
454 | static struct cpufreq_driver gx_suspmod_driver = { | ||
455 | .get = gx_get_cpuspeed, | ||
456 | .verify = cpufreq_gx_verify, | ||
457 | .target = cpufreq_gx_target, | ||
458 | .init = cpufreq_gx_cpu_init, | ||
459 | .name = "gx-suspmod", | ||
460 | .owner = THIS_MODULE, | ||
461 | }; | ||
462 | |||
463 | static int __init cpufreq_gx_init(void) | ||
464 | { | ||
465 | int ret; | ||
466 | struct gxfreq_params *params; | ||
467 | struct pci_dev *gx_pci; | ||
468 | |||
469 | /* Test if we have the right hardware */ | ||
470 | gx_pci = gx_detect_chipset(); | ||
471 | if (gx_pci == NULL) | ||
472 | return -ENODEV; | ||
473 | |||
474 | /* check whether module parameters are sane */ | ||
475 | if (max_duration > 0xff) | ||
476 | max_duration = 0xff; | ||
477 | |||
478 | dprintk("geode suspend modulation available.\n"); | ||
479 | |||
480 | params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); | ||
481 | if (params == NULL) | ||
482 | return -ENOMEM; | ||
483 | |||
484 | params->cs55x0 = gx_pci; | ||
485 | gx_params = params; | ||
486 | |||
487 | /* keep cs55x0 configurations */ | ||
488 | pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg)); | ||
489 | pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); | ||
490 | pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); | ||
491 | pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); | ||
492 | pci_read_config_byte(params->cs55x0, PCI_MODOFF, | ||
493 | &(params->off_duration)); | ||
494 | |||
495 | ret = cpufreq_register_driver(&gx_suspmod_driver); | ||
496 | if (ret) { | ||
497 | kfree(params); | ||
498 | return ret; /* register error! */ | ||
499 | } | ||
500 | |||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | static void __exit cpufreq_gx_exit(void) | ||
505 | { | ||
506 | cpufreq_unregister_driver(&gx_suspmod_driver); | ||
507 | pci_dev_put(gx_params->cs55x0); | ||
508 | kfree(gx_params); | ||
509 | } | ||
510 | |||
511 | MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>"); | ||
512 | MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); | ||
513 | MODULE_LICENSE("GPL"); | ||
514 | |||
515 | module_init(cpufreq_gx_init); | ||
516 | module_exit(cpufreq_gx_exit); | ||
517 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c deleted file mode 100644 index cf48cdd6907d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ /dev/null | |||
@@ -1,1029 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2001-2004 Dave Jones. <davej@redhat.com> | ||
3 | * (C) 2002 Padraig Brady. <padraig@antefacto.com> | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * Based upon datasheets & sample CPUs kindly provided by VIA. | ||
7 | * | ||
8 | * VIA have currently 3 different versions of Longhaul. | ||
9 | * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. | ||
10 | * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. | ||
11 | * Version 2 of longhaul is backward compatible with v1, but adds | ||
12 | * LONGHAUL MSR for purpose of both frequency and voltage scaling. | ||
13 | * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). | ||
14 | * Version 3 of longhaul got renamed to Powersaver and redesigned | ||
15 | * to use only the POWERSAVER MSR at 0x110a. | ||
16 | * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. | ||
17 | * It's pretty much the same feature wise to longhaul v2, though | ||
18 | * there is provision for scaling FSB too, but this doesn't work | ||
19 | * too well in practice so we don't even try to use this. | ||
20 | * | ||
21 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
22 | */ | ||
23 | |||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/moduleparam.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/cpufreq.h> | ||
29 | #include <linux/pci.h> | ||
30 | #include <linux/slab.h> | ||
31 | #include <linux/string.h> | ||
32 | #include <linux/delay.h> | ||
33 | #include <linux/timex.h> | ||
34 | #include <linux/io.h> | ||
35 | #include <linux/acpi.h> | ||
36 | |||
37 | #include <asm/msr.h> | ||
38 | #include <acpi/processor.h> | ||
39 | |||
40 | #include "longhaul.h" | ||
41 | |||
42 | #define PFX "longhaul: " | ||
43 | |||
44 | #define TYPE_LONGHAUL_V1 1 | ||
45 | #define TYPE_LONGHAUL_V2 2 | ||
46 | #define TYPE_POWERSAVER 3 | ||
47 | |||
48 | #define CPU_SAMUEL 1 | ||
49 | #define CPU_SAMUEL2 2 | ||
50 | #define CPU_EZRA 3 | ||
51 | #define CPU_EZRA_T 4 | ||
52 | #define CPU_NEHEMIAH 5 | ||
53 | #define CPU_NEHEMIAH_C 6 | ||
54 | |||
55 | /* Flags */ | ||
56 | #define USE_ACPI_C3 (1 << 1) | ||
57 | #define USE_NORTHBRIDGE (1 << 2) | ||
58 | |||
59 | static int cpu_model; | ||
60 | static unsigned int numscales = 16; | ||
61 | static unsigned int fsb; | ||
62 | |||
63 | static const struct mV_pos *vrm_mV_table; | ||
64 | static const unsigned char *mV_vrm_table; | ||
65 | |||
66 | static unsigned int highest_speed, lowest_speed; /* kHz */ | ||
67 | static unsigned int minmult, maxmult; | ||
68 | static int can_scale_voltage; | ||
69 | static struct acpi_processor *pr; | ||
70 | static struct acpi_processor_cx *cx; | ||
71 | static u32 acpi_regs_addr; | ||
72 | static u8 longhaul_flags; | ||
73 | static unsigned int longhaul_index; | ||
74 | |||
75 | /* Module parameters */ | ||
76 | static int scale_voltage; | ||
77 | static int disable_acpi_c3; | ||
78 | static int revid_errata; | ||
79 | |||
80 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
81 | "longhaul", msg) | ||
82 | |||
83 | |||
84 | /* Clock ratios multiplied by 10 */ | ||
85 | static int mults[32]; | ||
86 | static int eblcr[32]; | ||
87 | static int longhaul_version; | ||
88 | static struct cpufreq_frequency_table *longhaul_table; | ||
89 | |||
90 | #ifdef CONFIG_CPU_FREQ_DEBUG | ||
91 | static char speedbuffer[8]; | ||
92 | |||
93 | static char *print_speed(int speed) | ||
94 | { | ||
95 | if (speed < 1000) { | ||
96 | snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed); | ||
97 | return speedbuffer; | ||
98 | } | ||
99 | |||
100 | if (speed%1000 == 0) | ||
101 | snprintf(speedbuffer, sizeof(speedbuffer), | ||
102 | "%dGHz", speed/1000); | ||
103 | else | ||
104 | snprintf(speedbuffer, sizeof(speedbuffer), | ||
105 | "%d.%dGHz", speed/1000, (speed%1000)/100); | ||
106 | |||
107 | return speedbuffer; | ||
108 | } | ||
109 | #endif | ||
110 | |||
111 | |||
112 | static unsigned int calc_speed(int mult) | ||
113 | { | ||
114 | int khz; | ||
115 | khz = (mult/10)*fsb; | ||
116 | if (mult%10) | ||
117 | khz += fsb/2; | ||
118 | khz *= 1000; | ||
119 | return khz; | ||
120 | } | ||
121 | |||
122 | |||
123 | static int longhaul_get_cpu_mult(void) | ||
124 | { | ||
125 | unsigned long invalue = 0, lo, hi; | ||
126 | |||
127 | rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi); | ||
128 | invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22; | ||
129 | if (longhaul_version == TYPE_LONGHAUL_V2 || | ||
130 | longhaul_version == TYPE_POWERSAVER) { | ||
131 | if (lo & (1<<27)) | ||
132 | invalue += 16; | ||
133 | } | ||
134 | return eblcr[invalue]; | ||
135 | } | ||
136 | |||
137 | /* For processor with BCR2 MSR */ | ||
138 | |||
139 | static void do_longhaul1(unsigned int mults_index) | ||
140 | { | ||
141 | union msr_bcr2 bcr2; | ||
142 | |||
143 | rdmsrl(MSR_VIA_BCR2, bcr2.val); | ||
144 | /* Enable software clock multiplier */ | ||
145 | bcr2.bits.ESOFTBF = 1; | ||
146 | bcr2.bits.CLOCKMUL = mults_index & 0xff; | ||
147 | |||
148 | /* Sync to timer tick */ | ||
149 | safe_halt(); | ||
150 | /* Change frequency on next halt or sleep */ | ||
151 | wrmsrl(MSR_VIA_BCR2, bcr2.val); | ||
152 | /* Invoke transition */ | ||
153 | ACPI_FLUSH_CPU_CACHE(); | ||
154 | halt(); | ||
155 | |||
156 | /* Disable software clock multiplier */ | ||
157 | local_irq_disable(); | ||
158 | rdmsrl(MSR_VIA_BCR2, bcr2.val); | ||
159 | bcr2.bits.ESOFTBF = 0; | ||
160 | wrmsrl(MSR_VIA_BCR2, bcr2.val); | ||
161 | } | ||
162 | |||
163 | /* For processor with Longhaul MSR */ | ||
164 | |||
165 | static void do_powersaver(int cx_address, unsigned int mults_index, | ||
166 | unsigned int dir) | ||
167 | { | ||
168 | union msr_longhaul longhaul; | ||
169 | u32 t; | ||
170 | |||
171 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
172 | /* Setup new frequency */ | ||
173 | if (!revid_errata) | ||
174 | longhaul.bits.RevisionKey = longhaul.bits.RevisionID; | ||
175 | else | ||
176 | longhaul.bits.RevisionKey = 0; | ||
177 | longhaul.bits.SoftBusRatio = mults_index & 0xf; | ||
178 | longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4; | ||
179 | /* Setup new voltage */ | ||
180 | if (can_scale_voltage) | ||
181 | longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f; | ||
182 | /* Sync to timer tick */ | ||
183 | safe_halt(); | ||
184 | /* Raise voltage if necessary */ | ||
185 | if (can_scale_voltage && dir) { | ||
186 | longhaul.bits.EnableSoftVID = 1; | ||
187 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
188 | /* Change voltage */ | ||
189 | if (!cx_address) { | ||
190 | ACPI_FLUSH_CPU_CACHE(); | ||
191 | halt(); | ||
192 | } else { | ||
193 | ACPI_FLUSH_CPU_CACHE(); | ||
194 | /* Invoke C3 */ | ||
195 | inb(cx_address); | ||
196 | /* Dummy op - must do something useless after P_LVL3 | ||
197 | * read */ | ||
198 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
199 | } | ||
200 | longhaul.bits.EnableSoftVID = 0; | ||
201 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
202 | } | ||
203 | |||
204 | /* Change frequency on next halt or sleep */ | ||
205 | longhaul.bits.EnableSoftBusRatio = 1; | ||
206 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
207 | if (!cx_address) { | ||
208 | ACPI_FLUSH_CPU_CACHE(); | ||
209 | halt(); | ||
210 | } else { | ||
211 | ACPI_FLUSH_CPU_CACHE(); | ||
212 | /* Invoke C3 */ | ||
213 | inb(cx_address); | ||
214 | /* Dummy op - must do something useless after P_LVL3 read */ | ||
215 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
216 | } | ||
217 | /* Disable bus ratio bit */ | ||
218 | longhaul.bits.EnableSoftBusRatio = 0; | ||
219 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
220 | |||
221 | /* Reduce voltage if necessary */ | ||
222 | if (can_scale_voltage && !dir) { | ||
223 | longhaul.bits.EnableSoftVID = 1; | ||
224 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
225 | /* Change voltage */ | ||
226 | if (!cx_address) { | ||
227 | ACPI_FLUSH_CPU_CACHE(); | ||
228 | halt(); | ||
229 | } else { | ||
230 | ACPI_FLUSH_CPU_CACHE(); | ||
231 | /* Invoke C3 */ | ||
232 | inb(cx_address); | ||
233 | /* Dummy op - must do something useless after P_LVL3 | ||
234 | * read */ | ||
235 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
236 | } | ||
237 | longhaul.bits.EnableSoftVID = 0; | ||
238 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * longhaul_set_cpu_frequency() | ||
244 | * @mults_index : bitpattern of the new multiplier. | ||
245 | * | ||
246 | * Sets a new clock ratio. | ||
247 | */ | ||
248 | |||
249 | static void longhaul_setstate(unsigned int table_index) | ||
250 | { | ||
251 | unsigned int mults_index; | ||
252 | int speed, mult; | ||
253 | struct cpufreq_freqs freqs; | ||
254 | unsigned long flags; | ||
255 | unsigned int pic1_mask, pic2_mask; | ||
256 | u16 bm_status = 0; | ||
257 | u32 bm_timeout = 1000; | ||
258 | unsigned int dir = 0; | ||
259 | |||
260 | mults_index = longhaul_table[table_index].index; | ||
261 | /* Safety precautions */ | ||
262 | mult = mults[mults_index & 0x1f]; | ||
263 | if (mult == -1) | ||
264 | return; | ||
265 | speed = calc_speed(mult); | ||
266 | if ((speed > highest_speed) || (speed < lowest_speed)) | ||
267 | return; | ||
268 | /* Voltage transition before frequency transition? */ | ||
269 | if (can_scale_voltage && longhaul_index < table_index) | ||
270 | dir = 1; | ||
271 | |||
272 | freqs.old = calc_speed(longhaul_get_cpu_mult()); | ||
273 | freqs.new = speed; | ||
274 | freqs.cpu = 0; /* longhaul.c is UP only driver */ | ||
275 | |||
276 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
277 | |||
278 | dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", | ||
279 | fsb, mult/10, mult%10, print_speed(speed/1000)); | ||
280 | retry_loop: | ||
281 | preempt_disable(); | ||
282 | local_irq_save(flags); | ||
283 | |||
284 | pic2_mask = inb(0xA1); | ||
285 | pic1_mask = inb(0x21); /* works on C3. save mask. */ | ||
286 | outb(0xFF, 0xA1); /* Overkill */ | ||
287 | outb(0xFE, 0x21); /* TMR0 only */ | ||
288 | |||
289 | /* Wait while PCI bus is busy. */ | ||
290 | if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE | ||
291 | || ((pr != NULL) && pr->flags.bm_control))) { | ||
292 | bm_status = inw(acpi_regs_addr); | ||
293 | bm_status &= 1 << 4; | ||
294 | while (bm_status && bm_timeout) { | ||
295 | outw(1 << 4, acpi_regs_addr); | ||
296 | bm_timeout--; | ||
297 | bm_status = inw(acpi_regs_addr); | ||
298 | bm_status &= 1 << 4; | ||
299 | } | ||
300 | } | ||
301 | |||
302 | if (longhaul_flags & USE_NORTHBRIDGE) { | ||
303 | /* Disable AGP and PCI arbiters */ | ||
304 | outb(3, 0x22); | ||
305 | } else if ((pr != NULL) && pr->flags.bm_control) { | ||
306 | /* Disable bus master arbitration */ | ||
307 | acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1); | ||
308 | } | ||
309 | switch (longhaul_version) { | ||
310 | |||
311 | /* | ||
312 | * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) | ||
313 | * Software controlled multipliers only. | ||
314 | */ | ||
315 | case TYPE_LONGHAUL_V1: | ||
316 | do_longhaul1(mults_index); | ||
317 | break; | ||
318 | |||
319 | /* | ||
320 | * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] | ||
321 | * | ||
322 | * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) | ||
323 | * Nehemiah can do FSB scaling too, but this has never been proven | ||
324 | * to work in practice. | ||
325 | */ | ||
326 | case TYPE_LONGHAUL_V2: | ||
327 | case TYPE_POWERSAVER: | ||
328 | if (longhaul_flags & USE_ACPI_C3) { | ||
329 | /* Don't allow wakeup */ | ||
330 | acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0); | ||
331 | do_powersaver(cx->address, mults_index, dir); | ||
332 | } else { | ||
333 | do_powersaver(0, mults_index, dir); | ||
334 | } | ||
335 | break; | ||
336 | } | ||
337 | |||
338 | if (longhaul_flags & USE_NORTHBRIDGE) { | ||
339 | /* Enable arbiters */ | ||
340 | outb(0, 0x22); | ||
341 | } else if ((pr != NULL) && pr->flags.bm_control) { | ||
342 | /* Enable bus master arbitration */ | ||
343 | acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0); | ||
344 | } | ||
345 | outb(pic2_mask, 0xA1); /* restore mask */ | ||
346 | outb(pic1_mask, 0x21); | ||
347 | |||
348 | local_irq_restore(flags); | ||
349 | preempt_enable(); | ||
350 | |||
351 | freqs.new = calc_speed(longhaul_get_cpu_mult()); | ||
352 | /* Check if requested frequency is set. */ | ||
353 | if (unlikely(freqs.new != speed)) { | ||
354 | printk(KERN_INFO PFX "Failed to set requested frequency!\n"); | ||
355 | /* Revision ID = 1 but processor is expecting revision key | ||
356 | * equal to 0. Jumpers at the bottom of processor will change | ||
357 | * multiplier and FSB, but will not change bits in Longhaul | ||
358 | * MSR nor enable voltage scaling. */ | ||
359 | if (!revid_errata) { | ||
360 | printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" " | ||
361 | "option.\n"); | ||
362 | revid_errata = 1; | ||
363 | msleep(200); | ||
364 | goto retry_loop; | ||
365 | } | ||
366 | /* Why ACPI C3 sometimes doesn't work is a mystery for me. | ||
367 | * But it does happen. Processor is entering ACPI C3 state, | ||
368 | * but it doesn't change frequency. I tried poking various | ||
369 | * bits in northbridge registers, but without success. */ | ||
370 | if (longhaul_flags & USE_ACPI_C3) { | ||
371 | printk(KERN_INFO PFX "Disabling ACPI C3 support.\n"); | ||
372 | longhaul_flags &= ~USE_ACPI_C3; | ||
373 | if (revid_errata) { | ||
374 | printk(KERN_INFO PFX "Disabling \"Ignore " | ||
375 | "Revision ID\" option.\n"); | ||
376 | revid_errata = 0; | ||
377 | } | ||
378 | msleep(200); | ||
379 | goto retry_loop; | ||
380 | } | ||
381 | /* This shouldn't happen. Longhaul ver. 2 was reported not | ||
382 | * working on processors without voltage scaling, but with | ||
383 | * RevID = 1. RevID errata will make things right. Just | ||
384 | * to be 100% sure. */ | ||
385 | if (longhaul_version == TYPE_LONGHAUL_V2) { | ||
386 | printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n"); | ||
387 | longhaul_version = TYPE_LONGHAUL_V1; | ||
388 | msleep(200); | ||
389 | goto retry_loop; | ||
390 | } | ||
391 | } | ||
392 | /* Report true CPU frequency */ | ||
393 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
394 | |||
395 | if (!bm_timeout) | ||
396 | printk(KERN_INFO PFX "Warning: Timeout while waiting for " | ||
397 | "idle PCI bus.\n"); | ||
398 | } | ||
399 | |||
400 | /* | ||
401 | * Centaur decided to make life a little more tricky. | ||
402 | * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. | ||
403 | * Samuel2 and above have to try and guess what the FSB is. | ||
404 | * We do this by assuming we booted at maximum multiplier, and interpolate | ||
405 | * between that value multiplied by possible FSBs and cpu_mhz which | ||
406 | * was calculated at boot time. Really ugly, but no other way to do this. | ||
407 | */ | ||
408 | |||
409 | #define ROUNDING 0xf | ||
410 | |||
411 | static int guess_fsb(int mult) | ||
412 | { | ||
413 | int speed = cpu_khz / 1000; | ||
414 | int i; | ||
415 | int speeds[] = { 666, 1000, 1333, 2000 }; | ||
416 | int f_max, f_min; | ||
417 | |||
418 | for (i = 0; i < 4; i++) { | ||
419 | f_max = ((speeds[i] * mult) + 50) / 100; | ||
420 | f_max += (ROUNDING / 2); | ||
421 | f_min = f_max - ROUNDING; | ||
422 | if ((speed <= f_max) && (speed >= f_min)) | ||
423 | return speeds[i] / 10; | ||
424 | } | ||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | |||
429 | static int __cpuinit longhaul_get_ranges(void) | ||
430 | { | ||
431 | unsigned int i, j, k = 0; | ||
432 | unsigned int ratio; | ||
433 | int mult; | ||
434 | |||
435 | /* Get current frequency */ | ||
436 | mult = longhaul_get_cpu_mult(); | ||
437 | if (mult == -1) { | ||
438 | printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); | ||
439 | return -EINVAL; | ||
440 | } | ||
441 | fsb = guess_fsb(mult); | ||
442 | if (fsb == 0) { | ||
443 | printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); | ||
444 | return -EINVAL; | ||
445 | } | ||
446 | /* Get max multiplier - as we always did. | ||
447 | * Longhaul MSR is useful only when voltage scaling is enabled. | ||
448 | * C3 is booting at max anyway. */ | ||
449 | maxmult = mult; | ||
450 | /* Get min multiplier */ | ||
451 | switch (cpu_model) { | ||
452 | case CPU_NEHEMIAH: | ||
453 | minmult = 50; | ||
454 | break; | ||
455 | case CPU_NEHEMIAH_C: | ||
456 | minmult = 40; | ||
457 | break; | ||
458 | default: | ||
459 | minmult = 30; | ||
460 | break; | ||
461 | } | ||
462 | |||
463 | dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n", | ||
464 | minmult/10, minmult%10, maxmult/10, maxmult%10); | ||
465 | |||
466 | highest_speed = calc_speed(maxmult); | ||
467 | lowest_speed = calc_speed(minmult); | ||
468 | dprintk("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, | ||
469 | print_speed(lowest_speed/1000), | ||
470 | print_speed(highest_speed/1000)); | ||
471 | |||
472 | if (lowest_speed == highest_speed) { | ||
473 | printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n"); | ||
474 | return -EINVAL; | ||
475 | } | ||
476 | if (lowest_speed > highest_speed) { | ||
477 | printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", | ||
478 | lowest_speed, highest_speed); | ||
479 | return -EINVAL; | ||
480 | } | ||
481 | |||
482 | longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table), | ||
483 | GFP_KERNEL); | ||
484 | if (!longhaul_table) | ||
485 | return -ENOMEM; | ||
486 | |||
487 | for (j = 0; j < numscales; j++) { | ||
488 | ratio = mults[j]; | ||
489 | if (ratio == -1) | ||
490 | continue; | ||
491 | if (ratio > maxmult || ratio < minmult) | ||
492 | continue; | ||
493 | longhaul_table[k].frequency = calc_speed(ratio); | ||
494 | longhaul_table[k].index = j; | ||
495 | k++; | ||
496 | } | ||
497 | if (k <= 1) { | ||
498 | kfree(longhaul_table); | ||
499 | return -ENODEV; | ||
500 | } | ||
501 | /* Sort */ | ||
502 | for (j = 0; j < k - 1; j++) { | ||
503 | unsigned int min_f, min_i; | ||
504 | min_f = longhaul_table[j].frequency; | ||
505 | min_i = j; | ||
506 | for (i = j + 1; i < k; i++) { | ||
507 | if (longhaul_table[i].frequency < min_f) { | ||
508 | min_f = longhaul_table[i].frequency; | ||
509 | min_i = i; | ||
510 | } | ||
511 | } | ||
512 | if (min_i != j) { | ||
513 | swap(longhaul_table[j].frequency, | ||
514 | longhaul_table[min_i].frequency); | ||
515 | swap(longhaul_table[j].index, | ||
516 | longhaul_table[min_i].index); | ||
517 | } | ||
518 | } | ||
519 | |||
520 | longhaul_table[k].frequency = CPUFREQ_TABLE_END; | ||
521 | |||
522 | /* Find index we are running on */ | ||
523 | for (j = 0; j < k; j++) { | ||
524 | if (mults[longhaul_table[j].index & 0x1f] == mult) { | ||
525 | longhaul_index = j; | ||
526 | break; | ||
527 | } | ||
528 | } | ||
529 | return 0; | ||
530 | } | ||
531 | |||
532 | |||
533 | static void __cpuinit longhaul_setup_voltagescaling(void) | ||
534 | { | ||
535 | union msr_longhaul longhaul; | ||
536 | struct mV_pos minvid, maxvid, vid; | ||
537 | unsigned int j, speed, pos, kHz_step, numvscales; | ||
538 | int min_vid_speed; | ||
539 | |||
540 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
541 | if (!(longhaul.bits.RevisionID & 1)) { | ||
542 | printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); | ||
543 | return; | ||
544 | } | ||
545 | |||
546 | if (!longhaul.bits.VRMRev) { | ||
547 | printk(KERN_INFO PFX "VRM 8.5\n"); | ||
548 | vrm_mV_table = &vrm85_mV[0]; | ||
549 | mV_vrm_table = &mV_vrm85[0]; | ||
550 | } else { | ||
551 | printk(KERN_INFO PFX "Mobile VRM\n"); | ||
552 | if (cpu_model < CPU_NEHEMIAH) | ||
553 | return; | ||
554 | vrm_mV_table = &mobilevrm_mV[0]; | ||
555 | mV_vrm_table = &mV_mobilevrm[0]; | ||
556 | } | ||
557 | |||
558 | minvid = vrm_mV_table[longhaul.bits.MinimumVID]; | ||
559 | maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; | ||
560 | |||
561 | if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { | ||
562 | printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " | ||
563 | "Voltage scaling disabled.\n", | ||
564 | minvid.mV/1000, minvid.mV%1000, | ||
565 | maxvid.mV/1000, maxvid.mV%1000); | ||
566 | return; | ||
567 | } | ||
568 | |||
569 | if (minvid.mV == maxvid.mV) { | ||
570 | printk(KERN_INFO PFX "Claims to support voltage scaling but " | ||
571 | "min & max are both %d.%03d. " | ||
572 | "Voltage scaling disabled\n", | ||
573 | maxvid.mV/1000, maxvid.mV%1000); | ||
574 | return; | ||
575 | } | ||
576 | |||
577 | /* How many voltage steps*/ | ||
578 | numvscales = maxvid.pos - minvid.pos + 1; | ||
579 | printk(KERN_INFO PFX | ||
580 | "Max VID=%d.%03d " | ||
581 | "Min VID=%d.%03d, " | ||
582 | "%d possible voltage scales\n", | ||
583 | maxvid.mV/1000, maxvid.mV%1000, | ||
584 | minvid.mV/1000, minvid.mV%1000, | ||
585 | numvscales); | ||
586 | |||
587 | /* Calculate max frequency at min voltage */ | ||
588 | j = longhaul.bits.MinMHzBR; | ||
589 | if (longhaul.bits.MinMHzBR4) | ||
590 | j += 16; | ||
591 | min_vid_speed = eblcr[j]; | ||
592 | if (min_vid_speed == -1) | ||
593 | return; | ||
594 | switch (longhaul.bits.MinMHzFSB) { | ||
595 | case 0: | ||
596 | min_vid_speed *= 13333; | ||
597 | break; | ||
598 | case 1: | ||
599 | min_vid_speed *= 10000; | ||
600 | break; | ||
601 | case 3: | ||
602 | min_vid_speed *= 6666; | ||
603 | break; | ||
604 | default: | ||
605 | return; | ||
606 | break; | ||
607 | } | ||
608 | if (min_vid_speed >= highest_speed) | ||
609 | return; | ||
610 | /* Calculate kHz for one voltage step */ | ||
611 | kHz_step = (highest_speed - min_vid_speed) / numvscales; | ||
612 | |||
613 | j = 0; | ||
614 | while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { | ||
615 | speed = longhaul_table[j].frequency; | ||
616 | if (speed > min_vid_speed) | ||
617 | pos = (speed - min_vid_speed) / kHz_step + minvid.pos; | ||
618 | else | ||
619 | pos = minvid.pos; | ||
620 | longhaul_table[j].index |= mV_vrm_table[pos] << 8; | ||
621 | vid = vrm_mV_table[mV_vrm_table[pos]]; | ||
622 | printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", | ||
623 | speed, j, vid.mV); | ||
624 | j++; | ||
625 | } | ||
626 | |||
627 | can_scale_voltage = 1; | ||
628 | printk(KERN_INFO PFX "Voltage scaling enabled.\n"); | ||
629 | } | ||
630 | |||
631 | |||
632 | static int longhaul_verify(struct cpufreq_policy *policy) | ||
633 | { | ||
634 | return cpufreq_frequency_table_verify(policy, longhaul_table); | ||
635 | } | ||
636 | |||
637 | |||
638 | static int longhaul_target(struct cpufreq_policy *policy, | ||
639 | unsigned int target_freq, unsigned int relation) | ||
640 | { | ||
641 | unsigned int table_index = 0; | ||
642 | unsigned int i; | ||
643 | unsigned int dir = 0; | ||
644 | u8 vid, current_vid; | ||
645 | |||
646 | if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, | ||
647 | relation, &table_index)) | ||
648 | return -EINVAL; | ||
649 | |||
650 | /* Don't set same frequency again */ | ||
651 | if (longhaul_index == table_index) | ||
652 | return 0; | ||
653 | |||
654 | if (!can_scale_voltage) | ||
655 | longhaul_setstate(table_index); | ||
656 | else { | ||
657 | /* On test system voltage transitions exceeding single | ||
658 | * step up or down were turning motherboard off. Both | ||
659 | * "ondemand" and "userspace" are unsafe. C7 is doing | ||
660 | * this in hardware, C3 is old and we need to do this | ||
661 | * in software. */ | ||
662 | i = longhaul_index; | ||
663 | current_vid = (longhaul_table[longhaul_index].index >> 8); | ||
664 | current_vid &= 0x1f; | ||
665 | if (table_index > longhaul_index) | ||
666 | dir = 1; | ||
667 | while (i != table_index) { | ||
668 | vid = (longhaul_table[i].index >> 8) & 0x1f; | ||
669 | if (vid != current_vid) { | ||
670 | longhaul_setstate(i); | ||
671 | current_vid = vid; | ||
672 | msleep(200); | ||
673 | } | ||
674 | if (dir) | ||
675 | i++; | ||
676 | else | ||
677 | i--; | ||
678 | } | ||
679 | longhaul_setstate(table_index); | ||
680 | } | ||
681 | longhaul_index = table_index; | ||
682 | return 0; | ||
683 | } | ||
684 | |||
685 | |||
686 | static unsigned int longhaul_get(unsigned int cpu) | ||
687 | { | ||
688 | if (cpu) | ||
689 | return 0; | ||
690 | return calc_speed(longhaul_get_cpu_mult()); | ||
691 | } | ||
692 | |||
693 | static acpi_status longhaul_walk_callback(acpi_handle obj_handle, | ||
694 | u32 nesting_level, | ||
695 | void *context, void **return_value) | ||
696 | { | ||
697 | struct acpi_device *d; | ||
698 | |||
699 | if (acpi_bus_get_device(obj_handle, &d)) | ||
700 | return 0; | ||
701 | |||
702 | *return_value = acpi_driver_data(d); | ||
703 | return 1; | ||
704 | } | ||
705 | |||
706 | /* VIA don't support PM2 reg, but have something similar */ | ||
707 | static int enable_arbiter_disable(void) | ||
708 | { | ||
709 | struct pci_dev *dev; | ||
710 | int status = 1; | ||
711 | int reg; | ||
712 | u8 pci_cmd; | ||
713 | |||
714 | /* Find PLE133 host bridge */ | ||
715 | reg = 0x78; | ||
716 | dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, | ||
717 | NULL); | ||
718 | /* Find PM133/VT8605 host bridge */ | ||
719 | if (dev == NULL) | ||
720 | dev = pci_get_device(PCI_VENDOR_ID_VIA, | ||
721 | PCI_DEVICE_ID_VIA_8605_0, NULL); | ||
722 | /* Find CLE266 host bridge */ | ||
723 | if (dev == NULL) { | ||
724 | reg = 0x76; | ||
725 | dev = pci_get_device(PCI_VENDOR_ID_VIA, | ||
726 | PCI_DEVICE_ID_VIA_862X_0, NULL); | ||
727 | /* Find CN400 V-Link host bridge */ | ||
728 | if (dev == NULL) | ||
729 | dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); | ||
730 | } | ||
731 | if (dev != NULL) { | ||
732 | /* Enable access to port 0x22 */ | ||
733 | pci_read_config_byte(dev, reg, &pci_cmd); | ||
734 | if (!(pci_cmd & 1<<7)) { | ||
735 | pci_cmd |= 1<<7; | ||
736 | pci_write_config_byte(dev, reg, pci_cmd); | ||
737 | pci_read_config_byte(dev, reg, &pci_cmd); | ||
738 | if (!(pci_cmd & 1<<7)) { | ||
739 | printk(KERN_ERR PFX | ||
740 | "Can't enable access to port 0x22.\n"); | ||
741 | status = 0; | ||
742 | } | ||
743 | } | ||
744 | pci_dev_put(dev); | ||
745 | return status; | ||
746 | } | ||
747 | return 0; | ||
748 | } | ||
749 | |||
750 | static int longhaul_setup_southbridge(void) | ||
751 | { | ||
752 | struct pci_dev *dev; | ||
753 | u8 pci_cmd; | ||
754 | |||
755 | /* Find VT8235 southbridge */ | ||
756 | dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); | ||
757 | if (dev == NULL) | ||
758 | /* Find VT8237 southbridge */ | ||
759 | dev = pci_get_device(PCI_VENDOR_ID_VIA, | ||
760 | PCI_DEVICE_ID_VIA_8237, NULL); | ||
761 | if (dev != NULL) { | ||
762 | /* Set transition time to max */ | ||
763 | pci_read_config_byte(dev, 0xec, &pci_cmd); | ||
764 | pci_cmd &= ~(1 << 2); | ||
765 | pci_write_config_byte(dev, 0xec, pci_cmd); | ||
766 | pci_read_config_byte(dev, 0xe4, &pci_cmd); | ||
767 | pci_cmd &= ~(1 << 7); | ||
768 | pci_write_config_byte(dev, 0xe4, pci_cmd); | ||
769 | pci_read_config_byte(dev, 0xe5, &pci_cmd); | ||
770 | pci_cmd |= 1 << 7; | ||
771 | pci_write_config_byte(dev, 0xe5, pci_cmd); | ||
772 | /* Get address of ACPI registers block*/ | ||
773 | pci_read_config_byte(dev, 0x81, &pci_cmd); | ||
774 | if (pci_cmd & 1 << 7) { | ||
775 | pci_read_config_dword(dev, 0x88, &acpi_regs_addr); | ||
776 | acpi_regs_addr &= 0xff00; | ||
777 | printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", | ||
778 | acpi_regs_addr); | ||
779 | } | ||
780 | |||
781 | pci_dev_put(dev); | ||
782 | return 1; | ||
783 | } | ||
784 | return 0; | ||
785 | } | ||
786 | |||
787 | static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy) | ||
788 | { | ||
789 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
790 | char *cpuname = NULL; | ||
791 | int ret; | ||
792 | u32 lo, hi; | ||
793 | |||
794 | /* Check what we have on this motherboard */ | ||
795 | switch (c->x86_model) { | ||
796 | case 6: | ||
797 | cpu_model = CPU_SAMUEL; | ||
798 | cpuname = "C3 'Samuel' [C5A]"; | ||
799 | longhaul_version = TYPE_LONGHAUL_V1; | ||
800 | memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); | ||
801 | memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr)); | ||
802 | break; | ||
803 | |||
804 | case 7: | ||
805 | switch (c->x86_mask) { | ||
806 | case 0: | ||
807 | longhaul_version = TYPE_LONGHAUL_V1; | ||
808 | cpu_model = CPU_SAMUEL2; | ||
809 | cpuname = "C3 'Samuel 2' [C5B]"; | ||
810 | /* Note, this is not a typo, early Samuel2's had | ||
811 | * Samuel1 ratios. */ | ||
812 | memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); | ||
813 | memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); | ||
814 | break; | ||
815 | case 1 ... 15: | ||
816 | longhaul_version = TYPE_LONGHAUL_V2; | ||
817 | if (c->x86_mask < 8) { | ||
818 | cpu_model = CPU_SAMUEL2; | ||
819 | cpuname = "C3 'Samuel 2' [C5B]"; | ||
820 | } else { | ||
821 | cpu_model = CPU_EZRA; | ||
822 | cpuname = "C3 'Ezra' [C5C]"; | ||
823 | } | ||
824 | memcpy(mults, ezra_mults, sizeof(ezra_mults)); | ||
825 | memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr)); | ||
826 | break; | ||
827 | } | ||
828 | break; | ||
829 | |||
830 | case 8: | ||
831 | cpu_model = CPU_EZRA_T; | ||
832 | cpuname = "C3 'Ezra-T' [C5M]"; | ||
833 | longhaul_version = TYPE_POWERSAVER; | ||
834 | numscales = 32; | ||
835 | memcpy(mults, ezrat_mults, sizeof(ezrat_mults)); | ||
836 | memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr)); | ||
837 | break; | ||
838 | |||
839 | case 9: | ||
840 | longhaul_version = TYPE_POWERSAVER; | ||
841 | numscales = 32; | ||
842 | memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); | ||
843 | memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); | ||
844 | switch (c->x86_mask) { | ||
845 | case 0 ... 1: | ||
846 | cpu_model = CPU_NEHEMIAH; | ||
847 | cpuname = "C3 'Nehemiah A' [C5XLOE]"; | ||
848 | break; | ||
849 | case 2 ... 4: | ||
850 | cpu_model = CPU_NEHEMIAH; | ||
851 | cpuname = "C3 'Nehemiah B' [C5XLOH]"; | ||
852 | break; | ||
853 | case 5 ... 15: | ||
854 | cpu_model = CPU_NEHEMIAH_C; | ||
855 | cpuname = "C3 'Nehemiah C' [C5P]"; | ||
856 | break; | ||
857 | } | ||
858 | break; | ||
859 | |||
860 | default: | ||
861 | cpuname = "Unknown"; | ||
862 | break; | ||
863 | } | ||
864 | /* Check Longhaul ver. 2 */ | ||
865 | if (longhaul_version == TYPE_LONGHAUL_V2) { | ||
866 | rdmsr(MSR_VIA_LONGHAUL, lo, hi); | ||
867 | if (lo == 0 && hi == 0) | ||
868 | /* Looks like MSR isn't present */ | ||
869 | longhaul_version = TYPE_LONGHAUL_V1; | ||
870 | } | ||
871 | |||
872 | printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname); | ||
873 | switch (longhaul_version) { | ||
874 | case TYPE_LONGHAUL_V1: | ||
875 | case TYPE_LONGHAUL_V2: | ||
876 | printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version); | ||
877 | break; | ||
878 | case TYPE_POWERSAVER: | ||
879 | printk(KERN_CONT "Powersaver supported.\n"); | ||
880 | break; | ||
881 | }; | ||
882 | |||
883 | /* Doesn't hurt */ | ||
884 | longhaul_setup_southbridge(); | ||
885 | |||
886 | /* Find ACPI data for processor */ | ||
887 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, | ||
888 | ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, | ||
889 | NULL, (void *)&pr); | ||
890 | |||
891 | /* Check ACPI support for C3 state */ | ||
892 | if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { | ||
893 | cx = &pr->power.states[ACPI_STATE_C3]; | ||
894 | if (cx->address > 0 && cx->latency <= 1000) | ||
895 | longhaul_flags |= USE_ACPI_C3; | ||
896 | } | ||
897 | /* Disable if it isn't working */ | ||
898 | if (disable_acpi_c3) | ||
899 | longhaul_flags &= ~USE_ACPI_C3; | ||
900 | /* Check if northbridge is friendly */ | ||
901 | if (enable_arbiter_disable()) | ||
902 | longhaul_flags |= USE_NORTHBRIDGE; | ||
903 | |||
904 | /* Check ACPI support for bus master arbiter disable */ | ||
905 | if (!(longhaul_flags & USE_ACPI_C3 | ||
906 | || longhaul_flags & USE_NORTHBRIDGE) | ||
907 | && ((pr == NULL) || !(pr->flags.bm_control))) { | ||
908 | printk(KERN_ERR PFX | ||
909 | "No ACPI support. Unsupported northbridge.\n"); | ||
910 | return -ENODEV; | ||
911 | } | ||
912 | |||
913 | if (longhaul_flags & USE_NORTHBRIDGE) | ||
914 | printk(KERN_INFO PFX "Using northbridge support.\n"); | ||
915 | if (longhaul_flags & USE_ACPI_C3) | ||
916 | printk(KERN_INFO PFX "Using ACPI support.\n"); | ||
917 | |||
918 | ret = longhaul_get_ranges(); | ||
919 | if (ret != 0) | ||
920 | return ret; | ||
921 | |||
922 | if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) | ||
923 | longhaul_setup_voltagescaling(); | ||
924 | |||
925 | policy->cpuinfo.transition_latency = 200000; /* nsec */ | ||
926 | policy->cur = calc_speed(longhaul_get_cpu_mult()); | ||
927 | |||
928 | ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); | ||
929 | if (ret) | ||
930 | return ret; | ||
931 | |||
932 | cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); | ||
933 | |||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) | ||
938 | { | ||
939 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
940 | return 0; | ||
941 | } | ||
942 | |||
943 | static struct freq_attr *longhaul_attr[] = { | ||
944 | &cpufreq_freq_attr_scaling_available_freqs, | ||
945 | NULL, | ||
946 | }; | ||
947 | |||
948 | static struct cpufreq_driver longhaul_driver = { | ||
949 | .verify = longhaul_verify, | ||
950 | .target = longhaul_target, | ||
951 | .get = longhaul_get, | ||
952 | .init = longhaul_cpu_init, | ||
953 | .exit = __devexit_p(longhaul_cpu_exit), | ||
954 | .name = "longhaul", | ||
955 | .owner = THIS_MODULE, | ||
956 | .attr = longhaul_attr, | ||
957 | }; | ||
958 | |||
959 | |||
960 | static int __init longhaul_init(void) | ||
961 | { | ||
962 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
963 | |||
964 | if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) | ||
965 | return -ENODEV; | ||
966 | |||
967 | #ifdef CONFIG_SMP | ||
968 | if (num_online_cpus() > 1) { | ||
969 | printk(KERN_ERR PFX "More than 1 CPU detected, " | ||
970 | "longhaul disabled.\n"); | ||
971 | return -ENODEV; | ||
972 | } | ||
973 | #endif | ||
974 | #ifdef CONFIG_X86_IO_APIC | ||
975 | if (cpu_has_apic) { | ||
976 | printk(KERN_ERR PFX "APIC detected. Longhaul is currently " | ||
977 | "broken in this configuration.\n"); | ||
978 | return -ENODEV; | ||
979 | } | ||
980 | #endif | ||
981 | switch (c->x86_model) { | ||
982 | case 6 ... 9: | ||
983 | return cpufreq_register_driver(&longhaul_driver); | ||
984 | case 10: | ||
985 | printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); | ||
986 | default: | ||
987 | ; | ||
988 | } | ||
989 | |||
990 | return -ENODEV; | ||
991 | } | ||
992 | |||
993 | |||
994 | static void __exit longhaul_exit(void) | ||
995 | { | ||
996 | int i; | ||
997 | |||
998 | for (i = 0; i < numscales; i++) { | ||
999 | if (mults[i] == maxmult) { | ||
1000 | longhaul_setstate(i); | ||
1001 | break; | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | cpufreq_unregister_driver(&longhaul_driver); | ||
1006 | kfree(longhaul_table); | ||
1007 | } | ||
1008 | |||
1009 | /* Even if BIOS is exporting ACPI C3 state, and it is used | ||
1010 | * with success when CPU is idle, this state doesn't | ||
1011 | * trigger frequency transition in some cases. */ | ||
1012 | module_param(disable_acpi_c3, int, 0644); | ||
1013 | MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); | ||
1014 | /* Change CPU voltage with frequency. Very useful to save | ||
1015 | * power, but most VIA C3 processors aren't supporting it. */ | ||
1016 | module_param(scale_voltage, int, 0644); | ||
1017 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); | ||
1018 | /* Force revision key to 0 for processors which doesn't | ||
1019 | * support voltage scaling, but are introducing itself as | ||
1020 | * such. */ | ||
1021 | module_param(revid_errata, int, 0644); | ||
1022 | MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); | ||
1023 | |||
1024 | MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); | ||
1025 | MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors."); | ||
1026 | MODULE_LICENSE("GPL"); | ||
1027 | |||
1028 | late_initcall(longhaul_init); | ||
1029 | module_exit(longhaul_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h deleted file mode 100644 index cbf48fbca881..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ /dev/null | |||
@@ -1,353 +0,0 @@ | |||
1 | /* | ||
2 | * longhaul.h | ||
3 | * (C) 2003 Dave Jones. | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * | ||
7 | * VIA-specific information | ||
8 | */ | ||
9 | |||
10 | union msr_bcr2 { | ||
11 | struct { | ||
12 | unsigned Reseved:19, // 18:0 | ||
13 | ESOFTBF:1, // 19 | ||
14 | Reserved2:3, // 22:20 | ||
15 | CLOCKMUL:4, // 26:23 | ||
16 | Reserved3:5; // 31:27 | ||
17 | } bits; | ||
18 | unsigned long val; | ||
19 | }; | ||
20 | |||
21 | union msr_longhaul { | ||
22 | struct { | ||
23 | unsigned RevisionID:4, // 3:0 | ||
24 | RevisionKey:4, // 7:4 | ||
25 | EnableSoftBusRatio:1, // 8 | ||
26 | EnableSoftVID:1, // 9 | ||
27 | EnableSoftBSEL:1, // 10 | ||
28 | Reserved:3, // 11:13 | ||
29 | SoftBusRatio4:1, // 14 | ||
30 | VRMRev:1, // 15 | ||
31 | SoftBusRatio:4, // 19:16 | ||
32 | SoftVID:5, // 24:20 | ||
33 | Reserved2:3, // 27:25 | ||
34 | SoftBSEL:2, // 29:28 | ||
35 | Reserved3:2, // 31:30 | ||
36 | MaxMHzBR:4, // 35:32 | ||
37 | MaximumVID:5, // 40:36 | ||
38 | MaxMHzFSB:2, // 42:41 | ||
39 | MaxMHzBR4:1, // 43 | ||
40 | Reserved4:4, // 47:44 | ||
41 | MinMHzBR:4, // 51:48 | ||
42 | MinimumVID:5, // 56:52 | ||
43 | MinMHzFSB:2, // 58:57 | ||
44 | MinMHzBR4:1, // 59 | ||
45 | Reserved5:4; // 63:60 | ||
46 | } bits; | ||
47 | unsigned long long val; | ||
48 | }; | ||
49 | |||
50 | /* | ||
51 | * Clock ratio tables. Div/Mod by 10 to get ratio. | ||
52 | * The eblcr values specify the ratio read from the CPU. | ||
53 | * The mults values specify what to write to the CPU. | ||
54 | */ | ||
55 | |||
56 | /* | ||
57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) | ||
58 | */ | ||
59 | static const int __cpuinitdata samuel1_mults[16] = { | ||
60 | -1, /* 0000 -> RESERVED */ | ||
61 | 30, /* 0001 -> 3.0x */ | ||
62 | 40, /* 0010 -> 4.0x */ | ||
63 | -1, /* 0011 -> RESERVED */ | ||
64 | -1, /* 0100 -> RESERVED */ | ||
65 | 35, /* 0101 -> 3.5x */ | ||
66 | 45, /* 0110 -> 4.5x */ | ||
67 | 55, /* 0111 -> 5.5x */ | ||
68 | 60, /* 1000 -> 6.0x */ | ||
69 | 70, /* 1001 -> 7.0x */ | ||
70 | 80, /* 1010 -> 8.0x */ | ||
71 | 50, /* 1011 -> 5.0x */ | ||
72 | 65, /* 1100 -> 6.5x */ | ||
73 | 75, /* 1101 -> 7.5x */ | ||
74 | -1, /* 1110 -> RESERVED */ | ||
75 | -1, /* 1111 -> RESERVED */ | ||
76 | }; | ||
77 | |||
78 | static const int __cpuinitdata samuel1_eblcr[16] = { | ||
79 | 50, /* 0000 -> RESERVED */ | ||
80 | 30, /* 0001 -> 3.0x */ | ||
81 | 40, /* 0010 -> 4.0x */ | ||
82 | -1, /* 0011 -> RESERVED */ | ||
83 | 55, /* 0100 -> 5.5x */ | ||
84 | 35, /* 0101 -> 3.5x */ | ||
85 | 45, /* 0110 -> 4.5x */ | ||
86 | -1, /* 0111 -> RESERVED */ | ||
87 | -1, /* 1000 -> RESERVED */ | ||
88 | 70, /* 1001 -> 7.0x */ | ||
89 | 80, /* 1010 -> 8.0x */ | ||
90 | 60, /* 1011 -> 6.0x */ | ||
91 | -1, /* 1100 -> RESERVED */ | ||
92 | 75, /* 1101 -> 7.5x */ | ||
93 | -1, /* 1110 -> RESERVED */ | ||
94 | 65, /* 1111 -> 6.5x */ | ||
95 | }; | ||
96 | |||
97 | /* | ||
98 | * VIA C3 Samuel2 Stepping 1->15 | ||
99 | */ | ||
100 | static const int __cpuinitdata samuel2_eblcr[16] = { | ||
101 | 50, /* 0000 -> 5.0x */ | ||
102 | 30, /* 0001 -> 3.0x */ | ||
103 | 40, /* 0010 -> 4.0x */ | ||
104 | 100, /* 0011 -> 10.0x */ | ||
105 | 55, /* 0100 -> 5.5x */ | ||
106 | 35, /* 0101 -> 3.5x */ | ||
107 | 45, /* 0110 -> 4.5x */ | ||
108 | 110, /* 0111 -> 11.0x */ | ||
109 | 90, /* 1000 -> 9.0x */ | ||
110 | 70, /* 1001 -> 7.0x */ | ||
111 | 80, /* 1010 -> 8.0x */ | ||
112 | 60, /* 1011 -> 6.0x */ | ||
113 | 120, /* 1100 -> 12.0x */ | ||
114 | 75, /* 1101 -> 7.5x */ | ||
115 | 130, /* 1110 -> 13.0x */ | ||
116 | 65, /* 1111 -> 6.5x */ | ||
117 | }; | ||
118 | |||
119 | /* | ||
120 | * VIA C3 Ezra | ||
121 | */ | ||
122 | static const int __cpuinitdata ezra_mults[16] = { | ||
123 | 100, /* 0000 -> 10.0x */ | ||
124 | 30, /* 0001 -> 3.0x */ | ||
125 | 40, /* 0010 -> 4.0x */ | ||
126 | 90, /* 0011 -> 9.0x */ | ||
127 | 95, /* 0100 -> 9.5x */ | ||
128 | 35, /* 0101 -> 3.5x */ | ||
129 | 45, /* 0110 -> 4.5x */ | ||
130 | 55, /* 0111 -> 5.5x */ | ||
131 | 60, /* 1000 -> 6.0x */ | ||
132 | 70, /* 1001 -> 7.0x */ | ||
133 | 80, /* 1010 -> 8.0x */ | ||
134 | 50, /* 1011 -> 5.0x */ | ||
135 | 65, /* 1100 -> 6.5x */ | ||
136 | 75, /* 1101 -> 7.5x */ | ||
137 | 85, /* 1110 -> 8.5x */ | ||
138 | 120, /* 1111 -> 12.0x */ | ||
139 | }; | ||
140 | |||
141 | static const int __cpuinitdata ezra_eblcr[16] = { | ||
142 | 50, /* 0000 -> 5.0x */ | ||
143 | 30, /* 0001 -> 3.0x */ | ||
144 | 40, /* 0010 -> 4.0x */ | ||
145 | 100, /* 0011 -> 10.0x */ | ||
146 | 55, /* 0100 -> 5.5x */ | ||
147 | 35, /* 0101 -> 3.5x */ | ||
148 | 45, /* 0110 -> 4.5x */ | ||
149 | 95, /* 0111 -> 9.5x */ | ||
150 | 90, /* 1000 -> 9.0x */ | ||
151 | 70, /* 1001 -> 7.0x */ | ||
152 | 80, /* 1010 -> 8.0x */ | ||
153 | 60, /* 1011 -> 6.0x */ | ||
154 | 120, /* 1100 -> 12.0x */ | ||
155 | 75, /* 1101 -> 7.5x */ | ||
156 | 85, /* 1110 -> 8.5x */ | ||
157 | 65, /* 1111 -> 6.5x */ | ||
158 | }; | ||
159 | |||
160 | /* | ||
161 | * VIA C3 (Ezra-T) [C5M]. | ||
162 | */ | ||
163 | static const int __cpuinitdata ezrat_mults[32] = { | ||
164 | 100, /* 0000 -> 10.0x */ | ||
165 | 30, /* 0001 -> 3.0x */ | ||
166 | 40, /* 0010 -> 4.0x */ | ||
167 | 90, /* 0011 -> 9.0x */ | ||
168 | 95, /* 0100 -> 9.5x */ | ||
169 | 35, /* 0101 -> 3.5x */ | ||
170 | 45, /* 0110 -> 4.5x */ | ||
171 | 55, /* 0111 -> 5.5x */ | ||
172 | 60, /* 1000 -> 6.0x */ | ||
173 | 70, /* 1001 -> 7.0x */ | ||
174 | 80, /* 1010 -> 8.0x */ | ||
175 | 50, /* 1011 -> 5.0x */ | ||
176 | 65, /* 1100 -> 6.5x */ | ||
177 | 75, /* 1101 -> 7.5x */ | ||
178 | 85, /* 1110 -> 8.5x */ | ||
179 | 120, /* 1111 -> 12.0x */ | ||
180 | |||
181 | -1, /* 0000 -> RESERVED (10.0x) */ | ||
182 | 110, /* 0001 -> 11.0x */ | ||
183 | -1, /* 0010 -> 12.0x */ | ||
184 | -1, /* 0011 -> RESERVED (9.0x)*/ | ||
185 | 105, /* 0100 -> 10.5x */ | ||
186 | 115, /* 0101 -> 11.5x */ | ||
187 | 125, /* 0110 -> 12.5x */ | ||
188 | 135, /* 0111 -> 13.5x */ | ||
189 | 140, /* 1000 -> 14.0x */ | ||
190 | 150, /* 1001 -> 15.0x */ | ||
191 | 160, /* 1010 -> 16.0x */ | ||
192 | 130, /* 1011 -> 13.0x */ | ||
193 | 145, /* 1100 -> 14.5x */ | ||
194 | 155, /* 1101 -> 15.5x */ | ||
195 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
196 | -1, /* 1111 -> RESERVED (12.0x) */ | ||
197 | }; | ||
198 | |||
199 | static const int __cpuinitdata ezrat_eblcr[32] = { | ||
200 | 50, /* 0000 -> 5.0x */ | ||
201 | 30, /* 0001 -> 3.0x */ | ||
202 | 40, /* 0010 -> 4.0x */ | ||
203 | 100, /* 0011 -> 10.0x */ | ||
204 | 55, /* 0100 -> 5.5x */ | ||
205 | 35, /* 0101 -> 3.5x */ | ||
206 | 45, /* 0110 -> 4.5x */ | ||
207 | 95, /* 0111 -> 9.5x */ | ||
208 | 90, /* 1000 -> 9.0x */ | ||
209 | 70, /* 1001 -> 7.0x */ | ||
210 | 80, /* 1010 -> 8.0x */ | ||
211 | 60, /* 1011 -> 6.0x */ | ||
212 | 120, /* 1100 -> 12.0x */ | ||
213 | 75, /* 1101 -> 7.5x */ | ||
214 | 85, /* 1110 -> 8.5x */ | ||
215 | 65, /* 1111 -> 6.5x */ | ||
216 | |||
217 | -1, /* 0000 -> RESERVED (9.0x) */ | ||
218 | 110, /* 0001 -> 11.0x */ | ||
219 | 120, /* 0010 -> 12.0x */ | ||
220 | -1, /* 0011 -> RESERVED (10.0x)*/ | ||
221 | 135, /* 0100 -> 13.5x */ | ||
222 | 115, /* 0101 -> 11.5x */ | ||
223 | 125, /* 0110 -> 12.5x */ | ||
224 | 105, /* 0111 -> 10.5x */ | ||
225 | 130, /* 1000 -> 13.0x */ | ||
226 | 150, /* 1001 -> 15.0x */ | ||
227 | 160, /* 1010 -> 16.0x */ | ||
228 | 140, /* 1011 -> 14.0x */ | ||
229 | -1, /* 1100 -> RESERVED (12.0x) */ | ||
230 | 155, /* 1101 -> 15.5x */ | ||
231 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
232 | 145, /* 1111 -> 14.5x */ | ||
233 | }; | ||
234 | |||
235 | /* | ||
236 | * VIA C3 Nehemiah */ | ||
237 | |||
238 | static const int __cpuinitdata nehemiah_mults[32] = { | ||
239 | 100, /* 0000 -> 10.0x */ | ||
240 | -1, /* 0001 -> 16.0x */ | ||
241 | 40, /* 0010 -> 4.0x */ | ||
242 | 90, /* 0011 -> 9.0x */ | ||
243 | 95, /* 0100 -> 9.5x */ | ||
244 | -1, /* 0101 -> RESERVED */ | ||
245 | 45, /* 0110 -> 4.5x */ | ||
246 | 55, /* 0111 -> 5.5x */ | ||
247 | 60, /* 1000 -> 6.0x */ | ||
248 | 70, /* 1001 -> 7.0x */ | ||
249 | 80, /* 1010 -> 8.0x */ | ||
250 | 50, /* 1011 -> 5.0x */ | ||
251 | 65, /* 1100 -> 6.5x */ | ||
252 | 75, /* 1101 -> 7.5x */ | ||
253 | 85, /* 1110 -> 8.5x */ | ||
254 | 120, /* 1111 -> 12.0x */ | ||
255 | -1, /* 0000 -> 10.0x */ | ||
256 | 110, /* 0001 -> 11.0x */ | ||
257 | -1, /* 0010 -> 12.0x */ | ||
258 | -1, /* 0011 -> 9.0x */ | ||
259 | 105, /* 0100 -> 10.5x */ | ||
260 | 115, /* 0101 -> 11.5x */ | ||
261 | 125, /* 0110 -> 12.5x */ | ||
262 | 135, /* 0111 -> 13.5x */ | ||
263 | 140, /* 1000 -> 14.0x */ | ||
264 | 150, /* 1001 -> 15.0x */ | ||
265 | 160, /* 1010 -> 16.0x */ | ||
266 | 130, /* 1011 -> 13.0x */ | ||
267 | 145, /* 1100 -> 14.5x */ | ||
268 | 155, /* 1101 -> 15.5x */ | ||
269 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
270 | -1, /* 1111 -> 12.0x */ | ||
271 | }; | ||
272 | |||
273 | static const int __cpuinitdata nehemiah_eblcr[32] = { | ||
274 | 50, /* 0000 -> 5.0x */ | ||
275 | 160, /* 0001 -> 16.0x */ | ||
276 | 40, /* 0010 -> 4.0x */ | ||
277 | 100, /* 0011 -> 10.0x */ | ||
278 | 55, /* 0100 -> 5.5x */ | ||
279 | -1, /* 0101 -> RESERVED */ | ||
280 | 45, /* 0110 -> 4.5x */ | ||
281 | 95, /* 0111 -> 9.5x */ | ||
282 | 90, /* 1000 -> 9.0x */ | ||
283 | 70, /* 1001 -> 7.0x */ | ||
284 | 80, /* 1010 -> 8.0x */ | ||
285 | 60, /* 1011 -> 6.0x */ | ||
286 | 120, /* 1100 -> 12.0x */ | ||
287 | 75, /* 1101 -> 7.5x */ | ||
288 | 85, /* 1110 -> 8.5x */ | ||
289 | 65, /* 1111 -> 6.5x */ | ||
290 | 90, /* 0000 -> 9.0x */ | ||
291 | 110, /* 0001 -> 11.0x */ | ||
292 | 120, /* 0010 -> 12.0x */ | ||
293 | 100, /* 0011 -> 10.0x */ | ||
294 | 135, /* 0100 -> 13.5x */ | ||
295 | 115, /* 0101 -> 11.5x */ | ||
296 | 125, /* 0110 -> 12.5x */ | ||
297 | 105, /* 0111 -> 10.5x */ | ||
298 | 130, /* 1000 -> 13.0x */ | ||
299 | 150, /* 1001 -> 15.0x */ | ||
300 | 160, /* 1010 -> 16.0x */ | ||
301 | 140, /* 1011 -> 14.0x */ | ||
302 | 120, /* 1100 -> 12.0x */ | ||
303 | 155, /* 1101 -> 15.5x */ | ||
304 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
305 | 145 /* 1111 -> 14.5x */ | ||
306 | }; | ||
307 | |||
308 | /* | ||
309 | * Voltage scales. Div/Mod by 1000 to get actual voltage. | ||
310 | * Which scale to use depends on the VRM type in use. | ||
311 | */ | ||
312 | |||
313 | struct mV_pos { | ||
314 | unsigned short mV; | ||
315 | unsigned short pos; | ||
316 | }; | ||
317 | |||
318 | static const struct mV_pos __cpuinitdata vrm85_mV[32] = { | ||
319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, | ||
320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, | ||
321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, | ||
322 | {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, | ||
323 | {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, | ||
324 | {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, | ||
325 | {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, | ||
326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} | ||
327 | }; | ||
328 | |||
329 | static const unsigned char __cpuinitdata mV_vrm85[32] = { | ||
330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, | ||
331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, | ||
332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, | ||
333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 | ||
334 | }; | ||
335 | |||
336 | static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { | ||
337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, | ||
338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, | ||
339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, | ||
340 | {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, | ||
341 | {975, 15}, {950, 14}, {925, 13}, {900, 12}, | ||
342 | {875, 11}, {850, 10}, {825, 9}, {800, 8}, | ||
343 | {775, 7}, {750, 6}, {725, 5}, {700, 4}, | ||
344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} | ||
345 | }; | ||
346 | |||
347 | static const unsigned char __cpuinitdata mV_mobilevrm[32] = { | ||
348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, | ||
349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, | ||
350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, | ||
351 | 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 | ||
352 | }; | ||
353 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c deleted file mode 100644 index d9f51367666b..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ /dev/null | |||
@@ -1,327 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/cpufreq.h> | ||
13 | #include <linux/timex.h> | ||
14 | |||
15 | #include <asm/msr.h> | ||
16 | #include <asm/processor.h> | ||
17 | |||
18 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
19 | "longrun", msg) | ||
20 | |||
21 | static struct cpufreq_driver longrun_driver; | ||
22 | |||
23 | /** | ||
24 | * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz | ||
25 | * values into per cent values. In TMTA microcode, the following is valid: | ||
26 | * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) | ||
27 | */ | ||
28 | static unsigned int longrun_low_freq, longrun_high_freq; | ||
29 | |||
30 | |||
31 | /** | ||
32 | * longrun_get_policy - get the current LongRun policy | ||
33 | * @policy: struct cpufreq_policy where current policy is written into | ||
34 | * | ||
35 | * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS | ||
36 | * and MSR_TMTA_LONGRUN_CTRL | ||
37 | */ | ||
38 | static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy) | ||
39 | { | ||
40 | u32 msr_lo, msr_hi; | ||
41 | |||
42 | rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); | ||
43 | dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi); | ||
44 | if (msr_lo & 0x01) | ||
45 | policy->policy = CPUFREQ_POLICY_PERFORMANCE; | ||
46 | else | ||
47 | policy->policy = CPUFREQ_POLICY_POWERSAVE; | ||
48 | |||
49 | rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
50 | dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi); | ||
51 | msr_lo &= 0x0000007F; | ||
52 | msr_hi &= 0x0000007F; | ||
53 | |||
54 | if (longrun_high_freq <= longrun_low_freq) { | ||
55 | /* Assume degenerate Longrun table */ | ||
56 | policy->min = policy->max = longrun_high_freq; | ||
57 | } else { | ||
58 | policy->min = longrun_low_freq + msr_lo * | ||
59 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
60 | policy->max = longrun_low_freq + msr_hi * | ||
61 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
62 | } | ||
63 | policy->cpu = 0; | ||
64 | } | ||
65 | |||
66 | |||
67 | /** | ||
68 | * longrun_set_policy - sets a new CPUFreq policy | ||
69 | * @policy: new policy | ||
70 | * | ||
71 | * Sets a new CPUFreq policy on LongRun-capable processors. This function | ||
72 | * has to be called with cpufreq_driver locked. | ||
73 | */ | ||
74 | static int longrun_set_policy(struct cpufreq_policy *policy) | ||
75 | { | ||
76 | u32 msr_lo, msr_hi; | ||
77 | u32 pctg_lo, pctg_hi; | ||
78 | |||
79 | if (!policy) | ||
80 | return -EINVAL; | ||
81 | |||
82 | if (longrun_high_freq <= longrun_low_freq) { | ||
83 | /* Assume degenerate Longrun table */ | ||
84 | pctg_lo = pctg_hi = 100; | ||
85 | } else { | ||
86 | pctg_lo = (policy->min - longrun_low_freq) / | ||
87 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
88 | pctg_hi = (policy->max - longrun_low_freq) / | ||
89 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
90 | } | ||
91 | |||
92 | if (pctg_hi > 100) | ||
93 | pctg_hi = 100; | ||
94 | if (pctg_lo > pctg_hi) | ||
95 | pctg_lo = pctg_hi; | ||
96 | |||
97 | /* performance or economy mode */ | ||
98 | rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); | ||
99 | msr_lo &= 0xFFFFFFFE; | ||
100 | switch (policy->policy) { | ||
101 | case CPUFREQ_POLICY_PERFORMANCE: | ||
102 | msr_lo |= 0x00000001; | ||
103 | break; | ||
104 | case CPUFREQ_POLICY_POWERSAVE: | ||
105 | break; | ||
106 | } | ||
107 | wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); | ||
108 | |||
109 | /* lower and upper boundary */ | ||
110 | rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
111 | msr_lo &= 0xFFFFFF80; | ||
112 | msr_hi &= 0xFFFFFF80; | ||
113 | msr_lo |= pctg_lo; | ||
114 | msr_hi |= pctg_hi; | ||
115 | wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | |||
121 | /** | ||
122 | * longrun_verify_poliy - verifies a new CPUFreq policy | ||
123 | * @policy: the policy to verify | ||
124 | * | ||
125 | * Validates a new CPUFreq policy. This function has to be called with | ||
126 | * cpufreq_driver locked. | ||
127 | */ | ||
128 | static int longrun_verify_policy(struct cpufreq_policy *policy) | ||
129 | { | ||
130 | if (!policy) | ||
131 | return -EINVAL; | ||
132 | |||
133 | policy->cpu = 0; | ||
134 | cpufreq_verify_within_limits(policy, | ||
135 | policy->cpuinfo.min_freq, | ||
136 | policy->cpuinfo.max_freq); | ||
137 | |||
138 | if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && | ||
139 | (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) | ||
140 | return -EINVAL; | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static unsigned int longrun_get(unsigned int cpu) | ||
146 | { | ||
147 | u32 eax, ebx, ecx, edx; | ||
148 | |||
149 | if (cpu) | ||
150 | return 0; | ||
151 | |||
152 | cpuid(0x80860007, &eax, &ebx, &ecx, &edx); | ||
153 | dprintk("cpuid eax is %u\n", eax); | ||
154 | |||
155 | return eax * 1000; | ||
156 | } | ||
157 | |||
158 | /** | ||
159 | * longrun_determine_freqs - determines the lowest and highest possible core frequency | ||
160 | * @low_freq: an int to put the lowest frequency into | ||
161 | * @high_freq: an int to put the highest frequency into | ||
162 | * | ||
163 | * Determines the lowest and highest possible core frequencies on this CPU. | ||
164 | * This is necessary to calculate the performance percentage according to | ||
165 | * TMTA rules: | ||
166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) | ||
167 | */ | ||
168 | static int __cpuinit longrun_determine_freqs(unsigned int *low_freq, | ||
169 | unsigned int *high_freq) | ||
170 | { | ||
171 | u32 msr_lo, msr_hi; | ||
172 | u32 save_lo, save_hi; | ||
173 | u32 eax, ebx, ecx, edx; | ||
174 | u32 try_hi; | ||
175 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
176 | |||
177 | if (!low_freq || !high_freq) | ||
178 | return -EINVAL; | ||
179 | |||
180 | if (cpu_has(c, X86_FEATURE_LRTI)) { | ||
181 | /* if the LongRun Table Interface is present, the | ||
182 | * detection is a bit easier: | ||
183 | * For minimum frequency, read out the maximum | ||
184 | * level (msr_hi), write that into "currently | ||
185 | * selected level", and read out the frequency. | ||
186 | * For maximum frequency, read out level zero. | ||
187 | */ | ||
188 | /* minimum */ | ||
189 | rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); | ||
190 | wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); | ||
191 | rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); | ||
192 | *low_freq = msr_lo * 1000; /* to kHz */ | ||
193 | |||
194 | /* maximum */ | ||
195 | wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); | ||
196 | rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); | ||
197 | *high_freq = msr_lo * 1000; /* to kHz */ | ||
198 | |||
199 | dprintk("longrun table interface told %u - %u kHz\n", | ||
200 | *low_freq, *high_freq); | ||
201 | |||
202 | if (*low_freq > *high_freq) | ||
203 | *low_freq = *high_freq; | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | /* set the upper border to the value determined during TSC init */ | ||
208 | *high_freq = (cpu_khz / 1000); | ||
209 | *high_freq = *high_freq * 1000; | ||
210 | dprintk("high frequency is %u kHz\n", *high_freq); | ||
211 | |||
212 | /* get current borders */ | ||
213 | rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
214 | save_lo = msr_lo & 0x0000007F; | ||
215 | save_hi = msr_hi & 0x0000007F; | ||
216 | |||
217 | /* if current perf_pctg is larger than 90%, we need to decrease the | ||
218 | * upper limit to make the calculation more accurate. | ||
219 | */ | ||
220 | cpuid(0x80860007, &eax, &ebx, &ecx, &edx); | ||
221 | /* try decreasing in 10% steps, some processors react only | ||
222 | * on some barrier values */ | ||
223 | for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) { | ||
224 | /* set to 0 to try_hi perf_pctg */ | ||
225 | msr_lo &= 0xFFFFFF80; | ||
226 | msr_hi &= 0xFFFFFF80; | ||
227 | msr_hi |= try_hi; | ||
228 | wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
229 | |||
230 | /* read out current core MHz and current perf_pctg */ | ||
231 | cpuid(0x80860007, &eax, &ebx, &ecx, &edx); | ||
232 | |||
233 | /* restore values */ | ||
234 | wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); | ||
235 | } | ||
236 | dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); | ||
237 | |||
238 | /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) | ||
239 | * eqals | ||
240 | * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) | ||
241 | * | ||
242 | * high_freq * perf_pctg is stored tempoarily into "ebx". | ||
243 | */ | ||
244 | ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ | ||
245 | |||
246 | if ((ecx > 95) || (ecx == 0) || (eax < ebx)) | ||
247 | return -EIO; | ||
248 | |||
249 | edx = ((eax - ebx) * 100) / (100 - ecx); | ||
250 | *low_freq = edx * 1000; /* back to kHz */ | ||
251 | |||
252 | dprintk("low frequency is %u kHz\n", *low_freq); | ||
253 | |||
254 | if (*low_freq > *high_freq) | ||
255 | *low_freq = *high_freq; | ||
256 | |||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | |||
261 | static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy) | ||
262 | { | ||
263 | int result = 0; | ||
264 | |||
265 | /* capability check */ | ||
266 | if (policy->cpu != 0) | ||
267 | return -ENODEV; | ||
268 | |||
269 | /* detect low and high frequency */ | ||
270 | result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq); | ||
271 | if (result) | ||
272 | return result; | ||
273 | |||
274 | /* cpuinfo and default policy values */ | ||
275 | policy->cpuinfo.min_freq = longrun_low_freq; | ||
276 | policy->cpuinfo.max_freq = longrun_high_freq; | ||
277 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
278 | longrun_get_policy(policy); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | |||
284 | static struct cpufreq_driver longrun_driver = { | ||
285 | .flags = CPUFREQ_CONST_LOOPS, | ||
286 | .verify = longrun_verify_policy, | ||
287 | .setpolicy = longrun_set_policy, | ||
288 | .get = longrun_get, | ||
289 | .init = longrun_cpu_init, | ||
290 | .name = "longrun", | ||
291 | .owner = THIS_MODULE, | ||
292 | }; | ||
293 | |||
294 | |||
295 | /** | ||
296 | * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver | ||
297 | * | ||
298 | * Initializes the LongRun support. | ||
299 | */ | ||
300 | static int __init longrun_init(void) | ||
301 | { | ||
302 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
303 | |||
304 | if (c->x86_vendor != X86_VENDOR_TRANSMETA || | ||
305 | !cpu_has(c, X86_FEATURE_LONGRUN)) | ||
306 | return -ENODEV; | ||
307 | |||
308 | return cpufreq_register_driver(&longrun_driver); | ||
309 | } | ||
310 | |||
311 | |||
312 | /** | ||
313 | * longrun_exit - unregisters LongRun support | ||
314 | */ | ||
315 | static void __exit longrun_exit(void) | ||
316 | { | ||
317 | cpufreq_unregister_driver(&longrun_driver); | ||
318 | } | ||
319 | |||
320 | |||
321 | MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); | ||
322 | MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and " | ||
323 | "Efficeon processors."); | ||
324 | MODULE_LICENSE("GPL"); | ||
325 | |||
326 | module_init(longrun_init); | ||
327 | module_exit(longrun_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c deleted file mode 100644 index 911e193018ae..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/mperf.c +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/cpufreq.h> | ||
6 | #include <linux/slab.h> | ||
7 | |||
8 | #include "mperf.h" | ||
9 | |||
10 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
11 | |||
12 | /* Called via smp_call_function_single(), on the target CPU */ | ||
13 | static void read_measured_perf_ctrs(void *_cur) | ||
14 | { | ||
15 | struct aperfmperf *am = _cur; | ||
16 | |||
17 | get_aperfmperf(am); | ||
18 | } | ||
19 | |||
20 | /* | ||
21 | * Return the measured active (C0) frequency on this CPU since last call | ||
22 | * to this function. | ||
23 | * Input: cpu number | ||
24 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
25 | * | ||
26 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
27 | * over a period of time, while CPU is in C0 state. | ||
28 | * IA32_MPERF counts at the rate of max advertised frequency | ||
29 | * IA32_APERF counts at the rate of actual CPU frequency | ||
30 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
31 | * no meaning should be associated with absolute values of these MSRs. | ||
32 | */ | ||
33 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
34 | unsigned int cpu) | ||
35 | { | ||
36 | struct aperfmperf perf; | ||
37 | unsigned long ratio; | ||
38 | unsigned int retval; | ||
39 | |||
40 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
41 | return 0; | ||
42 | |||
43 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
44 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
45 | |||
46 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
47 | |||
48 | return retval; | ||
49 | } | ||
50 | EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); | ||
51 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h deleted file mode 100644 index 5dbf2950dc22..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/mperf.h +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | /* | ||
2 | * (c) 2010 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
9 | unsigned int cpu); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c deleted file mode 100644 index 52c93648e492..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ /dev/null | |||
@@ -1,331 +0,0 @@ | |||
1 | /* | ||
2 | * Pentium 4/Xeon CPU on demand clock modulation/speed scaling | ||
3 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
4 | * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> | ||
5 | * (C) 2002 Arjan van de Ven <arjanv@redhat.com> | ||
6 | * (C) 2002 Tora T. Engstad | ||
7 | * All Rights Reserved | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | * | ||
14 | * The author(s) of this software shall not be held liable for damages | ||
15 | * of any nature resulting due to the use of this software. This | ||
16 | * software is provided AS-IS with no warranties. | ||
17 | * | ||
18 | * Date Errata Description | ||
19 | * 20020525 N44, O17 12.5% or 25% DC causes lockup | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/smp.h> | ||
27 | #include <linux/cpufreq.h> | ||
28 | #include <linux/cpumask.h> | ||
29 | #include <linux/timex.h> | ||
30 | |||
31 | #include <asm/processor.h> | ||
32 | #include <asm/msr.h> | ||
33 | #include <asm/timer.h> | ||
34 | |||
35 | #include "speedstep-lib.h" | ||
36 | |||
37 | #define PFX "p4-clockmod: " | ||
38 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
39 | "p4-clockmod", msg) | ||
40 | |||
41 | /* | ||
42 | * Duty Cycle (3bits), note DC_DISABLE is not specified in | ||
43 | * intel docs i just use it to mean disable | ||
44 | */ | ||
45 | enum { | ||
46 | DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT, | ||
47 | DC_64PT, DC_75PT, DC_88PT, DC_DISABLE | ||
48 | }; | ||
49 | |||
50 | #define DC_ENTRIES 8 | ||
51 | |||
52 | |||
53 | static int has_N44_O17_errata[NR_CPUS]; | ||
54 | static unsigned int stock_freq; | ||
55 | static struct cpufreq_driver p4clockmod_driver; | ||
56 | static unsigned int cpufreq_p4_get(unsigned int cpu); | ||
57 | |||
58 | static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) | ||
59 | { | ||
60 | u32 l, h; | ||
61 | |||
62 | if (!cpu_online(cpu) || | ||
63 | (newstate > DC_DISABLE) || (newstate == DC_RESV)) | ||
64 | return -EINVAL; | ||
65 | |||
66 | rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); | ||
67 | |||
68 | if (l & 0x01) | ||
69 | dprintk("CPU#%d currently thermal throttled\n", cpu); | ||
70 | |||
71 | if (has_N44_O17_errata[cpu] && | ||
72 | (newstate == DC_25PT || newstate == DC_DFLT)) | ||
73 | newstate = DC_38PT; | ||
74 | |||
75 | rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); | ||
76 | if (newstate == DC_DISABLE) { | ||
77 | dprintk("CPU#%d disabling modulation\n", cpu); | ||
78 | wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h); | ||
79 | } else { | ||
80 | dprintk("CPU#%d setting duty cycle to %d%%\n", | ||
81 | cpu, ((125 * newstate) / 10)); | ||
82 | /* bits 63 - 5 : reserved | ||
83 | * bit 4 : enable/disable | ||
84 | * bits 3-1 : duty cycle | ||
85 | * bit 0 : reserved | ||
86 | */ | ||
87 | l = (l & ~14); | ||
88 | l = l | (1<<4) | ((newstate & 0x7)<<1); | ||
89 | wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h); | ||
90 | } | ||
91 | |||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | |||
96 | static struct cpufreq_frequency_table p4clockmod_table[] = { | ||
97 | {DC_RESV, CPUFREQ_ENTRY_INVALID}, | ||
98 | {DC_DFLT, 0}, | ||
99 | {DC_25PT, 0}, | ||
100 | {DC_38PT, 0}, | ||
101 | {DC_50PT, 0}, | ||
102 | {DC_64PT, 0}, | ||
103 | {DC_75PT, 0}, | ||
104 | {DC_88PT, 0}, | ||
105 | {DC_DISABLE, 0}, | ||
106 | {DC_RESV, CPUFREQ_TABLE_END}, | ||
107 | }; | ||
108 | |||
109 | |||
110 | static int cpufreq_p4_target(struct cpufreq_policy *policy, | ||
111 | unsigned int target_freq, | ||
112 | unsigned int relation) | ||
113 | { | ||
114 | unsigned int newstate = DC_RESV; | ||
115 | struct cpufreq_freqs freqs; | ||
116 | int i; | ||
117 | |||
118 | if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], | ||
119 | target_freq, relation, &newstate)) | ||
120 | return -EINVAL; | ||
121 | |||
122 | freqs.old = cpufreq_p4_get(policy->cpu); | ||
123 | freqs.new = stock_freq * p4clockmod_table[newstate].index / 8; | ||
124 | |||
125 | if (freqs.new == freqs.old) | ||
126 | return 0; | ||
127 | |||
128 | /* notifiers */ | ||
129 | for_each_cpu(i, policy->cpus) { | ||
130 | freqs.cpu = i; | ||
131 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
132 | } | ||
133 | |||
134 | /* run on each logical CPU, | ||
135 | * see section 13.15.3 of IA32 Intel Architecture Software | ||
136 | * Developer's Manual, Volume 3 | ||
137 | */ | ||
138 | for_each_cpu(i, policy->cpus) | ||
139 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); | ||
140 | |||
141 | /* notifiers */ | ||
142 | for_each_cpu(i, policy->cpus) { | ||
143 | freqs.cpu = i; | ||
144 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
145 | } | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | |||
151 | static int cpufreq_p4_verify(struct cpufreq_policy *policy) | ||
152 | { | ||
153 | return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]); | ||
154 | } | ||
155 | |||
156 | |||
157 | static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | ||
158 | { | ||
159 | if (c->x86 == 0x06) { | ||
160 | if (cpu_has(c, X86_FEATURE_EST)) | ||
161 | printk_once(KERN_WARNING PFX "Warning: EST-capable " | ||
162 | "CPU detected. The acpi-cpufreq module offers " | ||
163 | "voltage scaling in addition to frequency " | ||
164 | "scaling. You should use that instead of " | ||
165 | "p4-clockmod, if possible.\n"); | ||
166 | switch (c->x86_model) { | ||
167 | case 0x0E: /* Core */ | ||
168 | case 0x0F: /* Core Duo */ | ||
169 | case 0x16: /* Celeron Core */ | ||
170 | case 0x1C: /* Atom */ | ||
171 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
172 | return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); | ||
173 | case 0x0D: /* Pentium M (Dothan) */ | ||
174 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
175 | /* fall through */ | ||
176 | case 0x09: /* Pentium M (Banias) */ | ||
177 | return speedstep_get_frequency(SPEEDSTEP_CPU_PM); | ||
178 | } | ||
179 | } | ||
180 | |||
181 | if (c->x86 != 0xF) | ||
182 | return 0; | ||
183 | |||
184 | /* on P-4s, the TSC runs with constant frequency independent whether | ||
185 | * throttling is active or not. */ | ||
186 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
187 | |||
188 | if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) { | ||
189 | printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " | ||
190 | "The speedstep-ich or acpi cpufreq modules offer " | ||
191 | "voltage scaling in addition of frequency scaling. " | ||
192 | "You should use either one instead of p4-clockmod, " | ||
193 | "if possible.\n"); | ||
194 | return speedstep_get_frequency(SPEEDSTEP_CPU_P4M); | ||
195 | } | ||
196 | |||
197 | return speedstep_get_frequency(SPEEDSTEP_CPU_P4D); | ||
198 | } | ||
199 | |||
200 | |||
201 | |||
202 | static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) | ||
203 | { | ||
204 | struct cpuinfo_x86 *c = &cpu_data(policy->cpu); | ||
205 | int cpuid = 0; | ||
206 | unsigned int i; | ||
207 | |||
208 | #ifdef CONFIG_SMP | ||
209 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); | ||
210 | #endif | ||
211 | |||
212 | /* Errata workaround */ | ||
213 | cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; | ||
214 | switch (cpuid) { | ||
215 | case 0x0f07: | ||
216 | case 0x0f0a: | ||
217 | case 0x0f11: | ||
218 | case 0x0f12: | ||
219 | has_N44_O17_errata[policy->cpu] = 1; | ||
220 | dprintk("has errata -- disabling low frequencies\n"); | ||
221 | } | ||
222 | |||
223 | if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D && | ||
224 | c->x86_model < 2) { | ||
225 | /* switch to maximum frequency and measure result */ | ||
226 | cpufreq_p4_setdc(policy->cpu, DC_DISABLE); | ||
227 | recalibrate_cpu_khz(); | ||
228 | } | ||
229 | /* get max frequency */ | ||
230 | stock_freq = cpufreq_p4_get_frequency(c); | ||
231 | if (!stock_freq) | ||
232 | return -EINVAL; | ||
233 | |||
234 | /* table init */ | ||
235 | for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { | ||
236 | if ((i < 2) && (has_N44_O17_errata[policy->cpu])) | ||
237 | p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
238 | else | ||
239 | p4clockmod_table[i].frequency = (stock_freq * i)/8; | ||
240 | } | ||
241 | cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); | ||
242 | |||
243 | /* cpuinfo and default policy values */ | ||
244 | |||
245 | /* the transition latency is set to be 1 higher than the maximum | ||
246 | * transition latency of the ondemand governor */ | ||
247 | policy->cpuinfo.transition_latency = 10000001; | ||
248 | policy->cur = stock_freq; | ||
249 | |||
250 | return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); | ||
251 | } | ||
252 | |||
253 | |||
254 | static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) | ||
255 | { | ||
256 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | static unsigned int cpufreq_p4_get(unsigned int cpu) | ||
261 | { | ||
262 | u32 l, h; | ||
263 | |||
264 | rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); | ||
265 | |||
266 | if (l & 0x10) { | ||
267 | l = l >> 1; | ||
268 | l &= 0x7; | ||
269 | } else | ||
270 | l = DC_DISABLE; | ||
271 | |||
272 | if (l != DC_DISABLE) | ||
273 | return stock_freq * l / 8; | ||
274 | |||
275 | return stock_freq; | ||
276 | } | ||
277 | |||
278 | static struct freq_attr *p4clockmod_attr[] = { | ||
279 | &cpufreq_freq_attr_scaling_available_freqs, | ||
280 | NULL, | ||
281 | }; | ||
282 | |||
283 | static struct cpufreq_driver p4clockmod_driver = { | ||
284 | .verify = cpufreq_p4_verify, | ||
285 | .target = cpufreq_p4_target, | ||
286 | .init = cpufreq_p4_cpu_init, | ||
287 | .exit = cpufreq_p4_cpu_exit, | ||
288 | .get = cpufreq_p4_get, | ||
289 | .name = "p4-clockmod", | ||
290 | .owner = THIS_MODULE, | ||
291 | .attr = p4clockmod_attr, | ||
292 | }; | ||
293 | |||
294 | |||
295 | static int __init cpufreq_p4_init(void) | ||
296 | { | ||
297 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
298 | int ret; | ||
299 | |||
300 | /* | ||
301 | * THERM_CONTROL is architectural for IA32 now, so | ||
302 | * we can rely on the capability checks | ||
303 | */ | ||
304 | if (c->x86_vendor != X86_VENDOR_INTEL) | ||
305 | return -ENODEV; | ||
306 | |||
307 | if (!test_cpu_cap(c, X86_FEATURE_ACPI) || | ||
308 | !test_cpu_cap(c, X86_FEATURE_ACC)) | ||
309 | return -ENODEV; | ||
310 | |||
311 | ret = cpufreq_register_driver(&p4clockmod_driver); | ||
312 | if (!ret) | ||
313 | printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock " | ||
314 | "Modulation available\n"); | ||
315 | |||
316 | return ret; | ||
317 | } | ||
318 | |||
319 | |||
320 | static void __exit cpufreq_p4_exit(void) | ||
321 | { | ||
322 | cpufreq_unregister_driver(&p4clockmod_driver); | ||
323 | } | ||
324 | |||
325 | |||
326 | MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>"); | ||
327 | MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); | ||
328 | MODULE_LICENSE("GPL"); | ||
329 | |||
330 | late_initcall(cpufreq_p4_init); | ||
331 | module_exit(cpufreq_p4_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c deleted file mode 100644 index 755a31e0f5b0..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ /dev/null | |||
@@ -1,624 +0,0 @@ | |||
1 | /* | ||
2 | * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface | ||
3 | * | ||
4 | * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com> | ||
5 | * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. | ||
6 | * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com> | ||
7 | * | ||
8 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; version 2 of the License. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON | ||
17 | * INFRINGEMENT. See the GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
21 | * 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | * | ||
23 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
24 | */ | ||
25 | |||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/smp.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/cpufreq.h> | ||
32 | #include <linux/compiler.h> | ||
33 | #include <linux/slab.h> | ||
34 | |||
35 | #include <linux/acpi.h> | ||
36 | #include <linux/io.h> | ||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/uaccess.h> | ||
39 | |||
40 | #include <acpi/processor.h> | ||
41 | |||
42 | #define PCC_VERSION "1.00.00" | ||
43 | #define POLL_LOOPS 300 | ||
44 | |||
45 | #define CMD_COMPLETE 0x1 | ||
46 | #define CMD_GET_FREQ 0x0 | ||
47 | #define CMD_SET_FREQ 0x1 | ||
48 | |||
49 | #define BUF_SZ 4 | ||
50 | |||
51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
52 | "pcc-cpufreq", msg) | ||
53 | |||
54 | struct pcc_register_resource { | ||
55 | u8 descriptor; | ||
56 | u16 length; | ||
57 | u8 space_id; | ||
58 | u8 bit_width; | ||
59 | u8 bit_offset; | ||
60 | u8 access_size; | ||
61 | u64 address; | ||
62 | } __attribute__ ((packed)); | ||
63 | |||
64 | struct pcc_memory_resource { | ||
65 | u8 descriptor; | ||
66 | u16 length; | ||
67 | u8 space_id; | ||
68 | u8 resource_usage; | ||
69 | u8 type_specific; | ||
70 | u64 granularity; | ||
71 | u64 minimum; | ||
72 | u64 maximum; | ||
73 | u64 translation_offset; | ||
74 | u64 address_length; | ||
75 | } __attribute__ ((packed)); | ||
76 | |||
77 | static struct cpufreq_driver pcc_cpufreq_driver; | ||
78 | |||
79 | struct pcc_header { | ||
80 | u32 signature; | ||
81 | u16 length; | ||
82 | u8 major; | ||
83 | u8 minor; | ||
84 | u32 features; | ||
85 | u16 command; | ||
86 | u16 status; | ||
87 | u32 latency; | ||
88 | u32 minimum_time; | ||
89 | u32 maximum_time; | ||
90 | u32 nominal; | ||
91 | u32 throttled_frequency; | ||
92 | u32 minimum_frequency; | ||
93 | }; | ||
94 | |||
95 | static void __iomem *pcch_virt_addr; | ||
96 | static struct pcc_header __iomem *pcch_hdr; | ||
97 | |||
98 | static DEFINE_SPINLOCK(pcc_lock); | ||
99 | |||
100 | static struct acpi_generic_address doorbell; | ||
101 | |||
102 | static u64 doorbell_preserve; | ||
103 | static u64 doorbell_write; | ||
104 | |||
105 | static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f, | ||
106 | 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46}; | ||
107 | |||
108 | struct pcc_cpu { | ||
109 | u32 input_offset; | ||
110 | u32 output_offset; | ||
111 | }; | ||
112 | |||
113 | static struct pcc_cpu __percpu *pcc_cpu_info; | ||
114 | |||
115 | static int pcc_cpufreq_verify(struct cpufreq_policy *policy) | ||
116 | { | ||
117 | cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, | ||
118 | policy->cpuinfo.max_freq); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static inline void pcc_cmd(void) | ||
123 | { | ||
124 | u64 doorbell_value; | ||
125 | int i; | ||
126 | |||
127 | acpi_read(&doorbell_value, &doorbell); | ||
128 | acpi_write((doorbell_value & doorbell_preserve) | doorbell_write, | ||
129 | &doorbell); | ||
130 | |||
131 | for (i = 0; i < POLL_LOOPS; i++) { | ||
132 | if (ioread16(&pcch_hdr->status) & CMD_COMPLETE) | ||
133 | break; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static inline void pcc_clear_mapping(void) | ||
138 | { | ||
139 | if (pcch_virt_addr) | ||
140 | iounmap(pcch_virt_addr); | ||
141 | pcch_virt_addr = NULL; | ||
142 | } | ||
143 | |||
144 | static unsigned int pcc_get_freq(unsigned int cpu) | ||
145 | { | ||
146 | struct pcc_cpu *pcc_cpu_data; | ||
147 | unsigned int curr_freq; | ||
148 | unsigned int freq_limit; | ||
149 | u16 status; | ||
150 | u32 input_buffer; | ||
151 | u32 output_buffer; | ||
152 | |||
153 | spin_lock(&pcc_lock); | ||
154 | |||
155 | dprintk("get: get_freq for CPU %d\n", cpu); | ||
156 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
157 | |||
158 | input_buffer = 0x1; | ||
159 | iowrite32(input_buffer, | ||
160 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
161 | iowrite16(CMD_GET_FREQ, &pcch_hdr->command); | ||
162 | |||
163 | pcc_cmd(); | ||
164 | |||
165 | output_buffer = | ||
166 | ioread32(pcch_virt_addr + pcc_cpu_data->output_offset); | ||
167 | |||
168 | /* Clear the input buffer - we are done with the current command */ | ||
169 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
170 | |||
171 | status = ioread16(&pcch_hdr->status); | ||
172 | if (status != CMD_COMPLETE) { | ||
173 | dprintk("get: FAILED: for CPU %d, status is %d\n", | ||
174 | cpu, status); | ||
175 | goto cmd_incomplete; | ||
176 | } | ||
177 | iowrite16(0, &pcch_hdr->status); | ||
178 | curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff)) | ||
179 | / 100) * 1000); | ||
180 | |||
181 | dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is " | ||
182 | "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n", | ||
183 | cpu, (pcch_virt_addr + pcc_cpu_data->output_offset), | ||
184 | output_buffer, curr_freq); | ||
185 | |||
186 | freq_limit = (output_buffer >> 8) & 0xff; | ||
187 | if (freq_limit != 0xff) { | ||
188 | dprintk("get: frequency for cpu %d is being temporarily" | ||
189 | " capped at %d\n", cpu, curr_freq); | ||
190 | } | ||
191 | |||
192 | spin_unlock(&pcc_lock); | ||
193 | return curr_freq; | ||
194 | |||
195 | cmd_incomplete: | ||
196 | iowrite16(0, &pcch_hdr->status); | ||
197 | spin_unlock(&pcc_lock); | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static int pcc_cpufreq_target(struct cpufreq_policy *policy, | ||
202 | unsigned int target_freq, | ||
203 | unsigned int relation) | ||
204 | { | ||
205 | struct pcc_cpu *pcc_cpu_data; | ||
206 | struct cpufreq_freqs freqs; | ||
207 | u16 status; | ||
208 | u32 input_buffer; | ||
209 | int cpu; | ||
210 | |||
211 | spin_lock(&pcc_lock); | ||
212 | cpu = policy->cpu; | ||
213 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
214 | |||
215 | dprintk("target: CPU %d should go to target freq: %d " | ||
216 | "(virtual) input_offset is 0x%x\n", | ||
217 | cpu, target_freq, | ||
218 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
219 | |||
220 | freqs.new = target_freq; | ||
221 | freqs.cpu = cpu; | ||
222 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
223 | |||
224 | input_buffer = 0x1 | (((target_freq * 100) | ||
225 | / (ioread32(&pcch_hdr->nominal) * 1000)) << 8); | ||
226 | iowrite32(input_buffer, | ||
227 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
228 | iowrite16(CMD_SET_FREQ, &pcch_hdr->command); | ||
229 | |||
230 | pcc_cmd(); | ||
231 | |||
232 | /* Clear the input buffer - we are done with the current command */ | ||
233 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
234 | |||
235 | status = ioread16(&pcch_hdr->status); | ||
236 | if (status != CMD_COMPLETE) { | ||
237 | dprintk("target: FAILED for cpu %d, with status: 0x%x\n", | ||
238 | cpu, status); | ||
239 | goto cmd_incomplete; | ||
240 | } | ||
241 | iowrite16(0, &pcch_hdr->status); | ||
242 | |||
243 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
244 | dprintk("target: was SUCCESSFUL for cpu %d\n", cpu); | ||
245 | spin_unlock(&pcc_lock); | ||
246 | |||
247 | return 0; | ||
248 | |||
249 | cmd_incomplete: | ||
250 | iowrite16(0, &pcch_hdr->status); | ||
251 | spin_unlock(&pcc_lock); | ||
252 | return -EINVAL; | ||
253 | } | ||
254 | |||
255 | static int pcc_get_offset(int cpu) | ||
256 | { | ||
257 | acpi_status status; | ||
258 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
259 | union acpi_object *pccp, *offset; | ||
260 | struct pcc_cpu *pcc_cpu_data; | ||
261 | struct acpi_processor *pr; | ||
262 | int ret = 0; | ||
263 | |||
264 | pr = per_cpu(processors, cpu); | ||
265 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
266 | |||
267 | status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer); | ||
268 | if (ACPI_FAILURE(status)) | ||
269 | return -ENODEV; | ||
270 | |||
271 | pccp = buffer.pointer; | ||
272 | if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) { | ||
273 | ret = -ENODEV; | ||
274 | goto out_free; | ||
275 | }; | ||
276 | |||
277 | offset = &(pccp->package.elements[0]); | ||
278 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
279 | ret = -ENODEV; | ||
280 | goto out_free; | ||
281 | } | ||
282 | |||
283 | pcc_cpu_data->input_offset = offset->integer.value; | ||
284 | |||
285 | offset = &(pccp->package.elements[1]); | ||
286 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
287 | ret = -ENODEV; | ||
288 | goto out_free; | ||
289 | } | ||
290 | |||
291 | pcc_cpu_data->output_offset = offset->integer.value; | ||
292 | |||
293 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
294 | memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ); | ||
295 | |||
296 | dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data " | ||
297 | "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n", | ||
298 | cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset); | ||
299 | out_free: | ||
300 | kfree(buffer.pointer); | ||
301 | return ret; | ||
302 | } | ||
303 | |||
304 | static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | ||
305 | { | ||
306 | acpi_status status; | ||
307 | struct acpi_object_list input; | ||
308 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
309 | union acpi_object in_params[4]; | ||
310 | union acpi_object *out_obj; | ||
311 | u32 capabilities[2]; | ||
312 | u32 errors; | ||
313 | u32 supported; | ||
314 | int ret = 0; | ||
315 | |||
316 | input.count = 4; | ||
317 | input.pointer = in_params; | ||
318 | in_params[0].type = ACPI_TYPE_BUFFER; | ||
319 | in_params[0].buffer.length = 16; | ||
320 | in_params[0].buffer.pointer = OSC_UUID; | ||
321 | in_params[1].type = ACPI_TYPE_INTEGER; | ||
322 | in_params[1].integer.value = 1; | ||
323 | in_params[2].type = ACPI_TYPE_INTEGER; | ||
324 | in_params[2].integer.value = 2; | ||
325 | in_params[3].type = ACPI_TYPE_BUFFER; | ||
326 | in_params[3].buffer.length = 8; | ||
327 | in_params[3].buffer.pointer = (u8 *)&capabilities; | ||
328 | |||
329 | capabilities[0] = OSC_QUERY_ENABLE; | ||
330 | capabilities[1] = 0x1; | ||
331 | |||
332 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
333 | if (ACPI_FAILURE(status)) | ||
334 | return -ENODEV; | ||
335 | |||
336 | if (!output.length) | ||
337 | return -ENODEV; | ||
338 | |||
339 | out_obj = output.pointer; | ||
340 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
341 | ret = -ENODEV; | ||
342 | goto out_free; | ||
343 | } | ||
344 | |||
345 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
346 | if (errors) { | ||
347 | ret = -ENODEV; | ||
348 | goto out_free; | ||
349 | } | ||
350 | |||
351 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
352 | if (!(supported & 0x1)) { | ||
353 | ret = -ENODEV; | ||
354 | goto out_free; | ||
355 | } | ||
356 | |||
357 | kfree(output.pointer); | ||
358 | capabilities[0] = 0x0; | ||
359 | capabilities[1] = 0x1; | ||
360 | |||
361 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
362 | if (ACPI_FAILURE(status)) | ||
363 | return -ENODEV; | ||
364 | |||
365 | if (!output.length) | ||
366 | return -ENODEV; | ||
367 | |||
368 | out_obj = output.pointer; | ||
369 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
370 | ret = -ENODEV; | ||
371 | goto out_free; | ||
372 | } | ||
373 | |||
374 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
375 | if (errors) { | ||
376 | ret = -ENODEV; | ||
377 | goto out_free; | ||
378 | } | ||
379 | |||
380 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
381 | if (!(supported & 0x1)) { | ||
382 | ret = -ENODEV; | ||
383 | goto out_free; | ||
384 | } | ||
385 | |||
386 | out_free: | ||
387 | kfree(output.pointer); | ||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | static int __init pcc_cpufreq_probe(void) | ||
392 | { | ||
393 | acpi_status status; | ||
394 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
395 | struct pcc_memory_resource *mem_resource; | ||
396 | struct pcc_register_resource *reg_resource; | ||
397 | union acpi_object *out_obj, *member; | ||
398 | acpi_handle handle, osc_handle, pcch_handle; | ||
399 | int ret = 0; | ||
400 | |||
401 | status = acpi_get_handle(NULL, "\\_SB", &handle); | ||
402 | if (ACPI_FAILURE(status)) | ||
403 | return -ENODEV; | ||
404 | |||
405 | status = acpi_get_handle(handle, "PCCH", &pcch_handle); | ||
406 | if (ACPI_FAILURE(status)) | ||
407 | return -ENODEV; | ||
408 | |||
409 | status = acpi_get_handle(handle, "_OSC", &osc_handle); | ||
410 | if (ACPI_SUCCESS(status)) { | ||
411 | ret = pcc_cpufreq_do_osc(&osc_handle); | ||
412 | if (ret) | ||
413 | dprintk("probe: _OSC evaluation did not succeed\n"); | ||
414 | /* Firmware's use of _OSC is optional */ | ||
415 | ret = 0; | ||
416 | } | ||
417 | |||
418 | status = acpi_evaluate_object(handle, "PCCH", NULL, &output); | ||
419 | if (ACPI_FAILURE(status)) | ||
420 | return -ENODEV; | ||
421 | |||
422 | out_obj = output.pointer; | ||
423 | if (out_obj->type != ACPI_TYPE_PACKAGE) { | ||
424 | ret = -ENODEV; | ||
425 | goto out_free; | ||
426 | } | ||
427 | |||
428 | member = &out_obj->package.elements[0]; | ||
429 | if (member->type != ACPI_TYPE_BUFFER) { | ||
430 | ret = -ENODEV; | ||
431 | goto out_free; | ||
432 | } | ||
433 | |||
434 | mem_resource = (struct pcc_memory_resource *)member->buffer.pointer; | ||
435 | |||
436 | dprintk("probe: mem_resource descriptor: 0x%x," | ||
437 | " length: %d, space_id: %d, resource_usage: %d," | ||
438 | " type_specific: %d, granularity: 0x%llx," | ||
439 | " minimum: 0x%llx, maximum: 0x%llx," | ||
440 | " translation_offset: 0x%llx, address_length: 0x%llx\n", | ||
441 | mem_resource->descriptor, mem_resource->length, | ||
442 | mem_resource->space_id, mem_resource->resource_usage, | ||
443 | mem_resource->type_specific, mem_resource->granularity, | ||
444 | mem_resource->minimum, mem_resource->maximum, | ||
445 | mem_resource->translation_offset, | ||
446 | mem_resource->address_length); | ||
447 | |||
448 | if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) { | ||
449 | ret = -ENODEV; | ||
450 | goto out_free; | ||
451 | } | ||
452 | |||
453 | pcch_virt_addr = ioremap_nocache(mem_resource->minimum, | ||
454 | mem_resource->address_length); | ||
455 | if (pcch_virt_addr == NULL) { | ||
456 | dprintk("probe: could not map shared mem region\n"); | ||
457 | goto out_free; | ||
458 | } | ||
459 | pcch_hdr = pcch_virt_addr; | ||
460 | |||
461 | dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr); | ||
462 | dprintk("probe: PCCH header is at physical address: 0x%llx," | ||
463 | " signature: 0x%x, length: %d bytes, major: %d, minor: %d," | ||
464 | " supported features: 0x%x, command field: 0x%x," | ||
465 | " status field: 0x%x, nominal latency: %d us\n", | ||
466 | mem_resource->minimum, ioread32(&pcch_hdr->signature), | ||
467 | ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major), | ||
468 | ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features), | ||
469 | ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status), | ||
470 | ioread32(&pcch_hdr->latency)); | ||
471 | |||
472 | dprintk("probe: min time between commands: %d us," | ||
473 | " max time between commands: %d us," | ||
474 | " nominal CPU frequency: %d MHz," | ||
475 | " minimum CPU frequency: %d MHz," | ||
476 | " minimum CPU frequency without throttling: %d MHz\n", | ||
477 | ioread32(&pcch_hdr->minimum_time), | ||
478 | ioread32(&pcch_hdr->maximum_time), | ||
479 | ioread32(&pcch_hdr->nominal), | ||
480 | ioread32(&pcch_hdr->throttled_frequency), | ||
481 | ioread32(&pcch_hdr->minimum_frequency)); | ||
482 | |||
483 | member = &out_obj->package.elements[1]; | ||
484 | if (member->type != ACPI_TYPE_BUFFER) { | ||
485 | ret = -ENODEV; | ||
486 | goto pcch_free; | ||
487 | } | ||
488 | |||
489 | reg_resource = (struct pcc_register_resource *)member->buffer.pointer; | ||
490 | |||
491 | doorbell.space_id = reg_resource->space_id; | ||
492 | doorbell.bit_width = reg_resource->bit_width; | ||
493 | doorbell.bit_offset = reg_resource->bit_offset; | ||
494 | doorbell.access_width = 64; | ||
495 | doorbell.address = reg_resource->address; | ||
496 | |||
497 | dprintk("probe: doorbell: space_id is %d, bit_width is %d, " | ||
498 | "bit_offset is %d, access_width is %d, address is 0x%llx\n", | ||
499 | doorbell.space_id, doorbell.bit_width, doorbell.bit_offset, | ||
500 | doorbell.access_width, reg_resource->address); | ||
501 | |||
502 | member = &out_obj->package.elements[2]; | ||
503 | if (member->type != ACPI_TYPE_INTEGER) { | ||
504 | ret = -ENODEV; | ||
505 | goto pcch_free; | ||
506 | } | ||
507 | |||
508 | doorbell_preserve = member->integer.value; | ||
509 | |||
510 | member = &out_obj->package.elements[3]; | ||
511 | if (member->type != ACPI_TYPE_INTEGER) { | ||
512 | ret = -ENODEV; | ||
513 | goto pcch_free; | ||
514 | } | ||
515 | |||
516 | doorbell_write = member->integer.value; | ||
517 | |||
518 | dprintk("probe: doorbell_preserve: 0x%llx," | ||
519 | " doorbell_write: 0x%llx\n", | ||
520 | doorbell_preserve, doorbell_write); | ||
521 | |||
522 | pcc_cpu_info = alloc_percpu(struct pcc_cpu); | ||
523 | if (!pcc_cpu_info) { | ||
524 | ret = -ENOMEM; | ||
525 | goto pcch_free; | ||
526 | } | ||
527 | |||
528 | printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency" | ||
529 | " limits: %d MHz, %d MHz\n", PCC_VERSION, | ||
530 | ioread32(&pcch_hdr->minimum_frequency), | ||
531 | ioread32(&pcch_hdr->nominal)); | ||
532 | kfree(output.pointer); | ||
533 | return ret; | ||
534 | pcch_free: | ||
535 | pcc_clear_mapping(); | ||
536 | out_free: | ||
537 | kfree(output.pointer); | ||
538 | return ret; | ||
539 | } | ||
540 | |||
541 | static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) | ||
542 | { | ||
543 | unsigned int cpu = policy->cpu; | ||
544 | unsigned int result = 0; | ||
545 | |||
546 | if (!pcch_virt_addr) { | ||
547 | result = -1; | ||
548 | goto out; | ||
549 | } | ||
550 | |||
551 | result = pcc_get_offset(cpu); | ||
552 | if (result) { | ||
553 | dprintk("init: PCCP evaluation failed\n"); | ||
554 | goto out; | ||
555 | } | ||
556 | |||
557 | policy->max = policy->cpuinfo.max_freq = | ||
558 | ioread32(&pcch_hdr->nominal) * 1000; | ||
559 | policy->min = policy->cpuinfo.min_freq = | ||
560 | ioread32(&pcch_hdr->minimum_frequency) * 1000; | ||
561 | policy->cur = pcc_get_freq(cpu); | ||
562 | |||
563 | if (!policy->cur) { | ||
564 | dprintk("init: Unable to get current CPU frequency\n"); | ||
565 | result = -EINVAL; | ||
566 | goto out; | ||
567 | } | ||
568 | |||
569 | dprintk("init: policy->max is %d, policy->min is %d\n", | ||
570 | policy->max, policy->min); | ||
571 | out: | ||
572 | return result; | ||
573 | } | ||
574 | |||
575 | static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy) | ||
576 | { | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | static struct cpufreq_driver pcc_cpufreq_driver = { | ||
581 | .flags = CPUFREQ_CONST_LOOPS, | ||
582 | .get = pcc_get_freq, | ||
583 | .verify = pcc_cpufreq_verify, | ||
584 | .target = pcc_cpufreq_target, | ||
585 | .init = pcc_cpufreq_cpu_init, | ||
586 | .exit = pcc_cpufreq_cpu_exit, | ||
587 | .name = "pcc-cpufreq", | ||
588 | .owner = THIS_MODULE, | ||
589 | }; | ||
590 | |||
591 | static int __init pcc_cpufreq_init(void) | ||
592 | { | ||
593 | int ret; | ||
594 | |||
595 | if (acpi_disabled) | ||
596 | return 0; | ||
597 | |||
598 | ret = pcc_cpufreq_probe(); | ||
599 | if (ret) { | ||
600 | dprintk("pcc_cpufreq_init: PCCH evaluation failed\n"); | ||
601 | return ret; | ||
602 | } | ||
603 | |||
604 | ret = cpufreq_register_driver(&pcc_cpufreq_driver); | ||
605 | |||
606 | return ret; | ||
607 | } | ||
608 | |||
609 | static void __exit pcc_cpufreq_exit(void) | ||
610 | { | ||
611 | cpufreq_unregister_driver(&pcc_cpufreq_driver); | ||
612 | |||
613 | pcc_clear_mapping(); | ||
614 | |||
615 | free_percpu(pcc_cpu_info); | ||
616 | } | ||
617 | |||
618 | MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar"); | ||
619 | MODULE_VERSION(PCC_VERSION); | ||
620 | MODULE_DESCRIPTION("Processor Clocking Control interface driver"); | ||
621 | MODULE_LICENSE("GPL"); | ||
622 | |||
623 | late_initcall(pcc_cpufreq_init); | ||
624 | module_exit(pcc_cpufreq_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c deleted file mode 100644 index b3379d6a5c57..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ /dev/null | |||
@@ -1,261 +0,0 @@ | |||
1 | /* | ||
2 | * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) | ||
3 | * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, | ||
4 | * Dominik Brodowski. | ||
5 | * | ||
6 | * Licensed under the terms of the GNU GPL License version 2. | ||
7 | * | ||
8 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
9 | */ | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/cpufreq.h> | ||
15 | #include <linux/ioport.h> | ||
16 | #include <linux/timex.h> | ||
17 | #include <linux/io.h> | ||
18 | |||
19 | #include <asm/msr.h> | ||
20 | |||
21 | #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long | ||
22 | as it is unused */ | ||
23 | |||
24 | #define PFX "powernow-k6: " | ||
25 | static unsigned int busfreq; /* FSB, in 10 kHz */ | ||
26 | static unsigned int max_multiplier; | ||
27 | |||
28 | |||
29 | /* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ | ||
30 | static struct cpufreq_frequency_table clock_ratio[] = { | ||
31 | {45, /* 000 -> 4.5x */ 0}, | ||
32 | {50, /* 001 -> 5.0x */ 0}, | ||
33 | {40, /* 010 -> 4.0x */ 0}, | ||
34 | {55, /* 011 -> 5.5x */ 0}, | ||
35 | {20, /* 100 -> 2.0x */ 0}, | ||
36 | {30, /* 101 -> 3.0x */ 0}, | ||
37 | {60, /* 110 -> 6.0x */ 0}, | ||
38 | {35, /* 111 -> 3.5x */ 0}, | ||
39 | {0, CPUFREQ_TABLE_END} | ||
40 | }; | ||
41 | |||
42 | |||
43 | /** | ||
44 | * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier | ||
45 | * | ||
46 | * Returns the current setting of the frequency multiplier. Core clock | ||
47 | * speed is frequency of the Front-Side Bus multiplied with this value. | ||
48 | */ | ||
49 | static int powernow_k6_get_cpu_multiplier(void) | ||
50 | { | ||
51 | u64 invalue = 0; | ||
52 | u32 msrval; | ||
53 | |||
54 | msrval = POWERNOW_IOPORT + 0x1; | ||
55 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | ||
56 | invalue = inl(POWERNOW_IOPORT + 0x8); | ||
57 | msrval = POWERNOW_IOPORT + 0x0; | ||
58 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | ||
59 | |||
60 | return clock_ratio[(invalue >> 5)&7].index; | ||
61 | } | ||
62 | |||
63 | |||
64 | /** | ||
65 | * powernow_k6_set_state - set the PowerNow! multiplier | ||
66 | * @best_i: clock_ratio[best_i] is the target multiplier | ||
67 | * | ||
68 | * Tries to change the PowerNow! multiplier | ||
69 | */ | ||
70 | static void powernow_k6_set_state(unsigned int best_i) | ||
71 | { | ||
72 | unsigned long outvalue = 0, invalue = 0; | ||
73 | unsigned long msrval; | ||
74 | struct cpufreq_freqs freqs; | ||
75 | |||
76 | if (clock_ratio[best_i].index > max_multiplier) { | ||
77 | printk(KERN_ERR PFX "invalid target frequency\n"); | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); | ||
82 | freqs.new = busfreq * clock_ratio[best_i].index; | ||
83 | freqs.cpu = 0; /* powernow-k6.c is UP only driver */ | ||
84 | |||
85 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
86 | |||
87 | /* we now need to transform best_i to the BVC format, see AMD#23446 */ | ||
88 | |||
89 | outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5); | ||
90 | |||
91 | msrval = POWERNOW_IOPORT + 0x1; | ||
92 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | ||
93 | invalue = inl(POWERNOW_IOPORT + 0x8); | ||
94 | invalue = invalue & 0xf; | ||
95 | outvalue = outvalue | invalue; | ||
96 | outl(outvalue , (POWERNOW_IOPORT + 0x8)); | ||
97 | msrval = POWERNOW_IOPORT + 0x0; | ||
98 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | ||
99 | |||
100 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
101 | |||
102 | return; | ||
103 | } | ||
104 | |||
105 | |||
106 | /** | ||
107 | * powernow_k6_verify - verifies a new CPUfreq policy | ||
108 | * @policy: new policy | ||
109 | * | ||
110 | * Policy must be within lowest and highest possible CPU Frequency, | ||
111 | * and at least one possible state must be within min and max. | ||
112 | */ | ||
113 | static int powernow_k6_verify(struct cpufreq_policy *policy) | ||
114 | { | ||
115 | return cpufreq_frequency_table_verify(policy, &clock_ratio[0]); | ||
116 | } | ||
117 | |||
118 | |||
119 | /** | ||
120 | * powernow_k6_setpolicy - sets a new CPUFreq policy | ||
121 | * @policy: new policy | ||
122 | * @target_freq: the target frequency | ||
123 | * @relation: how that frequency relates to achieved frequency | ||
124 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
125 | * | ||
126 | * sets a new CPUFreq policy | ||
127 | */ | ||
128 | static int powernow_k6_target(struct cpufreq_policy *policy, | ||
129 | unsigned int target_freq, | ||
130 | unsigned int relation) | ||
131 | { | ||
132 | unsigned int newstate = 0; | ||
133 | |||
134 | if (cpufreq_frequency_table_target(policy, &clock_ratio[0], | ||
135 | target_freq, relation, &newstate)) | ||
136 | return -EINVAL; | ||
137 | |||
138 | powernow_k6_set_state(newstate); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | |||
144 | static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | ||
145 | { | ||
146 | unsigned int i, f; | ||
147 | int result; | ||
148 | |||
149 | if (policy->cpu != 0) | ||
150 | return -ENODEV; | ||
151 | |||
152 | /* get frequencies */ | ||
153 | max_multiplier = powernow_k6_get_cpu_multiplier(); | ||
154 | busfreq = cpu_khz / max_multiplier; | ||
155 | |||
156 | /* table init */ | ||
157 | for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { | ||
158 | f = clock_ratio[i].index; | ||
159 | if (f > max_multiplier) | ||
160 | clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
161 | else | ||
162 | clock_ratio[i].frequency = busfreq * f; | ||
163 | } | ||
164 | |||
165 | /* cpuinfo and default policy values */ | ||
166 | policy->cpuinfo.transition_latency = 200000; | ||
167 | policy->cur = busfreq * max_multiplier; | ||
168 | |||
169 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); | ||
170 | if (result) | ||
171 | return result; | ||
172 | |||
173 | cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); | ||
174 | |||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | |||
179 | static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) | ||
180 | { | ||
181 | unsigned int i; | ||
182 | for (i = 0; i < 8; i++) { | ||
183 | if (i == max_multiplier) | ||
184 | powernow_k6_set_state(i); | ||
185 | } | ||
186 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static unsigned int powernow_k6_get(unsigned int cpu) | ||
191 | { | ||
192 | unsigned int ret; | ||
193 | ret = (busfreq * powernow_k6_get_cpu_multiplier()); | ||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | static struct freq_attr *powernow_k6_attr[] = { | ||
198 | &cpufreq_freq_attr_scaling_available_freqs, | ||
199 | NULL, | ||
200 | }; | ||
201 | |||
202 | static struct cpufreq_driver powernow_k6_driver = { | ||
203 | .verify = powernow_k6_verify, | ||
204 | .target = powernow_k6_target, | ||
205 | .init = powernow_k6_cpu_init, | ||
206 | .exit = powernow_k6_cpu_exit, | ||
207 | .get = powernow_k6_get, | ||
208 | .name = "powernow-k6", | ||
209 | .owner = THIS_MODULE, | ||
210 | .attr = powernow_k6_attr, | ||
211 | }; | ||
212 | |||
213 | |||
214 | /** | ||
215 | * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver | ||
216 | * | ||
217 | * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported | ||
218 | * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero | ||
219 | * on success. | ||
220 | */ | ||
221 | static int __init powernow_k6_init(void) | ||
222 | { | ||
223 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
224 | |||
225 | if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || | ||
226 | ((c->x86_model != 12) && (c->x86_model != 13))) | ||
227 | return -ENODEV; | ||
228 | |||
229 | if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { | ||
230 | printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n"); | ||
231 | return -EIO; | ||
232 | } | ||
233 | |||
234 | if (cpufreq_register_driver(&powernow_k6_driver)) { | ||
235 | release_region(POWERNOW_IOPORT, 16); | ||
236 | return -EINVAL; | ||
237 | } | ||
238 | |||
239 | return 0; | ||
240 | } | ||
241 | |||
242 | |||
243 | /** | ||
244 | * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support | ||
245 | * | ||
246 | * Unregisters AMD K6-2+ / K6-3+ PowerNow! support. | ||
247 | */ | ||
248 | static void __exit powernow_k6_exit(void) | ||
249 | { | ||
250 | cpufreq_unregister_driver(&powernow_k6_driver); | ||
251 | release_region(POWERNOW_IOPORT, 16); | ||
252 | } | ||
253 | |||
254 | |||
255 | MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, " | ||
256 | "Dominik Brodowski <linux@brodo.de>"); | ||
257 | MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); | ||
258 | MODULE_LICENSE("GPL"); | ||
259 | |||
260 | module_init(powernow_k6_init); | ||
261 | module_exit(powernow_k6_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c deleted file mode 100644 index 4a45fd6e41ba..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ /dev/null | |||
@@ -1,752 +0,0 @@ | |||
1 | /* | ||
2 | * AMD K7 Powernow driver. | ||
3 | * (C) 2003 Dave Jones on behalf of SuSE Labs. | ||
4 | * (C) 2003-2004 Dave Jones <davej@redhat.com> | ||
5 | * | ||
6 | * Licensed under the terms of the GNU GPL License version 2. | ||
7 | * Based upon datasheets & sample CPUs kindly provided by AMD. | ||
8 | * | ||
9 | * Errata 5: | ||
10 | * CPU may fail to execute a FID/VID change in presence of interrupt. | ||
11 | * - We cli/sti on stepping A0 CPUs around the FID/VID transition. | ||
12 | * Errata 15: | ||
13 | * CPU with half frequency multipliers may hang upon wakeup from disconnect. | ||
14 | * - We disable half multipliers if ACPI is used on A0 stepping CPUs. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/moduleparam.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/cpufreq.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/dmi.h> | ||
25 | #include <linux/timex.h> | ||
26 | #include <linux/io.h> | ||
27 | |||
28 | #include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */ | ||
29 | #include <asm/msr.h> | ||
30 | #include <asm/system.h> | ||
31 | |||
32 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
33 | #include <linux/acpi.h> | ||
34 | #include <acpi/processor.h> | ||
35 | #endif | ||
36 | |||
37 | #include "powernow-k7.h" | ||
38 | |||
39 | #define PFX "powernow: " | ||
40 | |||
41 | |||
42 | struct psb_s { | ||
43 | u8 signature[10]; | ||
44 | u8 tableversion; | ||
45 | u8 flags; | ||
46 | u16 settlingtime; | ||
47 | u8 reserved1; | ||
48 | u8 numpst; | ||
49 | }; | ||
50 | |||
51 | struct pst_s { | ||
52 | u32 cpuid; | ||
53 | u8 fsbspeed; | ||
54 | u8 maxfid; | ||
55 | u8 startvid; | ||
56 | u8 numpstates; | ||
57 | }; | ||
58 | |||
59 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
60 | union powernow_acpi_control_t { | ||
61 | struct { | ||
62 | unsigned long fid:5, | ||
63 | vid:5, | ||
64 | sgtc:20, | ||
65 | res1:2; | ||
66 | } bits; | ||
67 | unsigned long val; | ||
68 | }; | ||
69 | #endif | ||
70 | |||
71 | #ifdef CONFIG_CPU_FREQ_DEBUG | ||
72 | /* divide by 1000 to get VCore voltage in V. */ | ||
73 | static const int mobile_vid_table[32] = { | ||
74 | 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, | ||
75 | 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, | ||
76 | 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, | ||
77 | 1075, 1050, 1025, 1000, 975, 950, 925, 0, | ||
78 | }; | ||
79 | #endif | ||
80 | |||
81 | /* divide by 10 to get FID. */ | ||
82 | static const int fid_codes[32] = { | ||
83 | 110, 115, 120, 125, 50, 55, 60, 65, | ||
84 | 70, 75, 80, 85, 90, 95, 100, 105, | ||
85 | 30, 190, 40, 200, 130, 135, 140, 210, | ||
86 | 150, 225, 160, 165, 170, 180, -1, -1, | ||
87 | }; | ||
88 | |||
89 | /* This parameter is used in order to force ACPI instead of legacy method for | ||
90 | * configuration purpose. | ||
91 | */ | ||
92 | |||
93 | static int acpi_force; | ||
94 | |||
95 | static struct cpufreq_frequency_table *powernow_table; | ||
96 | |||
97 | static unsigned int can_scale_bus; | ||
98 | static unsigned int can_scale_vid; | ||
99 | static unsigned int minimum_speed = -1; | ||
100 | static unsigned int maximum_speed; | ||
101 | static unsigned int number_scales; | ||
102 | static unsigned int fsb; | ||
103 | static unsigned int latency; | ||
104 | static char have_a0; | ||
105 | |||
106 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
107 | "powernow-k7", msg) | ||
108 | |||
109 | static int check_fsb(unsigned int fsbspeed) | ||
110 | { | ||
111 | int delta; | ||
112 | unsigned int f = fsb / 1000; | ||
113 | |||
114 | delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; | ||
115 | return delta < 5; | ||
116 | } | ||
117 | |||
118 | static int check_powernow(void) | ||
119 | { | ||
120 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
121 | unsigned int maxei, eax, ebx, ecx, edx; | ||
122 | |||
123 | if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) { | ||
124 | #ifdef MODULE | ||
125 | printk(KERN_INFO PFX "This module only works with " | ||
126 | "AMD K7 CPUs\n"); | ||
127 | #endif | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | /* Get maximum capabilities */ | ||
132 | maxei = cpuid_eax(0x80000000); | ||
133 | if (maxei < 0x80000007) { /* Any powernow info ? */ | ||
134 | #ifdef MODULE | ||
135 | printk(KERN_INFO PFX "No powernow capabilities detected\n"); | ||
136 | #endif | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | if ((c->x86_model == 6) && (c->x86_mask == 0)) { | ||
141 | printk(KERN_INFO PFX "K7 660[A0] core detected, " | ||
142 | "enabling errata workarounds\n"); | ||
143 | have_a0 = 1; | ||
144 | } | ||
145 | |||
146 | cpuid(0x80000007, &eax, &ebx, &ecx, &edx); | ||
147 | |||
148 | /* Check we can actually do something before we say anything.*/ | ||
149 | if (!(edx & (1 << 1 | 1 << 2))) | ||
150 | return 0; | ||
151 | |||
152 | printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); | ||
153 | |||
154 | if (edx & 1 << 1) { | ||
155 | printk("frequency"); | ||
156 | can_scale_bus = 1; | ||
157 | } | ||
158 | |||
159 | if ((edx & (1 << 1 | 1 << 2)) == 0x6) | ||
160 | printk(" and "); | ||
161 | |||
162 | if (edx & 1 << 2) { | ||
163 | printk("voltage"); | ||
164 | can_scale_vid = 1; | ||
165 | } | ||
166 | |||
167 | printk(".\n"); | ||
168 | return 1; | ||
169 | } | ||
170 | |||
171 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
172 | static void invalidate_entry(unsigned int entry) | ||
173 | { | ||
174 | powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; | ||
175 | } | ||
176 | #endif | ||
177 | |||
178 | static int get_ranges(unsigned char *pst) | ||
179 | { | ||
180 | unsigned int j; | ||
181 | unsigned int speed; | ||
182 | u8 fid, vid; | ||
183 | |||
184 | powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * | ||
185 | (number_scales + 1)), GFP_KERNEL); | ||
186 | if (!powernow_table) | ||
187 | return -ENOMEM; | ||
188 | |||
189 | for (j = 0 ; j < number_scales; j++) { | ||
190 | fid = *pst++; | ||
191 | |||
192 | powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; | ||
193 | powernow_table[j].index = fid; /* lower 8 bits */ | ||
194 | |||
195 | speed = powernow_table[j].frequency; | ||
196 | |||
197 | if ((fid_codes[fid] % 10) == 5) { | ||
198 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
199 | if (have_a0 == 1) | ||
200 | invalidate_entry(j); | ||
201 | #endif | ||
202 | } | ||
203 | |||
204 | if (speed < minimum_speed) | ||
205 | minimum_speed = speed; | ||
206 | if (speed > maximum_speed) | ||
207 | maximum_speed = speed; | ||
208 | |||
209 | vid = *pst++; | ||
210 | powernow_table[j].index |= (vid << 8); /* upper 8 bits */ | ||
211 | |||
212 | dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " | ||
213 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, | ||
214 | fid_codes[fid] % 10, speed/1000, vid, | ||
215 | mobile_vid_table[vid]/1000, | ||
216 | mobile_vid_table[vid]%1000); | ||
217 | } | ||
218 | powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; | ||
219 | powernow_table[number_scales].index = 0; | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | |||
225 | static void change_FID(int fid) | ||
226 | { | ||
227 | union msr_fidvidctl fidvidctl; | ||
228 | |||
229 | rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
230 | if (fidvidctl.bits.FID != fid) { | ||
231 | fidvidctl.bits.SGTC = latency; | ||
232 | fidvidctl.bits.FID = fid; | ||
233 | fidvidctl.bits.VIDC = 0; | ||
234 | fidvidctl.bits.FIDC = 1; | ||
235 | wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | |||
240 | static void change_VID(int vid) | ||
241 | { | ||
242 | union msr_fidvidctl fidvidctl; | ||
243 | |||
244 | rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
245 | if (fidvidctl.bits.VID != vid) { | ||
246 | fidvidctl.bits.SGTC = latency; | ||
247 | fidvidctl.bits.VID = vid; | ||
248 | fidvidctl.bits.FIDC = 0; | ||
249 | fidvidctl.bits.VIDC = 1; | ||
250 | wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | |||
255 | static void change_speed(unsigned int index) | ||
256 | { | ||
257 | u8 fid, vid; | ||
258 | struct cpufreq_freqs freqs; | ||
259 | union msr_fidvidstatus fidvidstatus; | ||
260 | int cfid; | ||
261 | |||
262 | /* fid are the lower 8 bits of the index we stored into | ||
263 | * the cpufreq frequency table in powernow_decode_bios, | ||
264 | * vid are the upper 8 bits. | ||
265 | */ | ||
266 | |||
267 | fid = powernow_table[index].index & 0xFF; | ||
268 | vid = (powernow_table[index].index & 0xFF00) >> 8; | ||
269 | |||
270 | freqs.cpu = 0; | ||
271 | |||
272 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); | ||
273 | cfid = fidvidstatus.bits.CFID; | ||
274 | freqs.old = fsb * fid_codes[cfid] / 10; | ||
275 | |||
276 | freqs.new = powernow_table[index].frequency; | ||
277 | |||
278 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
279 | |||
280 | /* Now do the magic poking into the MSRs. */ | ||
281 | |||
282 | if (have_a0 == 1) /* A0 errata 5 */ | ||
283 | local_irq_disable(); | ||
284 | |||
285 | if (freqs.old > freqs.new) { | ||
286 | /* Going down, so change FID first */ | ||
287 | change_FID(fid); | ||
288 | change_VID(vid); | ||
289 | } else { | ||
290 | /* Going up, so change VID first */ | ||
291 | change_VID(vid); | ||
292 | change_FID(fid); | ||
293 | } | ||
294 | |||
295 | |||
296 | if (have_a0 == 1) | ||
297 | local_irq_enable(); | ||
298 | |||
299 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
300 | } | ||
301 | |||
302 | |||
303 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
304 | |||
305 | static struct acpi_processor_performance *acpi_processor_perf; | ||
306 | |||
307 | static int powernow_acpi_init(void) | ||
308 | { | ||
309 | int i; | ||
310 | int retval = 0; | ||
311 | union powernow_acpi_control_t pc; | ||
312 | |||
313 | if (acpi_processor_perf != NULL && powernow_table != NULL) { | ||
314 | retval = -EINVAL; | ||
315 | goto err0; | ||
316 | } | ||
317 | |||
318 | acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), | ||
319 | GFP_KERNEL); | ||
320 | if (!acpi_processor_perf) { | ||
321 | retval = -ENOMEM; | ||
322 | goto err0; | ||
323 | } | ||
324 | |||
325 | if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, | ||
326 | GFP_KERNEL)) { | ||
327 | retval = -ENOMEM; | ||
328 | goto err05; | ||
329 | } | ||
330 | |||
331 | if (acpi_processor_register_performance(acpi_processor_perf, 0)) { | ||
332 | retval = -EIO; | ||
333 | goto err1; | ||
334 | } | ||
335 | |||
336 | if (acpi_processor_perf->control_register.space_id != | ||
337 | ACPI_ADR_SPACE_FIXED_HARDWARE) { | ||
338 | retval = -ENODEV; | ||
339 | goto err2; | ||
340 | } | ||
341 | |||
342 | if (acpi_processor_perf->status_register.space_id != | ||
343 | ACPI_ADR_SPACE_FIXED_HARDWARE) { | ||
344 | retval = -ENODEV; | ||
345 | goto err2; | ||
346 | } | ||
347 | |||
348 | number_scales = acpi_processor_perf->state_count; | ||
349 | |||
350 | if (number_scales < 2) { | ||
351 | retval = -ENODEV; | ||
352 | goto err2; | ||
353 | } | ||
354 | |||
355 | powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * | ||
356 | (number_scales + 1)), GFP_KERNEL); | ||
357 | if (!powernow_table) { | ||
358 | retval = -ENOMEM; | ||
359 | goto err2; | ||
360 | } | ||
361 | |||
362 | pc.val = (unsigned long) acpi_processor_perf->states[0].control; | ||
363 | for (i = 0; i < number_scales; i++) { | ||
364 | u8 fid, vid; | ||
365 | struct acpi_processor_px *state = | ||
366 | &acpi_processor_perf->states[i]; | ||
367 | unsigned int speed, speed_mhz; | ||
368 | |||
369 | pc.val = (unsigned long) state->control; | ||
370 | dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", | ||
371 | i, | ||
372 | (u32) state->core_frequency, | ||
373 | (u32) state->power, | ||
374 | (u32) state->transition_latency, | ||
375 | (u32) state->control, | ||
376 | pc.bits.sgtc); | ||
377 | |||
378 | vid = pc.bits.vid; | ||
379 | fid = pc.bits.fid; | ||
380 | |||
381 | powernow_table[i].frequency = fsb * fid_codes[fid] / 10; | ||
382 | powernow_table[i].index = fid; /* lower 8 bits */ | ||
383 | powernow_table[i].index |= (vid << 8); /* upper 8 bits */ | ||
384 | |||
385 | speed = powernow_table[i].frequency; | ||
386 | speed_mhz = speed / 1000; | ||
387 | |||
388 | /* processor_perflib will multiply the MHz value by 1000 to | ||
389 | * get a KHz value (e.g. 1266000). However, powernow-k7 works | ||
390 | * with true KHz values (e.g. 1266768). To ensure that all | ||
391 | * powernow frequencies are available, we must ensure that | ||
392 | * ACPI doesn't restrict them, so we round up the MHz value | ||
393 | * to ensure that perflib's computed KHz value is greater than | ||
394 | * or equal to powernow's KHz value. | ||
395 | */ | ||
396 | if (speed % 1000 > 0) | ||
397 | speed_mhz++; | ||
398 | |||
399 | if ((fid_codes[fid] % 10) == 5) { | ||
400 | if (have_a0 == 1) | ||
401 | invalidate_entry(i); | ||
402 | } | ||
403 | |||
404 | dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " | ||
405 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, | ||
406 | fid_codes[fid] % 10, speed_mhz, vid, | ||
407 | mobile_vid_table[vid]/1000, | ||
408 | mobile_vid_table[vid]%1000); | ||
409 | |||
410 | if (state->core_frequency != speed_mhz) { | ||
411 | state->core_frequency = speed_mhz; | ||
412 | dprintk(" Corrected ACPI frequency to %d\n", | ||
413 | speed_mhz); | ||
414 | } | ||
415 | |||
416 | if (latency < pc.bits.sgtc) | ||
417 | latency = pc.bits.sgtc; | ||
418 | |||
419 | if (speed < minimum_speed) | ||
420 | minimum_speed = speed; | ||
421 | if (speed > maximum_speed) | ||
422 | maximum_speed = speed; | ||
423 | } | ||
424 | |||
425 | powernow_table[i].frequency = CPUFREQ_TABLE_END; | ||
426 | powernow_table[i].index = 0; | ||
427 | |||
428 | /* notify BIOS that we exist */ | ||
429 | acpi_processor_notify_smm(THIS_MODULE); | ||
430 | |||
431 | return 0; | ||
432 | |||
433 | err2: | ||
434 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | ||
435 | err1: | ||
436 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | ||
437 | err05: | ||
438 | kfree(acpi_processor_perf); | ||
439 | err0: | ||
440 | printk(KERN_WARNING PFX "ACPI perflib can not be used on " | ||
441 | "this platform\n"); | ||
442 | acpi_processor_perf = NULL; | ||
443 | return retval; | ||
444 | } | ||
445 | #else | ||
446 | static int powernow_acpi_init(void) | ||
447 | { | ||
448 | printk(KERN_INFO PFX "no support for ACPI processor found." | ||
449 | " Please recompile your kernel with ACPI processor\n"); | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | #endif | ||
453 | |||
454 | static void print_pst_entry(struct pst_s *pst, unsigned int j) | ||
455 | { | ||
456 | dprintk("PST:%d (@%p)\n", j, pst); | ||
457 | dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", | ||
458 | pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); | ||
459 | } | ||
460 | |||
461 | static int powernow_decode_bios(int maxfid, int startvid) | ||
462 | { | ||
463 | struct psb_s *psb; | ||
464 | struct pst_s *pst; | ||
465 | unsigned int i, j; | ||
466 | unsigned char *p; | ||
467 | unsigned int etuple; | ||
468 | unsigned int ret; | ||
469 | |||
470 | etuple = cpuid_eax(0x80000001); | ||
471 | |||
472 | for (i = 0xC0000; i < 0xffff0 ; i += 16) { | ||
473 | |||
474 | p = phys_to_virt(i); | ||
475 | |||
476 | if (memcmp(p, "AMDK7PNOW!", 10) == 0) { | ||
477 | dprintk("Found PSB header at %p\n", p); | ||
478 | psb = (struct psb_s *) p; | ||
479 | dprintk("Table version: 0x%x\n", psb->tableversion); | ||
480 | if (psb->tableversion != 0x12) { | ||
481 | printk(KERN_INFO PFX "Sorry, only v1.2 tables" | ||
482 | " supported right now\n"); | ||
483 | return -ENODEV; | ||
484 | } | ||
485 | |||
486 | dprintk("Flags: 0x%x\n", psb->flags); | ||
487 | if ((psb->flags & 1) == 0) | ||
488 | dprintk("Mobile voltage regulator\n"); | ||
489 | else | ||
490 | dprintk("Desktop voltage regulator\n"); | ||
491 | |||
492 | latency = psb->settlingtime; | ||
493 | if (latency < 100) { | ||
494 | printk(KERN_INFO PFX "BIOS set settling time " | ||
495 | "to %d microseconds. " | ||
496 | "Should be at least 100. " | ||
497 | "Correcting.\n", latency); | ||
498 | latency = 100; | ||
499 | } | ||
500 | dprintk("Settling Time: %d microseconds.\n", | ||
501 | psb->settlingtime); | ||
502 | dprintk("Has %d PST tables. (Only dumping ones " | ||
503 | "relevant to this CPU).\n", | ||
504 | psb->numpst); | ||
505 | |||
506 | p += sizeof(struct psb_s); | ||
507 | |||
508 | pst = (struct pst_s *) p; | ||
509 | |||
510 | for (j = 0; j < psb->numpst; j++) { | ||
511 | pst = (struct pst_s *) p; | ||
512 | number_scales = pst->numpstates; | ||
513 | |||
514 | if ((etuple == pst->cpuid) && | ||
515 | check_fsb(pst->fsbspeed) && | ||
516 | (maxfid == pst->maxfid) && | ||
517 | (startvid == pst->startvid)) { | ||
518 | print_pst_entry(pst, j); | ||
519 | p = (char *)pst + sizeof(struct pst_s); | ||
520 | ret = get_ranges(p); | ||
521 | return ret; | ||
522 | } else { | ||
523 | unsigned int k; | ||
524 | p = (char *)pst + sizeof(struct pst_s); | ||
525 | for (k = 0; k < number_scales; k++) | ||
526 | p += 2; | ||
527 | } | ||
528 | } | ||
529 | printk(KERN_INFO PFX "No PST tables match this cpuid " | ||
530 | "(0x%x)\n", etuple); | ||
531 | printk(KERN_INFO PFX "This is indicative of a broken " | ||
532 | "BIOS.\n"); | ||
533 | |||
534 | return -EINVAL; | ||
535 | } | ||
536 | p++; | ||
537 | } | ||
538 | |||
539 | return -ENODEV; | ||
540 | } | ||
541 | |||
542 | |||
543 | static int powernow_target(struct cpufreq_policy *policy, | ||
544 | unsigned int target_freq, | ||
545 | unsigned int relation) | ||
546 | { | ||
547 | unsigned int newstate; | ||
548 | |||
549 | if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, | ||
550 | relation, &newstate)) | ||
551 | return -EINVAL; | ||
552 | |||
553 | change_speed(newstate); | ||
554 | |||
555 | return 0; | ||
556 | } | ||
557 | |||
558 | |||
559 | static int powernow_verify(struct cpufreq_policy *policy) | ||
560 | { | ||
561 | return cpufreq_frequency_table_verify(policy, powernow_table); | ||
562 | } | ||
563 | |||
564 | /* | ||
565 | * We use the fact that the bus frequency is somehow | ||
566 | * a multiple of 100000/3 khz, then we compute sgtc according | ||
567 | * to this multiple. | ||
568 | * That way, we match more how AMD thinks all of that work. | ||
569 | * We will then get the same kind of behaviour already tested under | ||
570 | * the "well-known" other OS. | ||
571 | */ | ||
572 | static int __cpuinit fixup_sgtc(void) | ||
573 | { | ||
574 | unsigned int sgtc; | ||
575 | unsigned int m; | ||
576 | |||
577 | m = fsb / 3333; | ||
578 | if ((m % 10) >= 5) | ||
579 | m += 5; | ||
580 | |||
581 | m /= 10; | ||
582 | |||
583 | sgtc = 100 * m * latency; | ||
584 | sgtc = sgtc / 3; | ||
585 | if (sgtc > 0xfffff) { | ||
586 | printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); | ||
587 | sgtc = 0xfffff; | ||
588 | } | ||
589 | return sgtc; | ||
590 | } | ||
591 | |||
592 | static unsigned int powernow_get(unsigned int cpu) | ||
593 | { | ||
594 | union msr_fidvidstatus fidvidstatus; | ||
595 | unsigned int cfid; | ||
596 | |||
597 | if (cpu) | ||
598 | return 0; | ||
599 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); | ||
600 | cfid = fidvidstatus.bits.CFID; | ||
601 | |||
602 | return fsb * fid_codes[cfid] / 10; | ||
603 | } | ||
604 | |||
605 | |||
606 | static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d) | ||
607 | { | ||
608 | printk(KERN_WARNING PFX | ||
609 | "%s laptop with broken PST tables in BIOS detected.\n", | ||
610 | d->ident); | ||
611 | printk(KERN_WARNING PFX | ||
612 | "You need to downgrade to 3A21 (09/09/2002), or try a newer " | ||
613 | "BIOS than 3A71 (01/20/2003)\n"); | ||
614 | printk(KERN_WARNING PFX | ||
615 | "cpufreq scaling has been disabled as a result of this.\n"); | ||
616 | return 0; | ||
617 | } | ||
618 | |||
619 | /* | ||
620 | * Some Athlon laptops have really fucked PST tables. | ||
621 | * A BIOS update is all that can save them. | ||
622 | * Mention this, and disable cpufreq. | ||
623 | */ | ||
624 | static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = { | ||
625 | { | ||
626 | .callback = acer_cpufreq_pst, | ||
627 | .ident = "Acer Aspire", | ||
628 | .matches = { | ||
629 | DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), | ||
630 | DMI_MATCH(DMI_BIOS_VERSION, "3A71"), | ||
631 | }, | ||
632 | }, | ||
633 | { } | ||
634 | }; | ||
635 | |||
636 | static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy) | ||
637 | { | ||
638 | union msr_fidvidstatus fidvidstatus; | ||
639 | int result; | ||
640 | |||
641 | if (policy->cpu != 0) | ||
642 | return -ENODEV; | ||
643 | |||
644 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); | ||
645 | |||
646 | recalibrate_cpu_khz(); | ||
647 | |||
648 | fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; | ||
649 | if (!fsb) { | ||
650 | printk(KERN_WARNING PFX "can not determine bus frequency\n"); | ||
651 | return -EINVAL; | ||
652 | } | ||
653 | dprintk("FSB: %3dMHz\n", fsb/1000); | ||
654 | |||
655 | if (dmi_check_system(powernow_dmi_table) || acpi_force) { | ||
656 | printk(KERN_INFO PFX "PSB/PST known to be broken. " | ||
657 | "Trying ACPI instead\n"); | ||
658 | result = powernow_acpi_init(); | ||
659 | } else { | ||
660 | result = powernow_decode_bios(fidvidstatus.bits.MFID, | ||
661 | fidvidstatus.bits.SVID); | ||
662 | if (result) { | ||
663 | printk(KERN_INFO PFX "Trying ACPI perflib\n"); | ||
664 | maximum_speed = 0; | ||
665 | minimum_speed = -1; | ||
666 | latency = 0; | ||
667 | result = powernow_acpi_init(); | ||
668 | if (result) { | ||
669 | printk(KERN_INFO PFX | ||
670 | "ACPI and legacy methods failed\n"); | ||
671 | } | ||
672 | } else { | ||
673 | /* SGTC use the bus clock as timer */ | ||
674 | latency = fixup_sgtc(); | ||
675 | printk(KERN_INFO PFX "SGTC: %d\n", latency); | ||
676 | } | ||
677 | } | ||
678 | |||
679 | if (result) | ||
680 | return result; | ||
681 | |||
682 | printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", | ||
683 | minimum_speed/1000, maximum_speed/1000); | ||
684 | |||
685 | policy->cpuinfo.transition_latency = | ||
686 | cpufreq_scale(2000000UL, fsb, latency); | ||
687 | |||
688 | policy->cur = powernow_get(0); | ||
689 | |||
690 | cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); | ||
691 | |||
692 | return cpufreq_frequency_table_cpuinfo(policy, powernow_table); | ||
693 | } | ||
694 | |||
695 | static int powernow_cpu_exit(struct cpufreq_policy *policy) | ||
696 | { | ||
697 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
698 | |||
699 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
700 | if (acpi_processor_perf) { | ||
701 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | ||
702 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | ||
703 | kfree(acpi_processor_perf); | ||
704 | } | ||
705 | #endif | ||
706 | |||
707 | kfree(powernow_table); | ||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | static struct freq_attr *powernow_table_attr[] = { | ||
712 | &cpufreq_freq_attr_scaling_available_freqs, | ||
713 | NULL, | ||
714 | }; | ||
715 | |||
716 | static struct cpufreq_driver powernow_driver = { | ||
717 | .verify = powernow_verify, | ||
718 | .target = powernow_target, | ||
719 | .get = powernow_get, | ||
720 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
721 | .bios_limit = acpi_processor_get_bios_limit, | ||
722 | #endif | ||
723 | .init = powernow_cpu_init, | ||
724 | .exit = powernow_cpu_exit, | ||
725 | .name = "powernow-k7", | ||
726 | .owner = THIS_MODULE, | ||
727 | .attr = powernow_table_attr, | ||
728 | }; | ||
729 | |||
730 | static int __init powernow_init(void) | ||
731 | { | ||
732 | if (check_powernow() == 0) | ||
733 | return -ENODEV; | ||
734 | return cpufreq_register_driver(&powernow_driver); | ||
735 | } | ||
736 | |||
737 | |||
738 | static void __exit powernow_exit(void) | ||
739 | { | ||
740 | cpufreq_unregister_driver(&powernow_driver); | ||
741 | } | ||
742 | |||
743 | module_param(acpi_force, int, 0444); | ||
744 | MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); | ||
745 | |||
746 | MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); | ||
747 | MODULE_DESCRIPTION("Powernow driver for AMD K7 processors."); | ||
748 | MODULE_LICENSE("GPL"); | ||
749 | |||
750 | late_initcall(powernow_init); | ||
751 | module_exit(powernow_exit); | ||
752 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h deleted file mode 100644 index 35fb4eaf6e1c..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2003 Dave Jones. | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * AMD-specific information | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | union msr_fidvidctl { | ||
11 | struct { | ||
12 | unsigned FID:5, // 4:0 | ||
13 | reserved1:3, // 7:5 | ||
14 | VID:5, // 12:8 | ||
15 | reserved2:3, // 15:13 | ||
16 | FIDC:1, // 16 | ||
17 | VIDC:1, // 17 | ||
18 | reserved3:2, // 19:18 | ||
19 | FIDCHGRATIO:1, // 20 | ||
20 | reserved4:11, // 31-21 | ||
21 | SGTC:20, // 32:51 | ||
22 | reserved5:12; // 63:52 | ||
23 | } bits; | ||
24 | unsigned long long val; | ||
25 | }; | ||
26 | |||
27 | union msr_fidvidstatus { | ||
28 | struct { | ||
29 | unsigned CFID:5, // 4:0 | ||
30 | reserved1:3, // 7:5 | ||
31 | SFID:5, // 12:8 | ||
32 | reserved2:3, // 15:13 | ||
33 | MFID:5, // 20:16 | ||
34 | reserved3:11, // 31:21 | ||
35 | CVID:5, // 36:32 | ||
36 | reserved4:3, // 39:37 | ||
37 | SVID:5, // 44:40 | ||
38 | reserved5:3, // 47:45 | ||
39 | MVID:5, // 52:48 | ||
40 | reserved6:11; // 63:53 | ||
41 | } bits; | ||
42 | unsigned long long val; | ||
43 | }; | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c deleted file mode 100644 index 2368e38327b3..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ /dev/null | |||
@@ -1,1607 +0,0 @@ | |||
1 | /* | ||
2 | * (c) 2003-2010 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | * | ||
7 | * Support : mark.langsdorf@amd.com | ||
8 | * | ||
9 | * Based on the powernow-k7.c module written by Dave Jones. | ||
10 | * (C) 2003 Dave Jones on behalf of SuSE Labs | ||
11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> | ||
12 | * (C) 2004 Pavel Machek <pavel@ucw.cz> | ||
13 | * Licensed under the terms of the GNU GPL License version 2. | ||
14 | * Based upon datasheets & sample CPUs kindly provided by AMD. | ||
15 | * | ||
16 | * Valuable input gratefully received from Dave Jones, Pavel Machek, | ||
17 | * Dominik Brodowski, Jacob Shin, and others. | ||
18 | * Originally developed by Paul Devriendt. | ||
19 | * Processor information obtained from Chapter 9 (Power and Thermal Management) | ||
20 | * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD | ||
21 | * Opteron Processors" available for download from www.amd.com | ||
22 | * | ||
23 | * Tables for specific CPUs can be inferred from | ||
24 | * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf | ||
25 | */ | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/cpufreq.h> | ||
32 | #include <linux/slab.h> | ||
33 | #include <linux/string.h> | ||
34 | #include <linux/cpumask.h> | ||
35 | #include <linux/sched.h> /* for current / set_cpus_allowed() */ | ||
36 | #include <linux/io.h> | ||
37 | #include <linux/delay.h> | ||
38 | |||
39 | #include <asm/msr.h> | ||
40 | |||
41 | #include <linux/acpi.h> | ||
42 | #include <linux/mutex.h> | ||
43 | #include <acpi/processor.h> | ||
44 | |||
45 | #define PFX "powernow-k8: " | ||
46 | #define VERSION "version 2.20.00" | ||
47 | #include "powernow-k8.h" | ||
48 | #include "mperf.h" | ||
49 | |||
50 | /* serialize freq changes */ | ||
51 | static DEFINE_MUTEX(fidvid_mutex); | ||
52 | |||
53 | static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | ||
54 | |||
55 | static int cpu_family = CPU_OPTERON; | ||
56 | |||
57 | /* core performance boost */ | ||
58 | static bool cpb_capable, cpb_enabled; | ||
59 | static struct msr __percpu *msrs; | ||
60 | |||
61 | static struct cpufreq_driver cpufreq_amd64_driver; | ||
62 | |||
63 | #ifndef CONFIG_SMP | ||
64 | static inline const struct cpumask *cpu_core_mask(int cpu) | ||
65 | { | ||
66 | return cpumask_of(0); | ||
67 | } | ||
68 | #endif | ||
69 | |||
70 | /* Return a frequency in MHz, given an input fid */ | ||
71 | static u32 find_freq_from_fid(u32 fid) | ||
72 | { | ||
73 | return 800 + (fid * 100); | ||
74 | } | ||
75 | |||
76 | /* Return a frequency in KHz, given an input fid */ | ||
77 | static u32 find_khz_freq_from_fid(u32 fid) | ||
78 | { | ||
79 | return 1000 * find_freq_from_fid(fid); | ||
80 | } | ||
81 | |||
82 | static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, | ||
83 | u32 pstate) | ||
84 | { | ||
85 | return data[pstate].frequency; | ||
86 | } | ||
87 | |||
88 | /* Return the vco fid for an input fid | ||
89 | * | ||
90 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids | ||
91 | * only from corresponding high fids. This returns "high" fid corresponding to | ||
92 | * "low" one. | ||
93 | */ | ||
94 | static u32 convert_fid_to_vco_fid(u32 fid) | ||
95 | { | ||
96 | if (fid < HI_FID_TABLE_BOTTOM) | ||
97 | return 8 + (2 * fid); | ||
98 | else | ||
99 | return fid; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Return 1 if the pending bit is set. Unless we just instructed the processor | ||
104 | * to transition to a new state, seeing this bit set is really bad news. | ||
105 | */ | ||
106 | static int pending_bit_stuck(void) | ||
107 | { | ||
108 | u32 lo, hi; | ||
109 | |||
110 | if (cpu_family == CPU_HW_PSTATE) | ||
111 | return 0; | ||
112 | |||
113 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | ||
114 | return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Update the global current fid / vid values from the status msr. | ||
119 | * Returns 1 on error. | ||
120 | */ | ||
121 | static int query_current_values_with_pending_wait(struct powernow_k8_data *data) | ||
122 | { | ||
123 | u32 lo, hi; | ||
124 | u32 i = 0; | ||
125 | |||
126 | if (cpu_family == CPU_HW_PSTATE) { | ||
127 | rdmsr(MSR_PSTATE_STATUS, lo, hi); | ||
128 | i = lo & HW_PSTATE_MASK; | ||
129 | data->currpstate = i; | ||
130 | |||
131 | /* | ||
132 | * a workaround for family 11h erratum 311 might cause | ||
133 | * an "out-of-range Pstate if the core is in Pstate-0 | ||
134 | */ | ||
135 | if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps)) | ||
136 | data->currpstate = HW_PSTATE_0; | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | do { | ||
141 | if (i++ > 10000) { | ||
142 | dprintk("detected change pending stuck\n"); | ||
143 | return 1; | ||
144 | } | ||
145 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | ||
146 | } while (lo & MSR_S_LO_CHANGE_PENDING); | ||
147 | |||
148 | data->currvid = hi & MSR_S_HI_CURRENT_VID; | ||
149 | data->currfid = lo & MSR_S_LO_CURRENT_FID; | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | /* the isochronous relief time */ | ||
155 | static void count_off_irt(struct powernow_k8_data *data) | ||
156 | { | ||
157 | udelay((1 << data->irt) * 10); | ||
158 | return; | ||
159 | } | ||
160 | |||
161 | /* the voltage stabilization time */ | ||
162 | static void count_off_vst(struct powernow_k8_data *data) | ||
163 | { | ||
164 | udelay(data->vstable * VST_UNITS_20US); | ||
165 | return; | ||
166 | } | ||
167 | |||
168 | /* need to init the control msr to a safe value (for each cpu) */ | ||
169 | static void fidvid_msr_init(void) | ||
170 | { | ||
171 | u32 lo, hi; | ||
172 | u8 fid, vid; | ||
173 | |||
174 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | ||
175 | vid = hi & MSR_S_HI_CURRENT_VID; | ||
176 | fid = lo & MSR_S_LO_CURRENT_FID; | ||
177 | lo = fid | (vid << MSR_C_LO_VID_SHIFT); | ||
178 | hi = MSR_C_HI_STP_GNT_BENIGN; | ||
179 | dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); | ||
180 | wrmsr(MSR_FIDVID_CTL, lo, hi); | ||
181 | } | ||
182 | |||
183 | /* write the new fid value along with the other control fields to the msr */ | ||
184 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) | ||
185 | { | ||
186 | u32 lo; | ||
187 | u32 savevid = data->currvid; | ||
188 | u32 i = 0; | ||
189 | |||
190 | if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { | ||
191 | printk(KERN_ERR PFX "internal error - overflow on fid write\n"); | ||
192 | return 1; | ||
193 | } | ||
194 | |||
195 | lo = fid; | ||
196 | lo |= (data->currvid << MSR_C_LO_VID_SHIFT); | ||
197 | lo |= MSR_C_LO_INIT_FID_VID; | ||
198 | |||
199 | dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", | ||
200 | fid, lo, data->plllock * PLL_LOCK_CONVERSION); | ||
201 | |||
202 | do { | ||
203 | wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); | ||
204 | if (i++ > 100) { | ||
205 | printk(KERN_ERR PFX | ||
206 | "Hardware error - pending bit very stuck - " | ||
207 | "no further pstate changes possible\n"); | ||
208 | return 1; | ||
209 | } | ||
210 | } while (query_current_values_with_pending_wait(data)); | ||
211 | |||
212 | count_off_irt(data); | ||
213 | |||
214 | if (savevid != data->currvid) { | ||
215 | printk(KERN_ERR PFX | ||
216 | "vid change on fid trans, old 0x%x, new 0x%x\n", | ||
217 | savevid, data->currvid); | ||
218 | return 1; | ||
219 | } | ||
220 | |||
221 | if (fid != data->currfid) { | ||
222 | printk(KERN_ERR PFX | ||
223 | "fid trans failed, fid 0x%x, curr 0x%x\n", fid, | ||
224 | data->currfid); | ||
225 | return 1; | ||
226 | } | ||
227 | |||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | /* Write a new vid to the hardware */ | ||
232 | static int write_new_vid(struct powernow_k8_data *data, u32 vid) | ||
233 | { | ||
234 | u32 lo; | ||
235 | u32 savefid = data->currfid; | ||
236 | int i = 0; | ||
237 | |||
238 | if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { | ||
239 | printk(KERN_ERR PFX "internal error - overflow on vid write\n"); | ||
240 | return 1; | ||
241 | } | ||
242 | |||
243 | lo = data->currfid; | ||
244 | lo |= (vid << MSR_C_LO_VID_SHIFT); | ||
245 | lo |= MSR_C_LO_INIT_FID_VID; | ||
246 | |||
247 | dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", | ||
248 | vid, lo, STOP_GRANT_5NS); | ||
249 | |||
250 | do { | ||
251 | wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); | ||
252 | if (i++ > 100) { | ||
253 | printk(KERN_ERR PFX "internal error - pending bit " | ||
254 | "very stuck - no further pstate " | ||
255 | "changes possible\n"); | ||
256 | return 1; | ||
257 | } | ||
258 | } while (query_current_values_with_pending_wait(data)); | ||
259 | |||
260 | if (savefid != data->currfid) { | ||
261 | printk(KERN_ERR PFX "fid changed on vid trans, old " | ||
262 | "0x%x new 0x%x\n", | ||
263 | savefid, data->currfid); | ||
264 | return 1; | ||
265 | } | ||
266 | |||
267 | if (vid != data->currvid) { | ||
268 | printk(KERN_ERR PFX "vid trans failed, vid 0x%x, " | ||
269 | "curr 0x%x\n", | ||
270 | vid, data->currvid); | ||
271 | return 1; | ||
272 | } | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | /* | ||
278 | * Reduce the vid by the max of step or reqvid. | ||
279 | * Decreasing vid codes represent increasing voltages: | ||
280 | * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. | ||
281 | */ | ||
282 | static int decrease_vid_code_by_step(struct powernow_k8_data *data, | ||
283 | u32 reqvid, u32 step) | ||
284 | { | ||
285 | if ((data->currvid - reqvid) > step) | ||
286 | reqvid = data->currvid - step; | ||
287 | |||
288 | if (write_new_vid(data, reqvid)) | ||
289 | return 1; | ||
290 | |||
291 | count_off_vst(data); | ||
292 | |||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | /* Change hardware pstate by single MSR write */ | ||
297 | static int transition_pstate(struct powernow_k8_data *data, u32 pstate) | ||
298 | { | ||
299 | wrmsr(MSR_PSTATE_CTRL, pstate, 0); | ||
300 | data->currpstate = pstate; | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ | ||
305 | static int transition_fid_vid(struct powernow_k8_data *data, | ||
306 | u32 reqfid, u32 reqvid) | ||
307 | { | ||
308 | if (core_voltage_pre_transition(data, reqvid, reqfid)) | ||
309 | return 1; | ||
310 | |||
311 | if (core_frequency_transition(data, reqfid)) | ||
312 | return 1; | ||
313 | |||
314 | if (core_voltage_post_transition(data, reqvid)) | ||
315 | return 1; | ||
316 | |||
317 | if (query_current_values_with_pending_wait(data)) | ||
318 | return 1; | ||
319 | |||
320 | if ((reqfid != data->currfid) || (reqvid != data->currvid)) { | ||
321 | printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, " | ||
322 | "curr 0x%x 0x%x\n", | ||
323 | smp_processor_id(), | ||
324 | reqfid, reqvid, data->currfid, data->currvid); | ||
325 | return 1; | ||
326 | } | ||
327 | |||
328 | dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", | ||
329 | smp_processor_id(), data->currfid, data->currvid); | ||
330 | |||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | /* Phase 1 - core voltage transition ... setup voltage */ | ||
335 | static int core_voltage_pre_transition(struct powernow_k8_data *data, | ||
336 | u32 reqvid, u32 reqfid) | ||
337 | { | ||
338 | u32 rvosteps = data->rvo; | ||
339 | u32 savefid = data->currfid; | ||
340 | u32 maxvid, lo, rvomult = 1; | ||
341 | |||
342 | dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " | ||
343 | "reqvid 0x%x, rvo 0x%x\n", | ||
344 | smp_processor_id(), | ||
345 | data->currfid, data->currvid, reqvid, data->rvo); | ||
346 | |||
347 | if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP)) | ||
348 | rvomult = 2; | ||
349 | rvosteps *= rvomult; | ||
350 | rdmsr(MSR_FIDVID_STATUS, lo, maxvid); | ||
351 | maxvid = 0x1f & (maxvid >> 16); | ||
352 | dprintk("ph1 maxvid=0x%x\n", maxvid); | ||
353 | if (reqvid < maxvid) /* lower numbers are higher voltages */ | ||
354 | reqvid = maxvid; | ||
355 | |||
356 | while (data->currvid > reqvid) { | ||
357 | dprintk("ph1: curr 0x%x, req vid 0x%x\n", | ||
358 | data->currvid, reqvid); | ||
359 | if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) | ||
360 | return 1; | ||
361 | } | ||
362 | |||
363 | while ((rvosteps > 0) && | ||
364 | ((rvomult * data->rvo + data->currvid) > reqvid)) { | ||
365 | if (data->currvid == maxvid) { | ||
366 | rvosteps = 0; | ||
367 | } else { | ||
368 | dprintk("ph1: changing vid for rvo, req 0x%x\n", | ||
369 | data->currvid - 1); | ||
370 | if (decrease_vid_code_by_step(data, data->currvid-1, 1)) | ||
371 | return 1; | ||
372 | rvosteps--; | ||
373 | } | ||
374 | } | ||
375 | |||
376 | if (query_current_values_with_pending_wait(data)) | ||
377 | return 1; | ||
378 | |||
379 | if (savefid != data->currfid) { | ||
380 | printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", | ||
381 | data->currfid); | ||
382 | return 1; | ||
383 | } | ||
384 | |||
385 | dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", | ||
386 | data->currfid, data->currvid); | ||
387 | |||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | /* Phase 2 - core frequency transition */ | ||
392 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) | ||
393 | { | ||
394 | u32 vcoreqfid, vcocurrfid, vcofiddiff; | ||
395 | u32 fid_interval, savevid = data->currvid; | ||
396 | |||
397 | if (data->currfid == reqfid) { | ||
398 | printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", | ||
399 | data->currfid); | ||
400 | return 0; | ||
401 | } | ||
402 | |||
403 | dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, " | ||
404 | "reqfid 0x%x\n", | ||
405 | smp_processor_id(), | ||
406 | data->currfid, data->currvid, reqfid); | ||
407 | |||
408 | vcoreqfid = convert_fid_to_vco_fid(reqfid); | ||
409 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); | ||
410 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid | ||
411 | : vcoreqfid - vcocurrfid; | ||
412 | |||
413 | if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP)) | ||
414 | vcofiddiff = 0; | ||
415 | |||
416 | while (vcofiddiff > 2) { | ||
417 | (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); | ||
418 | |||
419 | if (reqfid > data->currfid) { | ||
420 | if (data->currfid > LO_FID_TABLE_TOP) { | ||
421 | if (write_new_fid(data, | ||
422 | data->currfid + fid_interval)) | ||
423 | return 1; | ||
424 | } else { | ||
425 | if (write_new_fid | ||
426 | (data, | ||
427 | 2 + convert_fid_to_vco_fid(data->currfid))) | ||
428 | return 1; | ||
429 | } | ||
430 | } else { | ||
431 | if (write_new_fid(data, data->currfid - fid_interval)) | ||
432 | return 1; | ||
433 | } | ||
434 | |||
435 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); | ||
436 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid | ||
437 | : vcoreqfid - vcocurrfid; | ||
438 | } | ||
439 | |||
440 | if (write_new_fid(data, reqfid)) | ||
441 | return 1; | ||
442 | |||
443 | if (query_current_values_with_pending_wait(data)) | ||
444 | return 1; | ||
445 | |||
446 | if (data->currfid != reqfid) { | ||
447 | printk(KERN_ERR PFX | ||
448 | "ph2: mismatch, failed fid transition, " | ||
449 | "curr 0x%x, req 0x%x\n", | ||
450 | data->currfid, reqfid); | ||
451 | return 1; | ||
452 | } | ||
453 | |||
454 | if (savevid != data->currvid) { | ||
455 | printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", | ||
456 | savevid, data->currvid); | ||
457 | return 1; | ||
458 | } | ||
459 | |||
460 | dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", | ||
461 | data->currfid, data->currvid); | ||
462 | |||
463 | return 0; | ||
464 | } | ||
465 | |||
466 | /* Phase 3 - core voltage transition flow ... jump to the final vid. */ | ||
467 | static int core_voltage_post_transition(struct powernow_k8_data *data, | ||
468 | u32 reqvid) | ||
469 | { | ||
470 | u32 savefid = data->currfid; | ||
471 | u32 savereqvid = reqvid; | ||
472 | |||
473 | dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", | ||
474 | smp_processor_id(), | ||
475 | data->currfid, data->currvid); | ||
476 | |||
477 | if (reqvid != data->currvid) { | ||
478 | if (write_new_vid(data, reqvid)) | ||
479 | return 1; | ||
480 | |||
481 | if (savefid != data->currfid) { | ||
482 | printk(KERN_ERR PFX | ||
483 | "ph3: bad fid change, save 0x%x, curr 0x%x\n", | ||
484 | savefid, data->currfid); | ||
485 | return 1; | ||
486 | } | ||
487 | |||
488 | if (data->currvid != reqvid) { | ||
489 | printk(KERN_ERR PFX | ||
490 | "ph3: failed vid transition\n, " | ||
491 | "req 0x%x, curr 0x%x", | ||
492 | reqvid, data->currvid); | ||
493 | return 1; | ||
494 | } | ||
495 | } | ||
496 | |||
497 | if (query_current_values_with_pending_wait(data)) | ||
498 | return 1; | ||
499 | |||
500 | if (savereqvid != data->currvid) { | ||
501 | dprintk("ph3 failed, currvid 0x%x\n", data->currvid); | ||
502 | return 1; | ||
503 | } | ||
504 | |||
505 | if (savefid != data->currfid) { | ||
506 | dprintk("ph3 failed, currfid changed 0x%x\n", | ||
507 | data->currfid); | ||
508 | return 1; | ||
509 | } | ||
510 | |||
511 | dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", | ||
512 | data->currfid, data->currvid); | ||
513 | |||
514 | return 0; | ||
515 | } | ||
516 | |||
517 | static void check_supported_cpu(void *_rc) | ||
518 | { | ||
519 | u32 eax, ebx, ecx, edx; | ||
520 | int *rc = _rc; | ||
521 | |||
522 | *rc = -ENODEV; | ||
523 | |||
524 | if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD) | ||
525 | return; | ||
526 | |||
527 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | ||
528 | if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && | ||
529 | ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) | ||
530 | return; | ||
531 | |||
532 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { | ||
533 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || | ||
534 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { | ||
535 | printk(KERN_INFO PFX | ||
536 | "Processor cpuid %x not supported\n", eax); | ||
537 | return; | ||
538 | } | ||
539 | |||
540 | eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); | ||
541 | if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { | ||
542 | printk(KERN_INFO PFX | ||
543 | "No frequency change capabilities detected\n"); | ||
544 | return; | ||
545 | } | ||
546 | |||
547 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | ||
548 | if ((edx & P_STATE_TRANSITION_CAPABLE) | ||
549 | != P_STATE_TRANSITION_CAPABLE) { | ||
550 | printk(KERN_INFO PFX | ||
551 | "Power state transitions not supported\n"); | ||
552 | return; | ||
553 | } | ||
554 | } else { /* must be a HW Pstate capable processor */ | ||
555 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | ||
556 | if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) | ||
557 | cpu_family = CPU_HW_PSTATE; | ||
558 | else | ||
559 | return; | ||
560 | } | ||
561 | |||
562 | *rc = 0; | ||
563 | } | ||
564 | |||
565 | static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, | ||
566 | u8 maxvid) | ||
567 | { | ||
568 | unsigned int j; | ||
569 | u8 lastfid = 0xff; | ||
570 | |||
571 | for (j = 0; j < data->numps; j++) { | ||
572 | if (pst[j].vid > LEAST_VID) { | ||
573 | printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n", | ||
574 | j, pst[j].vid); | ||
575 | return -EINVAL; | ||
576 | } | ||
577 | if (pst[j].vid < data->rvo) { | ||
578 | /* vid + rvo >= 0 */ | ||
579 | printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" | ||
580 | " %d\n", j); | ||
581 | return -ENODEV; | ||
582 | } | ||
583 | if (pst[j].vid < maxvid + data->rvo) { | ||
584 | /* vid + rvo >= maxvid */ | ||
585 | printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" | ||
586 | " %d\n", j); | ||
587 | return -ENODEV; | ||
588 | } | ||
589 | if (pst[j].fid > MAX_FID) { | ||
590 | printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate" | ||
591 | " %d\n", j); | ||
592 | return -ENODEV; | ||
593 | } | ||
594 | if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { | ||
595 | /* Only first fid is allowed to be in "low" range */ | ||
596 | printk(KERN_ERR FW_BUG PFX "two low fids - %d : " | ||
597 | "0x%x\n", j, pst[j].fid); | ||
598 | return -EINVAL; | ||
599 | } | ||
600 | if (pst[j].fid < lastfid) | ||
601 | lastfid = pst[j].fid; | ||
602 | } | ||
603 | if (lastfid & 1) { | ||
604 | printk(KERN_ERR FW_BUG PFX "lastfid invalid\n"); | ||
605 | return -EINVAL; | ||
606 | } | ||
607 | if (lastfid > LO_FID_TABLE_TOP) | ||
608 | printk(KERN_INFO FW_BUG PFX | ||
609 | "first fid not from lo freq table\n"); | ||
610 | |||
611 | return 0; | ||
612 | } | ||
613 | |||
614 | static void invalidate_entry(struct cpufreq_frequency_table *powernow_table, | ||
615 | unsigned int entry) | ||
616 | { | ||
617 | powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; | ||
618 | } | ||
619 | |||
620 | static void print_basics(struct powernow_k8_data *data) | ||
621 | { | ||
622 | int j; | ||
623 | for (j = 0; j < data->numps; j++) { | ||
624 | if (data->powernow_table[j].frequency != | ||
625 | CPUFREQ_ENTRY_INVALID) { | ||
626 | if (cpu_family == CPU_HW_PSTATE) { | ||
627 | printk(KERN_INFO PFX | ||
628 | " %d : pstate %d (%d MHz)\n", j, | ||
629 | data->powernow_table[j].index, | ||
630 | data->powernow_table[j].frequency/1000); | ||
631 | } else { | ||
632 | printk(KERN_INFO PFX | ||
633 | "fid 0x%x (%d MHz), vid 0x%x\n", | ||
634 | data->powernow_table[j].index & 0xff, | ||
635 | data->powernow_table[j].frequency/1000, | ||
636 | data->powernow_table[j].index >> 8); | ||
637 | } | ||
638 | } | ||
639 | } | ||
640 | if (data->batps) | ||
641 | printk(KERN_INFO PFX "Only %d pstates on battery\n", | ||
642 | data->batps); | ||
643 | } | ||
644 | |||
645 | static u32 freq_from_fid_did(u32 fid, u32 did) | ||
646 | { | ||
647 | u32 mhz = 0; | ||
648 | |||
649 | if (boot_cpu_data.x86 == 0x10) | ||
650 | mhz = (100 * (fid + 0x10)) >> did; | ||
651 | else if (boot_cpu_data.x86 == 0x11) | ||
652 | mhz = (100 * (fid + 8)) >> did; | ||
653 | else | ||
654 | BUG(); | ||
655 | |||
656 | return mhz * 1000; | ||
657 | } | ||
658 | |||
659 | static int fill_powernow_table(struct powernow_k8_data *data, | ||
660 | struct pst_s *pst, u8 maxvid) | ||
661 | { | ||
662 | struct cpufreq_frequency_table *powernow_table; | ||
663 | unsigned int j; | ||
664 | |||
665 | if (data->batps) { | ||
666 | /* use ACPI support to get full speed on mains power */ | ||
667 | printk(KERN_WARNING PFX | ||
668 | "Only %d pstates usable (use ACPI driver for full " | ||
669 | "range\n", data->batps); | ||
670 | data->numps = data->batps; | ||
671 | } | ||
672 | |||
673 | for (j = 1; j < data->numps; j++) { | ||
674 | if (pst[j-1].fid >= pst[j].fid) { | ||
675 | printk(KERN_ERR PFX "PST out of sequence\n"); | ||
676 | return -EINVAL; | ||
677 | } | ||
678 | } | ||
679 | |||
680 | if (data->numps < 2) { | ||
681 | printk(KERN_ERR PFX "no p states to transition\n"); | ||
682 | return -ENODEV; | ||
683 | } | ||
684 | |||
685 | if (check_pst_table(data, pst, maxvid)) | ||
686 | return -EINVAL; | ||
687 | |||
688 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | ||
689 | * (data->numps + 1)), GFP_KERNEL); | ||
690 | if (!powernow_table) { | ||
691 | printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); | ||
692 | return -ENOMEM; | ||
693 | } | ||
694 | |||
695 | for (j = 0; j < data->numps; j++) { | ||
696 | int freq; | ||
697 | powernow_table[j].index = pst[j].fid; /* lower 8 bits */ | ||
698 | powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ | ||
699 | freq = find_khz_freq_from_fid(pst[j].fid); | ||
700 | powernow_table[j].frequency = freq; | ||
701 | } | ||
702 | powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; | ||
703 | powernow_table[data->numps].index = 0; | ||
704 | |||
705 | if (query_current_values_with_pending_wait(data)) { | ||
706 | kfree(powernow_table); | ||
707 | return -EIO; | ||
708 | } | ||
709 | |||
710 | dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); | ||
711 | data->powernow_table = powernow_table; | ||
712 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | ||
713 | print_basics(data); | ||
714 | |||
715 | for (j = 0; j < data->numps; j++) | ||
716 | if ((pst[j].fid == data->currfid) && | ||
717 | (pst[j].vid == data->currvid)) | ||
718 | return 0; | ||
719 | |||
720 | dprintk("currfid/vid do not match PST, ignoring\n"); | ||
721 | return 0; | ||
722 | } | ||
723 | |||
724 | /* Find and validate the PSB/PST table in BIOS. */ | ||
725 | static int find_psb_table(struct powernow_k8_data *data) | ||
726 | { | ||
727 | struct psb_s *psb; | ||
728 | unsigned int i; | ||
729 | u32 mvs; | ||
730 | u8 maxvid; | ||
731 | u32 cpst = 0; | ||
732 | u32 thiscpuid; | ||
733 | |||
734 | for (i = 0xc0000; i < 0xffff0; i += 0x10) { | ||
735 | /* Scan BIOS looking for the signature. */ | ||
736 | /* It can not be at ffff0 - it is too big. */ | ||
737 | |||
738 | psb = phys_to_virt(i); | ||
739 | if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) | ||
740 | continue; | ||
741 | |||
742 | dprintk("found PSB header at 0x%p\n", psb); | ||
743 | |||
744 | dprintk("table vers: 0x%x\n", psb->tableversion); | ||
745 | if (psb->tableversion != PSB_VERSION_1_4) { | ||
746 | printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n"); | ||
747 | return -ENODEV; | ||
748 | } | ||
749 | |||
750 | dprintk("flags: 0x%x\n", psb->flags1); | ||
751 | if (psb->flags1) { | ||
752 | printk(KERN_ERR FW_BUG PFX "unknown flags\n"); | ||
753 | return -ENODEV; | ||
754 | } | ||
755 | |||
756 | data->vstable = psb->vstable; | ||
757 | dprintk("voltage stabilization time: %d(*20us)\n", | ||
758 | data->vstable); | ||
759 | |||
760 | dprintk("flags2: 0x%x\n", psb->flags2); | ||
761 | data->rvo = psb->flags2 & 3; | ||
762 | data->irt = ((psb->flags2) >> 2) & 3; | ||
763 | mvs = ((psb->flags2) >> 4) & 3; | ||
764 | data->vidmvs = 1 << mvs; | ||
765 | data->batps = ((psb->flags2) >> 6) & 3; | ||
766 | |||
767 | dprintk("ramp voltage offset: %d\n", data->rvo); | ||
768 | dprintk("isochronous relief time: %d\n", data->irt); | ||
769 | dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); | ||
770 | |||
771 | dprintk("numpst: 0x%x\n", psb->num_tables); | ||
772 | cpst = psb->num_tables; | ||
773 | if ((psb->cpuid == 0x00000fc0) || | ||
774 | (psb->cpuid == 0x00000fe0)) { | ||
775 | thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | ||
776 | if ((thiscpuid == 0x00000fc0) || | ||
777 | (thiscpuid == 0x00000fe0)) | ||
778 | cpst = 1; | ||
779 | } | ||
780 | if (cpst != 1) { | ||
781 | printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); | ||
782 | return -ENODEV; | ||
783 | } | ||
784 | |||
785 | data->plllock = psb->plllocktime; | ||
786 | dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); | ||
787 | dprintk("maxfid: 0x%x\n", psb->maxfid); | ||
788 | dprintk("maxvid: 0x%x\n", psb->maxvid); | ||
789 | maxvid = psb->maxvid; | ||
790 | |||
791 | data->numps = psb->numps; | ||
792 | dprintk("numpstates: 0x%x\n", data->numps); | ||
793 | return fill_powernow_table(data, | ||
794 | (struct pst_s *)(psb+1), maxvid); | ||
795 | } | ||
796 | /* | ||
797 | * If you see this message, complain to BIOS manufacturer. If | ||
798 | * he tells you "we do not support Linux" or some similar | ||
799 | * nonsense, remember that Windows 2000 uses the same legacy | ||
800 | * mechanism that the old Linux PSB driver uses. Tell them it | ||
801 | * is broken with Windows 2000. | ||
802 | * | ||
803 | * The reference to the AMD documentation is chapter 9 in the | ||
804 | * BIOS and Kernel Developer's Guide, which is available on | ||
805 | * www.amd.com | ||
806 | */ | ||
807 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); | ||
808 | printk(KERN_ERR PFX "Make sure that your BIOS is up to date" | ||
809 | " and Cool'N'Quiet support is enabled in BIOS setup\n"); | ||
810 | return -ENODEV; | ||
811 | } | ||
812 | |||
813 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, | ||
814 | unsigned int index) | ||
815 | { | ||
816 | u64 control; | ||
817 | |||
818 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | ||
819 | return; | ||
820 | |||
821 | control = data->acpi_data.states[index].control; | ||
822 | data->irt = (control >> IRT_SHIFT) & IRT_MASK; | ||
823 | data->rvo = (control >> RVO_SHIFT) & RVO_MASK; | ||
824 | data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; | ||
825 | data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; | ||
826 | data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK); | ||
827 | data->vstable = (control >> VST_SHIFT) & VST_MASK; | ||
828 | } | ||
829 | |||
830 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | ||
831 | { | ||
832 | struct cpufreq_frequency_table *powernow_table; | ||
833 | int ret_val = -ENODEV; | ||
834 | u64 control, status; | ||
835 | |||
836 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | ||
837 | dprintk("register performance failed: bad ACPI data\n"); | ||
838 | return -EIO; | ||
839 | } | ||
840 | |||
841 | /* verify the data contained in the ACPI structures */ | ||
842 | if (data->acpi_data.state_count <= 1) { | ||
843 | dprintk("No ACPI P-States\n"); | ||
844 | goto err_out; | ||
845 | } | ||
846 | |||
847 | control = data->acpi_data.control_register.space_id; | ||
848 | status = data->acpi_data.status_register.space_id; | ||
849 | |||
850 | if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) || | ||
851 | (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) { | ||
852 | dprintk("Invalid control/status registers (%x - %x)\n", | ||
853 | control, status); | ||
854 | goto err_out; | ||
855 | } | ||
856 | |||
857 | /* fill in data->powernow_table */ | ||
858 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | ||
859 | * (data->acpi_data.state_count + 1)), GFP_KERNEL); | ||
860 | if (!powernow_table) { | ||
861 | dprintk("powernow_table memory alloc failure\n"); | ||
862 | goto err_out; | ||
863 | } | ||
864 | |||
865 | /* fill in data */ | ||
866 | data->numps = data->acpi_data.state_count; | ||
867 | powernow_k8_acpi_pst_values(data, 0); | ||
868 | |||
869 | if (cpu_family == CPU_HW_PSTATE) | ||
870 | ret_val = fill_powernow_table_pstate(data, powernow_table); | ||
871 | else | ||
872 | ret_val = fill_powernow_table_fidvid(data, powernow_table); | ||
873 | if (ret_val) | ||
874 | goto err_out_mem; | ||
875 | |||
876 | powernow_table[data->acpi_data.state_count].frequency = | ||
877 | CPUFREQ_TABLE_END; | ||
878 | powernow_table[data->acpi_data.state_count].index = 0; | ||
879 | data->powernow_table = powernow_table; | ||
880 | |||
881 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | ||
882 | print_basics(data); | ||
883 | |||
884 | /* notify BIOS that we exist */ | ||
885 | acpi_processor_notify_smm(THIS_MODULE); | ||
886 | |||
887 | if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { | ||
888 | printk(KERN_ERR PFX | ||
889 | "unable to alloc powernow_k8_data cpumask\n"); | ||
890 | ret_val = -ENOMEM; | ||
891 | goto err_out_mem; | ||
892 | } | ||
893 | |||
894 | return 0; | ||
895 | |||
896 | err_out_mem: | ||
897 | kfree(powernow_table); | ||
898 | |||
899 | err_out: | ||
900 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | ||
901 | |||
902 | /* data->acpi_data.state_count informs us at ->exit() | ||
903 | * whether ACPI was used */ | ||
904 | data->acpi_data.state_count = 0; | ||
905 | |||
906 | return ret_val; | ||
907 | } | ||
908 | |||
909 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, | ||
910 | struct cpufreq_frequency_table *powernow_table) | ||
911 | { | ||
912 | int i; | ||
913 | u32 hi = 0, lo = 0; | ||
914 | rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); | ||
915 | data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | ||
916 | |||
917 | for (i = 0; i < data->acpi_data.state_count; i++) { | ||
918 | u32 index; | ||
919 | |||
920 | index = data->acpi_data.states[i].control & HW_PSTATE_MASK; | ||
921 | if (index > data->max_hw_pstate) { | ||
922 | printk(KERN_ERR PFX "invalid pstate %d - " | ||
923 | "bad value %d.\n", i, index); | ||
924 | printk(KERN_ERR PFX "Please report to BIOS " | ||
925 | "manufacturer\n"); | ||
926 | invalidate_entry(powernow_table, i); | ||
927 | continue; | ||
928 | } | ||
929 | rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); | ||
930 | if (!(hi & HW_PSTATE_VALID_MASK)) { | ||
931 | dprintk("invalid pstate %d, ignoring\n", index); | ||
932 | invalidate_entry(powernow_table, i); | ||
933 | continue; | ||
934 | } | ||
935 | |||
936 | powernow_table[i].index = index; | ||
937 | |||
938 | /* Frequency may be rounded for these */ | ||
939 | if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) | ||
940 | || boot_cpu_data.x86 == 0x11) { | ||
941 | powernow_table[i].frequency = | ||
942 | freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); | ||
943 | } else | ||
944 | powernow_table[i].frequency = | ||
945 | data->acpi_data.states[i].core_frequency * 1000; | ||
946 | } | ||
947 | return 0; | ||
948 | } | ||
949 | |||
950 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | ||
951 | struct cpufreq_frequency_table *powernow_table) | ||
952 | { | ||
953 | int i; | ||
954 | |||
955 | for (i = 0; i < data->acpi_data.state_count; i++) { | ||
956 | u32 fid; | ||
957 | u32 vid; | ||
958 | u32 freq, index; | ||
959 | u64 status, control; | ||
960 | |||
961 | if (data->exttype) { | ||
962 | status = data->acpi_data.states[i].status; | ||
963 | fid = status & EXT_FID_MASK; | ||
964 | vid = (status >> VID_SHIFT) & EXT_VID_MASK; | ||
965 | } else { | ||
966 | control = data->acpi_data.states[i].control; | ||
967 | fid = control & FID_MASK; | ||
968 | vid = (control >> VID_SHIFT) & VID_MASK; | ||
969 | } | ||
970 | |||
971 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); | ||
972 | |||
973 | index = fid | (vid<<8); | ||
974 | powernow_table[i].index = index; | ||
975 | |||
976 | freq = find_khz_freq_from_fid(fid); | ||
977 | powernow_table[i].frequency = freq; | ||
978 | |||
979 | /* verify frequency is OK */ | ||
980 | if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { | ||
981 | dprintk("invalid freq %u kHz, ignoring\n", freq); | ||
982 | invalidate_entry(powernow_table, i); | ||
983 | continue; | ||
984 | } | ||
985 | |||
986 | /* verify voltage is OK - | ||
987 | * BIOSs are using "off" to indicate invalid */ | ||
988 | if (vid == VID_OFF) { | ||
989 | dprintk("invalid vid %u, ignoring\n", vid); | ||
990 | invalidate_entry(powernow_table, i); | ||
991 | continue; | ||
992 | } | ||
993 | |||
994 | if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { | ||
995 | printk(KERN_INFO PFX "invalid freq entries " | ||
996 | "%u kHz vs. %u kHz\n", freq, | ||
997 | (unsigned int) | ||
998 | (data->acpi_data.states[i].core_frequency | ||
999 | * 1000)); | ||
1000 | invalidate_entry(powernow_table, i); | ||
1001 | continue; | ||
1002 | } | ||
1003 | } | ||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) | ||
1008 | { | ||
1009 | if (data->acpi_data.state_count) | ||
1010 | acpi_processor_unregister_performance(&data->acpi_data, | ||
1011 | data->cpu); | ||
1012 | free_cpumask_var(data->acpi_data.shared_cpu_map); | ||
1013 | } | ||
1014 | |||
1015 | static int get_transition_latency(struct powernow_k8_data *data) | ||
1016 | { | ||
1017 | int max_latency = 0; | ||
1018 | int i; | ||
1019 | for (i = 0; i < data->acpi_data.state_count; i++) { | ||
1020 | int cur_latency = data->acpi_data.states[i].transition_latency | ||
1021 | + data->acpi_data.states[i].bus_master_latency; | ||
1022 | if (cur_latency > max_latency) | ||
1023 | max_latency = cur_latency; | ||
1024 | } | ||
1025 | if (max_latency == 0) { | ||
1026 | /* | ||
1027 | * Fam 11h and later may return 0 as transition latency. This | ||
1028 | * is intended and means "very fast". While cpufreq core and | ||
1029 | * governors currently can handle that gracefully, better set it | ||
1030 | * to 1 to avoid problems in the future. | ||
1031 | */ | ||
1032 | if (boot_cpu_data.x86 < 0x11) | ||
1033 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " | ||
1034 | "latency\n"); | ||
1035 | max_latency = 1; | ||
1036 | } | ||
1037 | /* value in usecs, needs to be in nanoseconds */ | ||
1038 | return 1000 * max_latency; | ||
1039 | } | ||
1040 | |||
1041 | /* Take a frequency, and issue the fid/vid transition command */ | ||
1042 | static int transition_frequency_fidvid(struct powernow_k8_data *data, | ||
1043 | unsigned int index) | ||
1044 | { | ||
1045 | u32 fid = 0; | ||
1046 | u32 vid = 0; | ||
1047 | int res, i; | ||
1048 | struct cpufreq_freqs freqs; | ||
1049 | |||
1050 | dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); | ||
1051 | |||
1052 | /* fid/vid correctness check for k8 */ | ||
1053 | /* fid are the lower 8 bits of the index we stored into | ||
1054 | * the cpufreq frequency table in find_psb_table, vid | ||
1055 | * are the upper 8 bits. | ||
1056 | */ | ||
1057 | fid = data->powernow_table[index].index & 0xFF; | ||
1058 | vid = (data->powernow_table[index].index & 0xFF00) >> 8; | ||
1059 | |||
1060 | dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); | ||
1061 | |||
1062 | if (query_current_values_with_pending_wait(data)) | ||
1063 | return 1; | ||
1064 | |||
1065 | if ((data->currvid == vid) && (data->currfid == fid)) { | ||
1066 | dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", | ||
1067 | fid, vid); | ||
1068 | return 0; | ||
1069 | } | ||
1070 | |||
1071 | dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", | ||
1072 | smp_processor_id(), fid, vid); | ||
1073 | freqs.old = find_khz_freq_from_fid(data->currfid); | ||
1074 | freqs.new = find_khz_freq_from_fid(fid); | ||
1075 | |||
1076 | for_each_cpu(i, data->available_cores) { | ||
1077 | freqs.cpu = i; | ||
1078 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
1079 | } | ||
1080 | |||
1081 | res = transition_fid_vid(data, fid, vid); | ||
1082 | freqs.new = find_khz_freq_from_fid(data->currfid); | ||
1083 | |||
1084 | for_each_cpu(i, data->available_cores) { | ||
1085 | freqs.cpu = i; | ||
1086 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
1087 | } | ||
1088 | return res; | ||
1089 | } | ||
1090 | |||
1091 | /* Take a frequency, and issue the hardware pstate transition command */ | ||
1092 | static int transition_frequency_pstate(struct powernow_k8_data *data, | ||
1093 | unsigned int index) | ||
1094 | { | ||
1095 | u32 pstate = 0; | ||
1096 | int res, i; | ||
1097 | struct cpufreq_freqs freqs; | ||
1098 | |||
1099 | dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); | ||
1100 | |||
1101 | /* get MSR index for hardware pstate transition */ | ||
1102 | pstate = index & HW_PSTATE_MASK; | ||
1103 | if (pstate > data->max_hw_pstate) | ||
1104 | return 0; | ||
1105 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, | ||
1106 | data->currpstate); | ||
1107 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | ||
1108 | |||
1109 | for_each_cpu(i, data->available_cores) { | ||
1110 | freqs.cpu = i; | ||
1111 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
1112 | } | ||
1113 | |||
1114 | res = transition_pstate(data, pstate); | ||
1115 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | ||
1116 | |||
1117 | for_each_cpu(i, data->available_cores) { | ||
1118 | freqs.cpu = i; | ||
1119 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
1120 | } | ||
1121 | return res; | ||
1122 | } | ||
1123 | |||
1124 | /* Driver entry point to switch to the target frequency */ | ||
1125 | static int powernowk8_target(struct cpufreq_policy *pol, | ||
1126 | unsigned targfreq, unsigned relation) | ||
1127 | { | ||
1128 | cpumask_var_t oldmask; | ||
1129 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | ||
1130 | u32 checkfid; | ||
1131 | u32 checkvid; | ||
1132 | unsigned int newstate; | ||
1133 | int ret = -EIO; | ||
1134 | |||
1135 | if (!data) | ||
1136 | return -EINVAL; | ||
1137 | |||
1138 | checkfid = data->currfid; | ||
1139 | checkvid = data->currvid; | ||
1140 | |||
1141 | /* only run on specific CPU from here on. */ | ||
1142 | /* This is poor form: use a workqueue or smp_call_function_single */ | ||
1143 | if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) | ||
1144 | return -ENOMEM; | ||
1145 | |||
1146 | cpumask_copy(oldmask, tsk_cpus_allowed(current)); | ||
1147 | set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); | ||
1148 | |||
1149 | if (smp_processor_id() != pol->cpu) { | ||
1150 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | ||
1151 | goto err_out; | ||
1152 | } | ||
1153 | |||
1154 | if (pending_bit_stuck()) { | ||
1155 | printk(KERN_ERR PFX "failing targ, change pending bit set\n"); | ||
1156 | goto err_out; | ||
1157 | } | ||
1158 | |||
1159 | dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", | ||
1160 | pol->cpu, targfreq, pol->min, pol->max, relation); | ||
1161 | |||
1162 | if (query_current_values_with_pending_wait(data)) | ||
1163 | goto err_out; | ||
1164 | |||
1165 | if (cpu_family != CPU_HW_PSTATE) { | ||
1166 | dprintk("targ: curr fid 0x%x, vid 0x%x\n", | ||
1167 | data->currfid, data->currvid); | ||
1168 | |||
1169 | if ((checkvid != data->currvid) || | ||
1170 | (checkfid != data->currfid)) { | ||
1171 | printk(KERN_INFO PFX | ||
1172 | "error - out of sync, fix 0x%x 0x%x, " | ||
1173 | "vid 0x%x 0x%x\n", | ||
1174 | checkfid, data->currfid, | ||
1175 | checkvid, data->currvid); | ||
1176 | } | ||
1177 | } | ||
1178 | |||
1179 | if (cpufreq_frequency_table_target(pol, data->powernow_table, | ||
1180 | targfreq, relation, &newstate)) | ||
1181 | goto err_out; | ||
1182 | |||
1183 | mutex_lock(&fidvid_mutex); | ||
1184 | |||
1185 | powernow_k8_acpi_pst_values(data, newstate); | ||
1186 | |||
1187 | if (cpu_family == CPU_HW_PSTATE) | ||
1188 | ret = transition_frequency_pstate(data, newstate); | ||
1189 | else | ||
1190 | ret = transition_frequency_fidvid(data, newstate); | ||
1191 | if (ret) { | ||
1192 | printk(KERN_ERR PFX "transition frequency failed\n"); | ||
1193 | ret = 1; | ||
1194 | mutex_unlock(&fidvid_mutex); | ||
1195 | goto err_out; | ||
1196 | } | ||
1197 | mutex_unlock(&fidvid_mutex); | ||
1198 | |||
1199 | if (cpu_family == CPU_HW_PSTATE) | ||
1200 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, | ||
1201 | newstate); | ||
1202 | else | ||
1203 | pol->cur = find_khz_freq_from_fid(data->currfid); | ||
1204 | ret = 0; | ||
1205 | |||
1206 | err_out: | ||
1207 | set_cpus_allowed_ptr(current, oldmask); | ||
1208 | free_cpumask_var(oldmask); | ||
1209 | return ret; | ||
1210 | } | ||
1211 | |||
1212 | /* Driver entry point to verify the policy and range of frequencies */ | ||
1213 | static int powernowk8_verify(struct cpufreq_policy *pol) | ||
1214 | { | ||
1215 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | ||
1216 | |||
1217 | if (!data) | ||
1218 | return -EINVAL; | ||
1219 | |||
1220 | return cpufreq_frequency_table_verify(pol, data->powernow_table); | ||
1221 | } | ||
1222 | |||
1223 | struct init_on_cpu { | ||
1224 | struct powernow_k8_data *data; | ||
1225 | int rc; | ||
1226 | }; | ||
1227 | |||
1228 | static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu) | ||
1229 | { | ||
1230 | struct init_on_cpu *init_on_cpu = _init_on_cpu; | ||
1231 | |||
1232 | if (pending_bit_stuck()) { | ||
1233 | printk(KERN_ERR PFX "failing init, change pending bit set\n"); | ||
1234 | init_on_cpu->rc = -ENODEV; | ||
1235 | return; | ||
1236 | } | ||
1237 | |||
1238 | if (query_current_values_with_pending_wait(init_on_cpu->data)) { | ||
1239 | init_on_cpu->rc = -ENODEV; | ||
1240 | return; | ||
1241 | } | ||
1242 | |||
1243 | if (cpu_family == CPU_OPTERON) | ||
1244 | fidvid_msr_init(); | ||
1245 | |||
1246 | init_on_cpu->rc = 0; | ||
1247 | } | ||
1248 | |||
1249 | /* per CPU init entry point to the driver */ | ||
1250 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | ||
1251 | { | ||
1252 | static const char ACPI_PSS_BIOS_BUG_MSG[] = | ||
1253 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" | ||
1254 | FW_BUG PFX "Try again with latest BIOS.\n"; | ||
1255 | struct powernow_k8_data *data; | ||
1256 | struct init_on_cpu init_on_cpu; | ||
1257 | int rc; | ||
1258 | struct cpuinfo_x86 *c = &cpu_data(pol->cpu); | ||
1259 | |||
1260 | if (!cpu_online(pol->cpu)) | ||
1261 | return -ENODEV; | ||
1262 | |||
1263 | smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1); | ||
1264 | if (rc) | ||
1265 | return -ENODEV; | ||
1266 | |||
1267 | data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); | ||
1268 | if (!data) { | ||
1269 | printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); | ||
1270 | return -ENOMEM; | ||
1271 | } | ||
1272 | |||
1273 | data->cpu = pol->cpu; | ||
1274 | data->currpstate = HW_PSTATE_INVALID; | ||
1275 | |||
1276 | if (powernow_k8_cpu_init_acpi(data)) { | ||
1277 | /* | ||
1278 | * Use the PSB BIOS structure. This is only available on | ||
1279 | * an UP version, and is deprecated by AMD. | ||
1280 | */ | ||
1281 | if (num_online_cpus() != 1) { | ||
1282 | printk_once(ACPI_PSS_BIOS_BUG_MSG); | ||
1283 | goto err_out; | ||
1284 | } | ||
1285 | if (pol->cpu != 0) { | ||
1286 | printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " | ||
1287 | "CPU other than CPU0. Complain to your BIOS " | ||
1288 | "vendor.\n"); | ||
1289 | goto err_out; | ||
1290 | } | ||
1291 | rc = find_psb_table(data); | ||
1292 | if (rc) | ||
1293 | goto err_out; | ||
1294 | |||
1295 | /* Take a crude guess here. | ||
1296 | * That guess was in microseconds, so multiply with 1000 */ | ||
1297 | pol->cpuinfo.transition_latency = ( | ||
1298 | ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + | ||
1299 | ((1 << data->irt) * 30)) * 1000; | ||
1300 | } else /* ACPI _PSS objects available */ | ||
1301 | pol->cpuinfo.transition_latency = get_transition_latency(data); | ||
1302 | |||
1303 | /* only run on specific CPU from here on */ | ||
1304 | init_on_cpu.data = data; | ||
1305 | smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu, | ||
1306 | &init_on_cpu, 1); | ||
1307 | rc = init_on_cpu.rc; | ||
1308 | if (rc != 0) | ||
1309 | goto err_out_exit_acpi; | ||
1310 | |||
1311 | if (cpu_family == CPU_HW_PSTATE) | ||
1312 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); | ||
1313 | else | ||
1314 | cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); | ||
1315 | data->available_cores = pol->cpus; | ||
1316 | |||
1317 | if (cpu_family == CPU_HW_PSTATE) | ||
1318 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, | ||
1319 | data->currpstate); | ||
1320 | else | ||
1321 | pol->cur = find_khz_freq_from_fid(data->currfid); | ||
1322 | dprintk("policy current frequency %d kHz\n", pol->cur); | ||
1323 | |||
1324 | /* min/max the cpu is capable of */ | ||
1325 | if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { | ||
1326 | printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n"); | ||
1327 | powernow_k8_cpu_exit_acpi(data); | ||
1328 | kfree(data->powernow_table); | ||
1329 | kfree(data); | ||
1330 | return -EINVAL; | ||
1331 | } | ||
1332 | |||
1333 | /* Check for APERF/MPERF support in hardware */ | ||
1334 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
1335 | cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; | ||
1336 | |||
1337 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | ||
1338 | |||
1339 | if (cpu_family == CPU_HW_PSTATE) | ||
1340 | dprintk("cpu_init done, current pstate 0x%x\n", | ||
1341 | data->currpstate); | ||
1342 | else | ||
1343 | dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", | ||
1344 | data->currfid, data->currvid); | ||
1345 | |||
1346 | per_cpu(powernow_data, pol->cpu) = data; | ||
1347 | |||
1348 | return 0; | ||
1349 | |||
1350 | err_out_exit_acpi: | ||
1351 | powernow_k8_cpu_exit_acpi(data); | ||
1352 | |||
1353 | err_out: | ||
1354 | kfree(data); | ||
1355 | return -ENODEV; | ||
1356 | } | ||
1357 | |||
1358 | static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) | ||
1359 | { | ||
1360 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | ||
1361 | |||
1362 | if (!data) | ||
1363 | return -EINVAL; | ||
1364 | |||
1365 | powernow_k8_cpu_exit_acpi(data); | ||
1366 | |||
1367 | cpufreq_frequency_table_put_attr(pol->cpu); | ||
1368 | |||
1369 | kfree(data->powernow_table); | ||
1370 | kfree(data); | ||
1371 | per_cpu(powernow_data, pol->cpu) = NULL; | ||
1372 | |||
1373 | return 0; | ||
1374 | } | ||
1375 | |||
1376 | static void query_values_on_cpu(void *_err) | ||
1377 | { | ||
1378 | int *err = _err; | ||
1379 | struct powernow_k8_data *data = __this_cpu_read(powernow_data); | ||
1380 | |||
1381 | *err = query_current_values_with_pending_wait(data); | ||
1382 | } | ||
1383 | |||
1384 | static unsigned int powernowk8_get(unsigned int cpu) | ||
1385 | { | ||
1386 | struct powernow_k8_data *data = per_cpu(powernow_data, cpu); | ||
1387 | unsigned int khz = 0; | ||
1388 | int err; | ||
1389 | |||
1390 | if (!data) | ||
1391 | return 0; | ||
1392 | |||
1393 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); | ||
1394 | if (err) | ||
1395 | goto out; | ||
1396 | |||
1397 | if (cpu_family == CPU_HW_PSTATE) | ||
1398 | khz = find_khz_freq_from_pstate(data->powernow_table, | ||
1399 | data->currpstate); | ||
1400 | else | ||
1401 | khz = find_khz_freq_from_fid(data->currfid); | ||
1402 | |||
1403 | |||
1404 | out: | ||
1405 | return khz; | ||
1406 | } | ||
1407 | |||
1408 | static void _cpb_toggle_msrs(bool t) | ||
1409 | { | ||
1410 | int cpu; | ||
1411 | |||
1412 | get_online_cpus(); | ||
1413 | |||
1414 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1415 | |||
1416 | for_each_cpu(cpu, cpu_online_mask) { | ||
1417 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1418 | if (t) | ||
1419 | reg->l &= ~BIT(25); | ||
1420 | else | ||
1421 | reg->l |= BIT(25); | ||
1422 | } | ||
1423 | wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1424 | |||
1425 | put_online_cpus(); | ||
1426 | } | ||
1427 | |||
1428 | /* | ||
1429 | * Switch on/off core performance boosting. | ||
1430 | * | ||
1431 | * 0=disable | ||
1432 | * 1=enable. | ||
1433 | */ | ||
1434 | static void cpb_toggle(bool t) | ||
1435 | { | ||
1436 | if (!cpb_capable) | ||
1437 | return; | ||
1438 | |||
1439 | if (t && !cpb_enabled) { | ||
1440 | cpb_enabled = true; | ||
1441 | _cpb_toggle_msrs(t); | ||
1442 | printk(KERN_INFO PFX "Core Boosting enabled.\n"); | ||
1443 | } else if (!t && cpb_enabled) { | ||
1444 | cpb_enabled = false; | ||
1445 | _cpb_toggle_msrs(t); | ||
1446 | printk(KERN_INFO PFX "Core Boosting disabled.\n"); | ||
1447 | } | ||
1448 | } | ||
1449 | |||
1450 | static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, | ||
1451 | size_t count) | ||
1452 | { | ||
1453 | int ret = -EINVAL; | ||
1454 | unsigned long val = 0; | ||
1455 | |||
1456 | ret = strict_strtoul(buf, 10, &val); | ||
1457 | if (!ret && (val == 0 || val == 1) && cpb_capable) | ||
1458 | cpb_toggle(val); | ||
1459 | else | ||
1460 | return -EINVAL; | ||
1461 | |||
1462 | return count; | ||
1463 | } | ||
1464 | |||
1465 | static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) | ||
1466 | { | ||
1467 | return sprintf(buf, "%u\n", cpb_enabled); | ||
1468 | } | ||
1469 | |||
1470 | #define define_one_rw(_name) \ | ||
1471 | static struct freq_attr _name = \ | ||
1472 | __ATTR(_name, 0644, show_##_name, store_##_name) | ||
1473 | |||
1474 | define_one_rw(cpb); | ||
1475 | |||
1476 | static struct freq_attr *powernow_k8_attr[] = { | ||
1477 | &cpufreq_freq_attr_scaling_available_freqs, | ||
1478 | &cpb, | ||
1479 | NULL, | ||
1480 | }; | ||
1481 | |||
1482 | static struct cpufreq_driver cpufreq_amd64_driver = { | ||
1483 | .verify = powernowk8_verify, | ||
1484 | .target = powernowk8_target, | ||
1485 | .bios_limit = acpi_processor_get_bios_limit, | ||
1486 | .init = powernowk8_cpu_init, | ||
1487 | .exit = __devexit_p(powernowk8_cpu_exit), | ||
1488 | .get = powernowk8_get, | ||
1489 | .name = "powernow-k8", | ||
1490 | .owner = THIS_MODULE, | ||
1491 | .attr = powernow_k8_attr, | ||
1492 | }; | ||
1493 | |||
1494 | /* | ||
1495 | * Clear the boost-disable flag on the CPU_DOWN path so that this cpu | ||
1496 | * cannot block the remaining ones from boosting. On the CPU_UP path we | ||
1497 | * simply keep the boost-disable flag in sync with the current global | ||
1498 | * state. | ||
1499 | */ | ||
1500 | static int cpb_notify(struct notifier_block *nb, unsigned long action, | ||
1501 | void *hcpu) | ||
1502 | { | ||
1503 | unsigned cpu = (long)hcpu; | ||
1504 | u32 lo, hi; | ||
1505 | |||
1506 | switch (action) { | ||
1507 | case CPU_UP_PREPARE: | ||
1508 | case CPU_UP_PREPARE_FROZEN: | ||
1509 | |||
1510 | if (!cpb_enabled) { | ||
1511 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1512 | lo |= BIT(25); | ||
1513 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1514 | } | ||
1515 | break; | ||
1516 | |||
1517 | case CPU_DOWN_PREPARE: | ||
1518 | case CPU_DOWN_PREPARE_FROZEN: | ||
1519 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1520 | lo &= ~BIT(25); | ||
1521 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1522 | break; | ||
1523 | |||
1524 | default: | ||
1525 | break; | ||
1526 | } | ||
1527 | |||
1528 | return NOTIFY_OK; | ||
1529 | } | ||
1530 | |||
1531 | static struct notifier_block cpb_nb = { | ||
1532 | .notifier_call = cpb_notify, | ||
1533 | }; | ||
1534 | |||
1535 | /* driver entry point for init */ | ||
1536 | static int __cpuinit powernowk8_init(void) | ||
1537 | { | ||
1538 | unsigned int i, supported_cpus = 0, cpu; | ||
1539 | int rv; | ||
1540 | |||
1541 | for_each_online_cpu(i) { | ||
1542 | int rc; | ||
1543 | smp_call_function_single(i, check_supported_cpu, &rc, 1); | ||
1544 | if (rc == 0) | ||
1545 | supported_cpus++; | ||
1546 | } | ||
1547 | |||
1548 | if (supported_cpus != num_online_cpus()) | ||
1549 | return -ENODEV; | ||
1550 | |||
1551 | printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", | ||
1552 | num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); | ||
1553 | |||
1554 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1555 | |||
1556 | cpb_capable = true; | ||
1557 | |||
1558 | msrs = msrs_alloc(); | ||
1559 | if (!msrs) { | ||
1560 | printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); | ||
1561 | return -ENOMEM; | ||
1562 | } | ||
1563 | |||
1564 | register_cpu_notifier(&cpb_nb); | ||
1565 | |||
1566 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1567 | |||
1568 | for_each_cpu(cpu, cpu_online_mask) { | ||
1569 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1570 | cpb_enabled |= !(!!(reg->l & BIT(25))); | ||
1571 | } | ||
1572 | |||
1573 | printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", | ||
1574 | (cpb_enabled ? "on" : "off")); | ||
1575 | } | ||
1576 | |||
1577 | rv = cpufreq_register_driver(&cpufreq_amd64_driver); | ||
1578 | if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) { | ||
1579 | unregister_cpu_notifier(&cpb_nb); | ||
1580 | msrs_free(msrs); | ||
1581 | msrs = NULL; | ||
1582 | } | ||
1583 | return rv; | ||
1584 | } | ||
1585 | |||
1586 | /* driver entry point for term */ | ||
1587 | static void __exit powernowk8_exit(void) | ||
1588 | { | ||
1589 | dprintk("exit\n"); | ||
1590 | |||
1591 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1592 | msrs_free(msrs); | ||
1593 | msrs = NULL; | ||
1594 | |||
1595 | unregister_cpu_notifier(&cpb_nb); | ||
1596 | } | ||
1597 | |||
1598 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | ||
1599 | } | ||
1600 | |||
1601 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and " | ||
1602 | "Mark Langsdorf <mark.langsdorf@amd.com>"); | ||
1603 | MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); | ||
1604 | MODULE_LICENSE("GPL"); | ||
1605 | |||
1606 | late_initcall(powernowk8_init); | ||
1607 | module_exit(powernowk8_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h deleted file mode 100644 index df3529b1c02d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ /dev/null | |||
@@ -1,224 +0,0 @@ | |||
1 | /* | ||
2 | * (c) 2003-2006 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | enum pstate { | ||
9 | HW_PSTATE_INVALID = 0xff, | ||
10 | HW_PSTATE_0 = 0, | ||
11 | HW_PSTATE_1 = 1, | ||
12 | HW_PSTATE_2 = 2, | ||
13 | HW_PSTATE_3 = 3, | ||
14 | HW_PSTATE_4 = 4, | ||
15 | HW_PSTATE_5 = 5, | ||
16 | HW_PSTATE_6 = 6, | ||
17 | HW_PSTATE_7 = 7, | ||
18 | }; | ||
19 | |||
20 | struct powernow_k8_data { | ||
21 | unsigned int cpu; | ||
22 | |||
23 | u32 numps; /* number of p-states */ | ||
24 | u32 batps; /* number of p-states supported on battery */ | ||
25 | u32 max_hw_pstate; /* maximum legal hardware pstate */ | ||
26 | |||
27 | /* these values are constant when the PSB is used to determine | ||
28 | * vid/fid pairings, but are modified during the ->target() call | ||
29 | * when ACPI is used */ | ||
30 | u32 rvo; /* ramp voltage offset */ | ||
31 | u32 irt; /* isochronous relief time */ | ||
32 | u32 vidmvs; /* usable value calculated from mvs */ | ||
33 | u32 vstable; /* voltage stabilization time, units 20 us */ | ||
34 | u32 plllock; /* pll lock time, units 1 us */ | ||
35 | u32 exttype; /* extended interface = 1 */ | ||
36 | |||
37 | /* keep track of the current fid / vid or pstate */ | ||
38 | u32 currvid; | ||
39 | u32 currfid; | ||
40 | enum pstate currpstate; | ||
41 | |||
42 | /* the powernow_table includes all frequency and vid/fid pairings: | ||
43 | * fid are the lower 8 bits of the index, vid are the upper 8 bits. | ||
44 | * frequency is in kHz */ | ||
45 | struct cpufreq_frequency_table *powernow_table; | ||
46 | |||
47 | /* the acpi table needs to be kept. it's only available if ACPI was | ||
48 | * used to determine valid frequency/vid/fid states */ | ||
49 | struct acpi_processor_performance acpi_data; | ||
50 | |||
51 | /* we need to keep track of associated cores, but let cpufreq | ||
52 | * handle hotplug events - so just point at cpufreq pol->cpus | ||
53 | * structure */ | ||
54 | struct cpumask *available_cores; | ||
55 | }; | ||
56 | |||
57 | /* processor's cpuid instruction support */ | ||
58 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | ||
59 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | ||
60 | #define CPUID_XFAM_K8 0 | ||
61 | #define CPUID_XMOD 0x000f0000 /* extended model */ | ||
62 | #define CPUID_XMOD_REV_MASK 0x000c0000 | ||
63 | #define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ | ||
64 | #define CPUID_USE_XFAM_XMOD 0x00000f00 | ||
65 | #define CPUID_GET_MAX_CAPABILITIES 0x80000000 | ||
66 | #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 | ||
67 | #define P_STATE_TRANSITION_CAPABLE 6 | ||
68 | |||
69 | /* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ | ||
70 | /* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ | ||
71 | /* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ | ||
72 | /* the register number is placed in ecx, and the data is returned in edx:eax. */ | ||
73 | |||
74 | #define MSR_FIDVID_CTL 0xc0010041 | ||
75 | #define MSR_FIDVID_STATUS 0xc0010042 | ||
76 | |||
77 | /* Field definitions within the FID VID Low Control MSR : */ | ||
78 | #define MSR_C_LO_INIT_FID_VID 0x00010000 | ||
79 | #define MSR_C_LO_NEW_VID 0x00003f00 | ||
80 | #define MSR_C_LO_NEW_FID 0x0000003f | ||
81 | #define MSR_C_LO_VID_SHIFT 8 | ||
82 | |||
83 | /* Field definitions within the FID VID High Control MSR : */ | ||
84 | #define MSR_C_HI_STP_GNT_TO 0x000fffff | ||
85 | |||
86 | /* Field definitions within the FID VID Low Status MSR : */ | ||
87 | #define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ | ||
88 | #define MSR_S_LO_MAX_RAMP_VID 0x3f000000 | ||
89 | #define MSR_S_LO_MAX_FID 0x003f0000 | ||
90 | #define MSR_S_LO_START_FID 0x00003f00 | ||
91 | #define MSR_S_LO_CURRENT_FID 0x0000003f | ||
92 | |||
93 | /* Field definitions within the FID VID High Status MSR : */ | ||
94 | #define MSR_S_HI_MIN_WORKING_VID 0x3f000000 | ||
95 | #define MSR_S_HI_MAX_WORKING_VID 0x003f0000 | ||
96 | #define MSR_S_HI_START_VID 0x00003f00 | ||
97 | #define MSR_S_HI_CURRENT_VID 0x0000003f | ||
98 | #define MSR_C_HI_STP_GNT_BENIGN 0x00000001 | ||
99 | |||
100 | |||
101 | /* Hardware Pstate _PSS and MSR definitions */ | ||
102 | #define USE_HW_PSTATE 0x00000080 | ||
103 | #define HW_PSTATE_MASK 0x00000007 | ||
104 | #define HW_PSTATE_VALID_MASK 0x80000000 | ||
105 | #define HW_PSTATE_MAX_MASK 0x000000f0 | ||
106 | #define HW_PSTATE_MAX_SHIFT 4 | ||
107 | #define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ | ||
108 | #define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ | ||
109 | #define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ | ||
110 | #define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */ | ||
111 | |||
112 | /* define the two driver architectures */ | ||
113 | #define CPU_OPTERON 0 | ||
114 | #define CPU_HW_PSTATE 1 | ||
115 | |||
116 | |||
117 | /* | ||
118 | * There are restrictions frequencies have to follow: | ||
119 | * - only 1 entry in the low fid table ( <=1.4GHz ) | ||
120 | * - lowest entry in the high fid table must be >= 2 * the entry in the | ||
121 | * low fid table | ||
122 | * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry | ||
123 | * in the low fid table | ||
124 | * - the parts can only step at <= 200 MHz intervals, odd fid values are | ||
125 | * supported in revision G and later revisions. | ||
126 | * - lowest frequency must be >= interprocessor hypertransport link speed | ||
127 | * (only applies to MP systems obviously) | ||
128 | */ | ||
129 | |||
130 | /* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ | ||
131 | #define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ | ||
132 | #define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ | ||
133 | |||
134 | #define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ | ||
135 | #define HI_VCOFREQ_TABLE_BOTTOM 1600 | ||
136 | |||
137 | #define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ | ||
138 | |||
139 | #define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ | ||
140 | #define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ | ||
141 | |||
142 | #define MIN_FREQ 800 /* Min and max freqs, per spec */ | ||
143 | #define MAX_FREQ 5000 | ||
144 | |||
145 | #define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ | ||
146 | #define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ | ||
147 | |||
148 | #define VID_OFF 0x3f | ||
149 | |||
150 | #define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ | ||
151 | |||
152 | #define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ | ||
153 | |||
154 | #define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ | ||
155 | #define VST_UNITS_20US 20 /* Voltage Stabilization Time is in units of 20us */ | ||
156 | |||
157 | /* | ||
158 | * Most values of interest are encoded in a single field of the _PSS | ||
159 | * entries: the "control" value. | ||
160 | */ | ||
161 | |||
162 | #define IRT_SHIFT 30 | ||
163 | #define RVO_SHIFT 28 | ||
164 | #define EXT_TYPE_SHIFT 27 | ||
165 | #define PLL_L_SHIFT 20 | ||
166 | #define MVS_SHIFT 18 | ||
167 | #define VST_SHIFT 11 | ||
168 | #define VID_SHIFT 6 | ||
169 | #define IRT_MASK 3 | ||
170 | #define RVO_MASK 3 | ||
171 | #define EXT_TYPE_MASK 1 | ||
172 | #define PLL_L_MASK 0x7f | ||
173 | #define MVS_MASK 3 | ||
174 | #define VST_MASK 0x7f | ||
175 | #define VID_MASK 0x1f | ||
176 | #define FID_MASK 0x1f | ||
177 | #define EXT_VID_MASK 0x3f | ||
178 | #define EXT_FID_MASK 0x3f | ||
179 | |||
180 | |||
181 | /* | ||
182 | * Version 1.4 of the PSB table. This table is constructed by BIOS and is | ||
183 | * to tell the OS's power management driver which VIDs and FIDs are | ||
184 | * supported by this particular processor. | ||
185 | * If the data in the PSB / PST is wrong, then this driver will program the | ||
186 | * wrong values into hardware, which is very likely to lead to a crash. | ||
187 | */ | ||
188 | |||
189 | #define PSB_ID_STRING "AMDK7PNOW!" | ||
190 | #define PSB_ID_STRING_LEN 10 | ||
191 | |||
192 | #define PSB_VERSION_1_4 0x14 | ||
193 | |||
194 | struct psb_s { | ||
195 | u8 signature[10]; | ||
196 | u8 tableversion; | ||
197 | u8 flags1; | ||
198 | u16 vstable; | ||
199 | u8 flags2; | ||
200 | u8 num_tables; | ||
201 | u32 cpuid; | ||
202 | u8 plllocktime; | ||
203 | u8 maxfid; | ||
204 | u8 maxvid; | ||
205 | u8 numps; | ||
206 | }; | ||
207 | |||
208 | /* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ | ||
209 | struct pst_s { | ||
210 | u8 fid; | ||
211 | u8 vid; | ||
212 | }; | ||
213 | |||
214 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) | ||
215 | |||
216 | static int core_voltage_pre_transition(struct powernow_k8_data *data, | ||
217 | u32 reqvid, u32 regfid); | ||
218 | static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); | ||
219 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); | ||
220 | |||
221 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); | ||
222 | |||
223 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); | ||
224 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c deleted file mode 100644 index 435a996a613a..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ /dev/null | |||
@@ -1,194 +0,0 @@ | |||
1 | /* | ||
2 | * sc520_freq.c: cpufreq driver for the AMD Elan sc520 | ||
3 | * | ||
4 | * Copyright (C) 2005 Sean Young <sean@mess.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * Based on elanfreq.c | ||
12 | * | ||
13 | * 2005-03-30: - initial revision | ||
14 | */ | ||
15 | |||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/init.h> | ||
19 | |||
20 | #include <linux/delay.h> | ||
21 | #include <linux/cpufreq.h> | ||
22 | #include <linux/timex.h> | ||
23 | #include <linux/io.h> | ||
24 | |||
25 | #include <asm/msr.h> | ||
26 | |||
27 | #define MMCR_BASE 0xfffef000 /* The default base address */ | ||
28 | #define OFFS_CPUCTL 0x2 /* CPU Control Register */ | ||
29 | |||
30 | static __u8 __iomem *cpuctl; | ||
31 | |||
32 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
33 | "sc520_freq", msg) | ||
34 | #define PFX "sc520_freq: " | ||
35 | |||
36 | static struct cpufreq_frequency_table sc520_freq_table[] = { | ||
37 | {0x01, 100000}, | ||
38 | {0x02, 133000}, | ||
39 | {0, CPUFREQ_TABLE_END}, | ||
40 | }; | ||
41 | |||
42 | static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) | ||
43 | { | ||
44 | u8 clockspeed_reg = *cpuctl; | ||
45 | |||
46 | switch (clockspeed_reg & 0x03) { | ||
47 | default: | ||
48 | printk(KERN_ERR PFX "error: cpuctl register has unexpected " | ||
49 | "value %02x\n", clockspeed_reg); | ||
50 | case 0x01: | ||
51 | return 100000; | ||
52 | case 0x02: | ||
53 | return 133000; | ||
54 | } | ||
55 | } | ||
56 | |||
57 | static void sc520_freq_set_cpu_state(unsigned int state) | ||
58 | { | ||
59 | |||
60 | struct cpufreq_freqs freqs; | ||
61 | u8 clockspeed_reg; | ||
62 | |||
63 | freqs.old = sc520_freq_get_cpu_frequency(0); | ||
64 | freqs.new = sc520_freq_table[state].frequency; | ||
65 | freqs.cpu = 0; /* AMD Elan is UP */ | ||
66 | |||
67 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
68 | |||
69 | dprintk("attempting to set frequency to %i kHz\n", | ||
70 | sc520_freq_table[state].frequency); | ||
71 | |||
72 | local_irq_disable(); | ||
73 | |||
74 | clockspeed_reg = *cpuctl & ~0x03; | ||
75 | *cpuctl = clockspeed_reg | sc520_freq_table[state].index; | ||
76 | |||
77 | local_irq_enable(); | ||
78 | |||
79 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
80 | }; | ||
81 | |||
82 | static int sc520_freq_verify(struct cpufreq_policy *policy) | ||
83 | { | ||
84 | return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); | ||
85 | } | ||
86 | |||
87 | static int sc520_freq_target(struct cpufreq_policy *policy, | ||
88 | unsigned int target_freq, | ||
89 | unsigned int relation) | ||
90 | { | ||
91 | unsigned int newstate = 0; | ||
92 | |||
93 | if (cpufreq_frequency_table_target(policy, sc520_freq_table, | ||
94 | target_freq, relation, &newstate)) | ||
95 | return -EINVAL; | ||
96 | |||
97 | sc520_freq_set_cpu_state(newstate); | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | |||
103 | /* | ||
104 | * Module init and exit code | ||
105 | */ | ||
106 | |||
107 | static int sc520_freq_cpu_init(struct cpufreq_policy *policy) | ||
108 | { | ||
109 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
110 | int result; | ||
111 | |||
112 | /* capability check */ | ||
113 | if (c->x86_vendor != X86_VENDOR_AMD || | ||
114 | c->x86 != 4 || c->x86_model != 9) | ||
115 | return -ENODEV; | ||
116 | |||
117 | /* cpuinfo and default policy values */ | ||
118 | policy->cpuinfo.transition_latency = 1000000; /* 1ms */ | ||
119 | policy->cur = sc520_freq_get_cpu_frequency(0); | ||
120 | |||
121 | result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); | ||
122 | if (result) | ||
123 | return result; | ||
124 | |||
125 | cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); | ||
126 | |||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | |||
131 | static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) | ||
132 | { | ||
133 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | |||
138 | static struct freq_attr *sc520_freq_attr[] = { | ||
139 | &cpufreq_freq_attr_scaling_available_freqs, | ||
140 | NULL, | ||
141 | }; | ||
142 | |||
143 | |||
144 | static struct cpufreq_driver sc520_freq_driver = { | ||
145 | .get = sc520_freq_get_cpu_frequency, | ||
146 | .verify = sc520_freq_verify, | ||
147 | .target = sc520_freq_target, | ||
148 | .init = sc520_freq_cpu_init, | ||
149 | .exit = sc520_freq_cpu_exit, | ||
150 | .name = "sc520_freq", | ||
151 | .owner = THIS_MODULE, | ||
152 | .attr = sc520_freq_attr, | ||
153 | }; | ||
154 | |||
155 | |||
156 | static int __init sc520_freq_init(void) | ||
157 | { | ||
158 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
159 | int err; | ||
160 | |||
161 | /* Test if we have the right hardware */ | ||
162 | if (c->x86_vendor != X86_VENDOR_AMD || | ||
163 | c->x86 != 4 || c->x86_model != 9) { | ||
164 | dprintk("no Elan SC520 processor found!\n"); | ||
165 | return -ENODEV; | ||
166 | } | ||
167 | cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); | ||
168 | if (!cpuctl) { | ||
169 | printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); | ||
170 | return -ENOMEM; | ||
171 | } | ||
172 | |||
173 | err = cpufreq_register_driver(&sc520_freq_driver); | ||
174 | if (err) | ||
175 | iounmap(cpuctl); | ||
176 | |||
177 | return err; | ||
178 | } | ||
179 | |||
180 | |||
181 | static void __exit sc520_freq_exit(void) | ||
182 | { | ||
183 | cpufreq_unregister_driver(&sc520_freq_driver); | ||
184 | iounmap(cpuctl); | ||
185 | } | ||
186 | |||
187 | |||
188 | MODULE_LICENSE("GPL"); | ||
189 | MODULE_AUTHOR("Sean Young <sean@mess.org>"); | ||
190 | MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); | ||
191 | |||
192 | module_init(sc520_freq_init); | ||
193 | module_exit(sc520_freq_exit); | ||
194 | |||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c deleted file mode 100644 index 9b1ff37de46a..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ /dev/null | |||
@@ -1,636 +0,0 @@ | |||
1 | /* | ||
2 | * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium | ||
3 | * M (part of the Centrino chipset). | ||
4 | * | ||
5 | * Since the original Pentium M, most new Intel CPUs support Enhanced | ||
6 | * SpeedStep. | ||
7 | * | ||
8 | * Despite the "SpeedStep" in the name, this is almost entirely unlike | ||
9 | * traditional SpeedStep. | ||
10 | * | ||
11 | * Modelled on speedstep.c | ||
12 | * | ||
13 | * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> | ||
14 | */ | ||
15 | |||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/cpufreq.h> | ||
20 | #include <linux/sched.h> /* current */ | ||
21 | #include <linux/delay.h> | ||
22 | #include <linux/compiler.h> | ||
23 | #include <linux/gfp.h> | ||
24 | |||
25 | #include <asm/msr.h> | ||
26 | #include <asm/processor.h> | ||
27 | #include <asm/cpufeature.h> | ||
28 | |||
29 | #define PFX "speedstep-centrino: " | ||
30 | #define MAINTAINER "cpufreq@vger.kernel.org" | ||
31 | |||
32 | #define dprintk(msg...) \ | ||
33 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | ||
34 | |||
35 | #define INTEL_MSR_RANGE (0xffff) | ||
36 | |||
37 | struct cpu_id | ||
38 | { | ||
39 | __u8 x86; /* CPU family */ | ||
40 | __u8 x86_model; /* model */ | ||
41 | __u8 x86_mask; /* stepping */ | ||
42 | }; | ||
43 | |||
44 | enum { | ||
45 | CPU_BANIAS, | ||
46 | CPU_DOTHAN_A1, | ||
47 | CPU_DOTHAN_A2, | ||
48 | CPU_DOTHAN_B0, | ||
49 | CPU_MP4HT_D0, | ||
50 | CPU_MP4HT_E0, | ||
51 | }; | ||
52 | |||
53 | static const struct cpu_id cpu_ids[] = { | ||
54 | [CPU_BANIAS] = { 6, 9, 5 }, | ||
55 | [CPU_DOTHAN_A1] = { 6, 13, 1 }, | ||
56 | [CPU_DOTHAN_A2] = { 6, 13, 2 }, | ||
57 | [CPU_DOTHAN_B0] = { 6, 13, 6 }, | ||
58 | [CPU_MP4HT_D0] = {15, 3, 4 }, | ||
59 | [CPU_MP4HT_E0] = {15, 4, 1 }, | ||
60 | }; | ||
61 | #define N_IDS ARRAY_SIZE(cpu_ids) | ||
62 | |||
63 | struct cpu_model | ||
64 | { | ||
65 | const struct cpu_id *cpu_id; | ||
66 | const char *model_name; | ||
67 | unsigned max_freq; /* max clock in kHz */ | ||
68 | |||
69 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ | ||
70 | }; | ||
71 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, | ||
72 | const struct cpu_id *x); | ||
73 | |||
74 | /* Operating points for current CPU */ | ||
75 | static DEFINE_PER_CPU(struct cpu_model *, centrino_model); | ||
76 | static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); | ||
77 | |||
78 | static struct cpufreq_driver centrino_driver; | ||
79 | |||
80 | #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE | ||
81 | |||
82 | /* Computes the correct form for IA32_PERF_CTL MSR for a particular | ||
83 | frequency/voltage operating point; frequency in MHz, volts in mV. | ||
84 | This is stored as "index" in the structure. */ | ||
85 | #define OP(mhz, mv) \ | ||
86 | { \ | ||
87 | .frequency = (mhz) * 1000, \ | ||
88 | .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * These voltage tables were derived from the Intel Pentium M | ||
93 | * datasheet, document 25261202.pdf, Table 5. I have verified they | ||
94 | * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium | ||
95 | * M. | ||
96 | */ | ||
97 | |||
98 | /* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ | ||
99 | static struct cpufreq_frequency_table banias_900[] = | ||
100 | { | ||
101 | OP(600, 844), | ||
102 | OP(800, 988), | ||
103 | OP(900, 1004), | ||
104 | { .frequency = CPUFREQ_TABLE_END } | ||
105 | }; | ||
106 | |||
107 | /* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ | ||
108 | static struct cpufreq_frequency_table banias_1000[] = | ||
109 | { | ||
110 | OP(600, 844), | ||
111 | OP(800, 972), | ||
112 | OP(900, 988), | ||
113 | OP(1000, 1004), | ||
114 | { .frequency = CPUFREQ_TABLE_END } | ||
115 | }; | ||
116 | |||
117 | /* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ | ||
118 | static struct cpufreq_frequency_table banias_1100[] = | ||
119 | { | ||
120 | OP( 600, 956), | ||
121 | OP( 800, 1020), | ||
122 | OP( 900, 1100), | ||
123 | OP(1000, 1164), | ||
124 | OP(1100, 1180), | ||
125 | { .frequency = CPUFREQ_TABLE_END } | ||
126 | }; | ||
127 | |||
128 | |||
129 | /* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ | ||
130 | static struct cpufreq_frequency_table banias_1200[] = | ||
131 | { | ||
132 | OP( 600, 956), | ||
133 | OP( 800, 1004), | ||
134 | OP( 900, 1020), | ||
135 | OP(1000, 1100), | ||
136 | OP(1100, 1164), | ||
137 | OP(1200, 1180), | ||
138 | { .frequency = CPUFREQ_TABLE_END } | ||
139 | }; | ||
140 | |||
141 | /* Intel Pentium M processor 1.30GHz (Banias) */ | ||
142 | static struct cpufreq_frequency_table banias_1300[] = | ||
143 | { | ||
144 | OP( 600, 956), | ||
145 | OP( 800, 1260), | ||
146 | OP(1000, 1292), | ||
147 | OP(1200, 1356), | ||
148 | OP(1300, 1388), | ||
149 | { .frequency = CPUFREQ_TABLE_END } | ||
150 | }; | ||
151 | |||
152 | /* Intel Pentium M processor 1.40GHz (Banias) */ | ||
153 | static struct cpufreq_frequency_table banias_1400[] = | ||
154 | { | ||
155 | OP( 600, 956), | ||
156 | OP( 800, 1180), | ||
157 | OP(1000, 1308), | ||
158 | OP(1200, 1436), | ||
159 | OP(1400, 1484), | ||
160 | { .frequency = CPUFREQ_TABLE_END } | ||
161 | }; | ||
162 | |||
163 | /* Intel Pentium M processor 1.50GHz (Banias) */ | ||
164 | static struct cpufreq_frequency_table banias_1500[] = | ||
165 | { | ||
166 | OP( 600, 956), | ||
167 | OP( 800, 1116), | ||
168 | OP(1000, 1228), | ||
169 | OP(1200, 1356), | ||
170 | OP(1400, 1452), | ||
171 | OP(1500, 1484), | ||
172 | { .frequency = CPUFREQ_TABLE_END } | ||
173 | }; | ||
174 | |||
175 | /* Intel Pentium M processor 1.60GHz (Banias) */ | ||
176 | static struct cpufreq_frequency_table banias_1600[] = | ||
177 | { | ||
178 | OP( 600, 956), | ||
179 | OP( 800, 1036), | ||
180 | OP(1000, 1164), | ||
181 | OP(1200, 1276), | ||
182 | OP(1400, 1420), | ||
183 | OP(1600, 1484), | ||
184 | { .frequency = CPUFREQ_TABLE_END } | ||
185 | }; | ||
186 | |||
187 | /* Intel Pentium M processor 1.70GHz (Banias) */ | ||
188 | static struct cpufreq_frequency_table banias_1700[] = | ||
189 | { | ||
190 | OP( 600, 956), | ||
191 | OP( 800, 1004), | ||
192 | OP(1000, 1116), | ||
193 | OP(1200, 1228), | ||
194 | OP(1400, 1308), | ||
195 | OP(1700, 1484), | ||
196 | { .frequency = CPUFREQ_TABLE_END } | ||
197 | }; | ||
198 | #undef OP | ||
199 | |||
200 | #define _BANIAS(cpuid, max, name) \ | ||
201 | { .cpu_id = cpuid, \ | ||
202 | .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ | ||
203 | .max_freq = (max)*1000, \ | ||
204 | .op_points = banias_##max, \ | ||
205 | } | ||
206 | #define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) | ||
207 | |||
208 | /* CPU models, their operating frequency range, and freq/voltage | ||
209 | operating points */ | ||
210 | static struct cpu_model models[] = | ||
211 | { | ||
212 | _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), | ||
213 | BANIAS(1000), | ||
214 | BANIAS(1100), | ||
215 | BANIAS(1200), | ||
216 | BANIAS(1300), | ||
217 | BANIAS(1400), | ||
218 | BANIAS(1500), | ||
219 | BANIAS(1600), | ||
220 | BANIAS(1700), | ||
221 | |||
222 | /* NULL model_name is a wildcard */ | ||
223 | { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, | ||
224 | { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, | ||
225 | { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, | ||
226 | { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, | ||
227 | { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, | ||
228 | |||
229 | { NULL, } | ||
230 | }; | ||
231 | #undef _BANIAS | ||
232 | #undef BANIAS | ||
233 | |||
234 | static int centrino_cpu_init_table(struct cpufreq_policy *policy) | ||
235 | { | ||
236 | struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); | ||
237 | struct cpu_model *model; | ||
238 | |||
239 | for(model = models; model->cpu_id != NULL; model++) | ||
240 | if (centrino_verify_cpu_id(cpu, model->cpu_id) && | ||
241 | (model->model_name == NULL || | ||
242 | strcmp(cpu->x86_model_id, model->model_name) == 0)) | ||
243 | break; | ||
244 | |||
245 | if (model->cpu_id == NULL) { | ||
246 | /* No match at all */ | ||
247 | dprintk("no support for CPU model \"%s\": " | ||
248 | "send /proc/cpuinfo to " MAINTAINER "\n", | ||
249 | cpu->x86_model_id); | ||
250 | return -ENOENT; | ||
251 | } | ||
252 | |||
253 | if (model->op_points == NULL) { | ||
254 | /* Matched a non-match */ | ||
255 | dprintk("no table support for CPU model \"%s\"\n", | ||
256 | cpu->x86_model_id); | ||
257 | dprintk("try using the acpi-cpufreq driver\n"); | ||
258 | return -ENOENT; | ||
259 | } | ||
260 | |||
261 | per_cpu(centrino_model, policy->cpu) = model; | ||
262 | |||
263 | dprintk("found \"%s\": max frequency: %dkHz\n", | ||
264 | model->model_name, model->max_freq); | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | #else | ||
270 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) | ||
271 | { | ||
272 | return -ENODEV; | ||
273 | } | ||
274 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ | ||
275 | |||
276 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, | ||
277 | const struct cpu_id *x) | ||
278 | { | ||
279 | if ((c->x86 == x->x86) && | ||
280 | (c->x86_model == x->x86_model) && | ||
281 | (c->x86_mask == x->x86_mask)) | ||
282 | return 1; | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | /* To be called only after centrino_model is initialized */ | ||
287 | static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) | ||
288 | { | ||
289 | int i; | ||
290 | |||
291 | /* | ||
292 | * Extract clock in kHz from PERF_CTL value | ||
293 | * for centrino, as some DSDTs are buggy. | ||
294 | * Ideally, this can be done using the acpi_data structure. | ||
295 | */ | ||
296 | if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || | ||
297 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || | ||
298 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { | ||
299 | msr = (msr >> 8) & 0xff; | ||
300 | return msr * 100000; | ||
301 | } | ||
302 | |||
303 | if ((!per_cpu(centrino_model, cpu)) || | ||
304 | (!per_cpu(centrino_model, cpu)->op_points)) | ||
305 | return 0; | ||
306 | |||
307 | msr &= 0xffff; | ||
308 | for (i = 0; | ||
309 | per_cpu(centrino_model, cpu)->op_points[i].frequency | ||
310 | != CPUFREQ_TABLE_END; | ||
311 | i++) { | ||
312 | if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) | ||
313 | return per_cpu(centrino_model, cpu)-> | ||
314 | op_points[i].frequency; | ||
315 | } | ||
316 | if (failsafe) | ||
317 | return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; | ||
318 | else | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | /* Return the current CPU frequency in kHz */ | ||
323 | static unsigned int get_cur_freq(unsigned int cpu) | ||
324 | { | ||
325 | unsigned l, h; | ||
326 | unsigned clock_freq; | ||
327 | |||
328 | rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h); | ||
329 | clock_freq = extract_clock(l, cpu, 0); | ||
330 | |||
331 | if (unlikely(clock_freq == 0)) { | ||
332 | /* | ||
333 | * On some CPUs, we can see transient MSR values (which are | ||
334 | * not present in _PSS), while CPU is doing some automatic | ||
335 | * P-state transition (like TM2). Get the last freq set | ||
336 | * in PERF_CTL. | ||
337 | */ | ||
338 | rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h); | ||
339 | clock_freq = extract_clock(l, cpu, 1); | ||
340 | } | ||
341 | return clock_freq; | ||
342 | } | ||
343 | |||
344 | |||
345 | static int centrino_cpu_init(struct cpufreq_policy *policy) | ||
346 | { | ||
347 | struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); | ||
348 | unsigned freq; | ||
349 | unsigned l, h; | ||
350 | int ret; | ||
351 | int i; | ||
352 | |||
353 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ | ||
354 | if (cpu->x86_vendor != X86_VENDOR_INTEL || | ||
355 | !cpu_has(cpu, X86_FEATURE_EST)) | ||
356 | return -ENODEV; | ||
357 | |||
358 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) | ||
359 | centrino_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
360 | |||
361 | if (policy->cpu != 0) | ||
362 | return -ENODEV; | ||
363 | |||
364 | for (i = 0; i < N_IDS; i++) | ||
365 | if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) | ||
366 | break; | ||
367 | |||
368 | if (i != N_IDS) | ||
369 | per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; | ||
370 | |||
371 | if (!per_cpu(centrino_cpu, policy->cpu)) { | ||
372 | dprintk("found unsupported CPU with " | ||
373 | "Enhanced SpeedStep: send /proc/cpuinfo to " | ||
374 | MAINTAINER "\n"); | ||
375 | return -ENODEV; | ||
376 | } | ||
377 | |||
378 | if (centrino_cpu_init_table(policy)) { | ||
379 | return -ENODEV; | ||
380 | } | ||
381 | |||
382 | /* Check to see if Enhanced SpeedStep is enabled, and try to | ||
383 | enable it if not. */ | ||
384 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
385 | |||
386 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | ||
387 | l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; | ||
388 | dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); | ||
389 | wrmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
390 | |||
391 | /* check to see if it stuck */ | ||
392 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
393 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | ||
394 | printk(KERN_INFO PFX | ||
395 | "couldn't enable Enhanced SpeedStep\n"); | ||
396 | return -ENODEV; | ||
397 | } | ||
398 | } | ||
399 | |||
400 | freq = get_cur_freq(policy->cpu); | ||
401 | policy->cpuinfo.transition_latency = 10000; | ||
402 | /* 10uS transition latency */ | ||
403 | policy->cur = freq; | ||
404 | |||
405 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); | ||
406 | |||
407 | ret = cpufreq_frequency_table_cpuinfo(policy, | ||
408 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
409 | if (ret) | ||
410 | return (ret); | ||
411 | |||
412 | cpufreq_frequency_table_get_attr( | ||
413 | per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); | ||
414 | |||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | static int centrino_cpu_exit(struct cpufreq_policy *policy) | ||
419 | { | ||
420 | unsigned int cpu = policy->cpu; | ||
421 | |||
422 | if (!per_cpu(centrino_model, cpu)) | ||
423 | return -ENODEV; | ||
424 | |||
425 | cpufreq_frequency_table_put_attr(cpu); | ||
426 | |||
427 | per_cpu(centrino_model, cpu) = NULL; | ||
428 | |||
429 | return 0; | ||
430 | } | ||
431 | |||
432 | /** | ||
433 | * centrino_verify - verifies a new CPUFreq policy | ||
434 | * @policy: new policy | ||
435 | * | ||
436 | * Limit must be within this model's frequency range at least one | ||
437 | * border included. | ||
438 | */ | ||
439 | static int centrino_verify (struct cpufreq_policy *policy) | ||
440 | { | ||
441 | return cpufreq_frequency_table_verify(policy, | ||
442 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
443 | } | ||
444 | |||
445 | /** | ||
446 | * centrino_setpolicy - set a new CPUFreq policy | ||
447 | * @policy: new policy | ||
448 | * @target_freq: the target frequency | ||
449 | * @relation: how that frequency relates to achieved frequency | ||
450 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
451 | * | ||
452 | * Sets a new CPUFreq policy. | ||
453 | */ | ||
454 | static int centrino_target (struct cpufreq_policy *policy, | ||
455 | unsigned int target_freq, | ||
456 | unsigned int relation) | ||
457 | { | ||
458 | unsigned int newstate = 0; | ||
459 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; | ||
460 | struct cpufreq_freqs freqs; | ||
461 | int retval = 0; | ||
462 | unsigned int j, k, first_cpu, tmp; | ||
463 | cpumask_var_t covered_cpus; | ||
464 | |||
465 | if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) | ||
466 | return -ENOMEM; | ||
467 | |||
468 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | ||
469 | retval = -ENODEV; | ||
470 | goto out; | ||
471 | } | ||
472 | |||
473 | if (unlikely(cpufreq_frequency_table_target(policy, | ||
474 | per_cpu(centrino_model, cpu)->op_points, | ||
475 | target_freq, | ||
476 | relation, | ||
477 | &newstate))) { | ||
478 | retval = -EINVAL; | ||
479 | goto out; | ||
480 | } | ||
481 | |||
482 | first_cpu = 1; | ||
483 | for_each_cpu(j, policy->cpus) { | ||
484 | int good_cpu; | ||
485 | |||
486 | /* cpufreq holds the hotplug lock, so we are safe here */ | ||
487 | if (!cpu_online(j)) | ||
488 | continue; | ||
489 | |||
490 | /* | ||
491 | * Support for SMP systems. | ||
492 | * Make sure we are running on CPU that wants to change freq | ||
493 | */ | ||
494 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | ||
495 | good_cpu = cpumask_any_and(policy->cpus, | ||
496 | cpu_online_mask); | ||
497 | else | ||
498 | good_cpu = j; | ||
499 | |||
500 | if (good_cpu >= nr_cpu_ids) { | ||
501 | dprintk("couldn't limit to CPUs in this domain\n"); | ||
502 | retval = -EAGAIN; | ||
503 | if (first_cpu) { | ||
504 | /* We haven't started the transition yet. */ | ||
505 | goto out; | ||
506 | } | ||
507 | break; | ||
508 | } | ||
509 | |||
510 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; | ||
511 | |||
512 | if (first_cpu) { | ||
513 | rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h); | ||
514 | if (msr == (oldmsr & 0xffff)) { | ||
515 | dprintk("no change needed - msr was and needs " | ||
516 | "to be %x\n", oldmsr); | ||
517 | retval = 0; | ||
518 | goto out; | ||
519 | } | ||
520 | |||
521 | freqs.old = extract_clock(oldmsr, cpu, 0); | ||
522 | freqs.new = extract_clock(msr, cpu, 0); | ||
523 | |||
524 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | ||
525 | target_freq, freqs.old, freqs.new, msr); | ||
526 | |||
527 | for_each_cpu(k, policy->cpus) { | ||
528 | if (!cpu_online(k)) | ||
529 | continue; | ||
530 | freqs.cpu = k; | ||
531 | cpufreq_notify_transition(&freqs, | ||
532 | CPUFREQ_PRECHANGE); | ||
533 | } | ||
534 | |||
535 | first_cpu = 0; | ||
536 | /* all but 16 LSB are reserved, treat them with care */ | ||
537 | oldmsr &= ~0xffff; | ||
538 | msr &= 0xffff; | ||
539 | oldmsr |= msr; | ||
540 | } | ||
541 | |||
542 | wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h); | ||
543 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | ||
544 | break; | ||
545 | |||
546 | cpumask_set_cpu(j, covered_cpus); | ||
547 | } | ||
548 | |||
549 | for_each_cpu(k, policy->cpus) { | ||
550 | if (!cpu_online(k)) | ||
551 | continue; | ||
552 | freqs.cpu = k; | ||
553 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
554 | } | ||
555 | |||
556 | if (unlikely(retval)) { | ||
557 | /* | ||
558 | * We have failed halfway through the frequency change. | ||
559 | * We have sent callbacks to policy->cpus and | ||
560 | * MSRs have already been written on coverd_cpus. | ||
561 | * Best effort undo.. | ||
562 | */ | ||
563 | |||
564 | for_each_cpu(j, covered_cpus) | ||
565 | wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h); | ||
566 | |||
567 | tmp = freqs.new; | ||
568 | freqs.new = freqs.old; | ||
569 | freqs.old = tmp; | ||
570 | for_each_cpu(j, policy->cpus) { | ||
571 | if (!cpu_online(j)) | ||
572 | continue; | ||
573 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
574 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
575 | } | ||
576 | } | ||
577 | retval = 0; | ||
578 | |||
579 | out: | ||
580 | free_cpumask_var(covered_cpus); | ||
581 | return retval; | ||
582 | } | ||
583 | |||
584 | static struct freq_attr* centrino_attr[] = { | ||
585 | &cpufreq_freq_attr_scaling_available_freqs, | ||
586 | NULL, | ||
587 | }; | ||
588 | |||
589 | static struct cpufreq_driver centrino_driver = { | ||
590 | .name = "centrino", /* should be speedstep-centrino, | ||
591 | but there's a 16 char limit */ | ||
592 | .init = centrino_cpu_init, | ||
593 | .exit = centrino_cpu_exit, | ||
594 | .verify = centrino_verify, | ||
595 | .target = centrino_target, | ||
596 | .get = get_cur_freq, | ||
597 | .attr = centrino_attr, | ||
598 | .owner = THIS_MODULE, | ||
599 | }; | ||
600 | |||
601 | |||
602 | /** | ||
603 | * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver | ||
604 | * | ||
605 | * Initializes the Enhanced SpeedStep support. Returns -ENODEV on | ||
606 | * unsupported devices, -ENOENT if there's no voltage table for this | ||
607 | * particular CPU model, -EINVAL on problems during initiatization, | ||
608 | * and zero on success. | ||
609 | * | ||
610 | * This is quite picky. Not only does the CPU have to advertise the | ||
611 | * "est" flag in the cpuid capability flags, we look for a specific | ||
612 | * CPU model and stepping, and we need to have the exact model name in | ||
613 | * our voltage tables. That is, be paranoid about not releasing | ||
614 | * someone's valuable magic smoke. | ||
615 | */ | ||
616 | static int __init centrino_init(void) | ||
617 | { | ||
618 | struct cpuinfo_x86 *cpu = &cpu_data(0); | ||
619 | |||
620 | if (!cpu_has(cpu, X86_FEATURE_EST)) | ||
621 | return -ENODEV; | ||
622 | |||
623 | return cpufreq_register_driver(¢rino_driver); | ||
624 | } | ||
625 | |||
626 | static void __exit centrino_exit(void) | ||
627 | { | ||
628 | cpufreq_unregister_driver(¢rino_driver); | ||
629 | } | ||
630 | |||
631 | MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); | ||
632 | MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); | ||
633 | MODULE_LICENSE ("GPL"); | ||
634 | |||
635 | late_initcall(centrino_init); | ||
636 | module_exit(centrino_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c deleted file mode 100644 index 561758e95180..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ /dev/null | |||
@@ -1,452 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2001 Dave Jones, Arjan van de ven. | ||
3 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * Based upon reverse engineered information, and on Intel documentation | ||
7 | * for chipsets ICH2-M and ICH3-M. | ||
8 | * | ||
9 | * Many thanks to Ducrot Bruno for finding and fixing the last | ||
10 | * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler | ||
11 | * for extensive testing. | ||
12 | * | ||
13 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
14 | */ | ||
15 | |||
16 | |||
17 | /********************************************************************* | ||
18 | * SPEEDSTEP - DEFINITIONS * | ||
19 | *********************************************************************/ | ||
20 | |||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/cpufreq.h> | ||
25 | #include <linux/pci.h> | ||
26 | #include <linux/sched.h> | ||
27 | |||
28 | #include "speedstep-lib.h" | ||
29 | |||
30 | |||
31 | /* speedstep_chipset: | ||
32 | * It is necessary to know which chipset is used. As accesses to | ||
33 | * this device occur at various places in this module, we need a | ||
34 | * static struct pci_dev * pointing to that device. | ||
35 | */ | ||
36 | static struct pci_dev *speedstep_chipset_dev; | ||
37 | |||
38 | |||
39 | /* speedstep_processor | ||
40 | */ | ||
41 | static enum speedstep_processor speedstep_processor; | ||
42 | |||
43 | static u32 pmbase; | ||
44 | |||
45 | /* | ||
46 | * There are only two frequency states for each processor. Values | ||
47 | * are in kHz for the time being. | ||
48 | */ | ||
49 | static struct cpufreq_frequency_table speedstep_freqs[] = { | ||
50 | {SPEEDSTEP_HIGH, 0}, | ||
51 | {SPEEDSTEP_LOW, 0}, | ||
52 | {0, CPUFREQ_TABLE_END}, | ||
53 | }; | ||
54 | |||
55 | |||
56 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
57 | "speedstep-ich", msg) | ||
58 | |||
59 | |||
60 | /** | ||
61 | * speedstep_find_register - read the PMBASE address | ||
62 | * | ||
63 | * Returns: -ENODEV if no register could be found | ||
64 | */ | ||
65 | static int speedstep_find_register(void) | ||
66 | { | ||
67 | if (!speedstep_chipset_dev) | ||
68 | return -ENODEV; | ||
69 | |||
70 | /* get PMBASE */ | ||
71 | pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); | ||
72 | if (!(pmbase & 0x01)) { | ||
73 | printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); | ||
74 | return -ENODEV; | ||
75 | } | ||
76 | |||
77 | pmbase &= 0xFFFFFFFE; | ||
78 | if (!pmbase) { | ||
79 | printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); | ||
80 | return -ENODEV; | ||
81 | } | ||
82 | |||
83 | dprintk("pmbase is 0x%x\n", pmbase); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | /** | ||
88 | * speedstep_set_state - set the SpeedStep state | ||
89 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | ||
90 | * | ||
91 | * Tries to change the SpeedStep state. Can be called from | ||
92 | * smp_call_function_single. | ||
93 | */ | ||
94 | static void speedstep_set_state(unsigned int state) | ||
95 | { | ||
96 | u8 pm2_blk; | ||
97 | u8 value; | ||
98 | unsigned long flags; | ||
99 | |||
100 | if (state > 0x1) | ||
101 | return; | ||
102 | |||
103 | /* Disable IRQs */ | ||
104 | local_irq_save(flags); | ||
105 | |||
106 | /* read state */ | ||
107 | value = inb(pmbase + 0x50); | ||
108 | |||
109 | dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); | ||
110 | |||
111 | /* write new state */ | ||
112 | value &= 0xFE; | ||
113 | value |= state; | ||
114 | |||
115 | dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase); | ||
116 | |||
117 | /* Disable bus master arbitration */ | ||
118 | pm2_blk = inb(pmbase + 0x20); | ||
119 | pm2_blk |= 0x01; | ||
120 | outb(pm2_blk, (pmbase + 0x20)); | ||
121 | |||
122 | /* Actual transition */ | ||
123 | outb(value, (pmbase + 0x50)); | ||
124 | |||
125 | /* Restore bus master arbitration */ | ||
126 | pm2_blk &= 0xfe; | ||
127 | outb(pm2_blk, (pmbase + 0x20)); | ||
128 | |||
129 | /* check if transition was successful */ | ||
130 | value = inb(pmbase + 0x50); | ||
131 | |||
132 | /* Enable IRQs */ | ||
133 | local_irq_restore(flags); | ||
134 | |||
135 | dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); | ||
136 | |||
137 | if (state == (value & 0x1)) | ||
138 | dprintk("change to %u MHz succeeded\n", | ||
139 | speedstep_get_frequency(speedstep_processor) / 1000); | ||
140 | else | ||
141 | printk(KERN_ERR "cpufreq: change failed - I/O error\n"); | ||
142 | |||
143 | return; | ||
144 | } | ||
145 | |||
146 | /* Wrapper for smp_call_function_single. */ | ||
147 | static void _speedstep_set_state(void *_state) | ||
148 | { | ||
149 | speedstep_set_state(*(unsigned int *)_state); | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * speedstep_activate - activate SpeedStep control in the chipset | ||
154 | * | ||
155 | * Tries to activate the SpeedStep status and control registers. | ||
156 | * Returns -EINVAL on an unsupported chipset, and zero on success. | ||
157 | */ | ||
158 | static int speedstep_activate(void) | ||
159 | { | ||
160 | u16 value = 0; | ||
161 | |||
162 | if (!speedstep_chipset_dev) | ||
163 | return -EINVAL; | ||
164 | |||
165 | pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value); | ||
166 | if (!(value & 0x08)) { | ||
167 | value |= 0x08; | ||
168 | dprintk("activating SpeedStep (TM) registers\n"); | ||
169 | pci_write_config_word(speedstep_chipset_dev, 0x00A0, value); | ||
170 | } | ||
171 | |||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | |||
176 | /** | ||
177 | * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic | ||
178 | * | ||
179 | * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to | ||
180 | * the LPC bridge / PM module which contains all power-management | ||
181 | * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected | ||
182 | * chipset, or zero on failure. | ||
183 | */ | ||
184 | static unsigned int speedstep_detect_chipset(void) | ||
185 | { | ||
186 | speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
187 | PCI_DEVICE_ID_INTEL_82801DB_12, | ||
188 | PCI_ANY_ID, PCI_ANY_ID, | ||
189 | NULL); | ||
190 | if (speedstep_chipset_dev) | ||
191 | return 4; /* 4-M */ | ||
192 | |||
193 | speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
194 | PCI_DEVICE_ID_INTEL_82801CA_12, | ||
195 | PCI_ANY_ID, PCI_ANY_ID, | ||
196 | NULL); | ||
197 | if (speedstep_chipset_dev) | ||
198 | return 3; /* 3-M */ | ||
199 | |||
200 | |||
201 | speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
202 | PCI_DEVICE_ID_INTEL_82801BA_10, | ||
203 | PCI_ANY_ID, PCI_ANY_ID, | ||
204 | NULL); | ||
205 | if (speedstep_chipset_dev) { | ||
206 | /* speedstep.c causes lockups on Dell Inspirons 8000 and | ||
207 | * 8100 which use a pretty old revision of the 82815 | ||
208 | * host brige. Abort on these systems. | ||
209 | */ | ||
210 | static struct pci_dev *hostbridge; | ||
211 | |||
212 | hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
213 | PCI_DEVICE_ID_INTEL_82815_MC, | ||
214 | PCI_ANY_ID, PCI_ANY_ID, | ||
215 | NULL); | ||
216 | |||
217 | if (!hostbridge) | ||
218 | return 2; /* 2-M */ | ||
219 | |||
220 | if (hostbridge->revision < 5) { | ||
221 | dprintk("hostbridge does not support speedstep\n"); | ||
222 | speedstep_chipset_dev = NULL; | ||
223 | pci_dev_put(hostbridge); | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | pci_dev_put(hostbridge); | ||
228 | return 2; /* 2-M */ | ||
229 | } | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static void get_freq_data(void *_speed) | ||
235 | { | ||
236 | unsigned int *speed = _speed; | ||
237 | |||
238 | *speed = speedstep_get_frequency(speedstep_processor); | ||
239 | } | ||
240 | |||
241 | static unsigned int speedstep_get(unsigned int cpu) | ||
242 | { | ||
243 | unsigned int speed; | ||
244 | |||
245 | /* You're supposed to ensure CPU is online. */ | ||
246 | if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0) | ||
247 | BUG(); | ||
248 | |||
249 | dprintk("detected %u kHz as current frequency\n", speed); | ||
250 | return speed; | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * speedstep_target - set a new CPUFreq policy | ||
255 | * @policy: new policy | ||
256 | * @target_freq: the target frequency | ||
257 | * @relation: how that frequency relates to achieved frequency | ||
258 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
259 | * | ||
260 | * Sets a new CPUFreq policy. | ||
261 | */ | ||
262 | static int speedstep_target(struct cpufreq_policy *policy, | ||
263 | unsigned int target_freq, | ||
264 | unsigned int relation) | ||
265 | { | ||
266 | unsigned int newstate = 0, policy_cpu; | ||
267 | struct cpufreq_freqs freqs; | ||
268 | int i; | ||
269 | |||
270 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], | ||
271 | target_freq, relation, &newstate)) | ||
272 | return -EINVAL; | ||
273 | |||
274 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); | ||
275 | freqs.old = speedstep_get(policy_cpu); | ||
276 | freqs.new = speedstep_freqs[newstate].frequency; | ||
277 | freqs.cpu = policy->cpu; | ||
278 | |||
279 | dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new); | ||
280 | |||
281 | /* no transition necessary */ | ||
282 | if (freqs.old == freqs.new) | ||
283 | return 0; | ||
284 | |||
285 | for_each_cpu(i, policy->cpus) { | ||
286 | freqs.cpu = i; | ||
287 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
288 | } | ||
289 | |||
290 | smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate, | ||
291 | true); | ||
292 | |||
293 | for_each_cpu(i, policy->cpus) { | ||
294 | freqs.cpu = i; | ||
295 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
296 | } | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | |||
302 | /** | ||
303 | * speedstep_verify - verifies a new CPUFreq policy | ||
304 | * @policy: new policy | ||
305 | * | ||
306 | * Limit must be within speedstep_low_freq and speedstep_high_freq, with | ||
307 | * at least one border included. | ||
308 | */ | ||
309 | static int speedstep_verify(struct cpufreq_policy *policy) | ||
310 | { | ||
311 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); | ||
312 | } | ||
313 | |||
314 | struct get_freqs { | ||
315 | struct cpufreq_policy *policy; | ||
316 | int ret; | ||
317 | }; | ||
318 | |||
319 | static void get_freqs_on_cpu(void *_get_freqs) | ||
320 | { | ||
321 | struct get_freqs *get_freqs = _get_freqs; | ||
322 | |||
323 | get_freqs->ret = | ||
324 | speedstep_get_freqs(speedstep_processor, | ||
325 | &speedstep_freqs[SPEEDSTEP_LOW].frequency, | ||
326 | &speedstep_freqs[SPEEDSTEP_HIGH].frequency, | ||
327 | &get_freqs->policy->cpuinfo.transition_latency, | ||
328 | &speedstep_set_state); | ||
329 | } | ||
330 | |||
331 | static int speedstep_cpu_init(struct cpufreq_policy *policy) | ||
332 | { | ||
333 | int result; | ||
334 | unsigned int policy_cpu, speed; | ||
335 | struct get_freqs gf; | ||
336 | |||
337 | /* only run on CPU to be set, or on its sibling */ | ||
338 | #ifdef CONFIG_SMP | ||
339 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); | ||
340 | #endif | ||
341 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); | ||
342 | |||
343 | /* detect low and high frequency and transition latency */ | ||
344 | gf.policy = policy; | ||
345 | smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1); | ||
346 | if (gf.ret) | ||
347 | return gf.ret; | ||
348 | |||
349 | /* get current speed setting */ | ||
350 | speed = speedstep_get(policy_cpu); | ||
351 | if (!speed) | ||
352 | return -EIO; | ||
353 | |||
354 | dprintk("currently at %s speed setting - %i MHz\n", | ||
355 | (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) | ||
356 | ? "low" : "high", | ||
357 | (speed / 1000)); | ||
358 | |||
359 | /* cpuinfo and default policy values */ | ||
360 | policy->cur = speed; | ||
361 | |||
362 | result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); | ||
363 | if (result) | ||
364 | return result; | ||
365 | |||
366 | cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); | ||
367 | |||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | |||
372 | static int speedstep_cpu_exit(struct cpufreq_policy *policy) | ||
373 | { | ||
374 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
375 | return 0; | ||
376 | } | ||
377 | |||
378 | static struct freq_attr *speedstep_attr[] = { | ||
379 | &cpufreq_freq_attr_scaling_available_freqs, | ||
380 | NULL, | ||
381 | }; | ||
382 | |||
383 | |||
384 | static struct cpufreq_driver speedstep_driver = { | ||
385 | .name = "speedstep-ich", | ||
386 | .verify = speedstep_verify, | ||
387 | .target = speedstep_target, | ||
388 | .init = speedstep_cpu_init, | ||
389 | .exit = speedstep_cpu_exit, | ||
390 | .get = speedstep_get, | ||
391 | .owner = THIS_MODULE, | ||
392 | .attr = speedstep_attr, | ||
393 | }; | ||
394 | |||
395 | |||
396 | /** | ||
397 | * speedstep_init - initializes the SpeedStep CPUFreq driver | ||
398 | * | ||
399 | * Initializes the SpeedStep support. Returns -ENODEV on unsupported | ||
400 | * devices, -EINVAL on problems during initiatization, and zero on | ||
401 | * success. | ||
402 | */ | ||
403 | static int __init speedstep_init(void) | ||
404 | { | ||
405 | /* detect processor */ | ||
406 | speedstep_processor = speedstep_detect_processor(); | ||
407 | if (!speedstep_processor) { | ||
408 | dprintk("Intel(R) SpeedStep(TM) capable processor " | ||
409 | "not found\n"); | ||
410 | return -ENODEV; | ||
411 | } | ||
412 | |||
413 | /* detect chipset */ | ||
414 | if (!speedstep_detect_chipset()) { | ||
415 | dprintk("Intel(R) SpeedStep(TM) for this chipset not " | ||
416 | "(yet) available.\n"); | ||
417 | return -ENODEV; | ||
418 | } | ||
419 | |||
420 | /* activate speedstep support */ | ||
421 | if (speedstep_activate()) { | ||
422 | pci_dev_put(speedstep_chipset_dev); | ||
423 | return -EINVAL; | ||
424 | } | ||
425 | |||
426 | if (speedstep_find_register()) | ||
427 | return -ENODEV; | ||
428 | |||
429 | return cpufreq_register_driver(&speedstep_driver); | ||
430 | } | ||
431 | |||
432 | |||
433 | /** | ||
434 | * speedstep_exit - unregisters SpeedStep support | ||
435 | * | ||
436 | * Unregisters SpeedStep support. | ||
437 | */ | ||
438 | static void __exit speedstep_exit(void) | ||
439 | { | ||
440 | pci_dev_put(speedstep_chipset_dev); | ||
441 | cpufreq_unregister_driver(&speedstep_driver); | ||
442 | } | ||
443 | |||
444 | |||
445 | MODULE_AUTHOR("Dave Jones <davej@redhat.com>, " | ||
446 | "Dominik Brodowski <linux@brodo.de>"); | ||
447 | MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets " | ||
448 | "with ICH-M southbridges."); | ||
449 | MODULE_LICENSE("GPL"); | ||
450 | |||
451 | module_init(speedstep_init); | ||
452 | module_exit(speedstep_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c deleted file mode 100644 index a94ec6be69fa..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ /dev/null | |||
@@ -1,481 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * Library for common functions for Intel SpeedStep v.1 and v.2 support | ||
7 | * | ||
8 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
9 | */ | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/moduleparam.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/cpufreq.h> | ||
16 | |||
17 | #include <asm/msr.h> | ||
18 | #include <asm/tsc.h> | ||
19 | #include "speedstep-lib.h" | ||
20 | |||
21 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
22 | "speedstep-lib", msg) | ||
23 | |||
24 | #define PFX "speedstep-lib: " | ||
25 | |||
26 | #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK | ||
27 | static int relaxed_check; | ||
28 | #else | ||
29 | #define relaxed_check 0 | ||
30 | #endif | ||
31 | |||
32 | /********************************************************************* | ||
33 | * GET PROCESSOR CORE SPEED IN KHZ * | ||
34 | *********************************************************************/ | ||
35 | |||
36 | static unsigned int pentium3_get_frequency(enum speedstep_processor processor) | ||
37 | { | ||
38 | /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ | ||
39 | struct { | ||
40 | unsigned int ratio; /* Frequency Multiplier (x10) */ | ||
41 | u8 bitmap; /* power on configuration bits | ||
42 | [27, 25:22] (in MSR 0x2a) */ | ||
43 | } msr_decode_mult[] = { | ||
44 | { 30, 0x01 }, | ||
45 | { 35, 0x05 }, | ||
46 | { 40, 0x02 }, | ||
47 | { 45, 0x06 }, | ||
48 | { 50, 0x00 }, | ||
49 | { 55, 0x04 }, | ||
50 | { 60, 0x0b }, | ||
51 | { 65, 0x0f }, | ||
52 | { 70, 0x09 }, | ||
53 | { 75, 0x0d }, | ||
54 | { 80, 0x0a }, | ||
55 | { 85, 0x26 }, | ||
56 | { 90, 0x20 }, | ||
57 | { 100, 0x2b }, | ||
58 | { 0, 0xff } /* error or unknown value */ | ||
59 | }; | ||
60 | |||
61 | /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ | ||
62 | struct { | ||
63 | unsigned int value; /* Front Side Bus speed in MHz */ | ||
64 | u8 bitmap; /* power on configuration bits [18: 19] | ||
65 | (in MSR 0x2a) */ | ||
66 | } msr_decode_fsb[] = { | ||
67 | { 66, 0x0 }, | ||
68 | { 100, 0x2 }, | ||
69 | { 133, 0x1 }, | ||
70 | { 0, 0xff} | ||
71 | }; | ||
72 | |||
73 | u32 msr_lo, msr_tmp; | ||
74 | int i = 0, j = 0; | ||
75 | |||
76 | /* read MSR 0x2a - we only need the low 32 bits */ | ||
77 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); | ||
78 | dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); | ||
79 | msr_tmp = msr_lo; | ||
80 | |||
81 | /* decode the FSB */ | ||
82 | msr_tmp &= 0x00c0000; | ||
83 | msr_tmp >>= 18; | ||
84 | while (msr_tmp != msr_decode_fsb[i].bitmap) { | ||
85 | if (msr_decode_fsb[i].bitmap == 0xff) | ||
86 | return 0; | ||
87 | i++; | ||
88 | } | ||
89 | |||
90 | /* decode the multiplier */ | ||
91 | if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) { | ||
92 | dprintk("workaround for early PIIIs\n"); | ||
93 | msr_lo &= 0x03c00000; | ||
94 | } else | ||
95 | msr_lo &= 0x0bc00000; | ||
96 | msr_lo >>= 22; | ||
97 | while (msr_lo != msr_decode_mult[j].bitmap) { | ||
98 | if (msr_decode_mult[j].bitmap == 0xff) | ||
99 | return 0; | ||
100 | j++; | ||
101 | } | ||
102 | |||
103 | dprintk("speed is %u\n", | ||
104 | (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); | ||
105 | |||
106 | return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100; | ||
107 | } | ||
108 | |||
109 | |||
110 | static unsigned int pentiumM_get_frequency(void) | ||
111 | { | ||
112 | u32 msr_lo, msr_tmp; | ||
113 | |||
114 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); | ||
115 | dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); | ||
116 | |||
117 | /* see table B-2 of 24547212.pdf */ | ||
118 | if (msr_lo & 0x00040000) { | ||
119 | printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n", | ||
120 | msr_lo, msr_tmp); | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | msr_tmp = (msr_lo >> 22) & 0x1f; | ||
125 | dprintk("bits 22-26 are 0x%x, speed is %u\n", | ||
126 | msr_tmp, (msr_tmp * 100 * 1000)); | ||
127 | |||
128 | return msr_tmp * 100 * 1000; | ||
129 | } | ||
130 | |||
131 | static unsigned int pentium_core_get_frequency(void) | ||
132 | { | ||
133 | u32 fsb = 0; | ||
134 | u32 msr_lo, msr_tmp; | ||
135 | int ret; | ||
136 | |||
137 | rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); | ||
138 | /* see table B-2 of 25366920.pdf */ | ||
139 | switch (msr_lo & 0x07) { | ||
140 | case 5: | ||
141 | fsb = 100000; | ||
142 | break; | ||
143 | case 1: | ||
144 | fsb = 133333; | ||
145 | break; | ||
146 | case 3: | ||
147 | fsb = 166667; | ||
148 | break; | ||
149 | case 2: | ||
150 | fsb = 200000; | ||
151 | break; | ||
152 | case 0: | ||
153 | fsb = 266667; | ||
154 | break; | ||
155 | case 4: | ||
156 | fsb = 333333; | ||
157 | break; | ||
158 | default: | ||
159 | printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); | ||
160 | } | ||
161 | |||
162 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); | ||
163 | dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", | ||
164 | msr_lo, msr_tmp); | ||
165 | |||
166 | msr_tmp = (msr_lo >> 22) & 0x1f; | ||
167 | dprintk("bits 22-26 are 0x%x, speed is %u\n", | ||
168 | msr_tmp, (msr_tmp * fsb)); | ||
169 | |||
170 | ret = (msr_tmp * fsb); | ||
171 | return ret; | ||
172 | } | ||
173 | |||
174 | |||
175 | static unsigned int pentium4_get_frequency(void) | ||
176 | { | ||
177 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
178 | u32 msr_lo, msr_hi, mult; | ||
179 | unsigned int fsb = 0; | ||
180 | unsigned int ret; | ||
181 | u8 fsb_code; | ||
182 | |||
183 | /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency | ||
184 | * to System Bus Frequency Ratio Field in the Processor Frequency | ||
185 | * Configuration Register of the MSR. Therefore the current | ||
186 | * frequency cannot be calculated and has to be measured. | ||
187 | */ | ||
188 | if (c->x86_model < 2) | ||
189 | return cpu_khz; | ||
190 | |||
191 | rdmsr(0x2c, msr_lo, msr_hi); | ||
192 | |||
193 | dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); | ||
194 | |||
195 | /* decode the FSB: see IA-32 Intel (C) Architecture Software | ||
196 | * Developer's Manual, Volume 3: System Prgramming Guide, | ||
197 | * revision #12 in Table B-1: MSRs in the Pentium 4 and | ||
198 | * Intel Xeon Processors, on page B-4 and B-5. | ||
199 | */ | ||
200 | fsb_code = (msr_lo >> 16) & 0x7; | ||
201 | switch (fsb_code) { | ||
202 | case 0: | ||
203 | fsb = 100 * 1000; | ||
204 | break; | ||
205 | case 1: | ||
206 | fsb = 13333 * 10; | ||
207 | break; | ||
208 | case 2: | ||
209 | fsb = 200 * 1000; | ||
210 | break; | ||
211 | } | ||
212 | |||
213 | if (!fsb) | ||
214 | printk(KERN_DEBUG PFX "couldn't detect FSB speed. " | ||
215 | "Please send an e-mail to <linux@brodo.de>\n"); | ||
216 | |||
217 | /* Multiplier. */ | ||
218 | mult = msr_lo >> 24; | ||
219 | |||
220 | dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", | ||
221 | fsb, mult, (fsb * mult)); | ||
222 | |||
223 | ret = (fsb * mult); | ||
224 | return ret; | ||
225 | } | ||
226 | |||
227 | |||
228 | /* Warning: may get called from smp_call_function_single. */ | ||
229 | unsigned int speedstep_get_frequency(enum speedstep_processor processor) | ||
230 | { | ||
231 | switch (processor) { | ||
232 | case SPEEDSTEP_CPU_PCORE: | ||
233 | return pentium_core_get_frequency(); | ||
234 | case SPEEDSTEP_CPU_PM: | ||
235 | return pentiumM_get_frequency(); | ||
236 | case SPEEDSTEP_CPU_P4D: | ||
237 | case SPEEDSTEP_CPU_P4M: | ||
238 | return pentium4_get_frequency(); | ||
239 | case SPEEDSTEP_CPU_PIII_T: | ||
240 | case SPEEDSTEP_CPU_PIII_C: | ||
241 | case SPEEDSTEP_CPU_PIII_C_EARLY: | ||
242 | return pentium3_get_frequency(processor); | ||
243 | default: | ||
244 | return 0; | ||
245 | }; | ||
246 | return 0; | ||
247 | } | ||
248 | EXPORT_SYMBOL_GPL(speedstep_get_frequency); | ||
249 | |||
250 | |||
251 | /********************************************************************* | ||
252 | * DETECT SPEEDSTEP-CAPABLE PROCESSOR * | ||
253 | *********************************************************************/ | ||
254 | |||
255 | unsigned int speedstep_detect_processor(void) | ||
256 | { | ||
257 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
258 | u32 ebx, msr_lo, msr_hi; | ||
259 | |||
260 | dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); | ||
261 | |||
262 | if ((c->x86_vendor != X86_VENDOR_INTEL) || | ||
263 | ((c->x86 != 6) && (c->x86 != 0xF))) | ||
264 | return 0; | ||
265 | |||
266 | if (c->x86 == 0xF) { | ||
267 | /* Intel Mobile Pentium 4-M | ||
268 | * or Intel Mobile Pentium 4 with 533 MHz FSB */ | ||
269 | if (c->x86_model != 2) | ||
270 | return 0; | ||
271 | |||
272 | ebx = cpuid_ebx(0x00000001); | ||
273 | ebx &= 0x000000FF; | ||
274 | |||
275 | dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); | ||
276 | |||
277 | switch (c->x86_mask) { | ||
278 | case 4: | ||
279 | /* | ||
280 | * B-stepping [M-P4-M] | ||
281 | * sample has ebx = 0x0f, production has 0x0e. | ||
282 | */ | ||
283 | if ((ebx == 0x0e) || (ebx == 0x0f)) | ||
284 | return SPEEDSTEP_CPU_P4M; | ||
285 | break; | ||
286 | case 7: | ||
287 | /* | ||
288 | * C-stepping [M-P4-M] | ||
289 | * needs to have ebx=0x0e, else it's a celeron: | ||
290 | * cf. 25130917.pdf / page 7, footnote 5 even | ||
291 | * though 25072120.pdf / page 7 doesn't say | ||
292 | * samples are only of B-stepping... | ||
293 | */ | ||
294 | if (ebx == 0x0e) | ||
295 | return SPEEDSTEP_CPU_P4M; | ||
296 | break; | ||
297 | case 9: | ||
298 | /* | ||
299 | * D-stepping [M-P4-M or M-P4/533] | ||
300 | * | ||
301 | * this is totally strange: CPUID 0x0F29 is | ||
302 | * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. | ||
303 | * The latter need to be sorted out as they don't | ||
304 | * support speedstep. | ||
305 | * Celerons with CPUID 0x0F29 may have either | ||
306 | * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything | ||
307 | * specific. | ||
308 | * M-P4-Ms may have either ebx=0xe or 0xf [see above] | ||
309 | * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] | ||
310 | * also, M-P4M HTs have ebx=0x8, too | ||
311 | * For now, they are distinguished by the model_id | ||
312 | * string | ||
313 | */ | ||
314 | if ((ebx == 0x0e) || | ||
315 | (strstr(c->x86_model_id, | ||
316 | "Mobile Intel(R) Pentium(R) 4") != NULL)) | ||
317 | return SPEEDSTEP_CPU_P4M; | ||
318 | break; | ||
319 | default: | ||
320 | break; | ||
321 | } | ||
322 | return 0; | ||
323 | } | ||
324 | |||
325 | switch (c->x86_model) { | ||
326 | case 0x0B: /* Intel PIII [Tualatin] */ | ||
327 | /* cpuid_ebx(1) is 0x04 for desktop PIII, | ||
328 | * 0x06 for mobile PIII-M */ | ||
329 | ebx = cpuid_ebx(0x00000001); | ||
330 | dprintk("ebx is %x\n", ebx); | ||
331 | |||
332 | ebx &= 0x000000FF; | ||
333 | |||
334 | if (ebx != 0x06) | ||
335 | return 0; | ||
336 | |||
337 | /* So far all PIII-M processors support SpeedStep. See | ||
338 | * Intel's 24540640.pdf of June 2003 | ||
339 | */ | ||
340 | return SPEEDSTEP_CPU_PIII_T; | ||
341 | |||
342 | case 0x08: /* Intel PIII [Coppermine] */ | ||
343 | |||
344 | /* all mobile PIII Coppermines have FSB 100 MHz | ||
345 | * ==> sort out a few desktop PIIIs. */ | ||
346 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); | ||
347 | dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", | ||
348 | msr_lo, msr_hi); | ||
349 | msr_lo &= 0x00c0000; | ||
350 | if (msr_lo != 0x0080000) | ||
351 | return 0; | ||
352 | |||
353 | /* | ||
354 | * If the processor is a mobile version, | ||
355 | * platform ID has bit 50 set | ||
356 | * it has SpeedStep technology if either | ||
357 | * bit 56 or 57 is set | ||
358 | */ | ||
359 | rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); | ||
360 | dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", | ||
361 | msr_lo, msr_hi); | ||
362 | if ((msr_hi & (1<<18)) && | ||
363 | (relaxed_check ? 1 : (msr_hi & (3<<24)))) { | ||
364 | if (c->x86_mask == 0x01) { | ||
365 | dprintk("early PIII version\n"); | ||
366 | return SPEEDSTEP_CPU_PIII_C_EARLY; | ||
367 | } else | ||
368 | return SPEEDSTEP_CPU_PIII_C; | ||
369 | } | ||
370 | |||
371 | default: | ||
372 | return 0; | ||
373 | } | ||
374 | } | ||
375 | EXPORT_SYMBOL_GPL(speedstep_detect_processor); | ||
376 | |||
377 | |||
378 | /********************************************************************* | ||
379 | * DETECT SPEEDSTEP SPEEDS * | ||
380 | *********************************************************************/ | ||
381 | |||
382 | unsigned int speedstep_get_freqs(enum speedstep_processor processor, | ||
383 | unsigned int *low_speed, | ||
384 | unsigned int *high_speed, | ||
385 | unsigned int *transition_latency, | ||
386 | void (*set_state) (unsigned int state)) | ||
387 | { | ||
388 | unsigned int prev_speed; | ||
389 | unsigned int ret = 0; | ||
390 | unsigned long flags; | ||
391 | struct timeval tv1, tv2; | ||
392 | |||
393 | if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) | ||
394 | return -EINVAL; | ||
395 | |||
396 | dprintk("trying to determine both speeds\n"); | ||
397 | |||
398 | /* get current speed */ | ||
399 | prev_speed = speedstep_get_frequency(processor); | ||
400 | if (!prev_speed) | ||
401 | return -EIO; | ||
402 | |||
403 | dprintk("previous speed is %u\n", prev_speed); | ||
404 | |||
405 | local_irq_save(flags); | ||
406 | |||
407 | /* switch to low state */ | ||
408 | set_state(SPEEDSTEP_LOW); | ||
409 | *low_speed = speedstep_get_frequency(processor); | ||
410 | if (!*low_speed) { | ||
411 | ret = -EIO; | ||
412 | goto out; | ||
413 | } | ||
414 | |||
415 | dprintk("low speed is %u\n", *low_speed); | ||
416 | |||
417 | /* start latency measurement */ | ||
418 | if (transition_latency) | ||
419 | do_gettimeofday(&tv1); | ||
420 | |||
421 | /* switch to high state */ | ||
422 | set_state(SPEEDSTEP_HIGH); | ||
423 | |||
424 | /* end latency measurement */ | ||
425 | if (transition_latency) | ||
426 | do_gettimeofday(&tv2); | ||
427 | |||
428 | *high_speed = speedstep_get_frequency(processor); | ||
429 | if (!*high_speed) { | ||
430 | ret = -EIO; | ||
431 | goto out; | ||
432 | } | ||
433 | |||
434 | dprintk("high speed is %u\n", *high_speed); | ||
435 | |||
436 | if (*low_speed == *high_speed) { | ||
437 | ret = -ENODEV; | ||
438 | goto out; | ||
439 | } | ||
440 | |||
441 | /* switch to previous state, if necessary */ | ||
442 | if (*high_speed != prev_speed) | ||
443 | set_state(SPEEDSTEP_LOW); | ||
444 | |||
445 | if (transition_latency) { | ||
446 | *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + | ||
447 | tv2.tv_usec - tv1.tv_usec; | ||
448 | dprintk("transition latency is %u uSec\n", *transition_latency); | ||
449 | |||
450 | /* convert uSec to nSec and add 20% for safety reasons */ | ||
451 | *transition_latency *= 1200; | ||
452 | |||
453 | /* check if the latency measurement is too high or too low | ||
454 | * and set it to a safe value (500uSec) in that case | ||
455 | */ | ||
456 | if (*transition_latency > 10000000 || | ||
457 | *transition_latency < 50000) { | ||
458 | printk(KERN_WARNING PFX "frequency transition " | ||
459 | "measured seems out of range (%u " | ||
460 | "nSec), falling back to a safe one of" | ||
461 | "%u nSec.\n", | ||
462 | *transition_latency, 500000); | ||
463 | *transition_latency = 500000; | ||
464 | } | ||
465 | } | ||
466 | |||
467 | out: | ||
468 | local_irq_restore(flags); | ||
469 | return ret; | ||
470 | } | ||
471 | EXPORT_SYMBOL_GPL(speedstep_get_freqs); | ||
472 | |||
473 | #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK | ||
474 | module_param(relaxed_check, int, 0444); | ||
475 | MODULE_PARM_DESC(relaxed_check, | ||
476 | "Don't do all checks for speedstep capability."); | ||
477 | #endif | ||
478 | |||
479 | MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); | ||
480 | MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); | ||
481 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h deleted file mode 100644 index 70d9cea1219d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ /dev/null | |||
@@ -1,49 +0,0 @@ | |||
1 | /* | ||
2 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * Library for common functions for Intel SpeedStep v.1 and v.2 support | ||
7 | * | ||
8 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
9 | */ | ||
10 | |||
11 | |||
12 | |||
13 | /* processors */ | ||
14 | enum speedstep_processor { | ||
15 | SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ | ||
16 | SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ | ||
17 | SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ | ||
18 | SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ | ||
19 | /* the following processors are not speedstep-capable and are not auto-detected | ||
20 | * in speedstep_detect_processor(). However, their speed can be detected using | ||
21 | * the speedstep_get_frequency() call. */ | ||
22 | SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ | ||
23 | SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ | ||
24 | SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ | ||
25 | }; | ||
26 | |||
27 | /* speedstep states -- only two of them */ | ||
28 | |||
29 | #define SPEEDSTEP_HIGH 0x00000000 | ||
30 | #define SPEEDSTEP_LOW 0x00000001 | ||
31 | |||
32 | |||
33 | /* detect a speedstep-capable processor */ | ||
34 | extern enum speedstep_processor speedstep_detect_processor(void); | ||
35 | |||
36 | /* detect the current speed (in khz) of the processor */ | ||
37 | extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); | ||
38 | |||
39 | |||
40 | /* detect the low and high speeds of the processor. The callback | ||
41 | * set_state"'s first argument is either SPEEDSTEP_HIGH or | ||
42 | * SPEEDSTEP_LOW; the second argument is zero so that no | ||
43 | * cpufreq_notify_transition calls are initiated. | ||
44 | */ | ||
45 | extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, | ||
46 | unsigned int *low_speed, | ||
47 | unsigned int *high_speed, | ||
48 | unsigned int *transition_latency, | ||
49 | void (*set_state) (unsigned int state)); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c deleted file mode 100644 index 91bc25b67bc1..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ /dev/null | |||
@@ -1,467 +0,0 @@ | |||
1 | /* | ||
2 | * Intel SpeedStep SMI driver. | ||
3 | * | ||
4 | * (C) 2003 Hiroshi Miura <miura@da-cha.org> | ||
5 | * | ||
6 | * Licensed under the terms of the GNU GPL License version 2. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | |||
11 | /********************************************************************* | ||
12 | * SPEEDSTEP - DEFINITIONS * | ||
13 | *********************************************************************/ | ||
14 | |||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/moduleparam.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/cpufreq.h> | ||
20 | #include <linux/delay.h> | ||
21 | #include <linux/io.h> | ||
22 | #include <asm/ist.h> | ||
23 | |||
24 | #include "speedstep-lib.h" | ||
25 | |||
26 | /* speedstep system management interface port/command. | ||
27 | * | ||
28 | * These parameters are got from IST-SMI BIOS call. | ||
29 | * If user gives it, these are used. | ||
30 | * | ||
31 | */ | ||
32 | static int smi_port; | ||
33 | static int smi_cmd; | ||
34 | static unsigned int smi_sig; | ||
35 | |||
36 | /* info about the processor */ | ||
37 | static enum speedstep_processor speedstep_processor; | ||
38 | |||
39 | /* | ||
40 | * There are only two frequency states for each processor. Values | ||
41 | * are in kHz for the time being. | ||
42 | */ | ||
43 | static struct cpufreq_frequency_table speedstep_freqs[] = { | ||
44 | {SPEEDSTEP_HIGH, 0}, | ||
45 | {SPEEDSTEP_LOW, 0}, | ||
46 | {0, CPUFREQ_TABLE_END}, | ||
47 | }; | ||
48 | |||
49 | #define GET_SPEEDSTEP_OWNER 0 | ||
50 | #define GET_SPEEDSTEP_STATE 1 | ||
51 | #define SET_SPEEDSTEP_STATE 2 | ||
52 | #define GET_SPEEDSTEP_FREQS 4 | ||
53 | |||
54 | /* how often shall the SMI call be tried if it failed, e.g. because | ||
55 | * of DMA activity going on? */ | ||
56 | #define SMI_TRIES 5 | ||
57 | |||
58 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
59 | "speedstep-smi", msg) | ||
60 | |||
61 | /** | ||
62 | * speedstep_smi_ownership | ||
63 | */ | ||
64 | static int speedstep_smi_ownership(void) | ||
65 | { | ||
66 | u32 command, result, magic, dummy; | ||
67 | u32 function = GET_SPEEDSTEP_OWNER; | ||
68 | unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; | ||
69 | |||
70 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
71 | magic = virt_to_phys(magic_data); | ||
72 | |||
73 | dprintk("trying to obtain ownership with command %x at port %x\n", | ||
74 | command, smi_port); | ||
75 | |||
76 | __asm__ __volatile__( | ||
77 | "push %%ebp\n" | ||
78 | "out %%al, (%%dx)\n" | ||
79 | "pop %%ebp\n" | ||
80 | : "=D" (result), | ||
81 | "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), | ||
82 | "=S" (dummy) | ||
83 | : "a" (command), "b" (function), "c" (0), "d" (smi_port), | ||
84 | "D" (0), "S" (magic) | ||
85 | : "memory" | ||
86 | ); | ||
87 | |||
88 | dprintk("result is %x\n", result); | ||
89 | |||
90 | return result; | ||
91 | } | ||
92 | |||
93 | /** | ||
94 | * speedstep_smi_get_freqs - get SpeedStep preferred & current freq. | ||
95 | * @low: the low frequency value is placed here | ||
96 | * @high: the high frequency value is placed here | ||
97 | * | ||
98 | * Only available on later SpeedStep-enabled systems, returns false results or | ||
99 | * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing | ||
100 | * shows that the latter occurs if !(ist_info.event & 0xFFFF). | ||
101 | */ | ||
102 | static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high) | ||
103 | { | ||
104 | u32 command, result = 0, edi, high_mhz, low_mhz, dummy; | ||
105 | u32 state = 0; | ||
106 | u32 function = GET_SPEEDSTEP_FREQS; | ||
107 | |||
108 | if (!(ist_info.event & 0xFFFF)) { | ||
109 | dprintk("bug #1422 -- can't read freqs from BIOS\n"); | ||
110 | return -ENODEV; | ||
111 | } | ||
112 | |||
113 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
114 | |||
115 | dprintk("trying to determine frequencies with command %x at port %x\n", | ||
116 | command, smi_port); | ||
117 | |||
118 | __asm__ __volatile__( | ||
119 | "push %%ebp\n" | ||
120 | "out %%al, (%%dx)\n" | ||
121 | "pop %%ebp" | ||
122 | : "=a" (result), | ||
123 | "=b" (high_mhz), | ||
124 | "=c" (low_mhz), | ||
125 | "=d" (state), "=D" (edi), "=S" (dummy) | ||
126 | : "a" (command), | ||
127 | "b" (function), | ||
128 | "c" (state), | ||
129 | "d" (smi_port), "S" (0), "D" (0) | ||
130 | ); | ||
131 | |||
132 | dprintk("result %x, low_freq %u, high_freq %u\n", | ||
133 | result, low_mhz, high_mhz); | ||
134 | |||
135 | /* abort if results are obviously incorrect... */ | ||
136 | if ((high_mhz + low_mhz) < 600) | ||
137 | return -EINVAL; | ||
138 | |||
139 | *high = high_mhz * 1000; | ||
140 | *low = low_mhz * 1000; | ||
141 | |||
142 | return result; | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * speedstep_get_state - set the SpeedStep state | ||
147 | * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | ||
148 | * | ||
149 | */ | ||
150 | static int speedstep_get_state(void) | ||
151 | { | ||
152 | u32 function = GET_SPEEDSTEP_STATE; | ||
153 | u32 result, state, edi, command, dummy; | ||
154 | |||
155 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
156 | |||
157 | dprintk("trying to determine current setting with command %x " | ||
158 | "at port %x\n", command, smi_port); | ||
159 | |||
160 | __asm__ __volatile__( | ||
161 | "push %%ebp\n" | ||
162 | "out %%al, (%%dx)\n" | ||
163 | "pop %%ebp\n" | ||
164 | : "=a" (result), | ||
165 | "=b" (state), "=D" (edi), | ||
166 | "=c" (dummy), "=d" (dummy), "=S" (dummy) | ||
167 | : "a" (command), "b" (function), "c" (0), | ||
168 | "d" (smi_port), "S" (0), "D" (0) | ||
169 | ); | ||
170 | |||
171 | dprintk("state is %x, result is %x\n", state, result); | ||
172 | |||
173 | return state & 1; | ||
174 | } | ||
175 | |||
176 | |||
177 | /** | ||
178 | * speedstep_set_state - set the SpeedStep state | ||
179 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | ||
180 | * | ||
181 | */ | ||
182 | static void speedstep_set_state(unsigned int state) | ||
183 | { | ||
184 | unsigned int result = 0, command, new_state, dummy; | ||
185 | unsigned long flags; | ||
186 | unsigned int function = SET_SPEEDSTEP_STATE; | ||
187 | unsigned int retry = 0; | ||
188 | |||
189 | if (state > 0x1) | ||
190 | return; | ||
191 | |||
192 | /* Disable IRQs */ | ||
193 | local_irq_save(flags); | ||
194 | |||
195 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
196 | |||
197 | dprintk("trying to set frequency to state %u " | ||
198 | "with command %x at port %x\n", | ||
199 | state, command, smi_port); | ||
200 | |||
201 | do { | ||
202 | if (retry) { | ||
203 | dprintk("retry %u, previous result %u, waiting...\n", | ||
204 | retry, result); | ||
205 | mdelay(retry * 50); | ||
206 | } | ||
207 | retry++; | ||
208 | __asm__ __volatile__( | ||
209 | "push %%ebp\n" | ||
210 | "out %%al, (%%dx)\n" | ||
211 | "pop %%ebp" | ||
212 | : "=b" (new_state), "=D" (result), | ||
213 | "=c" (dummy), "=a" (dummy), | ||
214 | "=d" (dummy), "=S" (dummy) | ||
215 | : "a" (command), "b" (function), "c" (state), | ||
216 | "d" (smi_port), "S" (0), "D" (0) | ||
217 | ); | ||
218 | } while ((new_state != state) && (retry <= SMI_TRIES)); | ||
219 | |||
220 | /* enable IRQs */ | ||
221 | local_irq_restore(flags); | ||
222 | |||
223 | if (new_state == state) | ||
224 | dprintk("change to %u MHz succeeded after %u tries " | ||
225 | "with result %u\n", | ||
226 | (speedstep_freqs[new_state].frequency / 1000), | ||
227 | retry, result); | ||
228 | else | ||
229 | printk(KERN_ERR "cpufreq: change to state %u " | ||
230 | "failed with new_state %u and result %u\n", | ||
231 | state, new_state, result); | ||
232 | |||
233 | return; | ||
234 | } | ||
235 | |||
236 | |||
237 | /** | ||
238 | * speedstep_target - set a new CPUFreq policy | ||
239 | * @policy: new policy | ||
240 | * @target_freq: new freq | ||
241 | * @relation: | ||
242 | * | ||
243 | * Sets a new CPUFreq policy/freq. | ||
244 | */ | ||
245 | static int speedstep_target(struct cpufreq_policy *policy, | ||
246 | unsigned int target_freq, unsigned int relation) | ||
247 | { | ||
248 | unsigned int newstate = 0; | ||
249 | struct cpufreq_freqs freqs; | ||
250 | |||
251 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], | ||
252 | target_freq, relation, &newstate)) | ||
253 | return -EINVAL; | ||
254 | |||
255 | freqs.old = speedstep_freqs[speedstep_get_state()].frequency; | ||
256 | freqs.new = speedstep_freqs[newstate].frequency; | ||
257 | freqs.cpu = 0; /* speedstep.c is UP only driver */ | ||
258 | |||
259 | if (freqs.old == freqs.new) | ||
260 | return 0; | ||
261 | |||
262 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
263 | speedstep_set_state(newstate); | ||
264 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | |||
270 | /** | ||
271 | * speedstep_verify - verifies a new CPUFreq policy | ||
272 | * @policy: new policy | ||
273 | * | ||
274 | * Limit must be within speedstep_low_freq and speedstep_high_freq, with | ||
275 | * at least one border included. | ||
276 | */ | ||
277 | static int speedstep_verify(struct cpufreq_policy *policy) | ||
278 | { | ||
279 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); | ||
280 | } | ||
281 | |||
282 | |||
283 | static int speedstep_cpu_init(struct cpufreq_policy *policy) | ||
284 | { | ||
285 | int result; | ||
286 | unsigned int speed, state; | ||
287 | unsigned int *low, *high; | ||
288 | |||
289 | /* capability check */ | ||
290 | if (policy->cpu != 0) | ||
291 | return -ENODEV; | ||
292 | |||
293 | result = speedstep_smi_ownership(); | ||
294 | if (result) { | ||
295 | dprintk("fails in acquiring ownership of a SMI interface.\n"); | ||
296 | return -EINVAL; | ||
297 | } | ||
298 | |||
299 | /* detect low and high frequency */ | ||
300 | low = &speedstep_freqs[SPEEDSTEP_LOW].frequency; | ||
301 | high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency; | ||
302 | |||
303 | result = speedstep_smi_get_freqs(low, high); | ||
304 | if (result) { | ||
305 | /* fall back to speedstep_lib.c dection mechanism: | ||
306 | * try both states out */ | ||
307 | dprintk("could not detect low and high frequencies " | ||
308 | "by SMI call.\n"); | ||
309 | result = speedstep_get_freqs(speedstep_processor, | ||
310 | low, high, | ||
311 | NULL, | ||
312 | &speedstep_set_state); | ||
313 | |||
314 | if (result) { | ||
315 | dprintk("could not detect two different speeds" | ||
316 | " -- aborting.\n"); | ||
317 | return result; | ||
318 | } else | ||
319 | dprintk("workaround worked.\n"); | ||
320 | } | ||
321 | |||
322 | /* get current speed setting */ | ||
323 | state = speedstep_get_state(); | ||
324 | speed = speedstep_freqs[state].frequency; | ||
325 | |||
326 | dprintk("currently at %s speed setting - %i MHz\n", | ||
327 | (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) | ||
328 | ? "low" : "high", | ||
329 | (speed / 1000)); | ||
330 | |||
331 | /* cpuinfo and default policy values */ | ||
332 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
333 | policy->cur = speed; | ||
334 | |||
335 | result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); | ||
336 | if (result) | ||
337 | return result; | ||
338 | |||
339 | cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); | ||
340 | |||
341 | return 0; | ||
342 | } | ||
343 | |||
344 | static int speedstep_cpu_exit(struct cpufreq_policy *policy) | ||
345 | { | ||
346 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
347 | return 0; | ||
348 | } | ||
349 | |||
350 | static unsigned int speedstep_get(unsigned int cpu) | ||
351 | { | ||
352 | if (cpu) | ||
353 | return -ENODEV; | ||
354 | return speedstep_get_frequency(speedstep_processor); | ||
355 | } | ||
356 | |||
357 | |||
358 | static int speedstep_resume(struct cpufreq_policy *policy) | ||
359 | { | ||
360 | int result = speedstep_smi_ownership(); | ||
361 | |||
362 | if (result) | ||
363 | dprintk("fails in re-acquiring ownership of a SMI interface.\n"); | ||
364 | |||
365 | return result; | ||
366 | } | ||
367 | |||
368 | static struct freq_attr *speedstep_attr[] = { | ||
369 | &cpufreq_freq_attr_scaling_available_freqs, | ||
370 | NULL, | ||
371 | }; | ||
372 | |||
373 | static struct cpufreq_driver speedstep_driver = { | ||
374 | .name = "speedstep-smi", | ||
375 | .verify = speedstep_verify, | ||
376 | .target = speedstep_target, | ||
377 | .init = speedstep_cpu_init, | ||
378 | .exit = speedstep_cpu_exit, | ||
379 | .get = speedstep_get, | ||
380 | .resume = speedstep_resume, | ||
381 | .owner = THIS_MODULE, | ||
382 | .attr = speedstep_attr, | ||
383 | }; | ||
384 | |||
385 | /** | ||
386 | * speedstep_init - initializes the SpeedStep CPUFreq driver | ||
387 | * | ||
388 | * Initializes the SpeedStep support. Returns -ENODEV on unsupported | ||
389 | * BIOS, -EINVAL on problems during initiatization, and zero on | ||
390 | * success. | ||
391 | */ | ||
392 | static int __init speedstep_init(void) | ||
393 | { | ||
394 | speedstep_processor = speedstep_detect_processor(); | ||
395 | |||
396 | switch (speedstep_processor) { | ||
397 | case SPEEDSTEP_CPU_PIII_T: | ||
398 | case SPEEDSTEP_CPU_PIII_C: | ||
399 | case SPEEDSTEP_CPU_PIII_C_EARLY: | ||
400 | break; | ||
401 | default: | ||
402 | speedstep_processor = 0; | ||
403 | } | ||
404 | |||
405 | if (!speedstep_processor) { | ||
406 | dprintk("No supported Intel CPU detected.\n"); | ||
407 | return -ENODEV; | ||
408 | } | ||
409 | |||
410 | dprintk("signature:0x%.8lx, command:0x%.8lx, " | ||
411 | "event:0x%.8lx, perf_level:0x%.8lx.\n", | ||
412 | ist_info.signature, ist_info.command, | ||
413 | ist_info.event, ist_info.perf_level); | ||
414 | |||
415 | /* Error if no IST-SMI BIOS or no PARM | ||
416 | sig= 'ISGE' aka 'Intel Speedstep Gate E' */ | ||
417 | if ((ist_info.signature != 0x47534943) && ( | ||
418 | (smi_port == 0) || (smi_cmd == 0))) | ||
419 | return -ENODEV; | ||
420 | |||
421 | if (smi_sig == 1) | ||
422 | smi_sig = 0x47534943; | ||
423 | else | ||
424 | smi_sig = ist_info.signature; | ||
425 | |||
426 | /* setup smi_port from MODLULE_PARM or BIOS */ | ||
427 | if ((smi_port > 0xff) || (smi_port < 0)) | ||
428 | return -EINVAL; | ||
429 | else if (smi_port == 0) | ||
430 | smi_port = ist_info.command & 0xff; | ||
431 | |||
432 | if ((smi_cmd > 0xff) || (smi_cmd < 0)) | ||
433 | return -EINVAL; | ||
434 | else if (smi_cmd == 0) | ||
435 | smi_cmd = (ist_info.command >> 16) & 0xff; | ||
436 | |||
437 | return cpufreq_register_driver(&speedstep_driver); | ||
438 | } | ||
439 | |||
440 | |||
441 | /** | ||
442 | * speedstep_exit - unregisters SpeedStep support | ||
443 | * | ||
444 | * Unregisters SpeedStep support. | ||
445 | */ | ||
446 | static void __exit speedstep_exit(void) | ||
447 | { | ||
448 | cpufreq_unregister_driver(&speedstep_driver); | ||
449 | } | ||
450 | |||
451 | module_param(smi_port, int, 0444); | ||
452 | module_param(smi_cmd, int, 0444); | ||
453 | module_param(smi_sig, uint, 0444); | ||
454 | |||
455 | MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value " | ||
456 | "-- Intel's default setting is 0xb2"); | ||
457 | MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value " | ||
458 | "-- Intel's default setting is 0x82"); | ||
459 | MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the " | ||
460 | "SMI interface."); | ||
461 | |||
462 | MODULE_AUTHOR("Hiroshi Miura"); | ||
463 | MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface."); | ||
464 | MODULE_LICENSE("GPL"); | ||
465 | |||
466 | module_init(speedstep_init); | ||
467 | module_exit(speedstep_exit); | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index df86bc8c859d..1edf5ba4fb2b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -29,10 +29,10 @@ | |||
29 | 29 | ||
30 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | 30 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) |
31 | { | 31 | { |
32 | u64 misc_enable; | ||
33 | |||
32 | /* Unmask CPUID levels if masked: */ | 34 | /* Unmask CPUID levels if masked: */ |
33 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { | 35 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { |
34 | u64 misc_enable; | ||
35 | |||
36 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 36 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
37 | 37 | ||
38 | if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { | 38 | if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { |
@@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
118 | * (model 2) with the same problem. | 118 | * (model 2) with the same problem. |
119 | */ | 119 | */ |
120 | if (c->x86 == 15) { | 120 | if (c->x86 == 15) { |
121 | u64 misc_enable; | ||
122 | |||
123 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 121 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
124 | 122 | ||
125 | if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { | 123 | if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { |
@@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
130 | } | 128 | } |
131 | } | 129 | } |
132 | #endif | 130 | #endif |
131 | |||
132 | /* | ||
133 | * If fast string is not enabled in IA32_MISC_ENABLE for any reason, | ||
134 | * clear the fast string and enhanced fast string CPU capabilities. | ||
135 | */ | ||
136 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { | ||
137 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
138 | if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { | ||
139 | printk(KERN_INFO "Disabled fast string operations\n"); | ||
140 | setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); | ||
141 | setup_clear_cpu_cap(X86_FEATURE_ERMS); | ||
142 | } | ||
143 | } | ||
133 | } | 144 | } |
134 | 145 | ||
135 | #ifdef CONFIG_X86_32 | 146 | #ifdef CONFIG_X86_32 |
@@ -400,12 +411,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
400 | 411 | ||
401 | switch (c->x86_model) { | 412 | switch (c->x86_model) { |
402 | case 5: | 413 | case 5: |
403 | if (c->x86_mask == 0) { | 414 | if (l2 == 0) |
404 | if (l2 == 0) | 415 | p = "Celeron (Covington)"; |
405 | p = "Celeron (Covington)"; | 416 | else if (l2 == 256) |
406 | else if (l2 == 256) | 417 | p = "Mobile Pentium II (Dixon)"; |
407 | p = "Mobile Pentium II (Dixon)"; | ||
408 | } | ||
409 | break; | 418 | break; |
410 | 419 | ||
411 | case 6: | 420 | case 6: |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 1ce1af2899df..c105c533ed94 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -327,7 +327,6 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | |||
327 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); | 327 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
328 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); | 328 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
329 | 329 | ||
330 | l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | ||
331 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; | 330 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; |
332 | } | 331 | } |
333 | 332 | ||
@@ -454,27 +453,16 @@ int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, | |||
454 | { | 453 | { |
455 | int ret = 0; | 454 | int ret = 0; |
456 | 455 | ||
457 | #define SUBCACHE_MASK (3UL << 20) | 456 | /* check if @slot is already used or the index is already disabled */ |
458 | #define SUBCACHE_INDEX 0xfff | ||
459 | |||
460 | /* | ||
461 | * check whether this slot is already used or | ||
462 | * the index is already disabled | ||
463 | */ | ||
464 | ret = amd_get_l3_disable_slot(l3, slot); | 457 | ret = amd_get_l3_disable_slot(l3, slot); |
465 | if (ret >= 0) | 458 | if (ret >= 0) |
466 | return -EINVAL; | 459 | return -EINVAL; |
467 | 460 | ||
468 | /* | 461 | if (index > l3->indices) |
469 | * check whether the other slot has disabled the | ||
470 | * same index already | ||
471 | */ | ||
472 | if (index == amd_get_l3_disable_slot(l3, !slot)) | ||
473 | return -EINVAL; | 462 | return -EINVAL; |
474 | 463 | ||
475 | /* do not allow writes outside of allowed bits */ | 464 | /* check whether the other slot has disabled the same index already */ |
476 | if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | 465 | if (index == amd_get_l3_disable_slot(l3, !slot)) |
477 | ((index & SUBCACHE_INDEX) > l3->indices)) | ||
478 | return -EINVAL; | 466 | return -EINVAL; |
479 | 467 | ||
480 | amd_l3_disable_index(l3, cpu, slot, index); | 468 | amd_l3_disable_index(l3, cpu, slot, index); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3385ea26f684..ff1ae9b6464d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -105,20 +105,6 @@ static int cpu_missing; | |||
105 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | 105 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); |
106 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | 106 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); |
107 | 107 | ||
108 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | ||
109 | void *data) | ||
110 | { | ||
111 | pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); | ||
112 | pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n"); | ||
113 | |||
114 | return NOTIFY_STOP; | ||
115 | } | ||
116 | |||
117 | static struct notifier_block mce_dec_nb = { | ||
118 | .notifier_call = default_decode_mce, | ||
119 | .priority = -1, | ||
120 | }; | ||
121 | |||
122 | /* MCA banks polled by the period polling timer for corrected events */ | 108 | /* MCA banks polled by the period polling timer for corrected events */ |
123 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 109 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
124 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 110 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
@@ -212,6 +198,8 @@ void mce_log(struct mce *mce) | |||
212 | 198 | ||
213 | static void print_mce(struct mce *m) | 199 | static void print_mce(struct mce *m) |
214 | { | 200 | { |
201 | int ret = 0; | ||
202 | |||
215 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", | 203 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", |
216 | m->extcpu, m->mcgstatus, m->bank, m->status); | 204 | m->extcpu, m->mcgstatus, m->bank, m->status); |
217 | 205 | ||
@@ -239,7 +227,11 @@ static void print_mce(struct mce *m) | |||
239 | * Print out human-readable details about the MCE error, | 227 | * Print out human-readable details about the MCE error, |
240 | * (if the CPU has an implementation for that) | 228 | * (if the CPU has an implementation for that) |
241 | */ | 229 | */ |
242 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | 230 | ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
231 | if (ret == NOTIFY_STOP) | ||
232 | return; | ||
233 | |||
234 | pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); | ||
243 | } | 235 | } |
244 | 236 | ||
245 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 237 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
@@ -590,7 +582,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
590 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | 582 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { |
591 | mce_log(&m); | 583 | mce_log(&m); |
592 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); | 584 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); |
593 | add_taint(TAINT_MACHINE_CHECK); | ||
594 | } | 585 | } |
595 | 586 | ||
596 | /* | 587 | /* |
@@ -1722,8 +1713,6 @@ __setup("mce", mcheck_enable); | |||
1722 | 1713 | ||
1723 | int __init mcheck_init(void) | 1714 | int __init mcheck_init(void) |
1724 | { | 1715 | { |
1725 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); | ||
1726 | |||
1727 | mcheck_intel_therm_init(); | 1716 | mcheck_intel_therm_init(); |
1728 | 1717 | ||
1729 | return 0; | 1718 | return 0; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 167f97b5596e..bb0adad35143 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -509,6 +509,7 @@ recurse: | |||
509 | out_free: | 509 | out_free: |
510 | if (b) { | 510 | if (b) { |
511 | kobject_put(&b->kobj); | 511 | kobject_put(&b->kobj); |
512 | list_del(&b->miscj); | ||
512 | kfree(b); | 513 | kfree(b); |
513 | } | 514 | } |
514 | return err; | 515 | return err; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9da97f..27c625178bf1 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -187,8 +187,6 @@ static int therm_throt_process(bool new_event, int event, int level) | |||
187 | this_cpu, | 187 | this_cpu, |
188 | level == CORE_LEVEL ? "Core" : "Package", | 188 | level == CORE_LEVEL ? "Core" : "Package", |
189 | state->count); | 189 | state->count); |
190 | |||
191 | add_taint(TAINT_MACHINE_CHECK); | ||
192 | return 1; | 190 | return 1; |
193 | } | 191 | } |
194 | if (old_event) { | 192 | if (old_event) { |
@@ -355,7 +353,6 @@ static void notify_thresholds(__u64 msr_val) | |||
355 | static void intel_thermal_interrupt(void) | 353 | static void intel_thermal_interrupt(void) |
356 | { | 354 | { |
357 | __u64 msr_val; | 355 | __u64 msr_val; |
358 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
359 | 356 | ||
360 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 357 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
361 | 358 | ||
@@ -367,19 +364,19 @@ static void intel_thermal_interrupt(void) | |||
367 | CORE_LEVEL) != 0) | 364 | CORE_LEVEL) != 0) |
368 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | 365 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); |
369 | 366 | ||
370 | if (cpu_has(c, X86_FEATURE_PLN)) | 367 | if (this_cpu_has(X86_FEATURE_PLN)) |
371 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | 368 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, |
372 | POWER_LIMIT_EVENT, | 369 | POWER_LIMIT_EVENT, |
373 | CORE_LEVEL) != 0) | 370 | CORE_LEVEL) != 0) |
374 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | 371 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); |
375 | 372 | ||
376 | if (cpu_has(c, X86_FEATURE_PTS)) { | 373 | if (this_cpu_has(X86_FEATURE_PTS)) { |
377 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | 374 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); |
378 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | 375 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, |
379 | THERMAL_THROTTLING_EVENT, | 376 | THERMAL_THROTTLING_EVENT, |
380 | PACKAGE_LEVEL) != 0) | 377 | PACKAGE_LEVEL) != 0) |
381 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | 378 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); |
382 | if (cpu_has(c, X86_FEATURE_PLN)) | 379 | if (this_cpu_has(X86_FEATURE_PLN)) |
383 | if (therm_throt_process(msr_val & | 380 | if (therm_throt_process(msr_val & |
384 | PACKAGE_THERM_STATUS_POWER_LIMIT, | 381 | PACKAGE_THERM_STATUS_POWER_LIMIT, |
385 | POWER_LIMIT_EVENT, | 382 | POWER_LIMIT_EVENT, |
@@ -393,7 +390,6 @@ static void unexpected_thermal_interrupt(void) | |||
393 | { | 390 | { |
394 | printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", | 391 | printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", |
395 | smp_processor_id()); | 392 | smp_processor_id()); |
396 | add_taint(TAINT_MACHINE_CHECK); | ||
397 | } | 393 | } |
398 | 394 | ||
399 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | 395 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; |
@@ -446,18 +442,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
446 | */ | 442 | */ |
447 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 443 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
448 | 444 | ||
445 | h = lvtthmr_init; | ||
449 | /* | 446 | /* |
450 | * The initial value of thermal LVT entries on all APs always reads | 447 | * The initial value of thermal LVT entries on all APs always reads |
451 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | 448 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI |
452 | * sequence to them and LVT registers are reset to 0s except for | 449 | * sequence to them and LVT registers are reset to 0s except for |
453 | * the mask bits which are set to 1s when APs receive INIT IPI. | 450 | * the mask bits which are set to 1s when APs receive INIT IPI. |
454 | * Always restore the value that BIOS has programmed on AP based on | 451 | * If BIOS takes over the thermal interrupt and sets its interrupt |
455 | * BSP's info we saved since BIOS is always setting the same value | 452 | * delivery mode to SMI (not fixed), it restores the value that the |
456 | * for all threads/cores | 453 | * BIOS has programmed on AP based on BSP's info we saved since BIOS |
454 | * is always setting the same value for all threads/cores. | ||
457 | */ | 455 | */ |
458 | apic_write(APIC_LVTTHMR, lvtthmr_init); | 456 | if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) |
457 | apic_write(APIC_LVTTHMR, lvtthmr_init); | ||
459 | 458 | ||
460 | h = lvtthmr_init; | ||
461 | 459 | ||
462 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 460 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
463 | printk(KERN_DEBUG | 461 | printk(KERN_DEBUG |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e638689279d3..3a0338b4b179 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
33 | #include <asm/smp.h> | 33 | #include <asm/smp.h> |
34 | #include <asm/alternative.h> | ||
34 | 35 | ||
35 | #if 0 | 36 | #if 0 |
36 | #undef wrmsrl | 37 | #undef wrmsrl |
@@ -363,12 +364,18 @@ again: | |||
363 | return new_raw_count; | 364 | return new_raw_count; |
364 | } | 365 | } |
365 | 366 | ||
366 | /* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */ | ||
367 | static inline int x86_pmu_addr_offset(int index) | 367 | static inline int x86_pmu_addr_offset(int index) |
368 | { | 368 | { |
369 | if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) | 369 | int offset; |
370 | return index << 1; | 370 | |
371 | return index; | 371 | /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ |
372 | alternative_io(ASM_NOP2, | ||
373 | "shll $1, %%eax", | ||
374 | X86_FEATURE_PERFCTR_CORE, | ||
375 | "=a" (offset), | ||
376 | "a" (index)); | ||
377 | |||
378 | return offset; | ||
372 | } | 379 | } |
373 | 380 | ||
374 | static inline unsigned int x86_pmu_config_addr(int index) | 381 | static inline unsigned int x86_pmu_config_addr(int index) |
@@ -1766,17 +1773,6 @@ static struct pmu pmu = { | |||
1766 | * callchain support | 1773 | * callchain support |
1767 | */ | 1774 | */ |
1768 | 1775 | ||
1769 | static void | ||
1770 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
1771 | { | ||
1772 | /* Ignore warnings */ | ||
1773 | } | ||
1774 | |||
1775 | static void backtrace_warning(void *data, char *msg) | ||
1776 | { | ||
1777 | /* Ignore warnings */ | ||
1778 | } | ||
1779 | |||
1780 | static int backtrace_stack(void *data, char *name) | 1776 | static int backtrace_stack(void *data, char *name) |
1781 | { | 1777 | { |
1782 | return 0; | 1778 | return 0; |
@@ -1790,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
1790 | } | 1786 | } |
1791 | 1787 | ||
1792 | static const struct stacktrace_ops backtrace_ops = { | 1788 | static const struct stacktrace_ops backtrace_ops = { |
1793 | .warning = backtrace_warning, | ||
1794 | .warning_symbol = backtrace_warning_symbol, | ||
1795 | .stack = backtrace_stack, | 1789 | .stack = backtrace_stack, |
1796 | .address = backtrace_address, | 1790 | .address = backtrace_address, |
1797 | .walk_stack = print_context_stack_bp, | 1791 | .walk_stack = print_context_stack_bp, |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index cf4e369cea67..fe29c1d2219e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
96 | */ | 96 | */ |
97 | static const u64 amd_perfmon_event_map[] = | 97 | static const u64 amd_perfmon_event_map[] = |
98 | { | 98 | { |
99 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | 99 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, |
100 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 100 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
101 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | 101 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, |
102 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | 102 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, |
103 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, | 103 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, |
104 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, | 104 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, |
105 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ | ||
106 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ | ||
105 | }; | 107 | }; |
106 | 108 | ||
107 | static u64 amd_pmu_event_map(int hw_event) | 109 | static u64 amd_pmu_event_map(int hw_event) |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 447a28de6f09..41178c826c48 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -36,7 +36,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = | |||
36 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 36 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
37 | }; | 37 | }; |
38 | 38 | ||
39 | static struct event_constraint intel_core_event_constraints[] = | 39 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
40 | { | 40 | { |
41 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | 41 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
42 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 42 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
@@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] = | |||
47 | EVENT_CONSTRAINT_END | 47 | EVENT_CONSTRAINT_END |
48 | }; | 48 | }; |
49 | 49 | ||
50 | static struct event_constraint intel_core2_event_constraints[] = | 50 | static struct event_constraint intel_core2_event_constraints[] __read_mostly = |
51 | { | 51 | { |
52 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 52 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
53 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 53 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] = | |||
70 | EVENT_CONSTRAINT_END | 70 | EVENT_CONSTRAINT_END |
71 | }; | 71 | }; |
72 | 72 | ||
73 | static struct event_constraint intel_nehalem_event_constraints[] = | 73 | static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = |
74 | { | 74 | { |
75 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 75 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
76 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 76 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] = | |||
86 | EVENT_CONSTRAINT_END | 86 | EVENT_CONSTRAINT_END |
87 | }; | 87 | }; |
88 | 88 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] = | 89 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
90 | { | 90 | { |
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), |
92 | EVENT_EXTRA_END | 92 | EVENT_EXTRA_END |
93 | }; | 93 | }; |
94 | 94 | ||
95 | static struct event_constraint intel_nehalem_percore_constraints[] = | 95 | static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = |
96 | { | 96 | { |
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | 97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), |
98 | EVENT_CONSTRAINT_END | 98 | EVENT_CONSTRAINT_END |
99 | }; | 99 | }; |
100 | 100 | ||
101 | static struct event_constraint intel_westmere_event_constraints[] = | 101 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = |
102 | { | 102 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
104 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 104 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] = | |||
110 | EVENT_CONSTRAINT_END | 110 | EVENT_CONSTRAINT_END |
111 | }; | 111 | }; |
112 | 112 | ||
113 | static struct event_constraint intel_snb_event_constraints[] = | 113 | static struct event_constraint intel_snb_event_constraints[] __read_mostly = |
114 | { | 114 | { |
115 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 115 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
116 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 116 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] = | |||
123 | EVENT_CONSTRAINT_END | 123 | EVENT_CONSTRAINT_END |
124 | }; | 124 | }; |
125 | 125 | ||
126 | static struct extra_reg intel_westmere_extra_regs[] = | 126 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
127 | { | 127 | { |
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), |
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | 129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), |
130 | EVENT_EXTRA_END | 130 | EVENT_EXTRA_END |
131 | }; | 131 | }; |
132 | 132 | ||
133 | static struct event_constraint intel_westmere_percore_constraints[] = | 133 | static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = |
134 | { | 134 | { |
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | 135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), |
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | 136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), |
137 | EVENT_CONSTRAINT_END | 137 | EVENT_CONSTRAINT_END |
138 | }; | 138 | }; |
139 | 139 | ||
140 | static struct event_constraint intel_gen_event_constraints[] = | 140 | static struct event_constraint intel_gen_event_constraints[] __read_mostly = |
141 | { | 141 | { |
142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
143 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 143 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
@@ -1440,6 +1440,11 @@ static __init int intel_pmu_init(void) | |||
1440 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1440 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1441 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1441 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1442 | 1442 | ||
1443 | /* UOPS_ISSUED.STALLED_CYCLES */ | ||
1444 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | ||
1445 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | ||
1446 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | ||
1447 | |||
1443 | if (ebx & 0x40) { | 1448 | if (ebx & 0x40) { |
1444 | /* | 1449 | /* |
1445 | * Erratum AAJ80 detected, we work it around by using | 1450 | * Erratum AAJ80 detected, we work it around by using |
@@ -1480,6 +1485,12 @@ static __init int intel_pmu_init(void) | |||
1480 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1485 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1481 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | 1486 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
1482 | x86_pmu.extra_regs = intel_westmere_extra_regs; | 1487 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
1488 | |||
1489 | /* UOPS_ISSUED.STALLED_CYCLES */ | ||
1490 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | ||
1491 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | ||
1492 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | ||
1493 | |||
1483 | pr_cont("Westmere events, "); | 1494 | pr_cont("Westmere events, "); |
1484 | break; | 1495 | break; |
1485 | 1496 | ||
@@ -1491,6 +1502,12 @@ static __init int intel_pmu_init(void) | |||
1491 | 1502 | ||
1492 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1503 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1493 | x86_pmu.pebs_constraints = intel_snb_pebs_events; | 1504 | x86_pmu.pebs_constraints = intel_snb_pebs_events; |
1505 | |||
1506 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | ||
1507 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | ||
1508 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ | ||
1509 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; | ||
1510 | |||
1494 | pr_cont("SandyBridge events, "); | 1511 | pr_cont("SandyBridge events, "); |
1495 | break; | 1512 | break; |
1496 | 1513 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index e93fcd55fae1..ead584fb6a7d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = { | |||
468 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | 468 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), |
469 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 469 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
470 | .escr_emask = | 470 | .escr_emask = |
471 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), | 471 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), |
472 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 472 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
473 | }, | 473 | }, |
474 | [P4_EVENT_X87_ASSIST] = { | 474 | [P4_EVENT_X87_ASSIST] = { |
@@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
912 | int idx, handled = 0; | 912 | int idx, handled = 0; |
913 | u64 val; | 913 | u64 val; |
914 | 914 | ||
915 | data.addr = 0; | 915 | perf_sample_data_init(&data, 0); |
916 | data.raw = NULL; | ||
917 | 916 | ||
918 | cpuc = &__get_cpu_var(cpu_hw_events); | 917 | cpuc = &__get_cpu_var(cpu_hw_events); |
919 | 918 | ||
@@ -1197,7 +1196,7 @@ static __init int p4_pmu_init(void) | |||
1197 | { | 1196 | { |
1198 | unsigned int low, high; | 1197 | unsigned int low, high; |
1199 | 1198 | ||
1200 | /* If we get stripped -- indexig fails */ | 1199 | /* If we get stripped -- indexing fails */ |
1201 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | 1200 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); |
1202 | 1201 | ||
1203 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | 1202 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index e2a3f0606da4..1aae78f775fc 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo, | |||
135 | } | 135 | } |
136 | EXPORT_SYMBOL_GPL(print_context_stack_bp); | 136 | EXPORT_SYMBOL_GPL(print_context_stack_bp); |
137 | 137 | ||
138 | |||
139 | static void | ||
140 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
141 | { | ||
142 | printk(data); | ||
143 | print_symbol(msg, symbol); | ||
144 | printk("\n"); | ||
145 | } | ||
146 | |||
147 | static void print_trace_warning(void *data, char *msg) | ||
148 | { | ||
149 | printk("%s%s\n", (char *)data, msg); | ||
150 | } | ||
151 | |||
152 | static int print_trace_stack(void *data, char *name) | 138 | static int print_trace_stack(void *data, char *name) |
153 | { | 139 | { |
154 | printk("%s <%s> ", (char *)data, name); | 140 | printk("%s <%s> ", (char *)data, name); |
@@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) | |||
166 | } | 152 | } |
167 | 153 | ||
168 | static const struct stacktrace_ops print_trace_ops = { | 154 | static const struct stacktrace_ops print_trace_ops = { |
169 | .warning = print_trace_warning, | ||
170 | .warning_symbol = print_trace_warning_symbol, | ||
171 | .stack = print_trace_stack, | 155 | .stack = print_trace_stack, |
172 | .address = print_trace_address, | 156 | .address = print_trace_address, |
173 | .walk_stack = print_context_stack, | 157 | .walk_stack = print_context_stack, |
@@ -279,7 +263,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
279 | printk("DEBUG_PAGEALLOC"); | 263 | printk("DEBUG_PAGEALLOC"); |
280 | #endif | 264 | #endif |
281 | printk("\n"); | 265 | printk("\n"); |
282 | sysfs_printk_last_file(); | ||
283 | if (notify_die(DIE_OOPS, str, regs, err, | 266 | if (notify_die(DIE_OOPS, str, regs, err, |
284 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | 267 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) |
285 | return 1; | 268 | return 1; |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index a93742a57468..0ba15a6cc57e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -260,9 +260,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) | |||
260 | return mod_code_status; | 260 | return mod_code_status; |
261 | } | 261 | } |
262 | 262 | ||
263 | static unsigned char *ftrace_nop_replace(void) | 263 | static const unsigned char *ftrace_nop_replace(void) |
264 | { | 264 | { |
265 | return ideal_nop5; | 265 | return ideal_nops[NOP_ATOMIC5]; |
266 | } | 266 | } |
267 | 267 | ||
268 | static int | 268 | static int |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index d6d6bb361931..3bb08509a7a1 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -23,7 +23,6 @@ | |||
23 | static void __init i386_default_early_setup(void) | 23 | static void __init i386_default_early_setup(void) |
24 | { | 24 | { |
25 | /* Initialize 32bit specific setup functions */ | 25 | /* Initialize 32bit specific setup functions */ |
26 | x86_init.resources.probe_roms = probe_roms; | ||
27 | x86_init.resources.reserve_resources = i386_reserve_resources; | 26 | x86_init.resources.reserve_resources = i386_reserve_resources; |
28 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; | 27 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; |
29 | 28 | ||
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index bfe8f729e086..6781765b3a0d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -217,7 +217,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { } | |||
217 | /* | 217 | /* |
218 | * Common hpet info | 218 | * Common hpet info |
219 | */ | 219 | */ |
220 | static unsigned long hpet_period; | 220 | static unsigned long hpet_freq; |
221 | 221 | ||
222 | static void hpet_legacy_set_mode(enum clock_event_mode mode, | 222 | static void hpet_legacy_set_mode(enum clock_event_mode mode, |
223 | struct clock_event_device *evt); | 223 | struct clock_event_device *evt); |
@@ -232,7 +232,6 @@ static struct clock_event_device hpet_clockevent = { | |||
232 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | 232 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, |
233 | .set_mode = hpet_legacy_set_mode, | 233 | .set_mode = hpet_legacy_set_mode, |
234 | .set_next_event = hpet_legacy_next_event, | 234 | .set_next_event = hpet_legacy_next_event, |
235 | .shift = 32, | ||
236 | .irq = 0, | 235 | .irq = 0, |
237 | .rating = 50, | 236 | .rating = 50, |
238 | }; | 237 | }; |
@@ -290,28 +289,12 @@ static void hpet_legacy_clockevent_register(void) | |||
290 | hpet_enable_legacy_int(); | 289 | hpet_enable_legacy_int(); |
291 | 290 | ||
292 | /* | 291 | /* |
293 | * The mult factor is defined as (include/linux/clockchips.h) | ||
294 | * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h) | ||
295 | * hpet_period is in units of femtoseconds (per cycle), so | ||
296 | * mult/2^shift = cyc/ns = 10^6/hpet_period | ||
297 | * mult = (10^6 * 2^shift)/hpet_period | ||
298 | * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period | ||
299 | */ | ||
300 | hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC, | ||
301 | hpet_period, hpet_clockevent.shift); | ||
302 | /* Calculate the min / max delta */ | ||
303 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | ||
304 | &hpet_clockevent); | ||
305 | /* Setup minimum reprogramming delta. */ | ||
306 | hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, | ||
307 | &hpet_clockevent); | ||
308 | |||
309 | /* | ||
310 | * Start hpet with the boot cpu mask and make it | 292 | * Start hpet with the boot cpu mask and make it |
311 | * global after the IO_APIC has been initialized. | 293 | * global after the IO_APIC has been initialized. |
312 | */ | 294 | */ |
313 | hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); | 295 | hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); |
314 | clockevents_register_device(&hpet_clockevent); | 296 | clockevents_config_and_register(&hpet_clockevent, hpet_freq, |
297 | HPET_MIN_PROG_DELTA, 0x7FFFFFFF); | ||
315 | global_clock_event = &hpet_clockevent; | 298 | global_clock_event = &hpet_clockevent; |
316 | printk(KERN_DEBUG "hpet clockevent registered\n"); | 299 | printk(KERN_DEBUG "hpet clockevent registered\n"); |
317 | } | 300 | } |
@@ -549,7 +532,6 @@ static int hpet_setup_irq(struct hpet_dev *dev) | |||
549 | static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) | 532 | static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) |
550 | { | 533 | { |
551 | struct clock_event_device *evt = &hdev->evt; | 534 | struct clock_event_device *evt = &hdev->evt; |
552 | uint64_t hpet_freq; | ||
553 | 535 | ||
554 | WARN_ON(cpu != smp_processor_id()); | 536 | WARN_ON(cpu != smp_processor_id()); |
555 | if (!(hdev->flags & HPET_DEV_VALID)) | 537 | if (!(hdev->flags & HPET_DEV_VALID)) |
@@ -571,24 +553,10 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) | |||
571 | 553 | ||
572 | evt->set_mode = hpet_msi_set_mode; | 554 | evt->set_mode = hpet_msi_set_mode; |
573 | evt->set_next_event = hpet_msi_next_event; | 555 | evt->set_next_event = hpet_msi_next_event; |
574 | evt->shift = 32; | ||
575 | |||
576 | /* | ||
577 | * The period is a femto seconds value. We need to calculate the | ||
578 | * scaled math multiplication factor for nanosecond to hpet tick | ||
579 | * conversion. | ||
580 | */ | ||
581 | hpet_freq = FSEC_PER_SEC; | ||
582 | do_div(hpet_freq, hpet_period); | ||
583 | evt->mult = div_sc((unsigned long) hpet_freq, | ||
584 | NSEC_PER_SEC, evt->shift); | ||
585 | /* Calculate the max delta */ | ||
586 | evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt); | ||
587 | /* 5 usec minimum reprogramming delta. */ | ||
588 | evt->min_delta_ns = 5000; | ||
589 | |||
590 | evt->cpumask = cpumask_of(hdev->cpu); | 556 | evt->cpumask = cpumask_of(hdev->cpu); |
591 | clockevents_register_device(evt); | 557 | |
558 | clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA, | ||
559 | 0x7FFFFFFF); | ||
592 | } | 560 | } |
593 | 561 | ||
594 | #ifdef CONFIG_HPET | 562 | #ifdef CONFIG_HPET |
@@ -792,7 +760,6 @@ static struct clocksource clocksource_hpet = { | |||
792 | static int hpet_clocksource_register(void) | 760 | static int hpet_clocksource_register(void) |
793 | { | 761 | { |
794 | u64 start, now; | 762 | u64 start, now; |
795 | u64 hpet_freq; | ||
796 | cycle_t t1; | 763 | cycle_t t1; |
797 | 764 | ||
798 | /* Start the counter */ | 765 | /* Start the counter */ |
@@ -819,24 +786,7 @@ static int hpet_clocksource_register(void) | |||
819 | return -ENODEV; | 786 | return -ENODEV; |
820 | } | 787 | } |
821 | 788 | ||
822 | /* | ||
823 | * The definition of mult is (include/linux/clocksource.h) | ||
824 | * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc | ||
825 | * so we first need to convert hpet_period to ns/cyc units: | ||
826 | * mult/2^shift = ns/cyc = hpet_period/10^6 | ||
827 | * mult = (hpet_period * 2^shift)/10^6 | ||
828 | * mult = (hpet_period << shift)/FSEC_PER_NSEC | ||
829 | */ | ||
830 | |||
831 | /* Need to convert hpet_period (fsec/cyc) to cyc/sec: | ||
832 | * | ||
833 | * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc) | ||
834 | * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period | ||
835 | */ | ||
836 | hpet_freq = FSEC_PER_SEC; | ||
837 | do_div(hpet_freq, hpet_period); | ||
838 | clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); | 789 | clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); |
839 | |||
840 | return 0; | 790 | return 0; |
841 | } | 791 | } |
842 | 792 | ||
@@ -845,7 +795,9 @@ static int hpet_clocksource_register(void) | |||
845 | */ | 795 | */ |
846 | int __init hpet_enable(void) | 796 | int __init hpet_enable(void) |
847 | { | 797 | { |
798 | unsigned long hpet_period; | ||
848 | unsigned int id; | 799 | unsigned int id; |
800 | u64 freq; | ||
849 | int i; | 801 | int i; |
850 | 802 | ||
851 | if (!is_hpet_capable()) | 803 | if (!is_hpet_capable()) |
@@ -884,6 +836,14 @@ int __init hpet_enable(void) | |||
884 | goto out_nohpet; | 836 | goto out_nohpet; |
885 | 837 | ||
886 | /* | 838 | /* |
839 | * The period is a femto seconds value. Convert it to a | ||
840 | * frequency. | ||
841 | */ | ||
842 | freq = FSEC_PER_SEC; | ||
843 | do_div(freq, hpet_period); | ||
844 | hpet_freq = freq; | ||
845 | |||
846 | /* | ||
887 | * Read the HPET ID register to retrieve the IRQ routing | 847 | * Read the HPET ID register to retrieve the IRQ routing |
888 | * information and the number of channels | 848 | * information and the number of channels |
889 | */ | 849 | */ |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 2dfd31597443..fb66dc9e36cb 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -93,7 +93,6 @@ static struct clock_event_device pit_ce = { | |||
93 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | 93 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, |
94 | .set_mode = init_pit_timer, | 94 | .set_mode = init_pit_timer, |
95 | .set_next_event = pit_next_event, | 95 | .set_next_event = pit_next_event, |
96 | .shift = 32, | ||
97 | .irq = 0, | 96 | .irq = 0, |
98 | }; | 97 | }; |
99 | 98 | ||
@@ -108,90 +107,12 @@ void __init setup_pit_timer(void) | |||
108 | * IO_APIC has been initialized. | 107 | * IO_APIC has been initialized. |
109 | */ | 108 | */ |
110 | pit_ce.cpumask = cpumask_of(smp_processor_id()); | 109 | pit_ce.cpumask = cpumask_of(smp_processor_id()); |
111 | pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift); | ||
112 | pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce); | ||
113 | pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce); | ||
114 | 110 | ||
115 | clockevents_register_device(&pit_ce); | 111 | clockevents_config_and_register(&pit_ce, CLOCK_TICK_RATE, 0xF, 0x7FFF); |
116 | global_clock_event = &pit_ce; | 112 | global_clock_event = &pit_ce; |
117 | } | 113 | } |
118 | 114 | ||
119 | #ifndef CONFIG_X86_64 | 115 | #ifndef CONFIG_X86_64 |
120 | /* | ||
121 | * Since the PIT overflows every tick, its not very useful | ||
122 | * to just read by itself. So use jiffies to emulate a free | ||
123 | * running counter: | ||
124 | */ | ||
125 | static cycle_t pit_read(struct clocksource *cs) | ||
126 | { | ||
127 | static int old_count; | ||
128 | static u32 old_jifs; | ||
129 | unsigned long flags; | ||
130 | int count; | ||
131 | u32 jifs; | ||
132 | |||
133 | raw_spin_lock_irqsave(&i8253_lock, flags); | ||
134 | /* | ||
135 | * Although our caller may have the read side of xtime_lock, | ||
136 | * this is now a seqlock, and we are cheating in this routine | ||
137 | * by having side effects on state that we cannot undo if | ||
138 | * there is a collision on the seqlock and our caller has to | ||
139 | * retry. (Namely, old_jifs and old_count.) So we must treat | ||
140 | * jiffies as volatile despite the lock. We read jiffies | ||
141 | * before latching the timer count to guarantee that although | ||
142 | * the jiffies value might be older than the count (that is, | ||
143 | * the counter may underflow between the last point where | ||
144 | * jiffies was incremented and the point where we latch the | ||
145 | * count), it cannot be newer. | ||
146 | */ | ||
147 | jifs = jiffies; | ||
148 | outb_pit(0x00, PIT_MODE); /* latch the count ASAP */ | ||
149 | count = inb_pit(PIT_CH0); /* read the latched count */ | ||
150 | count |= inb_pit(PIT_CH0) << 8; | ||
151 | |||
152 | /* VIA686a test code... reset the latch if count > max + 1 */ | ||
153 | if (count > LATCH) { | ||
154 | outb_pit(0x34, PIT_MODE); | ||
155 | outb_pit(LATCH & 0xff, PIT_CH0); | ||
156 | outb_pit(LATCH >> 8, PIT_CH0); | ||
157 | count = LATCH - 1; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * It's possible for count to appear to go the wrong way for a | ||
162 | * couple of reasons: | ||
163 | * | ||
164 | * 1. The timer counter underflows, but we haven't handled the | ||
165 | * resulting interrupt and incremented jiffies yet. | ||
166 | * 2. Hardware problem with the timer, not giving us continuous time, | ||
167 | * the counter does small "jumps" upwards on some Pentium systems, | ||
168 | * (see c't 95/10 page 335 for Neptun bug.) | ||
169 | * | ||
170 | * Previous attempts to handle these cases intelligently were | ||
171 | * buggy, so we just do the simple thing now. | ||
172 | */ | ||
173 | if (count > old_count && jifs == old_jifs) | ||
174 | count = old_count; | ||
175 | |||
176 | old_count = count; | ||
177 | old_jifs = jifs; | ||
178 | |||
179 | raw_spin_unlock_irqrestore(&i8253_lock, flags); | ||
180 | |||
181 | count = (LATCH - 1) - count; | ||
182 | |||
183 | return (cycle_t)(jifs * LATCH) + count; | ||
184 | } | ||
185 | |||
186 | static struct clocksource pit_cs = { | ||
187 | .name = "pit", | ||
188 | .rating = 110, | ||
189 | .read = pit_read, | ||
190 | .mask = CLOCKSOURCE_MASK(32), | ||
191 | .mult = 0, | ||
192 | .shift = 20, | ||
193 | }; | ||
194 | |||
195 | static int __init init_pit_clocksource(void) | 116 | static int __init init_pit_clocksource(void) |
196 | { | 117 | { |
197 | /* | 118 | /* |
@@ -205,10 +126,7 @@ static int __init init_pit_clocksource(void) | |||
205 | pit_ce.mode != CLOCK_EVT_MODE_PERIODIC) | 126 | pit_ce.mode != CLOCK_EVT_MODE_PERIODIC) |
206 | return 0; | 127 | return 0; |
207 | 128 | ||
208 | pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift); | 129 | return clocksource_i8253_init(); |
209 | |||
210 | return clocksource_register(&pit_cs); | ||
211 | } | 130 | } |
212 | arch_initcall(init_pit_clocksource); | 131 | arch_initcall(init_pit_clocksource); |
213 | |||
214 | #endif /* !CONFIG_X86_64 */ | 132 | #endif /* !CONFIG_X86_64 */ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 1cb0b9fc78dc..6c0802eb2f7f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -249,7 +249,7 @@ void fixup_irqs(void) | |||
249 | 249 | ||
250 | data = irq_desc_get_irq_data(desc); | 250 | data = irq_desc_get_irq_data(desc); |
251 | affinity = data->affinity; | 251 | affinity = data->affinity; |
252 | if (!irq_has_action(irq) || | 252 | if (!irq_has_action(irq) || irqd_is_per_cpu(data) || |
253 | cpumask_subset(affinity, cpu_online_mask)) { | 253 | cpumask_subset(affinity, cpu_online_mask)) { |
254 | raw_spin_unlock(&desc->lock); | 254 | raw_spin_unlock(&desc->lock); |
255 | continue; | 255 | continue; |
@@ -276,7 +276,8 @@ void fixup_irqs(void) | |||
276 | else if (!(warned++)) | 276 | else if (!(warned++)) |
277 | set_affinity = 0; | 277 | set_affinity = 0; |
278 | 278 | ||
279 | if (!irqd_can_move_in_process_context(data) && chip->irq_unmask) | 279 | if (!irqd_can_move_in_process_context(data) && |
280 | !irqd_irq_disabled(data) && chip->irq_unmask) | ||
280 | chip->irq_unmask(data); | 281 | chip->irq_unmask(data); |
281 | 282 | ||
282 | raw_spin_unlock(&desc->lock); | 283 | raw_spin_unlock(&desc->lock); |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 961b6b30ba90..3fee346ef545 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
@@ -34,7 +34,7 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
34 | code.offset = entry->target - | 34 | code.offset = entry->target - |
35 | (entry->code + JUMP_LABEL_NOP_SIZE); | 35 | (entry->code + JUMP_LABEL_NOP_SIZE); |
36 | } else | 36 | } else |
37 | memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE); | 37 | memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); |
38 | get_online_cpus(); | 38 | get_online_cpus(); |
39 | mutex_lock(&text_mutex); | 39 | mutex_lock(&text_mutex); |
40 | text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); | 40 | text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); |
@@ -44,7 +44,8 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
44 | 44 | ||
45 | void arch_jump_label_text_poke_early(jump_label_t addr) | 45 | void arch_jump_label_text_poke_early(jump_label_t addr) |
46 | { | 46 | { |
47 | text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE); | 47 | text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5], |
48 | JUMP_LABEL_NOP_SIZE); | ||
48 | } | 49 | } |
49 | 50 | ||
50 | #endif | 51 | #endif |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index c969fd9d1566..f1a6244d7d93 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1183 | struct pt_regs *regs) | 1183 | struct pt_regs *regs) |
1184 | { | 1184 | { |
1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
1186 | unsigned long flags; | ||
1186 | 1187 | ||
1187 | /* This is possible if op is under delayed unoptimizing */ | 1188 | /* This is possible if op is under delayed unoptimizing */ |
1188 | if (kprobe_disabled(&op->kp)) | 1189 | if (kprobe_disabled(&op->kp)) |
1189 | return; | 1190 | return; |
1190 | 1191 | ||
1191 | preempt_disable(); | 1192 | local_irq_save(flags); |
1192 | if (kprobe_running()) { | 1193 | if (kprobe_running()) { |
1193 | kprobes_inc_nmissed_count(&op->kp); | 1194 | kprobes_inc_nmissed_count(&op->kp); |
1194 | } else { | 1195 | } else { |
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1207 | opt_pre_handler(&op->kp, regs); | 1208 | opt_pre_handler(&op->kp, regs); |
1208 | __this_cpu_write(current_kprobe, NULL); | 1209 | __this_cpu_write(current_kprobe, NULL); |
1209 | } | 1210 | } |
1210 | preempt_enable_no_resched(); | 1211 | local_irq_restore(flags); |
1211 | } | 1212 | } |
1212 | 1213 | ||
1213 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | 1214 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index f98d3eafe07a..6389a6bca11b 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -26,8 +26,6 @@ | |||
26 | #include <asm/x86_init.h> | 26 | #include <asm/x86_init.h> |
27 | #include <asm/reboot.h> | 27 | #include <asm/reboot.h> |
28 | 28 | ||
29 | #define KVM_SCALE 22 | ||
30 | |||
31 | static int kvmclock = 1; | 29 | static int kvmclock = 1; |
32 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | 30 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; |
33 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | 31 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; |
@@ -120,8 +118,6 @@ static struct clocksource kvm_clock = { | |||
120 | .read = kvm_clock_get_cycles, | 118 | .read = kvm_clock_get_cycles, |
121 | .rating = 400, | 119 | .rating = 400, |
122 | .mask = CLOCKSOURCE_MASK(64), | 120 | .mask = CLOCKSOURCE_MASK(64), |
123 | .mult = 1 << KVM_SCALE, | ||
124 | .shift = KVM_SCALE, | ||
125 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 121 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
126 | }; | 122 | }; |
127 | 123 | ||
@@ -203,7 +199,7 @@ void __init kvmclock_init(void) | |||
203 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 199 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
204 | #endif | 200 | #endif |
205 | kvm_get_preset_lpj(); | 201 | kvm_get_preset_lpj(); |
206 | clocksource_register(&kvm_clock); | 202 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); |
207 | pv_info.paravirt_enabled = 1; | 203 | pv_info.paravirt_enabled = 1; |
208 | pv_info.name = "KVM"; | 204 | pv_info.name = "KVM"; |
209 | 205 | ||
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index ab23f1ad4bf1..52f256f2cc81 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/bug.h> | 24 | #include <linux/bug.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/gfp.h> | 26 | #include <linux/gfp.h> |
27 | #include <linux/jump_label.h> | ||
27 | 28 | ||
28 | #include <asm/system.h> | 29 | #include <asm/system.h> |
29 | #include <asm/page.h> | 30 | #include <asm/page.h> |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5a532ce646bf..6f9bfffb2720 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -715,17 +715,15 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) | |||
715 | } | 715 | } |
716 | } | 716 | } |
717 | 717 | ||
718 | static int | 718 | static int __init |
719 | check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) | 719 | check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) |
720 | { | 720 | { |
721 | int ret = 0; | ||
722 | |||
723 | if (!mpc_new_phys || count <= mpc_new_length) { | 721 | if (!mpc_new_phys || count <= mpc_new_length) { |
724 | WARN(1, "update_mptable: No spare slots (length: %x)\n", count); | 722 | WARN(1, "update_mptable: No spare slots (length: %x)\n", count); |
725 | return -1; | 723 | return -1; |
726 | } | 724 | } |
727 | 725 | ||
728 | return ret; | 726 | return 0; |
729 | } | 727 | } |
730 | #else /* CONFIG_X86_IO_APIC */ | 728 | #else /* CONFIG_X86_IO_APIC */ |
731 | static | 729 | static |
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c index 55d745ec1181..35ccf75696eb 100644 --- a/arch/x86/kernel/pci-iommu_table.c +++ b/arch/x86/kernel/pci-iommu_table.c | |||
@@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start, | |||
50 | struct iommu_table_entry *finish) | 50 | struct iommu_table_entry *finish) |
51 | { | 51 | { |
52 | struct iommu_table_entry *p, *q, *x; | 52 | struct iommu_table_entry *p, *q, *x; |
53 | char sym_p[KSYM_SYMBOL_LEN]; | ||
54 | char sym_q[KSYM_SYMBOL_LEN]; | ||
55 | 53 | ||
56 | /* Simple cyclic dependency checker. */ | 54 | /* Simple cyclic dependency checker. */ |
57 | for (p = start; p < finish; p++) { | 55 | for (p = start; p < finish; p++) { |
58 | q = find_dependents_of(start, finish, p); | 56 | q = find_dependents_of(start, finish, p); |
59 | x = find_dependents_of(start, finish, q); | 57 | x = find_dependents_of(start, finish, q); |
60 | if (p == x) { | 58 | if (p == x) { |
61 | sprint_symbol(sym_p, (unsigned long)p->detect); | 59 | printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n", |
62 | sprint_symbol(sym_q, (unsigned long)q->detect); | 60 | p->detect, q->detect); |
63 | |||
64 | printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \ | ||
65 | " on %s and vice-versa. BREAKING IT.\n", | ||
66 | sym_p, sym_q); | ||
67 | /* Heavy handed way..*/ | 61 | /* Heavy handed way..*/ |
68 | x->depend = 0; | 62 | x->depend = 0; |
69 | } | 63 | } |
@@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start, | |||
72 | for (p = start; p < finish; p++) { | 66 | for (p = start; p < finish; p++) { |
73 | q = find_dependents_of(p, finish, p); | 67 | q = find_dependents_of(p, finish, p); |
74 | if (q && q > p) { | 68 | if (q && q > p) { |
75 | sprint_symbol(sym_p, (unsigned long)p->detect); | 69 | printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n", |
76 | sprint_symbol(sym_q, (unsigned long)q->detect); | 70 | p->detect, q->detect); |
77 | |||
78 | printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\ | ||
79 | "should be called before %s!\n", | ||
80 | sym_p, sym_q); | ||
81 | } | 71 | } |
82 | } | 72 | } |
83 | } | 73 | } |
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms.c index 071e7fea42e5..ba0a4cce53be 100644 --- a/arch/x86/kernel/probe_roms_32.c +++ b/arch/x86/kernel/probe_roms.c | |||
@@ -73,6 +73,107 @@ static struct resource video_rom_resource = { | |||
73 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | 73 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
74 | }; | 74 | }; |
75 | 75 | ||
76 | /* does this oprom support the given pci device, or any of the devices | ||
77 | * that the driver supports? | ||
78 | */ | ||
79 | static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device) | ||
80 | { | ||
81 | struct pci_driver *drv = pdev->driver; | ||
82 | const struct pci_device_id *id; | ||
83 | |||
84 | if (pdev->vendor == vendor && pdev->device == device) | ||
85 | return true; | ||
86 | |||
87 | for (id = drv ? drv->id_table : NULL; id && id->vendor; id++) | ||
88 | if (id->vendor == vendor && id->device == device) | ||
89 | break; | ||
90 | |||
91 | return id && id->vendor; | ||
92 | } | ||
93 | |||
94 | static bool probe_list(struct pci_dev *pdev, unsigned short vendor, | ||
95 | const unsigned char *rom_list) | ||
96 | { | ||
97 | unsigned short device; | ||
98 | |||
99 | do { | ||
100 | if (probe_kernel_address(rom_list, device) != 0) | ||
101 | device = 0; | ||
102 | |||
103 | if (device && match_id(pdev, vendor, device)) | ||
104 | break; | ||
105 | |||
106 | rom_list += 2; | ||
107 | } while (device); | ||
108 | |||
109 | return !!device; | ||
110 | } | ||
111 | |||
112 | static struct resource *find_oprom(struct pci_dev *pdev) | ||
113 | { | ||
114 | struct resource *oprom = NULL; | ||
115 | int i; | ||
116 | |||
117 | for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) { | ||
118 | struct resource *res = &adapter_rom_resources[i]; | ||
119 | unsigned short offset, vendor, device, list, rev; | ||
120 | const unsigned char *rom; | ||
121 | |||
122 | if (res->end == 0) | ||
123 | break; | ||
124 | |||
125 | rom = isa_bus_to_virt(res->start); | ||
126 | if (probe_kernel_address(rom + 0x18, offset) != 0) | ||
127 | continue; | ||
128 | |||
129 | if (probe_kernel_address(rom + offset + 0x4, vendor) != 0) | ||
130 | continue; | ||
131 | |||
132 | if (probe_kernel_address(rom + offset + 0x6, device) != 0) | ||
133 | continue; | ||
134 | |||
135 | if (match_id(pdev, vendor, device)) { | ||
136 | oprom = res; | ||
137 | break; | ||
138 | } | ||
139 | |||
140 | if (probe_kernel_address(rom + offset + 0x8, list) == 0 && | ||
141 | probe_kernel_address(rom + offset + 0xc, rev) == 0 && | ||
142 | rev >= 3 && list && | ||
143 | probe_list(pdev, vendor, rom + offset + list)) { | ||
144 | oprom = res; | ||
145 | break; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | return oprom; | ||
150 | } | ||
151 | |||
152 | void *pci_map_biosrom(struct pci_dev *pdev) | ||
153 | { | ||
154 | struct resource *oprom = find_oprom(pdev); | ||
155 | |||
156 | if (!oprom) | ||
157 | return NULL; | ||
158 | |||
159 | return ioremap(oprom->start, resource_size(oprom)); | ||
160 | } | ||
161 | EXPORT_SYMBOL(pci_map_biosrom); | ||
162 | |||
163 | void pci_unmap_biosrom(void __iomem *image) | ||
164 | { | ||
165 | iounmap(image); | ||
166 | } | ||
167 | EXPORT_SYMBOL(pci_unmap_biosrom); | ||
168 | |||
169 | size_t pci_biosrom_size(struct pci_dev *pdev) | ||
170 | { | ||
171 | struct resource *oprom = find_oprom(pdev); | ||
172 | |||
173 | return oprom ? resource_size(oprom) : 0; | ||
174 | } | ||
175 | EXPORT_SYMBOL(pci_biosrom_size); | ||
176 | |||
76 | #define ROMSIGNATURE 0xaa55 | 177 | #define ROMSIGNATURE 0xaa55 |
77 | 178 | ||
78 | static int __init romsignature(const unsigned char *rom) | 179 | static int __init romsignature(const unsigned char *rom) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d46cbe46b7ab..88a90a977f8e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
449 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 449 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
450 | { | 450 | { |
451 | if (!need_resched()) { | 451 | if (!need_resched()) { |
452 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) | 452 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) |
453 | clflush((void *)¤t_thread_info()->flags); | 453 | clflush((void *)¤t_thread_info()->flags); |
454 | 454 | ||
455 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 455 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
@@ -465,7 +465,7 @@ static void mwait_idle(void) | |||
465 | if (!need_resched()) { | 465 | if (!need_resched()) { |
466 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 466 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); |
467 | trace_cpu_idle(1, smp_processor_id()); | 467 | trace_cpu_idle(1, smp_processor_id()); |
468 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) | 468 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) |
469 | clflush((void *)¤t_thread_info()->flags); | 469 | clflush((void *)¤t_thread_info()->flags); |
470 | 470 | ||
471 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 471 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 08c44b08bf5b..0c016f727695 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(pm_power_off); | |||
36 | 36 | ||
37 | static const struct desc_ptr no_idt = {}; | 37 | static const struct desc_ptr no_idt = {}; |
38 | static int reboot_mode; | 38 | static int reboot_mode; |
39 | enum reboot_type reboot_type = BOOT_KBD; | 39 | enum reboot_type reboot_type = BOOT_ACPI; |
40 | int reboot_force; | 40 | int reboot_force; |
41 | 41 | ||
42 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | 42 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) |
@@ -478,9 +478,24 @@ void __attribute__((weak)) mach_reboot_fixups(void) | |||
478 | { | 478 | { |
479 | } | 479 | } |
480 | 480 | ||
481 | /* | ||
482 | * Windows compatible x86 hardware expects the following on reboot: | ||
483 | * | ||
484 | * 1) If the FADT has the ACPI reboot register flag set, try it | ||
485 | * 2) If still alive, write to the keyboard controller | ||
486 | * 3) If still alive, write to the ACPI reboot register again | ||
487 | * 4) If still alive, write to the keyboard controller again | ||
488 | * | ||
489 | * If the machine is still alive at this stage, it gives up. We default to | ||
490 | * following the same pattern, except that if we're still alive after (4) we'll | ||
491 | * try to force a triple fault and then cycle between hitting the keyboard | ||
492 | * controller and doing that | ||
493 | */ | ||
481 | static void native_machine_emergency_restart(void) | 494 | static void native_machine_emergency_restart(void) |
482 | { | 495 | { |
483 | int i; | 496 | int i; |
497 | int attempt = 0; | ||
498 | int orig_reboot_type = reboot_type; | ||
484 | 499 | ||
485 | if (reboot_emergency) | 500 | if (reboot_emergency) |
486 | emergency_vmx_disable_all(); | 501 | emergency_vmx_disable_all(); |
@@ -502,6 +517,13 @@ static void native_machine_emergency_restart(void) | |||
502 | outb(0xfe, 0x64); /* pulse reset low */ | 517 | outb(0xfe, 0x64); /* pulse reset low */ |
503 | udelay(50); | 518 | udelay(50); |
504 | } | 519 | } |
520 | if (attempt == 0 && orig_reboot_type == BOOT_ACPI) { | ||
521 | attempt = 1; | ||
522 | reboot_type = BOOT_ACPI; | ||
523 | } else { | ||
524 | reboot_type = BOOT_TRIPLE; | ||
525 | } | ||
526 | break; | ||
505 | 527 | ||
506 | case BOOT_TRIPLE: | 528 | case BOOT_TRIPLE: |
507 | load_idt(&no_idt); | 529 | load_idt(&no_idt); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4be9b398470e..c3050af9306d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -691,8 +691,6 @@ early_param("reservelow", parse_reservelow); | |||
691 | 691 | ||
692 | void __init setup_arch(char **cmdline_p) | 692 | void __init setup_arch(char **cmdline_p) |
693 | { | 693 | { |
694 | unsigned long flags; | ||
695 | |||
696 | #ifdef CONFIG_X86_32 | 694 | #ifdef CONFIG_X86_32 |
697 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 695 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
698 | visws_early_detect(); | 696 | visws_early_detect(); |
@@ -1041,9 +1039,7 @@ void __init setup_arch(char **cmdline_p) | |||
1041 | 1039 | ||
1042 | mcheck_init(); | 1040 | mcheck_init(); |
1043 | 1041 | ||
1044 | local_irq_save(flags); | 1042 | arch_init_ideal_nops(); |
1045 | arch_init_ideal_nop5(); | ||
1046 | local_irq_restore(flags); | ||
1047 | } | 1043 | } |
1048 | 1044 | ||
1049 | #ifdef CONFIG_X86_32 | 1045 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4fd173cd8e57..40a24932a8a1 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs) | |||
601 | goto badframe; | 601 | goto badframe; |
602 | 602 | ||
603 | sigdelsetmask(&set, ~_BLOCKABLE); | 603 | sigdelsetmask(&set, ~_BLOCKABLE); |
604 | spin_lock_irq(¤t->sighand->siglock); | 604 | set_current_blocked(&set); |
605 | current->blocked = set; | ||
606 | recalc_sigpending(); | ||
607 | spin_unlock_irq(¤t->sighand->siglock); | ||
608 | 605 | ||
609 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 606 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
610 | goto badframe; | 607 | goto badframe; |
@@ -682,6 +679,7 @@ static int | |||
682 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 679 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
683 | sigset_t *oldset, struct pt_regs *regs) | 680 | sigset_t *oldset, struct pt_regs *regs) |
684 | { | 681 | { |
682 | sigset_t blocked; | ||
685 | int ret; | 683 | int ret; |
686 | 684 | ||
687 | /* Are we from a system call? */ | 685 | /* Are we from a system call? */ |
@@ -741,12 +739,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
741 | */ | 739 | */ |
742 | regs->flags &= ~X86_EFLAGS_TF; | 740 | regs->flags &= ~X86_EFLAGS_TF; |
743 | 741 | ||
744 | spin_lock_irq(¤t->sighand->siglock); | 742 | sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); |
745 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | ||
746 | if (!(ka->sa.sa_flags & SA_NODEFER)) | 743 | if (!(ka->sa.sa_flags & SA_NODEFER)) |
747 | sigaddset(¤t->blocked, sig); | 744 | sigaddset(&blocked, sig); |
748 | recalc_sigpending(); | 745 | set_current_blocked(&blocked); |
749 | spin_unlock_irq(¤t->sighand->siglock); | ||
750 | 746 | ||
751 | tracehook_signal_handler(sig, info, ka, regs, | 747 | tracehook_signal_handler(sig, info, ka, regs, |
752 | test_thread_flag(TIF_SINGLESTEP)); | 748 | test_thread_flag(TIF_SINGLESTEP)); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 513deac7228d..013e7eba83bb 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait) | |||
194 | } | 194 | } |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * Reschedule call back. Nothing to do, | 197 | * Reschedule call back. |
198 | * all the work is done automatically when | ||
199 | * we return from the interrupt. | ||
200 | */ | 198 | */ |
201 | void smp_reschedule_interrupt(struct pt_regs *regs) | 199 | void smp_reschedule_interrupt(struct pt_regs *regs) |
202 | { | 200 | { |
203 | ack_APIC_irq(); | 201 | ack_APIC_irq(); |
204 | inc_irq_stat(irq_resched_count); | 202 | inc_irq_stat(irq_resched_count); |
203 | scheduler_ipi(); | ||
205 | /* | 204 | /* |
206 | * KVM uses this interrupt to force a cpu out of guest mode | 205 | * KVM uses this interrupt to force a cpu out of guest mode |
207 | */ | 206 | */ |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c2871d3c71b6..a3c430bdfb60 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void) | |||
1332 | void *mwait_ptr; | 1332 | void *mwait_ptr; |
1333 | struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); | 1333 | struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); |
1334 | 1334 | ||
1335 | if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c))) | 1335 | if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)) |
1336 | return; | 1336 | return; |
1337 | if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH)) | 1337 | if (!this_cpu_has(X86_FEATURE_CLFLSH)) |
1338 | return; | 1338 | return; |
1339 | if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) | 1339 | if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) |
1340 | return; | 1340 | return; |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6515733a289d..55d9bc03f696 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -9,15 +9,6 @@ | |||
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <asm/stacktrace.h> | 10 | #include <asm/stacktrace.h> |
11 | 11 | ||
12 | static void save_stack_warning(void *data, char *msg) | ||
13 | { | ||
14 | } | ||
15 | |||
16 | static void | ||
17 | save_stack_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
18 | { | ||
19 | } | ||
20 | |||
21 | static int save_stack_stack(void *data, char *name) | 12 | static int save_stack_stack(void *data, char *name) |
22 | { | 13 | { |
23 | return 0; | 14 | return 0; |
@@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) | |||
53 | } | 44 | } |
54 | 45 | ||
55 | static const struct stacktrace_ops save_stack_ops = { | 46 | static const struct stacktrace_ops save_stack_ops = { |
56 | .warning = save_stack_warning, | ||
57 | .warning_symbol = save_stack_warning_symbol, | ||
58 | .stack = save_stack_stack, | 47 | .stack = save_stack_stack, |
59 | .address = save_stack_address, | 48 | .address = save_stack_address, |
60 | .walk_stack = print_context_stack, | 49 | .walk_stack = print_context_stack, |
61 | }; | 50 | }; |
62 | 51 | ||
63 | static const struct stacktrace_ops save_stack_ops_nosched = { | 52 | static const struct stacktrace_ops save_stack_ops_nosched = { |
64 | .warning = save_stack_warning, | ||
65 | .warning_symbol = save_stack_warning_symbol, | ||
66 | .stack = save_stack_stack, | 53 | .stack = save_stack_stack, |
67 | .address = save_stack_address_nosched, | 54 | .address = save_stack_address_nosched, |
68 | .walk_stack = print_context_stack, | 55 | .walk_stack = print_context_stack, |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index abce34d5c79d..32cbffb0c494 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -344,3 +344,4 @@ ENTRY(sys_call_table) | |||
344 | .long sys_open_by_handle_at | 344 | .long sys_open_by_handle_at |
345 | .long sys_clock_adjtime | 345 | .long sys_clock_adjtime |
346 | .long sys_syncfs | 346 | .long sys_syncfs |
347 | .long sys_sendmmsg /* 345 */ | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 75ef4b18e9b7..6f164bd5e14d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -35,7 +35,7 @@ void iommu_shutdown_noop(void) { } | |||
35 | struct x86_init_ops x86_init __initdata = { | 35 | struct x86_init_ops x86_init __initdata = { |
36 | 36 | ||
37 | .resources = { | 37 | .resources = { |
38 | .probe_roms = x86_init_noop, | 38 | .probe_roms = probe_roms, |
39 | .reserve_resources = reserve_standard_io_resources, | 39 | .reserve_resources = reserve_standard_io_resources, |
40 | .memory_setup = default_machine_specific_memory_setup, | 40 | .memory_setup = default_machine_specific_memory_setup, |
41 | }, | 41 | }, |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 1cd608973ce5..e191c096ab90 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * kernel and insert a module (lg.ko) which allows us to run other Linux | 7 | * kernel and insert a module (lg.ko) which allows us to run other Linux |
8 | * kernels the same way we'd run processes. We call the first kernel the Host, | 8 | * kernels the same way we'd run processes. We call the first kernel the Host, |
9 | * and the others the Guests. The program which sets up and configures Guests | 9 | * and the others the Guests. The program which sets up and configures Guests |
10 | * (such as the example in Documentation/lguest/lguest.c) is called the | 10 | * (such as the example in Documentation/virtual/lguest/lguest.c) is called the |
11 | * Launcher. | 11 | * Launcher. |
12 | * | 12 | * |
13 | * Secondly, we only run specially modified Guests, not normal kernels: setting | 13 | * Secondly, we only run specially modified Guests, not normal kernels: setting |
@@ -913,8 +913,6 @@ static struct clocksource lguest_clock = { | |||
913 | .rating = 200, | 913 | .rating = 200, |
914 | .read = lguest_clock_read, | 914 | .read = lguest_clock_read, |
915 | .mask = CLOCKSOURCE_MASK(64), | 915 | .mask = CLOCKSOURCE_MASK(64), |
916 | .mult = 1 << 22, | ||
917 | .shift = 22, | ||
918 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 916 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
919 | }; | 917 | }; |
920 | 918 | ||
@@ -997,7 +995,7 @@ static void lguest_time_init(void) | |||
997 | /* Set up the timer interrupt (0) to go to our simple timer routine */ | 995 | /* Set up the timer interrupt (0) to go to our simple timer routine */ |
998 | irq_set_handler(0, lguest_time_irq); | 996 | irq_set_handler(0, lguest_time_irq); |
999 | 997 | ||
1000 | clocksource_register(&lguest_clock); | 998 | clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); |
1001 | 999 | ||
1002 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 1000 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
1003 | * here and register our timer device. */ | 1001 | * here and register our timer device. */ |
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index aa4326bfb24a..f2145cfa12a6 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/linkage.h> | 1 | #include <linux/linkage.h> |
2 | #include <asm/dwarf2.h> | 2 | #include <asm/dwarf2.h> |
3 | #include <asm/alternative-asm.h> | ||
3 | 4 | ||
4 | /* | 5 | /* |
5 | * Zero a page. | 6 | * Zero a page. |
@@ -14,6 +15,15 @@ ENTRY(clear_page_c) | |||
14 | CFI_ENDPROC | 15 | CFI_ENDPROC |
15 | ENDPROC(clear_page_c) | 16 | ENDPROC(clear_page_c) |
16 | 17 | ||
18 | ENTRY(clear_page_c_e) | ||
19 | CFI_STARTPROC | ||
20 | movl $4096,%ecx | ||
21 | xorl %eax,%eax | ||
22 | rep stosb | ||
23 | ret | ||
24 | CFI_ENDPROC | ||
25 | ENDPROC(clear_page_c_e) | ||
26 | |||
17 | ENTRY(clear_page) | 27 | ENTRY(clear_page) |
18 | CFI_STARTPROC | 28 | CFI_STARTPROC |
19 | xorl %eax,%eax | 29 | xorl %eax,%eax |
@@ -38,21 +48,26 @@ ENTRY(clear_page) | |||
38 | .Lclear_page_end: | 48 | .Lclear_page_end: |
39 | ENDPROC(clear_page) | 49 | ENDPROC(clear_page) |
40 | 50 | ||
41 | /* Some CPUs run faster using the string instructions. | 51 | /* |
42 | It is also a lot simpler. Use this when possible */ | 52 | * Some CPUs support enhanced REP MOVSB/STOSB instructions. |
53 | * It is recommended to use this when possible. | ||
54 | * If enhanced REP MOVSB/STOSB is not available, try to use fast string. | ||
55 | * Otherwise, use original function. | ||
56 | * | ||
57 | */ | ||
43 | 58 | ||
44 | #include <asm/cpufeature.h> | 59 | #include <asm/cpufeature.h> |
45 | 60 | ||
46 | .section .altinstr_replacement,"ax" | 61 | .section .altinstr_replacement,"ax" |
47 | 1: .byte 0xeb /* jmp <disp8> */ | 62 | 1: .byte 0xeb /* jmp <disp8> */ |
48 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ | 63 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ |
49 | 2: | 64 | 2: .byte 0xeb /* jmp <disp8> */ |
65 | .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ | ||
66 | 3: | ||
50 | .previous | 67 | .previous |
51 | .section .altinstructions,"a" | 68 | .section .altinstructions,"a" |
52 | .align 8 | 69 | altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ |
53 | .quad clear_page | 70 | .Lclear_page_end-clear_page, 2b-1b |
54 | .quad 1b | 71 | altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ |
55 | .word X86_FEATURE_REP_GOOD | 72 | .Lclear_page_end-clear_page,3b-2b |
56 | .byte .Lclear_page_end - clear_page | ||
57 | .byte 2b - 1b | ||
58 | .previous | 73 | .previous |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 99e482615195..024840266ba0 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -15,23 +15,30 @@ | |||
15 | #include <asm/asm-offsets.h> | 15 | #include <asm/asm-offsets.h> |
16 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
17 | #include <asm/cpufeature.h> | 17 | #include <asm/cpufeature.h> |
18 | #include <asm/alternative-asm.h> | ||
18 | 19 | ||
19 | .macro ALTERNATIVE_JUMP feature,orig,alt | 20 | /* |
21 | * By placing feature2 after feature1 in altinstructions section, we logically | ||
22 | * implement: | ||
23 | * If CPU has feature2, jmp to alt2 is used | ||
24 | * else if CPU has feature1, jmp to alt1 is used | ||
25 | * else jmp to orig is used. | ||
26 | */ | ||
27 | .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 | ||
20 | 0: | 28 | 0: |
21 | .byte 0xe9 /* 32bit jump */ | 29 | .byte 0xe9 /* 32bit jump */ |
22 | .long \orig-1f /* by default jump to orig */ | 30 | .long \orig-1f /* by default jump to orig */ |
23 | 1: | 31 | 1: |
24 | .section .altinstr_replacement,"ax" | 32 | .section .altinstr_replacement,"ax" |
25 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | 33 | 2: .byte 0xe9 /* near jump with 32bit immediate */ |
26 | .long \alt-1b /* offset */ /* or alternatively to alt */ | 34 | .long \alt1-1b /* offset */ /* or alternatively to alt1 */ |
35 | 3: .byte 0xe9 /* near jump with 32bit immediate */ | ||
36 | .long \alt2-1b /* offset */ /* or alternatively to alt2 */ | ||
27 | .previous | 37 | .previous |
38 | |||
28 | .section .altinstructions,"a" | 39 | .section .altinstructions,"a" |
29 | .align 8 | 40 | altinstruction_entry 0b,2b,\feature1,5,5 |
30 | .quad 0b | 41 | altinstruction_entry 0b,3b,\feature2,5,5 |
31 | .quad 2b | ||
32 | .word \feature /* when feature is set */ | ||
33 | .byte 5 | ||
34 | .byte 5 | ||
35 | .previous | 42 | .previous |
36 | .endm | 43 | .endm |
37 | 44 | ||
@@ -72,8 +79,10 @@ ENTRY(_copy_to_user) | |||
72 | addq %rdx,%rcx | 79 | addq %rdx,%rcx |
73 | jc bad_to_user | 80 | jc bad_to_user |
74 | cmpq TI_addr_limit(%rax),%rcx | 81 | cmpq TI_addr_limit(%rax),%rcx |
75 | jae bad_to_user | 82 | ja bad_to_user |
76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 83 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ |
84 | copy_user_generic_unrolled,copy_user_generic_string, \ | ||
85 | copy_user_enhanced_fast_string | ||
77 | CFI_ENDPROC | 86 | CFI_ENDPROC |
78 | ENDPROC(_copy_to_user) | 87 | ENDPROC(_copy_to_user) |
79 | 88 | ||
@@ -85,8 +94,10 @@ ENTRY(_copy_from_user) | |||
85 | addq %rdx,%rcx | 94 | addq %rdx,%rcx |
86 | jc bad_from_user | 95 | jc bad_from_user |
87 | cmpq TI_addr_limit(%rax),%rcx | 96 | cmpq TI_addr_limit(%rax),%rcx |
88 | jae bad_from_user | 97 | ja bad_from_user |
89 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 98 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ |
99 | copy_user_generic_unrolled,copy_user_generic_string, \ | ||
100 | copy_user_enhanced_fast_string | ||
90 | CFI_ENDPROC | 101 | CFI_ENDPROC |
91 | ENDPROC(_copy_from_user) | 102 | ENDPROC(_copy_from_user) |
92 | 103 | ||
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string) | |||
255 | .previous | 266 | .previous |
256 | CFI_ENDPROC | 267 | CFI_ENDPROC |
257 | ENDPROC(copy_user_generic_string) | 268 | ENDPROC(copy_user_generic_string) |
269 | |||
270 | /* | ||
271 | * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. | ||
272 | * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. | ||
273 | * | ||
274 | * Input: | ||
275 | * rdi destination | ||
276 | * rsi source | ||
277 | * rdx count | ||
278 | * | ||
279 | * Output: | ||
280 | * eax uncopied bytes or 0 if successful. | ||
281 | */ | ||
282 | ENTRY(copy_user_enhanced_fast_string) | ||
283 | CFI_STARTPROC | ||
284 | andl %edx,%edx | ||
285 | jz 2f | ||
286 | movl %edx,%ecx | ||
287 | 1: rep | ||
288 | movsb | ||
289 | 2: xorl %eax,%eax | ||
290 | ret | ||
291 | |||
292 | .section .fixup,"ax" | ||
293 | 12: movl %ecx,%edx /* ecx is zerorest also */ | ||
294 | jmp copy_user_handle_tail | ||
295 | .previous | ||
296 | |||
297 | .section __ex_table,"a" | ||
298 | .align 8 | ||
299 | .quad 1b,12b | ||
300 | .previous | ||
301 | CFI_ENDPROC | ||
302 | ENDPROC(copy_user_enhanced_fast_string) | ||
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 75ef61e35e38..efbf2a0ecdea 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | #include <asm/cpufeature.h> | 5 | #include <asm/cpufeature.h> |
6 | #include <asm/dwarf2.h> | 6 | #include <asm/dwarf2.h> |
7 | #include <asm/alternative-asm.h> | ||
7 | 8 | ||
8 | /* | 9 | /* |
9 | * memcpy - Copy a memory block. | 10 | * memcpy - Copy a memory block. |
@@ -37,6 +38,23 @@ | |||
37 | .Lmemcpy_e: | 38 | .Lmemcpy_e: |
38 | .previous | 39 | .previous |
39 | 40 | ||
41 | /* | ||
42 | * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than | ||
43 | * memcpy_c. Use memcpy_c_e when possible. | ||
44 | * | ||
45 | * This gets patched over the unrolled variant (below) via the | ||
46 | * alternative instructions framework: | ||
47 | */ | ||
48 | .section .altinstr_replacement, "ax", @progbits | ||
49 | .Lmemcpy_c_e: | ||
50 | movq %rdi, %rax | ||
51 | |||
52 | movl %edx, %ecx | ||
53 | rep movsb | ||
54 | ret | ||
55 | .Lmemcpy_e_e: | ||
56 | .previous | ||
57 | |||
40 | ENTRY(__memcpy) | 58 | ENTRY(__memcpy) |
41 | ENTRY(memcpy) | 59 | ENTRY(memcpy) |
42 | CFI_STARTPROC | 60 | CFI_STARTPROC |
@@ -49,7 +67,7 @@ ENTRY(memcpy) | |||
49 | jb .Lhandle_tail | 67 | jb .Lhandle_tail |
50 | 68 | ||
51 | /* | 69 | /* |
52 | * We check whether memory false dependece could occur, | 70 | * We check whether memory false dependence could occur, |
53 | * then jump to corresponding copy mode. | 71 | * then jump to corresponding copy mode. |
54 | */ | 72 | */ |
55 | cmp %dil, %sil | 73 | cmp %dil, %sil |
@@ -171,21 +189,22 @@ ENDPROC(memcpy) | |||
171 | ENDPROC(__memcpy) | 189 | ENDPROC(__memcpy) |
172 | 190 | ||
173 | /* | 191 | /* |
174 | * Some CPUs run faster using the string copy instructions. | 192 | * Some CPUs are adding enhanced REP MOVSB/STOSB feature |
175 | * It is also a lot simpler. Use this when possible: | 193 | * If the feature is supported, memcpy_c_e() is the first choice. |
176 | */ | 194 | * If enhanced rep movsb copy is not available, use fast string copy |
177 | 195 | * memcpy_c() when possible. This is faster and code is simpler than | |
178 | .section .altinstructions, "a" | 196 | * original memcpy(). |
179 | .align 8 | 197 | * Otherwise, original memcpy() is used. |
180 | .quad memcpy | 198 | * In .altinstructions section, ERMS feature is placed after REG_GOOD |
181 | .quad .Lmemcpy_c | 199 | * feature to implement the right patch order. |
182 | .word X86_FEATURE_REP_GOOD | 200 | * |
183 | |||
184 | /* | ||
185 | * Replace only beginning, memcpy is used to apply alternatives, | 201 | * Replace only beginning, memcpy is used to apply alternatives, |
186 | * so it is silly to overwrite itself with nops - reboot is the | 202 | * so it is silly to overwrite itself with nops - reboot is the |
187 | * only outcome... | 203 | * only outcome... |
188 | */ | 204 | */ |
189 | .byte .Lmemcpy_e - .Lmemcpy_c | 205 | .section .altinstructions, "a" |
190 | .byte .Lmemcpy_e - .Lmemcpy_c | 206 | altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ |
207 | .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c | ||
208 | altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ | ||
209 | .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e | ||
191 | .previous | 210 | .previous |
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0ecb8433e5a8..d0ec9c2936d7 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S | |||
@@ -8,6 +8,7 @@ | |||
8 | #define _STRING_C | 8 | #define _STRING_C |
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
11 | #include <asm/cpufeature.h> | ||
11 | 12 | ||
12 | #undef memmove | 13 | #undef memmove |
13 | 14 | ||
@@ -24,6 +25,7 @@ | |||
24 | */ | 25 | */ |
25 | ENTRY(memmove) | 26 | ENTRY(memmove) |
26 | CFI_STARTPROC | 27 | CFI_STARTPROC |
28 | |||
27 | /* Handle more 32bytes in loop */ | 29 | /* Handle more 32bytes in loop */ |
28 | mov %rdi, %rax | 30 | mov %rdi, %rax |
29 | cmp $0x20, %rdx | 31 | cmp $0x20, %rdx |
@@ -31,8 +33,13 @@ ENTRY(memmove) | |||
31 | 33 | ||
32 | /* Decide forward/backward copy mode */ | 34 | /* Decide forward/backward copy mode */ |
33 | cmp %rdi, %rsi | 35 | cmp %rdi, %rsi |
34 | jb 2f | 36 | jge .Lmemmove_begin_forward |
37 | mov %rsi, %r8 | ||
38 | add %rdx, %r8 | ||
39 | cmp %rdi, %r8 | ||
40 | jg 2f | ||
35 | 41 | ||
42 | .Lmemmove_begin_forward: | ||
36 | /* | 43 | /* |
37 | * movsq instruction have many startup latency | 44 | * movsq instruction have many startup latency |
38 | * so we handle small size by general register. | 45 | * so we handle small size by general register. |
@@ -78,6 +85,8 @@ ENTRY(memmove) | |||
78 | rep movsq | 85 | rep movsq |
79 | movq %r11, (%r10) | 86 | movq %r11, (%r10) |
80 | jmp 13f | 87 | jmp 13f |
88 | .Lmemmove_end_forward: | ||
89 | |||
81 | /* | 90 | /* |
82 | * Handle data backward by movsq. | 91 | * Handle data backward by movsq. |
83 | */ | 92 | */ |
@@ -194,4 +203,22 @@ ENTRY(memmove) | |||
194 | 13: | 203 | 13: |
195 | retq | 204 | retq |
196 | CFI_ENDPROC | 205 | CFI_ENDPROC |
206 | |||
207 | .section .altinstr_replacement,"ax" | ||
208 | .Lmemmove_begin_forward_efs: | ||
209 | /* Forward moving data. */ | ||
210 | movq %rdx, %rcx | ||
211 | rep movsb | ||
212 | retq | ||
213 | .Lmemmove_end_forward_efs: | ||
214 | .previous | ||
215 | |||
216 | .section .altinstructions,"a" | ||
217 | .align 8 | ||
218 | .quad .Lmemmove_begin_forward | ||
219 | .quad .Lmemmove_begin_forward_efs | ||
220 | .word X86_FEATURE_ERMS | ||
221 | .byte .Lmemmove_end_forward-.Lmemmove_begin_forward | ||
222 | .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs | ||
223 | .previous | ||
197 | ENDPROC(memmove) | 224 | ENDPROC(memmove) |
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 09d344269652..79bd454b78a3 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
@@ -2,9 +2,13 @@ | |||
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <asm/dwarf2.h> | 4 | #include <asm/dwarf2.h> |
5 | #include <asm/cpufeature.h> | ||
6 | #include <asm/alternative-asm.h> | ||
5 | 7 | ||
6 | /* | 8 | /* |
7 | * ISO C memset - set a memory block to a byte value. | 9 | * ISO C memset - set a memory block to a byte value. This function uses fast |
10 | * string to get better performance than the original function. The code is | ||
11 | * simpler and shorter than the orignal function as well. | ||
8 | * | 12 | * |
9 | * rdi destination | 13 | * rdi destination |
10 | * rsi value (char) | 14 | * rsi value (char) |
@@ -31,6 +35,28 @@ | |||
31 | .Lmemset_e: | 35 | .Lmemset_e: |
32 | .previous | 36 | .previous |
33 | 37 | ||
38 | /* | ||
39 | * ISO C memset - set a memory block to a byte value. This function uses | ||
40 | * enhanced rep stosb to override the fast string function. | ||
41 | * The code is simpler and shorter than the fast string function as well. | ||
42 | * | ||
43 | * rdi destination | ||
44 | * rsi value (char) | ||
45 | * rdx count (bytes) | ||
46 | * | ||
47 | * rax original destination | ||
48 | */ | ||
49 | .section .altinstr_replacement, "ax", @progbits | ||
50 | .Lmemset_c_e: | ||
51 | movq %rdi,%r9 | ||
52 | movb %sil,%al | ||
53 | movl %edx,%ecx | ||
54 | rep stosb | ||
55 | movq %r9,%rax | ||
56 | ret | ||
57 | .Lmemset_e_e: | ||
58 | .previous | ||
59 | |||
34 | ENTRY(memset) | 60 | ENTRY(memset) |
35 | ENTRY(__memset) | 61 | ENTRY(__memset) |
36 | CFI_STARTPROC | 62 | CFI_STARTPROC |
@@ -112,16 +138,20 @@ ENTRY(__memset) | |||
112 | ENDPROC(memset) | 138 | ENDPROC(memset) |
113 | ENDPROC(__memset) | 139 | ENDPROC(__memset) |
114 | 140 | ||
115 | /* Some CPUs run faster using the string instructions. | 141 | /* Some CPUs support enhanced REP MOVSB/STOSB feature. |
116 | It is also a lot simpler. Use this when possible */ | 142 | * It is recommended to use this when possible. |
117 | 143 | * | |
118 | #include <asm/cpufeature.h> | 144 | * If enhanced REP MOVSB/STOSB feature is not available, use fast string |
119 | 145 | * instructions. | |
146 | * | ||
147 | * Otherwise, use original memset function. | ||
148 | * | ||
149 | * In .altinstructions section, ERMS feature is placed after REG_GOOD | ||
150 | * feature to implement the right patch order. | ||
151 | */ | ||
120 | .section .altinstructions,"a" | 152 | .section .altinstructions,"a" |
121 | .align 8 | 153 | altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ |
122 | .quad memset | 154 | .Lfinal-memset,.Lmemset_e-.Lmemset_c |
123 | .quad .Lmemset_c | 155 | altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ |
124 | .word X86_FEATURE_REP_GOOD | 156 | .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e |
125 | .byte .Lfinal - memset | ||
126 | .byte .Lmemset_e - .Lmemset_c | ||
127 | .previous | 157 | .previous |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 3e608edf9958..3d11327c9ab4 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -23,8 +23,8 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o | |||
23 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 23 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
24 | 24 | ||
25 | obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o | 25 | obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o |
26 | obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o | 26 | obj-$(CONFIG_AMD_NUMA) += amdtopology.o |
27 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o | 27 | obj-$(CONFIG_ACPI_NUMA) += srat.o |
28 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o | 28 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o |
29 | 29 | ||
30 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | 30 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o |
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology.c index 0919c26820d4..5247d01329ca 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/nodemask.h> | 13 | #include <linux/nodemask.h> |
14 | #include <linux/memblock.h> | 14 | #include <linux/memblock.h> |
15 | #include <linux/bootmem.h> | ||
15 | 16 | ||
16 | #include <asm/io.h> | 17 | #include <asm/io.h> |
17 | #include <linux/pci_ids.h> | 18 | #include <linux/pci_ids.h> |
@@ -69,10 +70,10 @@ static __init void early_get_boot_cpu_id(void) | |||
69 | 70 | ||
70 | int __init amd_numa_init(void) | 71 | int __init amd_numa_init(void) |
71 | { | 72 | { |
72 | unsigned long start = PFN_PHYS(0); | 73 | u64 start = PFN_PHYS(0); |
73 | unsigned long end = PFN_PHYS(max_pfn); | 74 | u64 end = PFN_PHYS(max_pfn); |
74 | unsigned numnodes; | 75 | unsigned numnodes; |
75 | unsigned long prevbase; | 76 | u64 prevbase; |
76 | int i, j, nb; | 77 | int i, j, nb; |
77 | u32 nodeid, reg; | 78 | u32 nodeid, reg; |
78 | unsigned int bits, cores, apicid_base; | 79 | unsigned int bits, cores, apicid_base; |
@@ -95,7 +96,7 @@ int __init amd_numa_init(void) | |||
95 | 96 | ||
96 | prevbase = 0; | 97 | prevbase = 0; |
97 | for (i = 0; i < 8; i++) { | 98 | for (i = 0; i < 8; i++) { |
98 | unsigned long base, limit; | 99 | u64 base, limit; |
99 | 100 | ||
100 | base = read_pci_config(0, nb, 1, 0x40 + i*8); | 101 | base = read_pci_config(0, nb, 1, 0x40 + i*8); |
101 | limit = read_pci_config(0, nb, 1, 0x44 + i*8); | 102 | limit = read_pci_config(0, nb, 1, 0x44 + i*8); |
@@ -107,18 +108,18 @@ int __init amd_numa_init(void) | |||
107 | continue; | 108 | continue; |
108 | } | 109 | } |
109 | if (nodeid >= numnodes) { | 110 | if (nodeid >= numnodes) { |
110 | pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, | 111 | pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid, |
111 | base, limit); | 112 | base, limit); |
112 | continue; | 113 | continue; |
113 | } | 114 | } |
114 | 115 | ||
115 | if (!limit) { | 116 | if (!limit) { |
116 | pr_info("Skipping node entry %d (base %lx)\n", | 117 | pr_info("Skipping node entry %d (base %Lx)\n", |
117 | i, base); | 118 | i, base); |
118 | continue; | 119 | continue; |
119 | } | 120 | } |
120 | if ((base >> 8) & 3 || (limit >> 8) & 3) { | 121 | if ((base >> 8) & 3 || (limit >> 8) & 3) { |
121 | pr_err("Node %d using interleaving mode %lx/%lx\n", | 122 | pr_err("Node %d using interleaving mode %Lx/%Lx\n", |
122 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); | 123 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); |
123 | return -EINVAL; | 124 | return -EINVAL; |
124 | } | 125 | } |
@@ -150,19 +151,19 @@ int __init amd_numa_init(void) | |||
150 | continue; | 151 | continue; |
151 | } | 152 | } |
152 | if (limit < base) { | 153 | if (limit < base) { |
153 | pr_err("Node %d bogus settings %lx-%lx.\n", | 154 | pr_err("Node %d bogus settings %Lx-%Lx.\n", |
154 | nodeid, base, limit); | 155 | nodeid, base, limit); |
155 | continue; | 156 | continue; |
156 | } | 157 | } |
157 | 158 | ||
158 | /* Could sort here, but pun for now. Should not happen anyroads. */ | 159 | /* Could sort here, but pun for now. Should not happen anyroads. */ |
159 | if (prevbase > base) { | 160 | if (prevbase > base) { |
160 | pr_err("Node map not sorted %lx,%lx\n", | 161 | pr_err("Node map not sorted %Lx,%Lx\n", |
161 | prevbase, base); | 162 | prevbase, base); |
162 | return -EINVAL; | 163 | return -EINVAL; |
163 | } | 164 | } |
164 | 165 | ||
165 | pr_info("Node %d MemBase %016lx Limit %016lx\n", | 166 | pr_info("Node %d MemBase %016Lx Limit %016Lx\n", |
166 | nodeid, base, limit); | 167 | nodeid, base, limit); |
167 | 168 | ||
168 | prevbase = base; | 169 | prevbase = base; |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 20e3f8702d1e..bcb394dfbb35 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ |
13 | #include <linux/perf_event.h> /* perf_sw_event */ | 13 | #include <linux/perf_event.h> /* perf_sw_event */ |
14 | #include <linux/hugetlb.h> /* hstate_index_to_shift */ | 14 | #include <linux/hugetlb.h> /* hstate_index_to_shift */ |
15 | #include <linux/prefetch.h> /* prefetchw */ | ||
15 | 16 | ||
16 | #include <asm/traps.h> /* dotraplinkage, ... */ | 17 | #include <asm/traps.h> /* dotraplinkage, ... */ |
17 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 18 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 80088f994193..29f7c6d98179 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -678,8 +678,10 @@ static void __init zone_sizes_init(void) | |||
678 | { | 678 | { |
679 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 679 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
680 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 680 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
681 | #ifdef CONFIG_ZONE_DMA | ||
681 | max_zone_pfns[ZONE_DMA] = | 682 | max_zone_pfns[ZONE_DMA] = |
682 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 683 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
684 | #endif | ||
683 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | 685 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
684 | #ifdef CONFIG_HIGHMEM | 686 | #ifdef CONFIG_HIGHMEM |
685 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | 687 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; |
@@ -716,6 +718,7 @@ void __init paging_init(void) | |||
716 | * NOTE: at this point the bootmem allocator is fully available. | 718 | * NOTE: at this point the bootmem allocator is fully available. |
717 | */ | 719 | */ |
718 | olpc_dt_build_devicetree(); | 720 | olpc_dt_build_devicetree(); |
721 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | ||
719 | sparse_init(); | 722 | sparse_init(); |
720 | zone_sizes_init(); | 723 | zone_sizes_init(); |
721 | } | 724 | } |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 794233587287..d865c4aeec55 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -616,7 +616,9 @@ void __init paging_init(void) | |||
616 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 616 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
617 | 617 | ||
618 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 618 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
619 | #ifdef CONFIG_ZONE_DMA | ||
619 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | 620 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; |
621 | #endif | ||
620 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | 622 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
621 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | 623 | max_zone_pfns[ZONE_NORMAL] = max_pfn; |
622 | 624 | ||
@@ -679,14 +681,6 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
679 | } | 681 | } |
680 | EXPORT_SYMBOL_GPL(arch_add_memory); | 682 | EXPORT_SYMBOL_GPL(arch_add_memory); |
681 | 683 | ||
682 | #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) | ||
683 | int memory_add_physaddr_to_nid(u64 start) | ||
684 | { | ||
685 | return 0; | ||
686 | } | ||
687 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
688 | #endif | ||
689 | |||
690 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 684 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
691 | 685 | ||
692 | static struct kcore_list kcore_vsyscall; | 686 | static struct kcore_list kcore_vsyscall; |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0369843511dc..be1ef574ce9a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -91,13 +91,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
91 | return (__force void __iomem *)phys_to_virt(phys_addr); | 91 | return (__force void __iomem *)phys_to_virt(phys_addr); |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Check if the request spans more than any BAR in the iomem resource | ||
95 | * tree. | ||
96 | */ | ||
97 | WARN_ONCE(iomem_map_sanity_check(phys_addr, size), | ||
98 | KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); | ||
99 | |||
100 | /* | ||
101 | * Don't allow anybody to remap normal RAM that we're using.. | 94 | * Don't allow anybody to remap normal RAM that we're using.. |
102 | */ | 95 | */ |
103 | last_pfn = last_addr >> PAGE_SHIFT; | 96 | last_pfn = last_addr >> PAGE_SHIFT; |
@@ -170,6 +163,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
170 | ret_addr = (void __iomem *) (vaddr + offset); | 163 | ret_addr = (void __iomem *) (vaddr + offset); |
171 | mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); | 164 | mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); |
172 | 165 | ||
166 | /* | ||
167 | * Check if the request spans more than any BAR in the iomem resource | ||
168 | * tree. | ||
169 | */ | ||
170 | WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size), | ||
171 | KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); | ||
172 | |||
173 | return ret_addr; | 173 | return ret_addr; |
174 | err_free_area: | 174 | err_free_area: |
175 | free_vm_area(area); | 175 | free_vm_area(area); |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 745258dfc4dc..f5510d889a22 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -1,11 +1,39 @@ | |||
1 | /* Common code for 32 and 64-bit NUMA */ | 1 | /* Common code for 32 and 64-bit NUMA */ |
2 | #include <linux/topology.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/module.h> | 3 | #include <linux/mm.h> |
4 | #include <linux/string.h> | ||
5 | #include <linux/init.h> | ||
4 | #include <linux/bootmem.h> | 6 | #include <linux/bootmem.h> |
5 | #include <asm/numa.h> | 7 | #include <linux/memblock.h> |
8 | #include <linux/mmzone.h> | ||
9 | #include <linux/ctype.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/nodemask.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/topology.h> | ||
14 | |||
15 | #include <asm/e820.h> | ||
16 | #include <asm/proto.h> | ||
17 | #include <asm/dma.h> | ||
6 | #include <asm/acpi.h> | 18 | #include <asm/acpi.h> |
19 | #include <asm/amd_nb.h> | ||
20 | |||
21 | #include "numa_internal.h" | ||
7 | 22 | ||
8 | int __initdata numa_off; | 23 | int __initdata numa_off; |
24 | nodemask_t numa_nodes_parsed __initdata; | ||
25 | |||
26 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | ||
27 | EXPORT_SYMBOL(node_data); | ||
28 | |||
29 | static struct numa_meminfo numa_meminfo | ||
30 | #ifndef CONFIG_MEMORY_HOTPLUG | ||
31 | __initdata | ||
32 | #endif | ||
33 | ; | ||
34 | |||
35 | static int numa_distance_cnt; | ||
36 | static u8 *numa_distance; | ||
9 | 37 | ||
10 | static __init int numa_setup(char *opt) | 38 | static __init int numa_setup(char *opt) |
11 | { | 39 | { |
@@ -32,6 +60,15 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | |||
32 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 60 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE |
33 | }; | 61 | }; |
34 | 62 | ||
63 | int __cpuinit numa_cpu_node(int cpu) | ||
64 | { | ||
65 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | ||
66 | |||
67 | if (apicid != BAD_APICID) | ||
68 | return __apicid_to_node[apicid]; | ||
69 | return NUMA_NO_NODE; | ||
70 | } | ||
71 | |||
35 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 72 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; |
36 | EXPORT_SYMBOL(node_to_cpumask_map); | 73 | EXPORT_SYMBOL(node_to_cpumask_map); |
37 | 74 | ||
@@ -95,6 +132,407 @@ void __init setup_node_to_cpumask_map(void) | |||
95 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); | 132 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); |
96 | } | 133 | } |
97 | 134 | ||
135 | static int __init numa_add_memblk_to(int nid, u64 start, u64 end, | ||
136 | struct numa_meminfo *mi) | ||
137 | { | ||
138 | /* ignore zero length blks */ | ||
139 | if (start == end) | ||
140 | return 0; | ||
141 | |||
142 | /* whine about and ignore invalid blks */ | ||
143 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { | ||
144 | pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", | ||
145 | nid, start, end); | ||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | if (mi->nr_blks >= NR_NODE_MEMBLKS) { | ||
150 | pr_err("NUMA: too many memblk ranges\n"); | ||
151 | return -EINVAL; | ||
152 | } | ||
153 | |||
154 | mi->blk[mi->nr_blks].start = start; | ||
155 | mi->blk[mi->nr_blks].end = end; | ||
156 | mi->blk[mi->nr_blks].nid = nid; | ||
157 | mi->nr_blks++; | ||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | /** | ||
162 | * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo | ||
163 | * @idx: Index of memblk to remove | ||
164 | * @mi: numa_meminfo to remove memblk from | ||
165 | * | ||
166 | * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and | ||
167 | * decrementing @mi->nr_blks. | ||
168 | */ | ||
169 | void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) | ||
170 | { | ||
171 | mi->nr_blks--; | ||
172 | memmove(&mi->blk[idx], &mi->blk[idx + 1], | ||
173 | (mi->nr_blks - idx) * sizeof(mi->blk[0])); | ||
174 | } | ||
175 | |||
176 | /** | ||
177 | * numa_add_memblk - Add one numa_memblk to numa_meminfo | ||
178 | * @nid: NUMA node ID of the new memblk | ||
179 | * @start: Start address of the new memblk | ||
180 | * @end: End address of the new memblk | ||
181 | * | ||
182 | * Add a new memblk to the default numa_meminfo. | ||
183 | * | ||
184 | * RETURNS: | ||
185 | * 0 on success, -errno on failure. | ||
186 | */ | ||
187 | int __init numa_add_memblk(int nid, u64 start, u64 end) | ||
188 | { | ||
189 | return numa_add_memblk_to(nid, start, end, &numa_meminfo); | ||
190 | } | ||
191 | |||
192 | /* Initialize NODE_DATA for a node on the local memory */ | ||
193 | static void __init setup_node_data(int nid, u64 start, u64 end) | ||
194 | { | ||
195 | const u64 nd_low = PFN_PHYS(MAX_DMA_PFN); | ||
196 | const u64 nd_high = PFN_PHYS(max_pfn_mapped); | ||
197 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
198 | bool remapped = false; | ||
199 | u64 nd_pa; | ||
200 | void *nd; | ||
201 | int tnid; | ||
202 | |||
203 | /* | ||
204 | * Don't confuse VM with a node that doesn't have the | ||
205 | * minimum amount of memory: | ||
206 | */ | ||
207 | if (end && (end - start) < NODE_MIN_SIZE) | ||
208 | return; | ||
209 | |||
210 | /* initialize remap allocator before aligning to ZONE_ALIGN */ | ||
211 | init_alloc_remap(nid, start, end); | ||
212 | |||
213 | start = roundup(start, ZONE_ALIGN); | ||
214 | |||
215 | printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", | ||
216 | nid, start, end); | ||
217 | |||
218 | /* | ||
219 | * Allocate node data. Try remap allocator first, node-local | ||
220 | * memory and then any node. Never allocate in DMA zone. | ||
221 | */ | ||
222 | nd = alloc_remap(nid, nd_size); | ||
223 | if (nd) { | ||
224 | nd_pa = __pa(nd); | ||
225 | remapped = true; | ||
226 | } else { | ||
227 | nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, | ||
228 | nd_size, SMP_CACHE_BYTES); | ||
229 | if (nd_pa == MEMBLOCK_ERROR) | ||
230 | nd_pa = memblock_find_in_range(nd_low, nd_high, | ||
231 | nd_size, SMP_CACHE_BYTES); | ||
232 | if (nd_pa == MEMBLOCK_ERROR) { | ||
233 | pr_err("Cannot find %zu bytes in node %d\n", | ||
234 | nd_size, nid); | ||
235 | return; | ||
236 | } | ||
237 | memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA"); | ||
238 | nd = __va(nd_pa); | ||
239 | } | ||
240 | |||
241 | /* report and initialize */ | ||
242 | printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", | ||
243 | nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); | ||
244 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); | ||
245 | if (!remapped && tnid != nid) | ||
246 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); | ||
247 | |||
248 | node_data[nid] = nd; | ||
249 | memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); | ||
250 | NODE_DATA(nid)->node_id = nid; | ||
251 | NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT; | ||
252 | NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT; | ||
253 | |||
254 | node_set_online(nid); | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * numa_cleanup_meminfo - Cleanup a numa_meminfo | ||
259 | * @mi: numa_meminfo to clean up | ||
260 | * | ||
261 | * Sanitize @mi by merging and removing unncessary memblks. Also check for | ||
262 | * conflicts and clear unused memblks. | ||
263 | * | ||
264 | * RETURNS: | ||
265 | * 0 on success, -errno on failure. | ||
266 | */ | ||
267 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi) | ||
268 | { | ||
269 | const u64 low = 0; | ||
270 | const u64 high = PFN_PHYS(max_pfn); | ||
271 | int i, j, k; | ||
272 | |||
273 | /* first, trim all entries */ | ||
274 | for (i = 0; i < mi->nr_blks; i++) { | ||
275 | struct numa_memblk *bi = &mi->blk[i]; | ||
276 | |||
277 | /* make sure all blocks are inside the limits */ | ||
278 | bi->start = max(bi->start, low); | ||
279 | bi->end = min(bi->end, high); | ||
280 | |||
281 | /* and there's no empty block */ | ||
282 | if (bi->start >= bi->end) | ||
283 | numa_remove_memblk_from(i--, mi); | ||
284 | } | ||
285 | |||
286 | /* merge neighboring / overlapping entries */ | ||
287 | for (i = 0; i < mi->nr_blks; i++) { | ||
288 | struct numa_memblk *bi = &mi->blk[i]; | ||
289 | |||
290 | for (j = i + 1; j < mi->nr_blks; j++) { | ||
291 | struct numa_memblk *bj = &mi->blk[j]; | ||
292 | u64 start, end; | ||
293 | |||
294 | /* | ||
295 | * See whether there are overlapping blocks. Whine | ||
296 | * about but allow overlaps of the same nid. They | ||
297 | * will be merged below. | ||
298 | */ | ||
299 | if (bi->end > bj->start && bi->start < bj->end) { | ||
300 | if (bi->nid != bj->nid) { | ||
301 | pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", | ||
302 | bi->nid, bi->start, bi->end, | ||
303 | bj->nid, bj->start, bj->end); | ||
304 | return -EINVAL; | ||
305 | } | ||
306 | pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", | ||
307 | bi->nid, bi->start, bi->end, | ||
308 | bj->start, bj->end); | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Join together blocks on the same node, holes | ||
313 | * between which don't overlap with memory on other | ||
314 | * nodes. | ||
315 | */ | ||
316 | if (bi->nid != bj->nid) | ||
317 | continue; | ||
318 | start = min(bi->start, bj->start); | ||
319 | end = max(bi->end, bj->end); | ||
320 | for (k = 0; k < mi->nr_blks; k++) { | ||
321 | struct numa_memblk *bk = &mi->blk[k]; | ||
322 | |||
323 | if (bi->nid == bk->nid) | ||
324 | continue; | ||
325 | if (start < bk->end && end > bk->start) | ||
326 | break; | ||
327 | } | ||
328 | if (k < mi->nr_blks) | ||
329 | continue; | ||
330 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", | ||
331 | bi->nid, bi->start, bi->end, bj->start, bj->end, | ||
332 | start, end); | ||
333 | bi->start = start; | ||
334 | bi->end = end; | ||
335 | numa_remove_memblk_from(j--, mi); | ||
336 | } | ||
337 | } | ||
338 | |||
339 | /* clear unused ones */ | ||
340 | for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { | ||
341 | mi->blk[i].start = mi->blk[i].end = 0; | ||
342 | mi->blk[i].nid = NUMA_NO_NODE; | ||
343 | } | ||
344 | |||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * Set nodes, which have memory in @mi, in *@nodemask. | ||
350 | */ | ||
351 | static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, | ||
352 | const struct numa_meminfo *mi) | ||
353 | { | ||
354 | int i; | ||
355 | |||
356 | for (i = 0; i < ARRAY_SIZE(mi->blk); i++) | ||
357 | if (mi->blk[i].start != mi->blk[i].end && | ||
358 | mi->blk[i].nid != NUMA_NO_NODE) | ||
359 | node_set(mi->blk[i].nid, *nodemask); | ||
360 | } | ||
361 | |||
362 | /** | ||
363 | * numa_reset_distance - Reset NUMA distance table | ||
364 | * | ||
365 | * The current table is freed. The next numa_set_distance() call will | ||
366 | * create a new one. | ||
367 | */ | ||
368 | void __init numa_reset_distance(void) | ||
369 | { | ||
370 | size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); | ||
371 | |||
372 | /* numa_distance could be 1LU marking allocation failure, test cnt */ | ||
373 | if (numa_distance_cnt) | ||
374 | memblock_x86_free_range(__pa(numa_distance), | ||
375 | __pa(numa_distance) + size); | ||
376 | numa_distance_cnt = 0; | ||
377 | numa_distance = NULL; /* enable table creation */ | ||
378 | } | ||
379 | |||
380 | static int __init numa_alloc_distance(void) | ||
381 | { | ||
382 | nodemask_t nodes_parsed; | ||
383 | size_t size; | ||
384 | int i, j, cnt = 0; | ||
385 | u64 phys; | ||
386 | |||
387 | /* size the new table and allocate it */ | ||
388 | nodes_parsed = numa_nodes_parsed; | ||
389 | numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); | ||
390 | |||
391 | for_each_node_mask(i, nodes_parsed) | ||
392 | cnt = i; | ||
393 | cnt++; | ||
394 | size = cnt * cnt * sizeof(numa_distance[0]); | ||
395 | |||
396 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), | ||
397 | size, PAGE_SIZE); | ||
398 | if (phys == MEMBLOCK_ERROR) { | ||
399 | pr_warning("NUMA: Warning: can't allocate distance table!\n"); | ||
400 | /* don't retry until explicitly reset */ | ||
401 | numa_distance = (void *)1LU; | ||
402 | return -ENOMEM; | ||
403 | } | ||
404 | memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); | ||
405 | |||
406 | numa_distance = __va(phys); | ||
407 | numa_distance_cnt = cnt; | ||
408 | |||
409 | /* fill with the default distances */ | ||
410 | for (i = 0; i < cnt; i++) | ||
411 | for (j = 0; j < cnt; j++) | ||
412 | numa_distance[i * cnt + j] = i == j ? | ||
413 | LOCAL_DISTANCE : REMOTE_DISTANCE; | ||
414 | printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); | ||
415 | |||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | /** | ||
420 | * numa_set_distance - Set NUMA distance from one NUMA to another | ||
421 | * @from: the 'from' node to set distance | ||
422 | * @to: the 'to' node to set distance | ||
423 | * @distance: NUMA distance | ||
424 | * | ||
425 | * Set the distance from node @from to @to to @distance. If distance table | ||
426 | * doesn't exist, one which is large enough to accommodate all the currently | ||
427 | * known nodes will be created. | ||
428 | * | ||
429 | * If such table cannot be allocated, a warning is printed and further | ||
430 | * calls are ignored until the distance table is reset with | ||
431 | * numa_reset_distance(). | ||
432 | * | ||
433 | * If @from or @to is higher than the highest known node at the time of | ||
434 | * table creation or @distance doesn't make sense, the call is ignored. | ||
435 | * This is to allow simplification of specific NUMA config implementations. | ||
436 | */ | ||
437 | void __init numa_set_distance(int from, int to, int distance) | ||
438 | { | ||
439 | if (!numa_distance && numa_alloc_distance() < 0) | ||
440 | return; | ||
441 | |||
442 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) { | ||
443 | printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", | ||
444 | from, to, distance); | ||
445 | return; | ||
446 | } | ||
447 | |||
448 | if ((u8)distance != distance || | ||
449 | (from == to && distance != LOCAL_DISTANCE)) { | ||
450 | pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", | ||
451 | from, to, distance); | ||
452 | return; | ||
453 | } | ||
454 | |||
455 | numa_distance[from * numa_distance_cnt + to] = distance; | ||
456 | } | ||
457 | |||
458 | int __node_distance(int from, int to) | ||
459 | { | ||
460 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) | ||
461 | return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; | ||
462 | return numa_distance[from * numa_distance_cnt + to]; | ||
463 | } | ||
464 | EXPORT_SYMBOL(__node_distance); | ||
465 | |||
466 | /* | ||
467 | * Sanity check to catch more bad NUMA configurations (they are amazingly | ||
468 | * common). Make sure the nodes cover all memory. | ||
469 | */ | ||
470 | static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | ||
471 | { | ||
472 | u64 numaram, e820ram; | ||
473 | int i; | ||
474 | |||
475 | numaram = 0; | ||
476 | for (i = 0; i < mi->nr_blks; i++) { | ||
477 | u64 s = mi->blk[i].start >> PAGE_SHIFT; | ||
478 | u64 e = mi->blk[i].end >> PAGE_SHIFT; | ||
479 | numaram += e - s; | ||
480 | numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); | ||
481 | if ((s64)numaram < 0) | ||
482 | numaram = 0; | ||
483 | } | ||
484 | |||
485 | e820ram = max_pfn - (memblock_x86_hole_size(0, | ||
486 | PFN_PHYS(max_pfn)) >> PAGE_SHIFT); | ||
487 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | ||
488 | if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { | ||
489 | printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", | ||
490 | (numaram << PAGE_SHIFT) >> 20, | ||
491 | (e820ram << PAGE_SHIFT) >> 20); | ||
492 | return false; | ||
493 | } | ||
494 | return true; | ||
495 | } | ||
496 | |||
497 | static int __init numa_register_memblks(struct numa_meminfo *mi) | ||
498 | { | ||
499 | int i, nid; | ||
500 | |||
501 | /* Account for nodes with cpus and no memory */ | ||
502 | node_possible_map = numa_nodes_parsed; | ||
503 | numa_nodemask_from_meminfo(&node_possible_map, mi); | ||
504 | if (WARN_ON(nodes_empty(node_possible_map))) | ||
505 | return -EINVAL; | ||
506 | |||
507 | for (i = 0; i < mi->nr_blks; i++) | ||
508 | memblock_x86_register_active_regions(mi->blk[i].nid, | ||
509 | mi->blk[i].start >> PAGE_SHIFT, | ||
510 | mi->blk[i].end >> PAGE_SHIFT); | ||
511 | |||
512 | /* for out of order entries */ | ||
513 | sort_node_map(); | ||
514 | if (!numa_meminfo_cover_memory(mi)) | ||
515 | return -EINVAL; | ||
516 | |||
517 | /* Finally register nodes. */ | ||
518 | for_each_node_mask(nid, node_possible_map) { | ||
519 | u64 start = PFN_PHYS(max_pfn); | ||
520 | u64 end = 0; | ||
521 | |||
522 | for (i = 0; i < mi->nr_blks; i++) { | ||
523 | if (nid != mi->blk[i].nid) | ||
524 | continue; | ||
525 | start = min(mi->blk[i].start, start); | ||
526 | end = max(mi->blk[i].end, end); | ||
527 | } | ||
528 | |||
529 | if (start < end) | ||
530 | setup_node_data(nid, start, end); | ||
531 | } | ||
532 | |||
533 | return 0; | ||
534 | } | ||
535 | |||
98 | /* | 536 | /* |
99 | * There are unfortunately some poorly designed mainboards around that | 537 | * There are unfortunately some poorly designed mainboards around that |
100 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | 538 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node |
@@ -102,7 +540,7 @@ void __init setup_node_to_cpumask_map(void) | |||
102 | * as the number of CPUs is not known yet. We round robin the existing | 540 | * as the number of CPUs is not known yet. We round robin the existing |
103 | * nodes. | 541 | * nodes. |
104 | */ | 542 | */ |
105 | void __init numa_init_array(void) | 543 | static void __init numa_init_array(void) |
106 | { | 544 | { |
107 | int rr, i; | 545 | int rr, i; |
108 | 546 | ||
@@ -117,6 +555,95 @@ void __init numa_init_array(void) | |||
117 | } | 555 | } |
118 | } | 556 | } |
119 | 557 | ||
558 | static int __init numa_init(int (*init_func)(void)) | ||
559 | { | ||
560 | int i; | ||
561 | int ret; | ||
562 | |||
563 | for (i = 0; i < MAX_LOCAL_APIC; i++) | ||
564 | set_apicid_to_node(i, NUMA_NO_NODE); | ||
565 | |||
566 | nodes_clear(numa_nodes_parsed); | ||
567 | nodes_clear(node_possible_map); | ||
568 | nodes_clear(node_online_map); | ||
569 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | ||
570 | remove_all_active_ranges(); | ||
571 | numa_reset_distance(); | ||
572 | |||
573 | ret = init_func(); | ||
574 | if (ret < 0) | ||
575 | return ret; | ||
576 | ret = numa_cleanup_meminfo(&numa_meminfo); | ||
577 | if (ret < 0) | ||
578 | return ret; | ||
579 | |||
580 | numa_emulation(&numa_meminfo, numa_distance_cnt); | ||
581 | |||
582 | ret = numa_register_memblks(&numa_meminfo); | ||
583 | if (ret < 0) | ||
584 | return ret; | ||
585 | |||
586 | for (i = 0; i < nr_cpu_ids; i++) { | ||
587 | int nid = early_cpu_to_node(i); | ||
588 | |||
589 | if (nid == NUMA_NO_NODE) | ||
590 | continue; | ||
591 | if (!node_online(nid)) | ||
592 | numa_clear_node(i); | ||
593 | } | ||
594 | numa_init_array(); | ||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /** | ||
599 | * dummy_numa_init - Fallback dummy NUMA init | ||
600 | * | ||
601 | * Used if there's no underlying NUMA architecture, NUMA initialization | ||
602 | * fails, or NUMA is disabled on the command line. | ||
603 | * | ||
604 | * Must online at least one node and add memory blocks that cover all | ||
605 | * allowed memory. This function must not fail. | ||
606 | */ | ||
607 | static int __init dummy_numa_init(void) | ||
608 | { | ||
609 | printk(KERN_INFO "%s\n", | ||
610 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | ||
611 | printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", | ||
612 | 0LLU, PFN_PHYS(max_pfn)); | ||
613 | |||
614 | node_set(0, numa_nodes_parsed); | ||
615 | numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); | ||
616 | |||
617 | return 0; | ||
618 | } | ||
619 | |||
620 | /** | ||
621 | * x86_numa_init - Initialize NUMA | ||
622 | * | ||
623 | * Try each configured NUMA initialization method until one succeeds. The | ||
624 | * last fallback is dummy single node config encomapssing whole memory and | ||
625 | * never fails. | ||
626 | */ | ||
627 | void __init x86_numa_init(void) | ||
628 | { | ||
629 | if (!numa_off) { | ||
630 | #ifdef CONFIG_X86_NUMAQ | ||
631 | if (!numa_init(numaq_numa_init)) | ||
632 | return; | ||
633 | #endif | ||
634 | #ifdef CONFIG_ACPI_NUMA | ||
635 | if (!numa_init(x86_acpi_numa_init)) | ||
636 | return; | ||
637 | #endif | ||
638 | #ifdef CONFIG_AMD_NUMA | ||
639 | if (!numa_init(amd_numa_init)) | ||
640 | return; | ||
641 | #endif | ||
642 | } | ||
643 | |||
644 | numa_init(dummy_numa_init); | ||
645 | } | ||
646 | |||
120 | static __init int find_near_online_node(int node) | 647 | static __init int find_near_online_node(int node) |
121 | { | 648 | { |
122 | int n, val; | 649 | int n, val; |
@@ -282,3 +809,18 @@ const struct cpumask *cpumask_of_node(int node) | |||
282 | EXPORT_SYMBOL(cpumask_of_node); | 809 | EXPORT_SYMBOL(cpumask_of_node); |
283 | 810 | ||
284 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 811 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
812 | |||
813 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
814 | int memory_add_physaddr_to_nid(u64 start) | ||
815 | { | ||
816 | struct numa_meminfo *mi = &numa_meminfo; | ||
817 | int nid = mi->blk[0].nid; | ||
818 | int i; | ||
819 | |||
820 | for (i = 0; i < mi->nr_blks; i++) | ||
821 | if (mi->blk[i].start <= start && mi->blk[i].end > start) | ||
822 | nid = mi->blk[i].nid; | ||
823 | return nid; | ||
824 | } | ||
825 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
826 | #endif | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index bde3906420df..849a975d3fa0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -22,39 +22,11 @@ | |||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/bootmem.h> | 25 | #include <linux/bootmem.h> |
27 | #include <linux/memblock.h> | 26 | #include <linux/memblock.h> |
28 | #include <linux/mmzone.h> | ||
29 | #include <linux/highmem.h> | ||
30 | #include <linux/initrd.h> | ||
31 | #include <linux/nodemask.h> | ||
32 | #include <linux/module.h> | 27 | #include <linux/module.h> |
33 | #include <linux/kexec.h> | ||
34 | #include <linux/pfn.h> | ||
35 | #include <linux/swap.h> | ||
36 | #include <linux/acpi.h> | ||
37 | |||
38 | #include <asm/e820.h> | ||
39 | #include <asm/setup.h> | ||
40 | #include <asm/mmzone.h> | ||
41 | #include <asm/bios_ebda.h> | ||
42 | #include <asm/proto.h> | ||
43 | |||
44 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | ||
45 | EXPORT_SYMBOL(node_data); | ||
46 | |||
47 | /* | ||
48 | * numa interface - we expect the numa architecture specific code to have | ||
49 | * populated the following initialisation. | ||
50 | * | ||
51 | * 1) node_online_map - the map of all nodes configured (online) in the system | ||
52 | * 2) node_start_pfn - the starting page frame number for a node | ||
53 | * 3) node_end_pfn - the ending page fram number for a node | ||
54 | */ | ||
55 | unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; | ||
56 | unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; | ||
57 | 28 | ||
29 | #include "numa_internal.h" | ||
58 | 30 | ||
59 | #ifdef CONFIG_DISCONTIGMEM | 31 | #ifdef CONFIG_DISCONTIGMEM |
60 | /* | 32 | /* |
@@ -99,108 +71,46 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | |||
99 | } | 71 | } |
100 | #endif | 72 | #endif |
101 | 73 | ||
102 | extern unsigned long find_max_low_pfn(void); | ||
103 | extern unsigned long highend_pfn, highstart_pfn; | 74 | extern unsigned long highend_pfn, highstart_pfn; |
104 | 75 | ||
105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | 76 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) |
106 | 77 | ||
107 | unsigned long node_remap_size[MAX_NUMNODES]; | ||
108 | static void *node_remap_start_vaddr[MAX_NUMNODES]; | 78 | static void *node_remap_start_vaddr[MAX_NUMNODES]; |
109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 79 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
110 | 80 | ||
111 | static unsigned long kva_start_pfn; | ||
112 | static unsigned long kva_pages; | ||
113 | |||
114 | int __cpuinit numa_cpu_node(int cpu) | ||
115 | { | ||
116 | return apic->x86_32_numa_cpu_node(cpu); | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * FLAT - support for basic PC memory model with discontig enabled, essentially | ||
121 | * a single node with all available processors in it with a flat | ||
122 | * memory map. | ||
123 | */ | ||
124 | int __init get_memcfg_numa_flat(void) | ||
125 | { | ||
126 | printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); | ||
127 | |||
128 | node_start_pfn[0] = 0; | ||
129 | node_end_pfn[0] = max_pfn; | ||
130 | memblock_x86_register_active_regions(0, 0, max_pfn); | ||
131 | memory_present(0, 0, max_pfn); | ||
132 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); | ||
133 | |||
134 | /* Indicate there is one node available. */ | ||
135 | nodes_clear(node_online_map); | ||
136 | node_set_online(0); | ||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Find the highest page frame number we have available for the node | ||
142 | */ | ||
143 | static void __init propagate_e820_map_node(int nid) | ||
144 | { | ||
145 | if (node_end_pfn[nid] > max_pfn) | ||
146 | node_end_pfn[nid] = max_pfn; | ||
147 | /* | ||
148 | * if a user has given mem=XXXX, then we need to make sure | ||
149 | * that the node _starts_ before that, too, not just ends | ||
150 | */ | ||
151 | if (node_start_pfn[nid] > max_pfn) | ||
152 | node_start_pfn[nid] = max_pfn; | ||
153 | BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem | ||
158 | * method. For node zero take this from the bottom of memory, for | ||
159 | * subsequent nodes place them at node_remap_start_vaddr which contains | ||
160 | * node local data in physically node local memory. See setup_memory() | ||
161 | * for details. | ||
162 | */ | ||
163 | static void __init allocate_pgdat(int nid) | ||
164 | { | ||
165 | char buf[16]; | ||
166 | |||
167 | if (node_has_online_mem(nid) && node_remap_start_vaddr[nid]) | ||
168 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; | ||
169 | else { | ||
170 | unsigned long pgdat_phys; | ||
171 | pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, | ||
172 | max_pfn_mapped<<PAGE_SHIFT, | ||
173 | sizeof(pg_data_t), | ||
174 | PAGE_SIZE); | ||
175 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); | ||
176 | memset(buf, 0, sizeof(buf)); | ||
177 | sprintf(buf, "NODE_DATA %d", nid); | ||
178 | memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); | ||
179 | } | ||
180 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", | ||
181 | nid, (unsigned long)NODE_DATA(nid)); | ||
182 | } | ||
183 | |||
184 | /* | 81 | /* |
185 | * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel | 82 | * Remap memory allocator |
186 | * virtual address space (KVA) is reserved and portions of nodes are mapped | ||
187 | * using it. This is to allow node-local memory to be allocated for | ||
188 | * structures that would normally require ZONE_NORMAL. The memory is | ||
189 | * allocated with alloc_remap() and callers should be prepared to allocate | ||
190 | * from the bootmem allocator instead. | ||
191 | */ | 83 | */ |
192 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | 84 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; |
193 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | 85 | static void *node_remap_end_vaddr[MAX_NUMNODES]; |
194 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; | 86 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; |
195 | static unsigned long node_remap_offset[MAX_NUMNODES]; | ||
196 | 87 | ||
88 | /** | ||
89 | * alloc_remap - Allocate remapped memory | ||
90 | * @nid: NUMA node to allocate memory from | ||
91 | * @size: The size of allocation | ||
92 | * | ||
93 | * Allocate @size bytes from the remap area of NUMA node @nid. The | ||
94 | * size of the remap area is predetermined by init_alloc_remap() and | ||
95 | * only the callers considered there should call this function. For | ||
96 | * more info, please read the comment on top of init_alloc_remap(). | ||
97 | * | ||
98 | * The caller must be ready to handle allocation failure from this | ||
99 | * function and fall back to regular memory allocator in such cases. | ||
100 | * | ||
101 | * CONTEXT: | ||
102 | * Single CPU early boot context. | ||
103 | * | ||
104 | * RETURNS: | ||
105 | * Pointer to the allocated memory on success, %NULL on failure. | ||
106 | */ | ||
197 | void *alloc_remap(int nid, unsigned long size) | 107 | void *alloc_remap(int nid, unsigned long size) |
198 | { | 108 | { |
199 | void *allocation = node_remap_alloc_vaddr[nid]; | 109 | void *allocation = node_remap_alloc_vaddr[nid]; |
200 | 110 | ||
201 | size = ALIGN(size, L1_CACHE_BYTES); | 111 | size = ALIGN(size, L1_CACHE_BYTES); |
202 | 112 | ||
203 | if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) | 113 | if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) |
204 | return NULL; | 114 | return NULL; |
205 | 115 | ||
206 | node_remap_alloc_vaddr[nid] += size; | 116 | node_remap_alloc_vaddr[nid] += size; |
@@ -209,26 +119,6 @@ void *alloc_remap(int nid, unsigned long size) | |||
209 | return allocation; | 119 | return allocation; |
210 | } | 120 | } |
211 | 121 | ||
212 | static void __init remap_numa_kva(void) | ||
213 | { | ||
214 | void *vaddr; | ||
215 | unsigned long pfn; | ||
216 | int node; | ||
217 | |||
218 | for_each_online_node(node) { | ||
219 | printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); | ||
220 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { | ||
221 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); | ||
222 | printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", | ||
223 | (unsigned long)vaddr, | ||
224 | node_remap_start_pfn[node] + pfn); | ||
225 | set_pmd_pfn((ulong) vaddr, | ||
226 | node_remap_start_pfn[node] + pfn, | ||
227 | PAGE_KERNEL_LARGE); | ||
228 | } | ||
229 | } | ||
230 | } | ||
231 | |||
232 | #ifdef CONFIG_HIBERNATION | 122 | #ifdef CONFIG_HIBERNATION |
233 | /** | 123 | /** |
234 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created | 124 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created |
@@ -240,15 +130,16 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
240 | int node; | 130 | int node; |
241 | 131 | ||
242 | for_each_online_node(node) { | 132 | for_each_online_node(node) { |
243 | unsigned long start_va, start_pfn, size, pfn; | 133 | unsigned long start_va, start_pfn, nr_pages, pfn; |
244 | 134 | ||
245 | start_va = (unsigned long)node_remap_start_vaddr[node]; | 135 | start_va = (unsigned long)node_remap_start_vaddr[node]; |
246 | start_pfn = node_remap_start_pfn[node]; | 136 | start_pfn = node_remap_start_pfn[node]; |
247 | size = node_remap_size[node]; | 137 | nr_pages = (node_remap_end_vaddr[node] - |
138 | node_remap_start_vaddr[node]) >> PAGE_SHIFT; | ||
248 | 139 | ||
249 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); | 140 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); |
250 | 141 | ||
251 | for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { | 142 | for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { |
252 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); | 143 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); |
253 | pgd_t *pgd = pgd_base + pgd_index(vaddr); | 144 | pgd_t *pgd = pgd_base + pgd_index(vaddr); |
254 | pud_t *pud = pud_offset(pgd, vaddr); | 145 | pud_t *pud = pud_offset(pgd, vaddr); |
@@ -264,132 +155,89 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
264 | } | 155 | } |
265 | #endif | 156 | #endif |
266 | 157 | ||
267 | static __init unsigned long calculate_numa_remap_pages(void) | 158 | /** |
159 | * init_alloc_remap - Initialize remap allocator for a NUMA node | ||
160 | * @nid: NUMA node to initizlie remap allocator for | ||
161 | * | ||
162 | * NUMA nodes may end up without any lowmem. As allocating pgdat and | ||
163 | * memmap on a different node with lowmem is inefficient, a special | ||
164 | * remap allocator is implemented which can be used by alloc_remap(). | ||
165 | * | ||
166 | * For each node, the amount of memory which will be necessary for | ||
167 | * pgdat and memmap is calculated and two memory areas of the size are | ||
168 | * allocated - one in the node and the other in lowmem; then, the area | ||
169 | * in the node is remapped to the lowmem area. | ||
170 | * | ||
171 | * As pgdat and memmap must be allocated in lowmem anyway, this | ||
172 | * doesn't waste lowmem address space; however, the actual lowmem | ||
173 | * which gets remapped over is wasted. The amount shouldn't be | ||
174 | * problematic on machines this feature will be used. | ||
175 | * | ||
176 | * Initialization failure isn't fatal. alloc_remap() is used | ||
177 | * opportunistically and the callers will fall back to other memory | ||
178 | * allocation mechanisms on failure. | ||
179 | */ | ||
180 | void __init init_alloc_remap(int nid, u64 start, u64 end) | ||
268 | { | 181 | { |
269 | int nid; | 182 | unsigned long start_pfn = start >> PAGE_SHIFT; |
270 | unsigned long size, reserve_pages = 0; | 183 | unsigned long end_pfn = end >> PAGE_SHIFT; |
271 | 184 | unsigned long size, pfn; | |
272 | for_each_online_node(nid) { | 185 | u64 node_pa, remap_pa; |
273 | u64 node_kva_target; | 186 | void *remap_va; |
274 | u64 node_kva_final; | ||
275 | |||
276 | /* | ||
277 | * The acpi/srat node info can show hot-add memroy zones | ||
278 | * where memory could be added but not currently present. | ||
279 | */ | ||
280 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", | ||
281 | nid, node_start_pfn[nid], node_end_pfn[nid]); | ||
282 | if (node_start_pfn[nid] > max_pfn) | ||
283 | continue; | ||
284 | if (!node_end_pfn[nid]) | ||
285 | continue; | ||
286 | if (node_end_pfn[nid] > max_pfn) | ||
287 | node_end_pfn[nid] = max_pfn; | ||
288 | |||
289 | /* ensure the remap includes space for the pgdat. */ | ||
290 | size = node_remap_size[nid] + sizeof(pg_data_t); | ||
291 | |||
292 | /* convert size to large (pmd size) pages, rounding up */ | ||
293 | size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; | ||
294 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | ||
295 | size = size * PTRS_PER_PTE; | ||
296 | |||
297 | node_kva_target = round_down(node_end_pfn[nid] - size, | ||
298 | PTRS_PER_PTE); | ||
299 | node_kva_target <<= PAGE_SHIFT; | ||
300 | do { | ||
301 | node_kva_final = memblock_find_in_range(node_kva_target, | ||
302 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, | ||
303 | ((u64)size)<<PAGE_SHIFT, | ||
304 | LARGE_PAGE_BYTES); | ||
305 | node_kva_target -= LARGE_PAGE_BYTES; | ||
306 | } while (node_kva_final == MEMBLOCK_ERROR && | ||
307 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); | ||
308 | |||
309 | if (node_kva_final == MEMBLOCK_ERROR) | ||
310 | panic("Can not get kva ram\n"); | ||
311 | |||
312 | node_remap_size[nid] = size; | ||
313 | node_remap_offset[nid] = reserve_pages; | ||
314 | reserve_pages += size; | ||
315 | printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" | ||
316 | " node %d at %llx\n", | ||
317 | size, nid, node_kva_final>>PAGE_SHIFT); | ||
318 | |||
319 | /* | ||
320 | * prevent kva address below max_low_pfn want it on system | ||
321 | * with less memory later. | ||
322 | * layout will be: KVA address , KVA RAM | ||
323 | * | ||
324 | * we are supposed to only record the one less then max_low_pfn | ||
325 | * but we could have some hole in high memory, and it will only | ||
326 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide | ||
327 | * to use it as free. | ||
328 | * So memblock_x86_reserve_range here, hope we don't run out of that array | ||
329 | */ | ||
330 | memblock_x86_reserve_range(node_kva_final, | ||
331 | node_kva_final+(((u64)size)<<PAGE_SHIFT), | ||
332 | "KVA RAM"); | ||
333 | |||
334 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; | ||
335 | } | ||
336 | printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", | ||
337 | reserve_pages); | ||
338 | return reserve_pages; | ||
339 | } | ||
340 | 187 | ||
341 | static void init_remap_allocator(int nid) | 188 | /* |
342 | { | 189 | * The acpi/srat node info can show hot-add memroy zones where |
343 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | 190 | * memory could be added but not currently present. |
344 | kva_start_pfn + node_remap_offset[nid]); | 191 | */ |
345 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | 192 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", |
346 | (node_remap_size[nid] * PAGE_SIZE); | 193 | nid, start_pfn, end_pfn); |
347 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | 194 | |
348 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | 195 | /* calculate the necessary space aligned to large page size */ |
349 | 196 | size = node_memmap_size_bytes(nid, start_pfn, end_pfn); | |
350 | printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, | 197 | size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); |
351 | (ulong) node_remap_start_vaddr[nid], | 198 | size = ALIGN(size, LARGE_PAGE_BYTES); |
352 | (ulong) node_remap_end_vaddr[nid]); | 199 | |
200 | /* allocate node memory and the lowmem remap area */ | ||
201 | node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); | ||
202 | if (node_pa == MEMBLOCK_ERROR) { | ||
203 | pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", | ||
204 | size, nid); | ||
205 | return; | ||
206 | } | ||
207 | memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); | ||
208 | |||
209 | remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, | ||
210 | max_low_pfn << PAGE_SHIFT, | ||
211 | size, LARGE_PAGE_BYTES); | ||
212 | if (remap_pa == MEMBLOCK_ERROR) { | ||
213 | pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", | ||
214 | size, nid); | ||
215 | memblock_x86_free_range(node_pa, node_pa + size); | ||
216 | return; | ||
217 | } | ||
218 | memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); | ||
219 | remap_va = phys_to_virt(remap_pa); | ||
220 | |||
221 | /* perform actual remap */ | ||
222 | for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) | ||
223 | set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), | ||
224 | (node_pa >> PAGE_SHIFT) + pfn, | ||
225 | PAGE_KERNEL_LARGE); | ||
226 | |||
227 | /* initialize remap allocator parameters */ | ||
228 | node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; | ||
229 | node_remap_start_vaddr[nid] = remap_va; | ||
230 | node_remap_end_vaddr[nid] = remap_va + size; | ||
231 | node_remap_alloc_vaddr[nid] = remap_va; | ||
232 | |||
233 | printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", | ||
234 | nid, node_pa, node_pa + size, remap_va, remap_va + size); | ||
353 | } | 235 | } |
354 | 236 | ||
355 | void __init initmem_init(void) | 237 | void __init initmem_init(void) |
356 | { | 238 | { |
357 | int nid; | 239 | x86_numa_init(); |
358 | long kva_target_pfn; | ||
359 | |||
360 | /* | ||
361 | * When mapping a NUMA machine we allocate the node_mem_map arrays | ||
362 | * from node local memory. They are then mapped directly into KVA | ||
363 | * between zone normal and vmalloc space. Calculate the size of | ||
364 | * this space and use it to adjust the boundary between ZONE_NORMAL | ||
365 | * and ZONE_HIGHMEM. | ||
366 | */ | ||
367 | |||
368 | get_memcfg_numa(); | ||
369 | numa_init_array(); | ||
370 | |||
371 | kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); | ||
372 | 240 | ||
373 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); | ||
374 | do { | ||
375 | kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT, | ||
376 | max_low_pfn<<PAGE_SHIFT, | ||
377 | kva_pages<<PAGE_SHIFT, | ||
378 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; | ||
379 | kva_target_pfn -= PTRS_PER_PTE; | ||
380 | } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn); | ||
381 | |||
382 | if (kva_start_pfn == MEMBLOCK_ERROR) | ||
383 | panic("Can not get kva space\n"); | ||
384 | |||
385 | printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", | ||
386 | kva_start_pfn, max_low_pfn); | ||
387 | printk(KERN_INFO "max_pfn = %lx\n", max_pfn); | ||
388 | |||
389 | /* avoid clash with initrd */ | ||
390 | memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT, | ||
391 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, | ||
392 | "KVA PG"); | ||
393 | #ifdef CONFIG_HIGHMEM | 241 | #ifdef CONFIG_HIGHMEM |
394 | highstart_pfn = highend_pfn = max_pfn; | 242 | highstart_pfn = highend_pfn = max_pfn; |
395 | if (max_pfn > max_low_pfn) | 243 | if (max_pfn > max_low_pfn) |
@@ -409,51 +257,9 @@ void __init initmem_init(void) | |||
409 | 257 | ||
410 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", | 258 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", |
411 | (ulong) pfn_to_kaddr(max_low_pfn)); | 259 | (ulong) pfn_to_kaddr(max_low_pfn)); |
412 | for_each_online_node(nid) { | ||
413 | init_remap_allocator(nid); | ||
414 | |||
415 | allocate_pgdat(nid); | ||
416 | } | ||
417 | remap_numa_kva(); | ||
418 | 260 | ||
419 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", | 261 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", |
420 | (ulong) pfn_to_kaddr(highstart_pfn)); | 262 | (ulong) pfn_to_kaddr(highstart_pfn)); |
421 | for_each_online_node(nid) | ||
422 | propagate_e820_map_node(nid); | ||
423 | |||
424 | for_each_online_node(nid) { | ||
425 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | ||
426 | NODE_DATA(nid)->node_id = nid; | ||
427 | } | ||
428 | 263 | ||
429 | setup_bootmem_allocator(); | 264 | setup_bootmem_allocator(); |
430 | } | 265 | } |
431 | |||
432 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
433 | static int paddr_to_nid(u64 addr) | ||
434 | { | ||
435 | int nid; | ||
436 | unsigned long pfn = PFN_DOWN(addr); | ||
437 | |||
438 | for_each_node(nid) | ||
439 | if (node_start_pfn[nid] <= pfn && | ||
440 | pfn < node_end_pfn[nid]) | ||
441 | return nid; | ||
442 | |||
443 | return -1; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * This function is used to ask node id BEFORE memmap and mem_section's | ||
448 | * initialization (pfn_to_nid() can't be used yet). | ||
449 | * If _PXM is not defined on ACPI's DSDT, node id must be found by this. | ||
450 | */ | ||
451 | int memory_add_physaddr_to_nid(u64 addr) | ||
452 | { | ||
453 | int nid = paddr_to_nid(addr); | ||
454 | return (nid >= 0) ? nid : 0; | ||
455 | } | ||
456 | |||
457 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
458 | #endif | ||
459 | |||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 85b52fc03084..dd27f401f0a0 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -2,646 +2,13 @@ | |||
2 | * Generic VM initialization for x86-64 NUMA setups. | 2 | * Generic VM initialization for x86-64 NUMA setups. |
3 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 3 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. |
4 | */ | 4 | */ |
5 | #include <linux/kernel.h> | ||
6 | #include <linux/mm.h> | ||
7 | #include <linux/string.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/bootmem.h> | 5 | #include <linux/bootmem.h> |
10 | #include <linux/memblock.h> | ||
11 | #include <linux/mmzone.h> | ||
12 | #include <linux/ctype.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/nodemask.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/acpi.h> | ||
17 | |||
18 | #include <asm/e820.h> | ||
19 | #include <asm/proto.h> | ||
20 | #include <asm/dma.h> | ||
21 | #include <asm/acpi.h> | ||
22 | #include <asm/amd_nb.h> | ||
23 | 6 | ||
24 | #include "numa_internal.h" | 7 | #include "numa_internal.h" |
25 | 8 | ||
26 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | ||
27 | EXPORT_SYMBOL(node_data); | ||
28 | |||
29 | nodemask_t numa_nodes_parsed __initdata; | ||
30 | |||
31 | struct memnode memnode; | ||
32 | |||
33 | static unsigned long __initdata nodemap_addr; | ||
34 | static unsigned long __initdata nodemap_size; | ||
35 | |||
36 | static struct numa_meminfo numa_meminfo __initdata; | ||
37 | |||
38 | static int numa_distance_cnt; | ||
39 | static u8 *numa_distance; | ||
40 | |||
41 | /* | ||
42 | * Given a shift value, try to populate memnodemap[] | ||
43 | * Returns : | ||
44 | * 1 if OK | ||
45 | * 0 if memnodmap[] too small (of shift too small) | ||
46 | * -1 if node overlap or lost ram (shift too big) | ||
47 | */ | ||
48 | static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift) | ||
49 | { | ||
50 | unsigned long addr, end; | ||
51 | int i, res = -1; | ||
52 | |||
53 | memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); | ||
54 | for (i = 0; i < mi->nr_blks; i++) { | ||
55 | addr = mi->blk[i].start; | ||
56 | end = mi->blk[i].end; | ||
57 | if (addr >= end) | ||
58 | continue; | ||
59 | if ((end >> shift) >= memnodemapsize) | ||
60 | return 0; | ||
61 | do { | ||
62 | if (memnodemap[addr >> shift] != NUMA_NO_NODE) | ||
63 | return -1; | ||
64 | memnodemap[addr >> shift] = mi->blk[i].nid; | ||
65 | addr += (1UL << shift); | ||
66 | } while (addr < end); | ||
67 | res = 1; | ||
68 | } | ||
69 | return res; | ||
70 | } | ||
71 | |||
72 | static int __init allocate_cachealigned_memnodemap(void) | ||
73 | { | ||
74 | unsigned long addr; | ||
75 | |||
76 | memnodemap = memnode.embedded_map; | ||
77 | if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map)) | ||
78 | return 0; | ||
79 | |||
80 | addr = 0x8000; | ||
81 | nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); | ||
82 | nodemap_addr = memblock_find_in_range(addr, get_max_mapped(), | ||
83 | nodemap_size, L1_CACHE_BYTES); | ||
84 | if (nodemap_addr == MEMBLOCK_ERROR) { | ||
85 | printk(KERN_ERR | ||
86 | "NUMA: Unable to allocate Memory to Node hash map\n"); | ||
87 | nodemap_addr = nodemap_size = 0; | ||
88 | return -1; | ||
89 | } | ||
90 | memnodemap = phys_to_virt(nodemap_addr); | ||
91 | memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP"); | ||
92 | |||
93 | printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", | ||
94 | nodemap_addr, nodemap_addr + nodemap_size); | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * The LSB of all start and end addresses in the node map is the value of the | ||
100 | * maximum possible shift. | ||
101 | */ | ||
102 | static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi) | ||
103 | { | ||
104 | int i, nodes_used = 0; | ||
105 | unsigned long start, end; | ||
106 | unsigned long bitfield = 0, memtop = 0; | ||
107 | |||
108 | for (i = 0; i < mi->nr_blks; i++) { | ||
109 | start = mi->blk[i].start; | ||
110 | end = mi->blk[i].end; | ||
111 | if (start >= end) | ||
112 | continue; | ||
113 | bitfield |= start; | ||
114 | nodes_used++; | ||
115 | if (end > memtop) | ||
116 | memtop = end; | ||
117 | } | ||
118 | if (nodes_used <= 1) | ||
119 | i = 63; | ||
120 | else | ||
121 | i = find_first_bit(&bitfield, sizeof(unsigned long)*8); | ||
122 | memnodemapsize = (memtop >> i)+1; | ||
123 | return i; | ||
124 | } | ||
125 | |||
126 | static int __init compute_hash_shift(const struct numa_meminfo *mi) | ||
127 | { | ||
128 | int shift; | ||
129 | |||
130 | shift = extract_lsb_from_nodes(mi); | ||
131 | if (allocate_cachealigned_memnodemap()) | ||
132 | return -1; | ||
133 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | ||
134 | shift); | ||
135 | |||
136 | if (populate_memnodemap(mi, shift) != 1) { | ||
137 | printk(KERN_INFO "Your memory is not aligned you need to " | ||
138 | "rebuild your kernel with a bigger NODEMAPSIZE " | ||
139 | "shift=%d\n", shift); | ||
140 | return -1; | ||
141 | } | ||
142 | return shift; | ||
143 | } | ||
144 | |||
145 | int __meminit __early_pfn_to_nid(unsigned long pfn) | ||
146 | { | ||
147 | return phys_to_nid(pfn << PAGE_SHIFT); | ||
148 | } | ||
149 | |||
150 | static void * __init early_node_mem(int nodeid, unsigned long start, | ||
151 | unsigned long end, unsigned long size, | ||
152 | unsigned long align) | ||
153 | { | ||
154 | unsigned long mem; | ||
155 | |||
156 | /* | ||
157 | * put it on high as possible | ||
158 | * something will go with NODE_DATA | ||
159 | */ | ||
160 | if (start < (MAX_DMA_PFN<<PAGE_SHIFT)) | ||
161 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
162 | if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && | ||
163 | end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
164 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
165 | mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align); | ||
166 | if (mem != MEMBLOCK_ERROR) | ||
167 | return __va(mem); | ||
168 | |||
169 | /* extend the search scope */ | ||
170 | end = max_pfn_mapped << PAGE_SHIFT; | ||
171 | start = MAX_DMA_PFN << PAGE_SHIFT; | ||
172 | mem = memblock_find_in_range(start, end, size, align); | ||
173 | if (mem != MEMBLOCK_ERROR) | ||
174 | return __va(mem); | ||
175 | |||
176 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", | ||
177 | size, nodeid); | ||
178 | |||
179 | return NULL; | ||
180 | } | ||
181 | |||
182 | static int __init numa_add_memblk_to(int nid, u64 start, u64 end, | ||
183 | struct numa_meminfo *mi) | ||
184 | { | ||
185 | /* ignore zero length blks */ | ||
186 | if (start == end) | ||
187 | return 0; | ||
188 | |||
189 | /* whine about and ignore invalid blks */ | ||
190 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { | ||
191 | pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", | ||
192 | nid, start, end); | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | if (mi->nr_blks >= NR_NODE_MEMBLKS) { | ||
197 | pr_err("NUMA: too many memblk ranges\n"); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | mi->blk[mi->nr_blks].start = start; | ||
202 | mi->blk[mi->nr_blks].end = end; | ||
203 | mi->blk[mi->nr_blks].nid = nid; | ||
204 | mi->nr_blks++; | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo | ||
210 | * @idx: Index of memblk to remove | ||
211 | * @mi: numa_meminfo to remove memblk from | ||
212 | * | ||
213 | * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and | ||
214 | * decrementing @mi->nr_blks. | ||
215 | */ | ||
216 | void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) | ||
217 | { | ||
218 | mi->nr_blks--; | ||
219 | memmove(&mi->blk[idx], &mi->blk[idx + 1], | ||
220 | (mi->nr_blks - idx) * sizeof(mi->blk[0])); | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * numa_add_memblk - Add one numa_memblk to numa_meminfo | ||
225 | * @nid: NUMA node ID of the new memblk | ||
226 | * @start: Start address of the new memblk | ||
227 | * @end: End address of the new memblk | ||
228 | * | ||
229 | * Add a new memblk to the default numa_meminfo. | ||
230 | * | ||
231 | * RETURNS: | ||
232 | * 0 on success, -errno on failure. | ||
233 | */ | ||
234 | int __init numa_add_memblk(int nid, u64 start, u64 end) | ||
235 | { | ||
236 | return numa_add_memblk_to(nid, start, end, &numa_meminfo); | ||
237 | } | ||
238 | |||
239 | /* Initialize bootmem allocator for a node */ | ||
240 | void __init | ||
241 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | ||
242 | { | ||
243 | unsigned long start_pfn, last_pfn, nodedata_phys; | ||
244 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
245 | int nid; | ||
246 | |||
247 | if (!end) | ||
248 | return; | ||
249 | |||
250 | /* | ||
251 | * Don't confuse VM with a node that doesn't have the | ||
252 | * minimum amount of memory: | ||
253 | */ | ||
254 | if (end && (end - start) < NODE_MIN_SIZE) | ||
255 | return; | ||
256 | |||
257 | start = roundup(start, ZONE_ALIGN); | ||
258 | |||
259 | printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, | ||
260 | start, end); | ||
261 | |||
262 | start_pfn = start >> PAGE_SHIFT; | ||
263 | last_pfn = end >> PAGE_SHIFT; | ||
264 | |||
265 | node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, | ||
266 | SMP_CACHE_BYTES); | ||
267 | if (node_data[nodeid] == NULL) | ||
268 | return; | ||
269 | nodedata_phys = __pa(node_data[nodeid]); | ||
270 | memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); | ||
271 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, | ||
272 | nodedata_phys + pgdat_size - 1); | ||
273 | nid = phys_to_nid(nodedata_phys); | ||
274 | if (nid != nodeid) | ||
275 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
276 | |||
277 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | ||
278 | NODE_DATA(nodeid)->node_id = nodeid; | ||
279 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | ||
280 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | ||
281 | |||
282 | node_set_online(nodeid); | ||
283 | } | ||
284 | |||
285 | /** | ||
286 | * numa_cleanup_meminfo - Cleanup a numa_meminfo | ||
287 | * @mi: numa_meminfo to clean up | ||
288 | * | ||
289 | * Sanitize @mi by merging and removing unncessary memblks. Also check for | ||
290 | * conflicts and clear unused memblks. | ||
291 | * | ||
292 | * RETURNS: | ||
293 | * 0 on success, -errno on failure. | ||
294 | */ | ||
295 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi) | ||
296 | { | ||
297 | const u64 low = 0; | ||
298 | const u64 high = (u64)max_pfn << PAGE_SHIFT; | ||
299 | int i, j, k; | ||
300 | |||
301 | for (i = 0; i < mi->nr_blks; i++) { | ||
302 | struct numa_memblk *bi = &mi->blk[i]; | ||
303 | |||
304 | /* make sure all blocks are inside the limits */ | ||
305 | bi->start = max(bi->start, low); | ||
306 | bi->end = min(bi->end, high); | ||
307 | |||
308 | /* and there's no empty block */ | ||
309 | if (bi->start >= bi->end) { | ||
310 | numa_remove_memblk_from(i--, mi); | ||
311 | continue; | ||
312 | } | ||
313 | |||
314 | for (j = i + 1; j < mi->nr_blks; j++) { | ||
315 | struct numa_memblk *bj = &mi->blk[j]; | ||
316 | unsigned long start, end; | ||
317 | |||
318 | /* | ||
319 | * See whether there are overlapping blocks. Whine | ||
320 | * about but allow overlaps of the same nid. They | ||
321 | * will be merged below. | ||
322 | */ | ||
323 | if (bi->end > bj->start && bi->start < bj->end) { | ||
324 | if (bi->nid != bj->nid) { | ||
325 | pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", | ||
326 | bi->nid, bi->start, bi->end, | ||
327 | bj->nid, bj->start, bj->end); | ||
328 | return -EINVAL; | ||
329 | } | ||
330 | pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", | ||
331 | bi->nid, bi->start, bi->end, | ||
332 | bj->start, bj->end); | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Join together blocks on the same node, holes | ||
337 | * between which don't overlap with memory on other | ||
338 | * nodes. | ||
339 | */ | ||
340 | if (bi->nid != bj->nid) | ||
341 | continue; | ||
342 | start = max(min(bi->start, bj->start), low); | ||
343 | end = min(max(bi->end, bj->end), high); | ||
344 | for (k = 0; k < mi->nr_blks; k++) { | ||
345 | struct numa_memblk *bk = &mi->blk[k]; | ||
346 | |||
347 | if (bi->nid == bk->nid) | ||
348 | continue; | ||
349 | if (start < bk->end && end > bk->start) | ||
350 | break; | ||
351 | } | ||
352 | if (k < mi->nr_blks) | ||
353 | continue; | ||
354 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", | ||
355 | bi->nid, bi->start, bi->end, bj->start, bj->end, | ||
356 | start, end); | ||
357 | bi->start = start; | ||
358 | bi->end = end; | ||
359 | numa_remove_memblk_from(j--, mi); | ||
360 | } | ||
361 | } | ||
362 | |||
363 | for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { | ||
364 | mi->blk[i].start = mi->blk[i].end = 0; | ||
365 | mi->blk[i].nid = NUMA_NO_NODE; | ||
366 | } | ||
367 | |||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * Set nodes, which have memory in @mi, in *@nodemask. | ||
373 | */ | ||
374 | static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, | ||
375 | const struct numa_meminfo *mi) | ||
376 | { | ||
377 | int i; | ||
378 | |||
379 | for (i = 0; i < ARRAY_SIZE(mi->blk); i++) | ||
380 | if (mi->blk[i].start != mi->blk[i].end && | ||
381 | mi->blk[i].nid != NUMA_NO_NODE) | ||
382 | node_set(mi->blk[i].nid, *nodemask); | ||
383 | } | ||
384 | |||
385 | /** | ||
386 | * numa_reset_distance - Reset NUMA distance table | ||
387 | * | ||
388 | * The current table is freed. The next numa_set_distance() call will | ||
389 | * create a new one. | ||
390 | */ | ||
391 | void __init numa_reset_distance(void) | ||
392 | { | ||
393 | size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); | ||
394 | |||
395 | /* numa_distance could be 1LU marking allocation failure, test cnt */ | ||
396 | if (numa_distance_cnt) | ||
397 | memblock_x86_free_range(__pa(numa_distance), | ||
398 | __pa(numa_distance) + size); | ||
399 | numa_distance_cnt = 0; | ||
400 | numa_distance = NULL; /* enable table creation */ | ||
401 | } | ||
402 | |||
403 | static int __init numa_alloc_distance(void) | ||
404 | { | ||
405 | nodemask_t nodes_parsed; | ||
406 | size_t size; | ||
407 | int i, j, cnt = 0; | ||
408 | u64 phys; | ||
409 | |||
410 | /* size the new table and allocate it */ | ||
411 | nodes_parsed = numa_nodes_parsed; | ||
412 | numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); | ||
413 | |||
414 | for_each_node_mask(i, nodes_parsed) | ||
415 | cnt = i; | ||
416 | cnt++; | ||
417 | size = cnt * cnt * sizeof(numa_distance[0]); | ||
418 | |||
419 | phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT, | ||
420 | size, PAGE_SIZE); | ||
421 | if (phys == MEMBLOCK_ERROR) { | ||
422 | pr_warning("NUMA: Warning: can't allocate distance table!\n"); | ||
423 | /* don't retry until explicitly reset */ | ||
424 | numa_distance = (void *)1LU; | ||
425 | return -ENOMEM; | ||
426 | } | ||
427 | memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); | ||
428 | |||
429 | numa_distance = __va(phys); | ||
430 | numa_distance_cnt = cnt; | ||
431 | |||
432 | /* fill with the default distances */ | ||
433 | for (i = 0; i < cnt; i++) | ||
434 | for (j = 0; j < cnt; j++) | ||
435 | numa_distance[i * cnt + j] = i == j ? | ||
436 | LOCAL_DISTANCE : REMOTE_DISTANCE; | ||
437 | printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); | ||
438 | |||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | /** | ||
443 | * numa_set_distance - Set NUMA distance from one NUMA to another | ||
444 | * @from: the 'from' node to set distance | ||
445 | * @to: the 'to' node to set distance | ||
446 | * @distance: NUMA distance | ||
447 | * | ||
448 | * Set the distance from node @from to @to to @distance. If distance table | ||
449 | * doesn't exist, one which is large enough to accommodate all the currently | ||
450 | * known nodes will be created. | ||
451 | * | ||
452 | * If such table cannot be allocated, a warning is printed and further | ||
453 | * calls are ignored until the distance table is reset with | ||
454 | * numa_reset_distance(). | ||
455 | * | ||
456 | * If @from or @to is higher than the highest known node at the time of | ||
457 | * table creation or @distance doesn't make sense, the call is ignored. | ||
458 | * This is to allow simplification of specific NUMA config implementations. | ||
459 | */ | ||
460 | void __init numa_set_distance(int from, int to, int distance) | ||
461 | { | ||
462 | if (!numa_distance && numa_alloc_distance() < 0) | ||
463 | return; | ||
464 | |||
465 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) { | ||
466 | printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", | ||
467 | from, to, distance); | ||
468 | return; | ||
469 | } | ||
470 | |||
471 | if ((u8)distance != distance || | ||
472 | (from == to && distance != LOCAL_DISTANCE)) { | ||
473 | pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", | ||
474 | from, to, distance); | ||
475 | return; | ||
476 | } | ||
477 | |||
478 | numa_distance[from * numa_distance_cnt + to] = distance; | ||
479 | } | ||
480 | |||
481 | int __node_distance(int from, int to) | ||
482 | { | ||
483 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) | ||
484 | return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; | ||
485 | return numa_distance[from * numa_distance_cnt + to]; | ||
486 | } | ||
487 | EXPORT_SYMBOL(__node_distance); | ||
488 | |||
489 | /* | ||
490 | * Sanity check to catch more bad NUMA configurations (they are amazingly | ||
491 | * common). Make sure the nodes cover all memory. | ||
492 | */ | ||
493 | static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | ||
494 | { | ||
495 | unsigned long numaram, e820ram; | ||
496 | int i; | ||
497 | |||
498 | numaram = 0; | ||
499 | for (i = 0; i < mi->nr_blks; i++) { | ||
500 | unsigned long s = mi->blk[i].start >> PAGE_SHIFT; | ||
501 | unsigned long e = mi->blk[i].end >> PAGE_SHIFT; | ||
502 | numaram += e - s; | ||
503 | numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); | ||
504 | if ((long)numaram < 0) | ||
505 | numaram = 0; | ||
506 | } | ||
507 | |||
508 | e820ram = max_pfn - (memblock_x86_hole_size(0, | ||
509 | max_pfn << PAGE_SHIFT) >> PAGE_SHIFT); | ||
510 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | ||
511 | if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { | ||
512 | printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", | ||
513 | (numaram << PAGE_SHIFT) >> 20, | ||
514 | (e820ram << PAGE_SHIFT) >> 20); | ||
515 | return false; | ||
516 | } | ||
517 | return true; | ||
518 | } | ||
519 | |||
520 | static int __init numa_register_memblks(struct numa_meminfo *mi) | ||
521 | { | ||
522 | int i, nid; | ||
523 | |||
524 | /* Account for nodes with cpus and no memory */ | ||
525 | node_possible_map = numa_nodes_parsed; | ||
526 | numa_nodemask_from_meminfo(&node_possible_map, mi); | ||
527 | if (WARN_ON(nodes_empty(node_possible_map))) | ||
528 | return -EINVAL; | ||
529 | |||
530 | memnode_shift = compute_hash_shift(mi); | ||
531 | if (memnode_shift < 0) { | ||
532 | printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n"); | ||
533 | return -EINVAL; | ||
534 | } | ||
535 | |||
536 | for (i = 0; i < mi->nr_blks; i++) | ||
537 | memblock_x86_register_active_regions(mi->blk[i].nid, | ||
538 | mi->blk[i].start >> PAGE_SHIFT, | ||
539 | mi->blk[i].end >> PAGE_SHIFT); | ||
540 | |||
541 | /* for out of order entries */ | ||
542 | sort_node_map(); | ||
543 | if (!numa_meminfo_cover_memory(mi)) | ||
544 | return -EINVAL; | ||
545 | |||
546 | /* Finally register nodes. */ | ||
547 | for_each_node_mask(nid, node_possible_map) { | ||
548 | u64 start = (u64)max_pfn << PAGE_SHIFT; | ||
549 | u64 end = 0; | ||
550 | |||
551 | for (i = 0; i < mi->nr_blks; i++) { | ||
552 | if (nid != mi->blk[i].nid) | ||
553 | continue; | ||
554 | start = min(mi->blk[i].start, start); | ||
555 | end = max(mi->blk[i].end, end); | ||
556 | } | ||
557 | |||
558 | if (start < end) | ||
559 | setup_node_bootmem(nid, start, end); | ||
560 | } | ||
561 | |||
562 | return 0; | ||
563 | } | ||
564 | |||
565 | /** | ||
566 | * dummy_numma_init - Fallback dummy NUMA init | ||
567 | * | ||
568 | * Used if there's no underlying NUMA architecture, NUMA initialization | ||
569 | * fails, or NUMA is disabled on the command line. | ||
570 | * | ||
571 | * Must online at least one node and add memory blocks that cover all | ||
572 | * allowed memory. This function must not fail. | ||
573 | */ | ||
574 | static int __init dummy_numa_init(void) | ||
575 | { | ||
576 | printk(KERN_INFO "%s\n", | ||
577 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | ||
578 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", | ||
579 | 0LU, max_pfn << PAGE_SHIFT); | ||
580 | |||
581 | node_set(0, numa_nodes_parsed); | ||
582 | numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); | ||
583 | |||
584 | return 0; | ||
585 | } | ||
586 | |||
587 | static int __init numa_init(int (*init_func)(void)) | ||
588 | { | ||
589 | int i; | ||
590 | int ret; | ||
591 | |||
592 | for (i = 0; i < MAX_LOCAL_APIC; i++) | ||
593 | set_apicid_to_node(i, NUMA_NO_NODE); | ||
594 | |||
595 | nodes_clear(numa_nodes_parsed); | ||
596 | nodes_clear(node_possible_map); | ||
597 | nodes_clear(node_online_map); | ||
598 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | ||
599 | remove_all_active_ranges(); | ||
600 | numa_reset_distance(); | ||
601 | |||
602 | ret = init_func(); | ||
603 | if (ret < 0) | ||
604 | return ret; | ||
605 | ret = numa_cleanup_meminfo(&numa_meminfo); | ||
606 | if (ret < 0) | ||
607 | return ret; | ||
608 | |||
609 | numa_emulation(&numa_meminfo, numa_distance_cnt); | ||
610 | |||
611 | ret = numa_register_memblks(&numa_meminfo); | ||
612 | if (ret < 0) | ||
613 | return ret; | ||
614 | |||
615 | for (i = 0; i < nr_cpu_ids; i++) { | ||
616 | int nid = early_cpu_to_node(i); | ||
617 | |||
618 | if (nid == NUMA_NO_NODE) | ||
619 | continue; | ||
620 | if (!node_online(nid)) | ||
621 | numa_clear_node(i); | ||
622 | } | ||
623 | numa_init_array(); | ||
624 | return 0; | ||
625 | } | ||
626 | |||
627 | void __init initmem_init(void) | 9 | void __init initmem_init(void) |
628 | { | 10 | { |
629 | int ret; | 11 | x86_numa_init(); |
630 | |||
631 | if (!numa_off) { | ||
632 | #ifdef CONFIG_ACPI_NUMA | ||
633 | ret = numa_init(x86_acpi_numa_init); | ||
634 | if (!ret) | ||
635 | return; | ||
636 | #endif | ||
637 | #ifdef CONFIG_AMD_NUMA | ||
638 | ret = numa_init(amd_numa_init); | ||
639 | if (!ret) | ||
640 | return; | ||
641 | #endif | ||
642 | } | ||
643 | |||
644 | numa_init(dummy_numa_init); | ||
645 | } | 12 | } |
646 | 13 | ||
647 | unsigned long __init numa_free_all_bootmem(void) | 14 | unsigned long __init numa_free_all_bootmem(void) |
@@ -656,12 +23,3 @@ unsigned long __init numa_free_all_bootmem(void) | |||
656 | 23 | ||
657 | return pages; | 24 | return pages; |
658 | } | 25 | } |
659 | |||
660 | int __cpuinit numa_cpu_node(int cpu) | ||
661 | { | ||
662 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | ||
663 | |||
664 | if (apicid != BAD_APICID) | ||
665 | return __apicid_to_node[apicid]; | ||
666 | return NUMA_NO_NODE; | ||
667 | } | ||
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index de84cc140379..d0ed086b6247 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/errno.h> | 5 | #include <linux/errno.h> |
6 | #include <linux/topology.h> | 6 | #include <linux/topology.h> |
7 | #include <linux/memblock.h> | 7 | #include <linux/memblock.h> |
8 | #include <linux/bootmem.h> | ||
8 | #include <asm/dma.h> | 9 | #include <asm/dma.h> |
9 | 10 | ||
10 | #include "numa_internal.h" | 11 | #include "numa_internal.h" |
@@ -84,7 +85,13 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
84 | nr_nodes = MAX_NUMNODES; | 85 | nr_nodes = MAX_NUMNODES; |
85 | } | 86 | } |
86 | 87 | ||
87 | size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes; | 88 | /* |
89 | * Calculate target node size. x86_32 freaks on __udivdi3() so do | ||
90 | * the division in ulong number of pages and convert back. | ||
91 | */ | ||
92 | size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); | ||
93 | size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); | ||
94 | |||
88 | /* | 95 | /* |
89 | * Calculate the number of big nodes that can be allocated as a result | 96 | * Calculate the number of big nodes that can be allocated as a result |
90 | * of consolidating the remainder. | 97 | * of consolidating the remainder. |
@@ -226,7 +233,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | |||
226 | */ | 233 | */ |
227 | while (nodes_weight(physnode_mask)) { | 234 | while (nodes_weight(physnode_mask)) { |
228 | for_each_node_mask(i, physnode_mask) { | 235 | for_each_node_mask(i, physnode_mask) { |
229 | u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; | 236 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); |
230 | u64 start, limit, end; | 237 | u64 start, limit, end; |
231 | int phys_blk; | 238 | int phys_blk; |
232 | 239 | ||
@@ -298,7 +305,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) | |||
298 | { | 305 | { |
299 | static struct numa_meminfo ei __initdata; | 306 | static struct numa_meminfo ei __initdata; |
300 | static struct numa_meminfo pi __initdata; | 307 | static struct numa_meminfo pi __initdata; |
301 | const u64 max_addr = max_pfn << PAGE_SHIFT; | 308 | const u64 max_addr = PFN_PHYS(max_pfn); |
302 | u8 *phys_dist = NULL; | 309 | u8 *phys_dist = NULL; |
303 | size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); | 310 | size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); |
304 | int max_emu_nid, dfl_phys_nid; | 311 | int max_emu_nid, dfl_phys_nid; |
@@ -342,8 +349,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) | |||
342 | if (numa_dist_cnt) { | 349 | if (numa_dist_cnt) { |
343 | u64 phys; | 350 | u64 phys; |
344 | 351 | ||
345 | phys = memblock_find_in_range(0, | 352 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
346 | (u64)max_pfn_mapped << PAGE_SHIFT, | ||
347 | phys_size, PAGE_SIZE); | 353 | phys_size, PAGE_SIZE); |
348 | if (phys == MEMBLOCK_ERROR) { | 354 | if (phys == MEMBLOCK_ERROR) { |
349 | pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); | 355 | pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); |
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h index ef2d97377d7c..7178c3afe05e 100644 --- a/arch/x86/mm/numa_internal.h +++ b/arch/x86/mm/numa_internal.h | |||
@@ -19,6 +19,14 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); | |||
19 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi); | 19 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi); |
20 | void __init numa_reset_distance(void); | 20 | void __init numa_reset_distance(void); |
21 | 21 | ||
22 | void __init x86_numa_init(void); | ||
23 | |||
24 | #ifdef CONFIG_X86_64 | ||
25 | static inline void init_alloc_remap(int nid, u64 start, u64 end) { } | ||
26 | #else | ||
27 | void __init init_alloc_remap(int nid, u64 start, u64 end); | ||
28 | #endif | ||
29 | |||
22 | #ifdef CONFIG_NUMA_EMU | 30 | #ifdef CONFIG_NUMA_EMU |
23 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, | 31 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, |
24 | int numa_dist_cnt); | 32 | int numa_dist_cnt); |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat.c index 8e9d3394f6d4..81dbfdeb080d 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat.c | |||
@@ -26,8 +26,6 @@ | |||
26 | 26 | ||
27 | int acpi_numa __initdata; | 27 | int acpi_numa __initdata; |
28 | 28 | ||
29 | static struct bootnode nodes_add[MAX_NUMNODES]; | ||
30 | |||
31 | static __init int setup_node(int pxm) | 29 | static __init int setup_node(int pxm) |
32 | { | 30 | { |
33 | return acpi_map_pxm_to_node(pxm); | 31 | return acpi_map_pxm_to_node(pxm); |
@@ -37,7 +35,6 @@ static __init void bad_srat(void) | |||
37 | { | 35 | { |
38 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 36 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
39 | acpi_numa = -1; | 37 | acpi_numa = -1; |
40 | memset(nodes_add, 0, sizeof(nodes_add)); | ||
41 | } | 38 | } |
42 | 39 | ||
43 | static __init inline int srat_disabled(void) | 40 | static __init inline int srat_disabled(void) |
@@ -131,73 +128,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
131 | pxm, apic_id, node); | 128 | pxm, apic_id, node); |
132 | } | 129 | } |
133 | 130 | ||
134 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 131 | #ifdef CONFIG_MEMORY_HOTPLUG |
135 | static inline int save_add_info(void) {return 1;} | 132 | static inline int save_add_info(void) {return 1;} |
136 | #else | 133 | #else |
137 | static inline int save_add_info(void) {return 0;} | 134 | static inline int save_add_info(void) {return 0;} |
138 | #endif | 135 | #endif |
139 | /* | ||
140 | * Update nodes_add[] | ||
141 | * This code supports one contiguous hot add area per node | ||
142 | */ | ||
143 | static void __init | ||
144 | update_nodes_add(int node, unsigned long start, unsigned long end) | ||
145 | { | ||
146 | unsigned long s_pfn = start >> PAGE_SHIFT; | ||
147 | unsigned long e_pfn = end >> PAGE_SHIFT; | ||
148 | int changed = 0; | ||
149 | struct bootnode *nd = &nodes_add[node]; | ||
150 | |||
151 | /* I had some trouble with strange memory hotadd regions breaking | ||
152 | the boot. Be very strict here and reject anything unexpected. | ||
153 | If you want working memory hotadd write correct SRATs. | ||
154 | |||
155 | The node size check is a basic sanity check to guard against | ||
156 | mistakes */ | ||
157 | if ((signed long)(end - start) < NODE_MIN_SIZE) { | ||
158 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); | ||
159 | return; | ||
160 | } | ||
161 | |||
162 | /* This check might be a bit too strict, but I'm keeping it for now. */ | ||
163 | if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) { | ||
164 | printk(KERN_ERR | ||
165 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", | ||
166 | s_pfn, e_pfn); | ||
167 | return; | ||
168 | } | ||
169 | |||
170 | /* Looks good */ | ||
171 | |||
172 | if (nd->start == nd->end) { | ||
173 | nd->start = start; | ||
174 | nd->end = end; | ||
175 | changed = 1; | ||
176 | } else { | ||
177 | if (nd->start == end) { | ||
178 | nd->start = start; | ||
179 | changed = 1; | ||
180 | } | ||
181 | if (nd->end == start) { | ||
182 | nd->end = end; | ||
183 | changed = 1; | ||
184 | } | ||
185 | if (!changed) | ||
186 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | ||
187 | } | ||
188 | |||
189 | if (changed) { | ||
190 | node_set(node, numa_nodes_parsed); | ||
191 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", | ||
192 | nd->start, nd->end); | ||
193 | } | ||
194 | } | ||
195 | 136 | ||
196 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 137 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
197 | void __init | 138 | void __init |
198 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | 139 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) |
199 | { | 140 | { |
200 | unsigned long start, end; | 141 | u64 start, end; |
201 | int node, pxm; | 142 | int node, pxm; |
202 | 143 | ||
203 | if (srat_disabled()) | 144 | if (srat_disabled()) |
@@ -226,11 +167,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
226 | return; | 167 | return; |
227 | } | 168 | } |
228 | 169 | ||
229 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | 170 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, |
230 | start, end); | 171 | start, end); |
231 | |||
232 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) | ||
233 | update_nodes_add(node, start, end); | ||
234 | } | 172 | } |
235 | 173 | ||
236 | void __init acpi_numa_arch_fixup(void) {} | 174 | void __init acpi_numa_arch_fixup(void) {} |
@@ -244,17 +182,3 @@ int __init x86_acpi_numa_init(void) | |||
244 | return ret; | 182 | return ret; |
245 | return srat_disabled() ? -EINVAL : 0; | 183 | return srat_disabled() ? -EINVAL : 0; |
246 | } | 184 | } |
247 | |||
248 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) | ||
249 | int memory_add_physaddr_to_nid(u64 start) | ||
250 | { | ||
251 | int i, ret = 0; | ||
252 | |||
253 | for_each_node(i) | ||
254 | if (nodes_add[i].start <= start && nodes_add[i].end > start) | ||
255 | ret = i; | ||
256 | |||
257 | return ret; | ||
258 | } | ||
259 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
260 | #endif | ||
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c deleted file mode 100644 index 364f36bdfad8..000000000000 --- a/arch/x86/mm/srat_32.c +++ /dev/null | |||
@@ -1,288 +0,0 @@ | |||
1 | /* | ||
2 | * Some of the code in this file has been gleaned from the 64 bit | ||
3 | * discontigmem support code base. | ||
4 | * | ||
5 | * Copyright (C) 2002, IBM Corp. | ||
6 | * | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
17 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
18 | * details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | * | ||
24 | * Send feedback to Pat Gaughen <gone@us.ibm.com> | ||
25 | */ | ||
26 | #include <linux/mm.h> | ||
27 | #include <linux/bootmem.h> | ||
28 | #include <linux/memblock.h> | ||
29 | #include <linux/mmzone.h> | ||
30 | #include <linux/acpi.h> | ||
31 | #include <linux/nodemask.h> | ||
32 | #include <asm/srat.h> | ||
33 | #include <asm/topology.h> | ||
34 | #include <asm/smp.h> | ||
35 | #include <asm/e820.h> | ||
36 | |||
37 | /* | ||
38 | * proximity macros and definitions | ||
39 | */ | ||
40 | #define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */ | ||
41 | #define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ | ||
42 | #define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) | ||
43 | #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) | ||
44 | /* bitmap length; _PXM is at most 255 */ | ||
45 | #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) | ||
46 | static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ | ||
47 | |||
48 | #define MAX_CHUNKS_PER_NODE 3 | ||
49 | #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) | ||
50 | struct node_memory_chunk_s { | ||
51 | unsigned long start_pfn; | ||
52 | unsigned long end_pfn; | ||
53 | u8 pxm; // proximity domain of node | ||
54 | u8 nid; // which cnode contains this chunk? | ||
55 | u8 bank; // which mem bank on this node | ||
56 | }; | ||
57 | static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; | ||
58 | |||
59 | static int __initdata num_memory_chunks; /* total number of memory chunks */ | ||
60 | static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC]; | ||
61 | |||
62 | int acpi_numa __initdata; | ||
63 | |||
64 | static __init void bad_srat(void) | ||
65 | { | ||
66 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | ||
67 | acpi_numa = -1; | ||
68 | num_memory_chunks = 0; | ||
69 | } | ||
70 | |||
71 | static __init inline int srat_disabled(void) | ||
72 | { | ||
73 | return numa_off || acpi_numa < 0; | ||
74 | } | ||
75 | |||
76 | /* Identify CPU proximity domains */ | ||
77 | void __init | ||
78 | acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) | ||
79 | { | ||
80 | if (srat_disabled()) | ||
81 | return; | ||
82 | if (cpu_affinity->header.length != | ||
83 | sizeof(struct acpi_srat_cpu_affinity)) { | ||
84 | bad_srat(); | ||
85 | return; | ||
86 | } | ||
87 | |||
88 | if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0) | ||
89 | return; /* empty entry */ | ||
90 | |||
91 | /* mark this node as "seen" in node bitmap */ | ||
92 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); | ||
93 | |||
94 | /* don't need to check apic_id here, because it is always 8 bits */ | ||
95 | apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; | ||
96 | |||
97 | printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", | ||
98 | cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo); | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * Identify memory proximity domains and hot-remove capabilities. | ||
103 | * Fill node memory chunk list structure. | ||
104 | */ | ||
105 | void __init | ||
106 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity) | ||
107 | { | ||
108 | unsigned long long paddr, size; | ||
109 | unsigned long start_pfn, end_pfn; | ||
110 | u8 pxm; | ||
111 | struct node_memory_chunk_s *p, *q, *pend; | ||
112 | |||
113 | if (srat_disabled()) | ||
114 | return; | ||
115 | if (memory_affinity->header.length != | ||
116 | sizeof(struct acpi_srat_mem_affinity)) { | ||
117 | bad_srat(); | ||
118 | return; | ||
119 | } | ||
120 | |||
121 | if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0) | ||
122 | return; /* empty entry */ | ||
123 | |||
124 | pxm = memory_affinity->proximity_domain & 0xff; | ||
125 | |||
126 | /* mark this node as "seen" in node bitmap */ | ||
127 | BMAP_SET(pxm_bitmap, pxm); | ||
128 | |||
129 | /* calculate info for memory chunk structure */ | ||
130 | paddr = memory_affinity->base_address; | ||
131 | size = memory_affinity->length; | ||
132 | |||
133 | start_pfn = paddr >> PAGE_SHIFT; | ||
134 | end_pfn = (paddr + size) >> PAGE_SHIFT; | ||
135 | |||
136 | |||
137 | if (num_memory_chunks >= MAXCHUNKS) { | ||
138 | printk(KERN_WARNING "Too many mem chunks in SRAT." | ||
139 | " Ignoring %lld MBytes at %llx\n", | ||
140 | size/(1024*1024), paddr); | ||
141 | return; | ||
142 | } | ||
143 | |||
144 | /* Insertion sort based on base address */ | ||
145 | pend = &node_memory_chunk[num_memory_chunks]; | ||
146 | for (p = &node_memory_chunk[0]; p < pend; p++) { | ||
147 | if (start_pfn < p->start_pfn) | ||
148 | break; | ||
149 | } | ||
150 | if (p < pend) { | ||
151 | for (q = pend; q >= p; q--) | ||
152 | *(q + 1) = *q; | ||
153 | } | ||
154 | p->start_pfn = start_pfn; | ||
155 | p->end_pfn = end_pfn; | ||
156 | p->pxm = pxm; | ||
157 | |||
158 | num_memory_chunks++; | ||
159 | |||
160 | printk(KERN_DEBUG "Memory range %08lx to %08lx" | ||
161 | " in proximity domain %02x %s\n", | ||
162 | start_pfn, end_pfn, | ||
163 | pxm, | ||
164 | ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? | ||
165 | "enabled and removable" : "enabled" ) ); | ||
166 | } | ||
167 | |||
168 | /* Callback for SLIT parsing */ | ||
169 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | ||
170 | { | ||
171 | } | ||
172 | |||
173 | void acpi_numa_arch_fixup(void) | ||
174 | { | ||
175 | } | ||
176 | /* | ||
177 | * The SRAT table always lists ascending addresses, so can always | ||
178 | * assume that the first "start" address that you see is the real | ||
179 | * start of the node, and that the current "end" address is after | ||
180 | * the previous one. | ||
181 | */ | ||
182 | static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) | ||
183 | { | ||
184 | /* | ||
185 | * Only add present memory as told by the e820. | ||
186 | * There is no guarantee from the SRAT that the memory it | ||
187 | * enumerates is present at boot time because it represents | ||
188 | * *possible* memory hotplug areas the same as normal RAM. | ||
189 | */ | ||
190 | if (memory_chunk->start_pfn >= max_pfn) { | ||
191 | printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", | ||
192 | memory_chunk->start_pfn, memory_chunk->end_pfn); | ||
193 | return -1; | ||
194 | } | ||
195 | if (memory_chunk->nid != nid) | ||
196 | return -1; | ||
197 | |||
198 | if (!node_has_online_mem(nid)) | ||
199 | node_start_pfn[nid] = memory_chunk->start_pfn; | ||
200 | |||
201 | if (node_start_pfn[nid] > memory_chunk->start_pfn) | ||
202 | node_start_pfn[nid] = memory_chunk->start_pfn; | ||
203 | |||
204 | if (node_end_pfn[nid] < memory_chunk->end_pfn) | ||
205 | node_end_pfn[nid] = memory_chunk->end_pfn; | ||
206 | |||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | int __init get_memcfg_from_srat(void) | ||
211 | { | ||
212 | int i, j, nid; | ||
213 | |||
214 | if (srat_disabled()) | ||
215 | goto out_fail; | ||
216 | |||
217 | if (acpi_numa_init() < 0) | ||
218 | goto out_fail; | ||
219 | |||
220 | if (num_memory_chunks == 0) { | ||
221 | printk(KERN_DEBUG | ||
222 | "could not find any ACPI SRAT memory areas.\n"); | ||
223 | goto out_fail; | ||
224 | } | ||
225 | |||
226 | /* Calculate total number of nodes in system from PXM bitmap and create | ||
227 | * a set of sequential node IDs starting at zero. (ACPI doesn't seem | ||
228 | * to specify the range of _PXM values.) | ||
229 | */ | ||
230 | /* | ||
231 | * MCD - we no longer HAVE to number nodes sequentially. PXM domain | ||
232 | * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically | ||
233 | * 32, so we will continue numbering them in this manner until MAX_NUMNODES | ||
234 | * approaches MAX_PXM_DOMAINS for i386. | ||
235 | */ | ||
236 | nodes_clear(node_online_map); | ||
237 | for (i = 0; i < MAX_PXM_DOMAINS; i++) { | ||
238 | if (BMAP_TEST(pxm_bitmap, i)) { | ||
239 | int nid = acpi_map_pxm_to_node(i); | ||
240 | node_set_online(nid); | ||
241 | } | ||
242 | } | ||
243 | BUG_ON(num_online_nodes() == 0); | ||
244 | |||
245 | /* set cnode id in memory chunk structure */ | ||
246 | for (i = 0; i < num_memory_chunks; i++) | ||
247 | node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); | ||
248 | |||
249 | printk(KERN_DEBUG "pxm bitmap: "); | ||
250 | for (i = 0; i < sizeof(pxm_bitmap); i++) { | ||
251 | printk(KERN_CONT "%02x ", pxm_bitmap[i]); | ||
252 | } | ||
253 | printk(KERN_CONT "\n"); | ||
254 | printk(KERN_DEBUG "Number of logical nodes in system = %d\n", | ||
255 | num_online_nodes()); | ||
256 | printk(KERN_DEBUG "Number of memory chunks in system = %d\n", | ||
257 | num_memory_chunks); | ||
258 | |||
259 | for (i = 0; i < MAX_LOCAL_APIC; i++) | ||
260 | set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i])); | ||
261 | |||
262 | for (j = 0; j < num_memory_chunks; j++){ | ||
263 | struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; | ||
264 | printk(KERN_DEBUG | ||
265 | "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", | ||
266 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); | ||
267 | if (node_read_chunk(chunk->nid, chunk)) | ||
268 | continue; | ||
269 | |||
270 | memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn, | ||
271 | min(chunk->end_pfn, max_pfn)); | ||
272 | } | ||
273 | /* for out of order entries in SRAT */ | ||
274 | sort_node_map(); | ||
275 | |||
276 | for_each_online_node(nid) { | ||
277 | unsigned long start = node_start_pfn[nid]; | ||
278 | unsigned long end = min(node_end_pfn[nid], max_pfn); | ||
279 | |||
280 | memory_present(nid, start, end); | ||
281 | node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); | ||
282 | } | ||
283 | return 1; | ||
284 | out_fail: | ||
285 | printk(KERN_DEBUG "failed to get NUMA memory information from SRAT" | ||
286 | " table\n"); | ||
287 | return 0; | ||
288 | } | ||
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile new file mode 100644 index 000000000000..90568c33ddb0 --- /dev/null +++ b/arch/x86/net/Makefile | |||
@@ -0,0 +1,4 @@ | |||
1 | # | ||
2 | # Arch-specific network modules | ||
3 | # | ||
4 | obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o | ||
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S new file mode 100644 index 000000000000..66870223f8c5 --- /dev/null +++ b/arch/x86/net/bpf_jit.S | |||
@@ -0,0 +1,140 @@ | |||
1 | /* bpf_jit.S : BPF JIT helper functions | ||
2 | * | ||
3 | * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | */ | ||
10 | #include <linux/linkage.h> | ||
11 | #include <asm/dwarf2.h> | ||
12 | |||
13 | /* | ||
14 | * Calling convention : | ||
15 | * rdi : skb pointer | ||
16 | * esi : offset of byte(s) to fetch in skb (can be scratched) | ||
17 | * r8 : copy of skb->data | ||
18 | * r9d : hlen = skb->len - skb->data_len | ||
19 | */ | ||
20 | #define SKBDATA %r8 | ||
21 | |||
22 | sk_load_word_ind: | ||
23 | .globl sk_load_word_ind | ||
24 | |||
25 | add %ebx,%esi /* offset += X */ | ||
26 | # test %esi,%esi /* if (offset < 0) goto bpf_error; */ | ||
27 | js bpf_error | ||
28 | |||
29 | sk_load_word: | ||
30 | .globl sk_load_word | ||
31 | |||
32 | mov %r9d,%eax # hlen | ||
33 | sub %esi,%eax # hlen - offset | ||
34 | cmp $3,%eax | ||
35 | jle bpf_slow_path_word | ||
36 | mov (SKBDATA,%rsi),%eax | ||
37 | bswap %eax /* ntohl() */ | ||
38 | ret | ||
39 | |||
40 | |||
41 | sk_load_half_ind: | ||
42 | .globl sk_load_half_ind | ||
43 | |||
44 | add %ebx,%esi /* offset += X */ | ||
45 | js bpf_error | ||
46 | |||
47 | sk_load_half: | ||
48 | .globl sk_load_half | ||
49 | |||
50 | mov %r9d,%eax | ||
51 | sub %esi,%eax # hlen - offset | ||
52 | cmp $1,%eax | ||
53 | jle bpf_slow_path_half | ||
54 | movzwl (SKBDATA,%rsi),%eax | ||
55 | rol $8,%ax # ntohs() | ||
56 | ret | ||
57 | |||
58 | sk_load_byte_ind: | ||
59 | .globl sk_load_byte_ind | ||
60 | add %ebx,%esi /* offset += X */ | ||
61 | js bpf_error | ||
62 | |||
63 | sk_load_byte: | ||
64 | .globl sk_load_byte | ||
65 | |||
66 | cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ | ||
67 | jle bpf_slow_path_byte | ||
68 | movzbl (SKBDATA,%rsi),%eax | ||
69 | ret | ||
70 | |||
71 | /** | ||
72 | * sk_load_byte_msh - BPF_S_LDX_B_MSH helper | ||
73 | * | ||
74 | * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) | ||
75 | * Must preserve A accumulator (%eax) | ||
76 | * Inputs : %esi is the offset value, already known positive | ||
77 | */ | ||
78 | ENTRY(sk_load_byte_msh) | ||
79 | CFI_STARTPROC | ||
80 | cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ | ||
81 | jle bpf_slow_path_byte_msh | ||
82 | movzbl (SKBDATA,%rsi),%ebx | ||
83 | and $15,%bl | ||
84 | shl $2,%bl | ||
85 | ret | ||
86 | CFI_ENDPROC | ||
87 | ENDPROC(sk_load_byte_msh) | ||
88 | |||
89 | bpf_error: | ||
90 | # force a return 0 from jit handler | ||
91 | xor %eax,%eax | ||
92 | mov -8(%rbp),%rbx | ||
93 | leaveq | ||
94 | ret | ||
95 | |||
96 | /* rsi contains offset and can be scratched */ | ||
97 | #define bpf_slow_path_common(LEN) \ | ||
98 | push %rdi; /* save skb */ \ | ||
99 | push %r9; \ | ||
100 | push SKBDATA; \ | ||
101 | /* rsi already has offset */ \ | ||
102 | mov $LEN,%ecx; /* len */ \ | ||
103 | lea -12(%rbp),%rdx; \ | ||
104 | call skb_copy_bits; \ | ||
105 | test %eax,%eax; \ | ||
106 | pop SKBDATA; \ | ||
107 | pop %r9; \ | ||
108 | pop %rdi | ||
109 | |||
110 | |||
111 | bpf_slow_path_word: | ||
112 | bpf_slow_path_common(4) | ||
113 | js bpf_error | ||
114 | mov -12(%rbp),%eax | ||
115 | bswap %eax | ||
116 | ret | ||
117 | |||
118 | bpf_slow_path_half: | ||
119 | bpf_slow_path_common(2) | ||
120 | js bpf_error | ||
121 | mov -12(%rbp),%ax | ||
122 | rol $8,%ax | ||
123 | movzwl %ax,%eax | ||
124 | ret | ||
125 | |||
126 | bpf_slow_path_byte: | ||
127 | bpf_slow_path_common(1) | ||
128 | js bpf_error | ||
129 | movzbl -12(%rbp),%eax | ||
130 | ret | ||
131 | |||
132 | bpf_slow_path_byte_msh: | ||
133 | xchg %eax,%ebx /* dont lose A , X is about to be scratched */ | ||
134 | bpf_slow_path_common(1) | ||
135 | js bpf_error | ||
136 | movzbl -12(%rbp),%eax | ||
137 | and $15,%al | ||
138 | shl $2,%al | ||
139 | xchg %eax,%ebx | ||
140 | ret | ||
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c new file mode 100644 index 000000000000..bfab3fa10edc --- /dev/null +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -0,0 +1,654 @@ | |||
1 | /* bpf_jit_comp.c : BPF JIT compiler | ||
2 | * | ||
3 | * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | */ | ||
10 | #include <linux/moduleloader.h> | ||
11 | #include <asm/cacheflush.h> | ||
12 | #include <linux/netdevice.h> | ||
13 | #include <linux/filter.h> | ||
14 | |||
15 | /* | ||
16 | * Conventions : | ||
17 | * EAX : BPF A accumulator | ||
18 | * EBX : BPF X accumulator | ||
19 | * RDI : pointer to skb (first argument given to JIT function) | ||
20 | * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) | ||
21 | * ECX,EDX,ESI : scratch registers | ||
22 | * r9d : skb->len - skb->data_len (headlen) | ||
23 | * r8 : skb->data | ||
24 | * -8(RBP) : saved RBX value | ||
25 | * -16(RBP)..-80(RBP) : BPF_MEMWORDS values | ||
26 | */ | ||
27 | int bpf_jit_enable __read_mostly; | ||
28 | |||
29 | /* | ||
30 | * assembly code in arch/x86/net/bpf_jit.S | ||
31 | */ | ||
32 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; | ||
33 | extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; | ||
34 | |||
35 | static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | ||
36 | { | ||
37 | if (len == 1) | ||
38 | *ptr = bytes; | ||
39 | else if (len == 2) | ||
40 | *(u16 *)ptr = bytes; | ||
41 | else { | ||
42 | *(u32 *)ptr = bytes; | ||
43 | barrier(); | ||
44 | } | ||
45 | return ptr + len; | ||
46 | } | ||
47 | |||
48 | #define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0) | ||
49 | |||
50 | #define EMIT1(b1) EMIT(b1, 1) | ||
51 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | ||
52 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | ||
53 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | ||
54 | #define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0) | ||
55 | |||
56 | #define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */ | ||
57 | #define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */ | ||
58 | |||
59 | static inline bool is_imm8(int value) | ||
60 | { | ||
61 | return value <= 127 && value >= -128; | ||
62 | } | ||
63 | |||
64 | static inline bool is_near(int offset) | ||
65 | { | ||
66 | return offset <= 127 && offset >= -128; | ||
67 | } | ||
68 | |||
69 | #define EMIT_JMP(offset) \ | ||
70 | do { \ | ||
71 | if (offset) { \ | ||
72 | if (is_near(offset)) \ | ||
73 | EMIT2(0xeb, offset); /* jmp .+off8 */ \ | ||
74 | else \ | ||
75 | EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \ | ||
76 | } \ | ||
77 | } while (0) | ||
78 | |||
79 | /* list of x86 cond jumps opcodes (. + s8) | ||
80 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | ||
81 | */ | ||
82 | #define X86_JB 0x72 | ||
83 | #define X86_JAE 0x73 | ||
84 | #define X86_JE 0x74 | ||
85 | #define X86_JNE 0x75 | ||
86 | #define X86_JBE 0x76 | ||
87 | #define X86_JA 0x77 | ||
88 | |||
89 | #define EMIT_COND_JMP(op, offset) \ | ||
90 | do { \ | ||
91 | if (is_near(offset)) \ | ||
92 | EMIT2(op, offset); /* jxx .+off8 */ \ | ||
93 | else { \ | ||
94 | EMIT2(0x0f, op + 0x10); \ | ||
95 | EMIT(offset, 4); /* jxx .+off32 */ \ | ||
96 | } \ | ||
97 | } while (0) | ||
98 | |||
99 | #define COND_SEL(CODE, TOP, FOP) \ | ||
100 | case CODE: \ | ||
101 | t_op = TOP; \ | ||
102 | f_op = FOP; \ | ||
103 | goto cond_branch | ||
104 | |||
105 | |||
106 | #define SEEN_DATAREF 1 /* might call external helpers */ | ||
107 | #define SEEN_XREG 2 /* ebx is used */ | ||
108 | #define SEEN_MEM 4 /* use mem[] for temporary storage */ | ||
109 | |||
110 | static inline void bpf_flush_icache(void *start, void *end) | ||
111 | { | ||
112 | mm_segment_t old_fs = get_fs(); | ||
113 | |||
114 | set_fs(KERNEL_DS); | ||
115 | smp_wmb(); | ||
116 | flush_icache_range((unsigned long)start, (unsigned long)end); | ||
117 | set_fs(old_fs); | ||
118 | } | ||
119 | |||
120 | |||
121 | void bpf_jit_compile(struct sk_filter *fp) | ||
122 | { | ||
123 | u8 temp[64]; | ||
124 | u8 *prog; | ||
125 | unsigned int proglen, oldproglen = 0; | ||
126 | int ilen, i; | ||
127 | int t_offset, f_offset; | ||
128 | u8 t_op, f_op, seen = 0, pass; | ||
129 | u8 *image = NULL; | ||
130 | u8 *func; | ||
131 | int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */ | ||
132 | unsigned int cleanup_addr; /* epilogue code offset */ | ||
133 | unsigned int *addrs; | ||
134 | const struct sock_filter *filter = fp->insns; | ||
135 | int flen = fp->len; | ||
136 | |||
137 | if (!bpf_jit_enable) | ||
138 | return; | ||
139 | |||
140 | addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL); | ||
141 | if (addrs == NULL) | ||
142 | return; | ||
143 | |||
144 | /* Before first pass, make a rough estimation of addrs[] | ||
145 | * each bpf instruction is translated to less than 64 bytes | ||
146 | */ | ||
147 | for (proglen = 0, i = 0; i < flen; i++) { | ||
148 | proglen += 64; | ||
149 | addrs[i] = proglen; | ||
150 | } | ||
151 | cleanup_addr = proglen; /* epilogue address */ | ||
152 | |||
153 | for (pass = 0; pass < 10; pass++) { | ||
154 | /* no prologue/epilogue for trivial filters (RET something) */ | ||
155 | proglen = 0; | ||
156 | prog = temp; | ||
157 | |||
158 | if (seen) { | ||
159 | EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ | ||
160 | EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ | ||
161 | /* note : must save %rbx in case bpf_error is hit */ | ||
162 | if (seen & (SEEN_XREG | SEEN_DATAREF)) | ||
163 | EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ | ||
164 | if (seen & SEEN_XREG) | ||
165 | CLEAR_X(); /* make sure we dont leek kernel memory */ | ||
166 | |||
167 | /* | ||
168 | * If this filter needs to access skb data, | ||
169 | * loads r9 and r8 with : | ||
170 | * r9 = skb->len - skb->data_len | ||
171 | * r8 = skb->data | ||
172 | */ | ||
173 | if (seen & SEEN_DATAREF) { | ||
174 | if (offsetof(struct sk_buff, len) <= 127) | ||
175 | /* mov off8(%rdi),%r9d */ | ||
176 | EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); | ||
177 | else { | ||
178 | /* mov off32(%rdi),%r9d */ | ||
179 | EMIT3(0x44, 0x8b, 0x8f); | ||
180 | EMIT(offsetof(struct sk_buff, len), 4); | ||
181 | } | ||
182 | if (is_imm8(offsetof(struct sk_buff, data_len))) | ||
183 | /* sub off8(%rdi),%r9d */ | ||
184 | EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len)); | ||
185 | else { | ||
186 | EMIT3(0x44, 0x2b, 0x8f); | ||
187 | EMIT(offsetof(struct sk_buff, data_len), 4); | ||
188 | } | ||
189 | |||
190 | if (is_imm8(offsetof(struct sk_buff, data))) | ||
191 | /* mov off8(%rdi),%r8 */ | ||
192 | EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data)); | ||
193 | else { | ||
194 | /* mov off32(%rdi),%r8 */ | ||
195 | EMIT3(0x4c, 0x8b, 0x87); | ||
196 | EMIT(offsetof(struct sk_buff, data), 4); | ||
197 | } | ||
198 | } | ||
199 | } | ||
200 | |||
201 | switch (filter[0].code) { | ||
202 | case BPF_S_RET_K: | ||
203 | case BPF_S_LD_W_LEN: | ||
204 | case BPF_S_ANC_PROTOCOL: | ||
205 | case BPF_S_ANC_IFINDEX: | ||
206 | case BPF_S_ANC_MARK: | ||
207 | case BPF_S_ANC_RXHASH: | ||
208 | case BPF_S_ANC_CPU: | ||
209 | case BPF_S_ANC_QUEUE: | ||
210 | case BPF_S_LD_W_ABS: | ||
211 | case BPF_S_LD_H_ABS: | ||
212 | case BPF_S_LD_B_ABS: | ||
213 | /* first instruction sets A register (or is RET 'constant') */ | ||
214 | break; | ||
215 | default: | ||
216 | /* make sure we dont leak kernel information to user */ | ||
217 | CLEAR_A(); /* A = 0 */ | ||
218 | } | ||
219 | |||
220 | for (i = 0; i < flen; i++) { | ||
221 | unsigned int K = filter[i].k; | ||
222 | |||
223 | switch (filter[i].code) { | ||
224 | case BPF_S_ALU_ADD_X: /* A += X; */ | ||
225 | seen |= SEEN_XREG; | ||
226 | EMIT2(0x01, 0xd8); /* add %ebx,%eax */ | ||
227 | break; | ||
228 | case BPF_S_ALU_ADD_K: /* A += K; */ | ||
229 | if (!K) | ||
230 | break; | ||
231 | if (is_imm8(K)) | ||
232 | EMIT3(0x83, 0xc0, K); /* add imm8,%eax */ | ||
233 | else | ||
234 | EMIT1_off32(0x05, K); /* add imm32,%eax */ | ||
235 | break; | ||
236 | case BPF_S_ALU_SUB_X: /* A -= X; */ | ||
237 | seen |= SEEN_XREG; | ||
238 | EMIT2(0x29, 0xd8); /* sub %ebx,%eax */ | ||
239 | break; | ||
240 | case BPF_S_ALU_SUB_K: /* A -= K */ | ||
241 | if (!K) | ||
242 | break; | ||
243 | if (is_imm8(K)) | ||
244 | EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */ | ||
245 | else | ||
246 | EMIT1_off32(0x2d, K); /* sub imm32,%eax */ | ||
247 | break; | ||
248 | case BPF_S_ALU_MUL_X: /* A *= X; */ | ||
249 | seen |= SEEN_XREG; | ||
250 | EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */ | ||
251 | break; | ||
252 | case BPF_S_ALU_MUL_K: /* A *= K */ | ||
253 | if (is_imm8(K)) | ||
254 | EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */ | ||
255 | else { | ||
256 | EMIT2(0x69, 0xc0); /* imul imm32,%eax */ | ||
257 | EMIT(K, 4); | ||
258 | } | ||
259 | break; | ||
260 | case BPF_S_ALU_DIV_X: /* A /= X; */ | ||
261 | seen |= SEEN_XREG; | ||
262 | EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ | ||
263 | if (pc_ret0 != -1) | ||
264 | EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4)); | ||
265 | else { | ||
266 | EMIT_COND_JMP(X86_JNE, 2 + 5); | ||
267 | CLEAR_A(); | ||
268 | EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ | ||
269 | } | ||
270 | EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ | ||
271 | break; | ||
272 | case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ | ||
273 | EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ | ||
274 | EMIT(K, 4); | ||
275 | EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */ | ||
276 | break; | ||
277 | case BPF_S_ALU_AND_X: | ||
278 | seen |= SEEN_XREG; | ||
279 | EMIT2(0x21, 0xd8); /* and %ebx,%eax */ | ||
280 | break; | ||
281 | case BPF_S_ALU_AND_K: | ||
282 | if (K >= 0xFFFFFF00) { | ||
283 | EMIT2(0x24, K & 0xFF); /* and imm8,%al */ | ||
284 | } else if (K >= 0xFFFF0000) { | ||
285 | EMIT2(0x66, 0x25); /* and imm16,%ax */ | ||
286 | EMIT2(K, 2); | ||
287 | } else { | ||
288 | EMIT1_off32(0x25, K); /* and imm32,%eax */ | ||
289 | } | ||
290 | break; | ||
291 | case BPF_S_ALU_OR_X: | ||
292 | seen |= SEEN_XREG; | ||
293 | EMIT2(0x09, 0xd8); /* or %ebx,%eax */ | ||
294 | break; | ||
295 | case BPF_S_ALU_OR_K: | ||
296 | if (is_imm8(K)) | ||
297 | EMIT3(0x83, 0xc8, K); /* or imm8,%eax */ | ||
298 | else | ||
299 | EMIT1_off32(0x0d, K); /* or imm32,%eax */ | ||
300 | break; | ||
301 | case BPF_S_ALU_LSH_X: /* A <<= X; */ | ||
302 | seen |= SEEN_XREG; | ||
303 | EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ | ||
304 | break; | ||
305 | case BPF_S_ALU_LSH_K: | ||
306 | if (K == 0) | ||
307 | break; | ||
308 | else if (K == 1) | ||
309 | EMIT2(0xd1, 0xe0); /* shl %eax */ | ||
310 | else | ||
311 | EMIT3(0xc1, 0xe0, K); | ||
312 | break; | ||
313 | case BPF_S_ALU_RSH_X: /* A >>= X; */ | ||
314 | seen |= SEEN_XREG; | ||
315 | EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */ | ||
316 | break; | ||
317 | case BPF_S_ALU_RSH_K: /* A >>= K; */ | ||
318 | if (K == 0) | ||
319 | break; | ||
320 | else if (K == 1) | ||
321 | EMIT2(0xd1, 0xe8); /* shr %eax */ | ||
322 | else | ||
323 | EMIT3(0xc1, 0xe8, K); | ||
324 | break; | ||
325 | case BPF_S_ALU_NEG: | ||
326 | EMIT2(0xf7, 0xd8); /* neg %eax */ | ||
327 | break; | ||
328 | case BPF_S_RET_K: | ||
329 | if (!K) { | ||
330 | if (pc_ret0 == -1) | ||
331 | pc_ret0 = i; | ||
332 | CLEAR_A(); | ||
333 | } else { | ||
334 | EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ | ||
335 | } | ||
336 | /* fallinto */ | ||
337 | case BPF_S_RET_A: | ||
338 | if (seen) { | ||
339 | if (i != flen - 1) { | ||
340 | EMIT_JMP(cleanup_addr - addrs[i]); | ||
341 | break; | ||
342 | } | ||
343 | if (seen & SEEN_XREG) | ||
344 | EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ | ||
345 | EMIT1(0xc9); /* leaveq */ | ||
346 | } | ||
347 | EMIT1(0xc3); /* ret */ | ||
348 | break; | ||
349 | case BPF_S_MISC_TAX: /* X = A */ | ||
350 | seen |= SEEN_XREG; | ||
351 | EMIT2(0x89, 0xc3); /* mov %eax,%ebx */ | ||
352 | break; | ||
353 | case BPF_S_MISC_TXA: /* A = X */ | ||
354 | seen |= SEEN_XREG; | ||
355 | EMIT2(0x89, 0xd8); /* mov %ebx,%eax */ | ||
356 | break; | ||
357 | case BPF_S_LD_IMM: /* A = K */ | ||
358 | if (!K) | ||
359 | CLEAR_A(); | ||
360 | else | ||
361 | EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ | ||
362 | break; | ||
363 | case BPF_S_LDX_IMM: /* X = K */ | ||
364 | seen |= SEEN_XREG; | ||
365 | if (!K) | ||
366 | CLEAR_X(); | ||
367 | else | ||
368 | EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */ | ||
369 | break; | ||
370 | case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */ | ||
371 | seen |= SEEN_MEM; | ||
372 | EMIT3(0x8b, 0x45, 0xf0 - K*4); | ||
373 | break; | ||
374 | case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */ | ||
375 | seen |= SEEN_XREG | SEEN_MEM; | ||
376 | EMIT3(0x8b, 0x5d, 0xf0 - K*4); | ||
377 | break; | ||
378 | case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */ | ||
379 | seen |= SEEN_MEM; | ||
380 | EMIT3(0x89, 0x45, 0xf0 - K*4); | ||
381 | break; | ||
382 | case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ | ||
383 | seen |= SEEN_XREG | SEEN_MEM; | ||
384 | EMIT3(0x89, 0x5d, 0xf0 - K*4); | ||
385 | break; | ||
386 | case BPF_S_LD_W_LEN: /* A = skb->len; */ | ||
387 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); | ||
388 | if (is_imm8(offsetof(struct sk_buff, len))) | ||
389 | /* mov off8(%rdi),%eax */ | ||
390 | EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len)); | ||
391 | else { | ||
392 | EMIT2(0x8b, 0x87); | ||
393 | EMIT(offsetof(struct sk_buff, len), 4); | ||
394 | } | ||
395 | break; | ||
396 | case BPF_S_LDX_W_LEN: /* X = skb->len; */ | ||
397 | seen |= SEEN_XREG; | ||
398 | if (is_imm8(offsetof(struct sk_buff, len))) | ||
399 | /* mov off8(%rdi),%ebx */ | ||
400 | EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len)); | ||
401 | else { | ||
402 | EMIT2(0x8b, 0x9f); | ||
403 | EMIT(offsetof(struct sk_buff, len), 4); | ||
404 | } | ||
405 | break; | ||
406 | case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ | ||
407 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); | ||
408 | if (is_imm8(offsetof(struct sk_buff, protocol))) { | ||
409 | /* movzwl off8(%rdi),%eax */ | ||
410 | EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol)); | ||
411 | } else { | ||
412 | EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ | ||
413 | EMIT(offsetof(struct sk_buff, protocol), 4); | ||
414 | } | ||
415 | EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */ | ||
416 | break; | ||
417 | case BPF_S_ANC_IFINDEX: | ||
418 | if (is_imm8(offsetof(struct sk_buff, dev))) { | ||
419 | /* movq off8(%rdi),%rax */ | ||
420 | EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev)); | ||
421 | } else { | ||
422 | EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */ | ||
423 | EMIT(offsetof(struct sk_buff, dev), 4); | ||
424 | } | ||
425 | EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */ | ||
426 | EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6)); | ||
427 | BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); | ||
428 | EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */ | ||
429 | EMIT(offsetof(struct net_device, ifindex), 4); | ||
430 | break; | ||
431 | case BPF_S_ANC_MARK: | ||
432 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); | ||
433 | if (is_imm8(offsetof(struct sk_buff, mark))) { | ||
434 | /* mov off8(%rdi),%eax */ | ||
435 | EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark)); | ||
436 | } else { | ||
437 | EMIT2(0x8b, 0x87); | ||
438 | EMIT(offsetof(struct sk_buff, mark), 4); | ||
439 | } | ||
440 | break; | ||
441 | case BPF_S_ANC_RXHASH: | ||
442 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); | ||
443 | if (is_imm8(offsetof(struct sk_buff, rxhash))) { | ||
444 | /* mov off8(%rdi),%eax */ | ||
445 | EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash)); | ||
446 | } else { | ||
447 | EMIT2(0x8b, 0x87); | ||
448 | EMIT(offsetof(struct sk_buff, rxhash), 4); | ||
449 | } | ||
450 | break; | ||
451 | case BPF_S_ANC_QUEUE: | ||
452 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); | ||
453 | if (is_imm8(offsetof(struct sk_buff, queue_mapping))) { | ||
454 | /* movzwl off8(%rdi),%eax */ | ||
455 | EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping)); | ||
456 | } else { | ||
457 | EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ | ||
458 | EMIT(offsetof(struct sk_buff, queue_mapping), 4); | ||
459 | } | ||
460 | break; | ||
461 | case BPF_S_ANC_CPU: | ||
462 | #ifdef CONFIG_SMP | ||
463 | EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */ | ||
464 | EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */ | ||
465 | #else | ||
466 | CLEAR_A(); | ||
467 | #endif | ||
468 | break; | ||
469 | case BPF_S_LD_W_ABS: | ||
470 | func = sk_load_word; | ||
471 | common_load: seen |= SEEN_DATAREF; | ||
472 | if ((int)K < 0) | ||
473 | goto out; | ||
474 | t_offset = func - (image + addrs[i]); | ||
475 | EMIT1_off32(0xbe, K); /* mov imm32,%esi */ | ||
476 | EMIT1_off32(0xe8, t_offset); /* call */ | ||
477 | break; | ||
478 | case BPF_S_LD_H_ABS: | ||
479 | func = sk_load_half; | ||
480 | goto common_load; | ||
481 | case BPF_S_LD_B_ABS: | ||
482 | func = sk_load_byte; | ||
483 | goto common_load; | ||
484 | case BPF_S_LDX_B_MSH: | ||
485 | if ((int)K < 0) { | ||
486 | if (pc_ret0 != -1) { | ||
487 | EMIT_JMP(addrs[pc_ret0] - addrs[i]); | ||
488 | break; | ||
489 | } | ||
490 | CLEAR_A(); | ||
491 | EMIT_JMP(cleanup_addr - addrs[i]); | ||
492 | break; | ||
493 | } | ||
494 | seen |= SEEN_DATAREF | SEEN_XREG; | ||
495 | t_offset = sk_load_byte_msh - (image + addrs[i]); | ||
496 | EMIT1_off32(0xbe, K); /* mov imm32,%esi */ | ||
497 | EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ | ||
498 | break; | ||
499 | case BPF_S_LD_W_IND: | ||
500 | func = sk_load_word_ind; | ||
501 | common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; | ||
502 | t_offset = func - (image + addrs[i]); | ||
503 | EMIT1_off32(0xbe, K); /* mov imm32,%esi */ | ||
504 | EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ | ||
505 | break; | ||
506 | case BPF_S_LD_H_IND: | ||
507 | func = sk_load_half_ind; | ||
508 | goto common_load_ind; | ||
509 | case BPF_S_LD_B_IND: | ||
510 | func = sk_load_byte_ind; | ||
511 | goto common_load_ind; | ||
512 | case BPF_S_JMP_JA: | ||
513 | t_offset = addrs[i + K] - addrs[i]; | ||
514 | EMIT_JMP(t_offset); | ||
515 | break; | ||
516 | COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE); | ||
517 | COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB); | ||
518 | COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE); | ||
519 | COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE); | ||
520 | COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE); | ||
521 | COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB); | ||
522 | COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE); | ||
523 | COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE); | ||
524 | |||
525 | cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; | ||
526 | t_offset = addrs[i + filter[i].jt] - addrs[i]; | ||
527 | |||
528 | /* same targets, can avoid doing the test :) */ | ||
529 | if (filter[i].jt == filter[i].jf) { | ||
530 | EMIT_JMP(t_offset); | ||
531 | break; | ||
532 | } | ||
533 | |||
534 | switch (filter[i].code) { | ||
535 | case BPF_S_JMP_JGT_X: | ||
536 | case BPF_S_JMP_JGE_X: | ||
537 | case BPF_S_JMP_JEQ_X: | ||
538 | seen |= SEEN_XREG; | ||
539 | EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */ | ||
540 | break; | ||
541 | case BPF_S_JMP_JSET_X: | ||
542 | seen |= SEEN_XREG; | ||
543 | EMIT2(0x85, 0xd8); /* test %ebx,%eax */ | ||
544 | break; | ||
545 | case BPF_S_JMP_JEQ_K: | ||
546 | if (K == 0) { | ||
547 | EMIT2(0x85, 0xc0); /* test %eax,%eax */ | ||
548 | break; | ||
549 | } | ||
550 | case BPF_S_JMP_JGT_K: | ||
551 | case BPF_S_JMP_JGE_K: | ||
552 | if (K <= 127) | ||
553 | EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */ | ||
554 | else | ||
555 | EMIT1_off32(0x3d, K); /* cmp imm32,%eax */ | ||
556 | break; | ||
557 | case BPF_S_JMP_JSET_K: | ||
558 | if (K <= 0xFF) | ||
559 | EMIT2(0xa8, K); /* test imm8,%al */ | ||
560 | else if (!(K & 0xFFFF00FF)) | ||
561 | EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */ | ||
562 | else if (K <= 0xFFFF) { | ||
563 | EMIT2(0x66, 0xa9); /* test imm16,%ax */ | ||
564 | EMIT(K, 2); | ||
565 | } else { | ||
566 | EMIT1_off32(0xa9, K); /* test imm32,%eax */ | ||
567 | } | ||
568 | break; | ||
569 | } | ||
570 | if (filter[i].jt != 0) { | ||
571 | if (filter[i].jf) | ||
572 | t_offset += is_near(f_offset) ? 2 : 6; | ||
573 | EMIT_COND_JMP(t_op, t_offset); | ||
574 | if (filter[i].jf) | ||
575 | EMIT_JMP(f_offset); | ||
576 | break; | ||
577 | } | ||
578 | EMIT_COND_JMP(f_op, f_offset); | ||
579 | break; | ||
580 | default: | ||
581 | /* hmm, too complex filter, give up with jit compiler */ | ||
582 | goto out; | ||
583 | } | ||
584 | ilen = prog - temp; | ||
585 | if (image) { | ||
586 | if (unlikely(proglen + ilen > oldproglen)) { | ||
587 | pr_err("bpb_jit_compile fatal error\n"); | ||
588 | kfree(addrs); | ||
589 | module_free(NULL, image); | ||
590 | return; | ||
591 | } | ||
592 | memcpy(image + proglen, temp, ilen); | ||
593 | } | ||
594 | proglen += ilen; | ||
595 | addrs[i] = proglen; | ||
596 | prog = temp; | ||
597 | } | ||
598 | /* last bpf instruction is always a RET : | ||
599 | * use it to give the cleanup instruction(s) addr | ||
600 | */ | ||
601 | cleanup_addr = proglen - 1; /* ret */ | ||
602 | if (seen) | ||
603 | cleanup_addr -= 1; /* leaveq */ | ||
604 | if (seen & SEEN_XREG) | ||
605 | cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ | ||
606 | |||
607 | if (image) { | ||
608 | WARN_ON(proglen != oldproglen); | ||
609 | break; | ||
610 | } | ||
611 | if (proglen == oldproglen) { | ||
612 | image = module_alloc(max_t(unsigned int, | ||
613 | proglen, | ||
614 | sizeof(struct work_struct))); | ||
615 | if (!image) | ||
616 | goto out; | ||
617 | } | ||
618 | oldproglen = proglen; | ||
619 | } | ||
620 | if (bpf_jit_enable > 1) | ||
621 | pr_err("flen=%d proglen=%u pass=%d image=%p\n", | ||
622 | flen, proglen, pass, image); | ||
623 | |||
624 | if (image) { | ||
625 | if (bpf_jit_enable > 1) | ||
626 | print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, | ||
627 | 16, 1, image, proglen, false); | ||
628 | |||
629 | bpf_flush_icache(image, image + proglen); | ||
630 | |||
631 | fp->bpf_func = (void *)image; | ||
632 | } | ||
633 | out: | ||
634 | kfree(addrs); | ||
635 | return; | ||
636 | } | ||
637 | |||
638 | static void jit_free_defer(struct work_struct *arg) | ||
639 | { | ||
640 | module_free(NULL, arg); | ||
641 | } | ||
642 | |||
643 | /* run from softirq, we must use a work_struct to call | ||
644 | * module_free() from process context | ||
645 | */ | ||
646 | void bpf_jit_free(struct sk_filter *fp) | ||
647 | { | ||
648 | if (fp->bpf_func != sk_run_filter) { | ||
649 | struct work_struct *work = (struct work_struct *)fp->bpf_func; | ||
650 | |||
651 | INIT_WORK(work, jit_free_defer); | ||
652 | schedule_work(work); | ||
653 | } | ||
654 | } | ||
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e19a36..a5b64ab4cd6e 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -16,17 +16,6 @@ | |||
16 | #include <asm/stacktrace.h> | 16 | #include <asm/stacktrace.h> |
17 | #include <linux/compat.h> | 17 | #include <linux/compat.h> |
18 | 18 | ||
19 | static void backtrace_warning_symbol(void *data, char *msg, | ||
20 | unsigned long symbol) | ||
21 | { | ||
22 | /* Ignore warnings */ | ||
23 | } | ||
24 | |||
25 | static void backtrace_warning(void *data, char *msg) | ||
26 | { | ||
27 | /* Ignore warnings */ | ||
28 | } | ||
29 | |||
30 | static int backtrace_stack(void *data, char *name) | 19 | static int backtrace_stack(void *data, char *name) |
31 | { | 20 | { |
32 | /* Yes, we want all stacks */ | 21 | /* Yes, we want all stacks */ |
@@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
42 | } | 31 | } |
43 | 32 | ||
44 | static struct stacktrace_ops backtrace_ops = { | 33 | static struct stacktrace_ops backtrace_ops = { |
45 | .warning = backtrace_warning, | ||
46 | .warning_symbol = backtrace_warning_symbol, | ||
47 | .stack = backtrace_stack, | 34 | .stack = backtrace_stack, |
48 | .address = backtrace_address, | 35 | .address = backtrace_address, |
49 | .walk_stack = print_context_stack, | 36 | .walk_stack = print_context_stack, |
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e37b407a0ee8..8214724ce54d 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
108 | } | 108 | } |
109 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, | 109 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, |
110 | (type == PCI_CAP_ID_MSIX) ? | 110 | (type == PCI_CAP_ID_MSIX) ? |
111 | "msi-x" : "msi"); | 111 | "msi-x" : "msi", |
112 | DOMID_SELF); | ||
112 | if (irq < 0) | 113 | if (irq < 0) |
113 | goto error; | 114 | goto error; |
114 | dev_dbg(&dev->dev, | 115 | dev_dbg(&dev->dev, |
@@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
148 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, | 149 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, |
149 | (type == PCI_CAP_ID_MSIX) ? | 150 | (type == PCI_CAP_ID_MSIX) ? |
150 | "pcifront-msi-x" : | 151 | "pcifront-msi-x" : |
151 | "pcifront-msi"); | 152 | "pcifront-msi", |
153 | DOMID_SELF); | ||
152 | if (irq < 0) | 154 | if (irq < 0) |
153 | goto free; | 155 | goto free; |
154 | i++; | 156 | i++; |
@@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
190 | 192 | ||
191 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 193 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
192 | struct physdev_map_pirq map_irq; | 194 | struct physdev_map_pirq map_irq; |
195 | domid_t domid; | ||
196 | |||
197 | domid = ret = xen_find_device_domain_owner(dev); | ||
198 | /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED, | ||
199 | * hence check ret value for < 0. */ | ||
200 | if (ret < 0) | ||
201 | domid = DOMID_SELF; | ||
193 | 202 | ||
194 | memset(&map_irq, 0, sizeof(map_irq)); | 203 | memset(&map_irq, 0, sizeof(map_irq)); |
195 | map_irq.domid = DOMID_SELF; | 204 | map_irq.domid = domid; |
196 | map_irq.type = MAP_PIRQ_TYPE_MSI; | 205 | map_irq.type = MAP_PIRQ_TYPE_MSI; |
197 | map_irq.index = -1; | 206 | map_irq.index = -1; |
198 | map_irq.pirq = -1; | 207 | map_irq.pirq = -1; |
@@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
215 | 224 | ||
216 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); | 225 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); |
217 | if (ret) { | 226 | if (ret) { |
218 | dev_warn(&dev->dev, "xen map irq failed %d\n", ret); | 227 | dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n", |
228 | ret, domid); | ||
219 | goto out; | 229 | goto out; |
220 | } | 230 | } |
221 | 231 | ||
222 | ret = xen_bind_pirq_msi_to_irq(dev, msidesc, | 232 | ret = xen_bind_pirq_msi_to_irq(dev, msidesc, |
223 | map_irq.pirq, map_irq.index, | 233 | map_irq.pirq, map_irq.index, |
224 | (type == PCI_CAP_ID_MSIX) ? | 234 | (type == PCI_CAP_ID_MSIX) ? |
225 | "msi-x" : "msi"); | 235 | "msi-x" : "msi", |
236 | domid); | ||
226 | if (ret < 0) | 237 | if (ret < 0) |
227 | goto out; | 238 | goto out; |
228 | } | 239 | } |
@@ -461,3 +472,78 @@ void __init xen_setup_pirqs(void) | |||
461 | } | 472 | } |
462 | } | 473 | } |
463 | #endif | 474 | #endif |
475 | |||
476 | #ifdef CONFIG_XEN_DOM0 | ||
477 | struct xen_device_domain_owner { | ||
478 | domid_t domain; | ||
479 | struct pci_dev *dev; | ||
480 | struct list_head list; | ||
481 | }; | ||
482 | |||
483 | static DEFINE_SPINLOCK(dev_domain_list_spinlock); | ||
484 | static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); | ||
485 | |||
486 | static struct xen_device_domain_owner *find_device(struct pci_dev *dev) | ||
487 | { | ||
488 | struct xen_device_domain_owner *owner; | ||
489 | |||
490 | list_for_each_entry(owner, &dev_domain_list, list) { | ||
491 | if (owner->dev == dev) | ||
492 | return owner; | ||
493 | } | ||
494 | return NULL; | ||
495 | } | ||
496 | |||
497 | int xen_find_device_domain_owner(struct pci_dev *dev) | ||
498 | { | ||
499 | struct xen_device_domain_owner *owner; | ||
500 | int domain = -ENODEV; | ||
501 | |||
502 | spin_lock(&dev_domain_list_spinlock); | ||
503 | owner = find_device(dev); | ||
504 | if (owner) | ||
505 | domain = owner->domain; | ||
506 | spin_unlock(&dev_domain_list_spinlock); | ||
507 | return domain; | ||
508 | } | ||
509 | EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); | ||
510 | |||
511 | int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) | ||
512 | { | ||
513 | struct xen_device_domain_owner *owner; | ||
514 | |||
515 | owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); | ||
516 | if (!owner) | ||
517 | return -ENODEV; | ||
518 | |||
519 | spin_lock(&dev_domain_list_spinlock); | ||
520 | if (find_device(dev)) { | ||
521 | spin_unlock(&dev_domain_list_spinlock); | ||
522 | kfree(owner); | ||
523 | return -EEXIST; | ||
524 | } | ||
525 | owner->domain = domain; | ||
526 | owner->dev = dev; | ||
527 | list_add_tail(&owner->list, &dev_domain_list); | ||
528 | spin_unlock(&dev_domain_list_spinlock); | ||
529 | return 0; | ||
530 | } | ||
531 | EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); | ||
532 | |||
533 | int xen_unregister_device_domain_owner(struct pci_dev *dev) | ||
534 | { | ||
535 | struct xen_device_domain_owner *owner; | ||
536 | |||
537 | spin_lock(&dev_domain_list_spinlock); | ||
538 | owner = find_device(dev); | ||
539 | if (!owner) { | ||
540 | spin_unlock(&dev_domain_list_spinlock); | ||
541 | return -ENODEV; | ||
542 | } | ||
543 | list_del(&owner->list); | ||
544 | spin_unlock(&dev_domain_list_spinlock); | ||
545 | kfree(owner); | ||
546 | return 0; | ||
547 | } | ||
548 | EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); | ||
549 | #endif | ||
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 0fe27d7c6258..b30aa26a8df2 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -145,17 +145,6 @@ static void virt_efi_reset_system(int reset_type, | |||
145 | data_size, data); | 145 | data_size, data); |
146 | } | 146 | } |
147 | 147 | ||
148 | static efi_status_t virt_efi_set_virtual_address_map( | ||
149 | unsigned long memory_map_size, | ||
150 | unsigned long descriptor_size, | ||
151 | u32 descriptor_version, | ||
152 | efi_memory_desc_t *virtual_map) | ||
153 | { | ||
154 | return efi_call_virt4(set_virtual_address_map, | ||
155 | memory_map_size, descriptor_size, | ||
156 | descriptor_version, virtual_map); | ||
157 | } | ||
158 | |||
159 | static efi_status_t __init phys_efi_set_virtual_address_map( | 148 | static efi_status_t __init phys_efi_set_virtual_address_map( |
160 | unsigned long memory_map_size, | 149 | unsigned long memory_map_size, |
161 | unsigned long descriptor_size, | 150 | unsigned long descriptor_size, |
@@ -468,11 +457,25 @@ void __init efi_init(void) | |||
468 | #endif | 457 | #endif |
469 | } | 458 | } |
470 | 459 | ||
460 | void __init efi_set_executable(efi_memory_desc_t *md, bool executable) | ||
461 | { | ||
462 | u64 addr, npages; | ||
463 | |||
464 | addr = md->virt_addr; | ||
465 | npages = md->num_pages; | ||
466 | |||
467 | memrange_efi_to_native(&addr, &npages); | ||
468 | |||
469 | if (executable) | ||
470 | set_memory_x(addr, npages); | ||
471 | else | ||
472 | set_memory_nx(addr, npages); | ||
473 | } | ||
474 | |||
471 | static void __init runtime_code_page_mkexec(void) | 475 | static void __init runtime_code_page_mkexec(void) |
472 | { | 476 | { |
473 | efi_memory_desc_t *md; | 477 | efi_memory_desc_t *md; |
474 | void *p; | 478 | void *p; |
475 | u64 addr, npages; | ||
476 | 479 | ||
477 | /* Make EFI runtime service code area executable */ | 480 | /* Make EFI runtime service code area executable */ |
478 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 481 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
@@ -481,10 +484,7 @@ static void __init runtime_code_page_mkexec(void) | |||
481 | if (md->type != EFI_RUNTIME_SERVICES_CODE) | 484 | if (md->type != EFI_RUNTIME_SERVICES_CODE) |
482 | continue; | 485 | continue; |
483 | 486 | ||
484 | addr = md->virt_addr; | 487 | efi_set_executable(md, true); |
485 | npages = md->num_pages; | ||
486 | memrange_efi_to_native(&addr, &npages); | ||
487 | set_memory_x(addr, npages); | ||
488 | } | 488 | } |
489 | } | 489 | } |
490 | 490 | ||
@@ -498,13 +498,42 @@ static void __init runtime_code_page_mkexec(void) | |||
498 | */ | 498 | */ |
499 | void __init efi_enter_virtual_mode(void) | 499 | void __init efi_enter_virtual_mode(void) |
500 | { | 500 | { |
501 | efi_memory_desc_t *md; | 501 | efi_memory_desc_t *md, *prev_md = NULL; |
502 | efi_status_t status; | 502 | efi_status_t status; |
503 | unsigned long size; | 503 | unsigned long size; |
504 | u64 end, systab, addr, npages, end_pfn; | 504 | u64 end, systab, addr, npages, end_pfn; |
505 | void *p, *va; | 505 | void *p, *va, *new_memmap = NULL; |
506 | int count = 0; | ||
506 | 507 | ||
507 | efi.systab = NULL; | 508 | efi.systab = NULL; |
509 | |||
510 | /* Merge contiguous regions of the same type and attribute */ | ||
511 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
512 | u64 prev_size; | ||
513 | md = p; | ||
514 | |||
515 | if (!prev_md) { | ||
516 | prev_md = md; | ||
517 | continue; | ||
518 | } | ||
519 | |||
520 | if (prev_md->type != md->type || | ||
521 | prev_md->attribute != md->attribute) { | ||
522 | prev_md = md; | ||
523 | continue; | ||
524 | } | ||
525 | |||
526 | prev_size = prev_md->num_pages << EFI_PAGE_SHIFT; | ||
527 | |||
528 | if (md->phys_addr == (prev_md->phys_addr + prev_size)) { | ||
529 | prev_md->num_pages += md->num_pages; | ||
530 | md->type = EFI_RESERVED_TYPE; | ||
531 | md->attribute = 0; | ||
532 | continue; | ||
533 | } | ||
534 | prev_md = md; | ||
535 | } | ||
536 | |||
508 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 537 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
509 | md = p; | 538 | md = p; |
510 | if (!(md->attribute & EFI_MEMORY_RUNTIME)) | 539 | if (!(md->attribute & EFI_MEMORY_RUNTIME)) |
@@ -541,15 +570,21 @@ void __init efi_enter_virtual_mode(void) | |||
541 | systab += md->virt_addr - md->phys_addr; | 570 | systab += md->virt_addr - md->phys_addr; |
542 | efi.systab = (efi_system_table_t *) (unsigned long) systab; | 571 | efi.systab = (efi_system_table_t *) (unsigned long) systab; |
543 | } | 572 | } |
573 | new_memmap = krealloc(new_memmap, | ||
574 | (count + 1) * memmap.desc_size, | ||
575 | GFP_KERNEL); | ||
576 | memcpy(new_memmap + (count * memmap.desc_size), md, | ||
577 | memmap.desc_size); | ||
578 | count++; | ||
544 | } | 579 | } |
545 | 580 | ||
546 | BUG_ON(!efi.systab); | 581 | BUG_ON(!efi.systab); |
547 | 582 | ||
548 | status = phys_efi_set_virtual_address_map( | 583 | status = phys_efi_set_virtual_address_map( |
549 | memmap.desc_size * memmap.nr_map, | 584 | memmap.desc_size * count, |
550 | memmap.desc_size, | 585 | memmap.desc_size, |
551 | memmap.desc_version, | 586 | memmap.desc_version, |
552 | memmap.phys_map); | 587 | (efi_memory_desc_t *)__pa(new_memmap)); |
553 | 588 | ||
554 | if (status != EFI_SUCCESS) { | 589 | if (status != EFI_SUCCESS) { |
555 | printk(KERN_ALERT "Unable to switch EFI into virtual mode " | 590 | printk(KERN_ALERT "Unable to switch EFI into virtual mode " |
@@ -572,11 +607,12 @@ void __init efi_enter_virtual_mode(void) | |||
572 | efi.set_variable = virt_efi_set_variable; | 607 | efi.set_variable = virt_efi_set_variable; |
573 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; | 608 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; |
574 | efi.reset_system = virt_efi_reset_system; | 609 | efi.reset_system = virt_efi_reset_system; |
575 | efi.set_virtual_address_map = virt_efi_set_virtual_address_map; | 610 | efi.set_virtual_address_map = NULL; |
576 | if (__supported_pte_mask & _PAGE_NX) | 611 | if (__supported_pte_mask & _PAGE_NX) |
577 | runtime_code_page_mkexec(); | 612 | runtime_code_page_mkexec(); |
578 | early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); | 613 | early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); |
579 | memmap.map = NULL; | 614 | memmap.map = NULL; |
615 | kfree(new_memmap); | ||
580 | } | 616 | } |
581 | 617 | ||
582 | /* | 618 | /* |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac0621a7ac3d..2649426a7905 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -41,22 +41,7 @@ | |||
41 | static pgd_t save_pgd __initdata; | 41 | static pgd_t save_pgd __initdata; |
42 | static unsigned long efi_flags __initdata; | 42 | static unsigned long efi_flags __initdata; |
43 | 43 | ||
44 | static void __init early_mapping_set_exec(unsigned long start, | 44 | static void __init early_code_mapping_set_exec(int executable) |
45 | unsigned long end, | ||
46 | int executable) | ||
47 | { | ||
48 | unsigned long num_pages; | ||
49 | |||
50 | start &= PMD_MASK; | ||
51 | end = (end + PMD_SIZE - 1) & PMD_MASK; | ||
52 | num_pages = (end - start) >> PAGE_SHIFT; | ||
53 | if (executable) | ||
54 | set_memory_x((unsigned long)__va(start), num_pages); | ||
55 | else | ||
56 | set_memory_nx((unsigned long)__va(start), num_pages); | ||
57 | } | ||
58 | |||
59 | static void __init early_runtime_code_mapping_set_exec(int executable) | ||
60 | { | 45 | { |
61 | efi_memory_desc_t *md; | 46 | efi_memory_desc_t *md; |
62 | void *p; | 47 | void *p; |
@@ -67,11 +52,8 @@ static void __init early_runtime_code_mapping_set_exec(int executable) | |||
67 | /* Make EFI runtime service code area executable */ | 52 | /* Make EFI runtime service code area executable */ |
68 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 53 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
69 | md = p; | 54 | md = p; |
70 | if (md->type == EFI_RUNTIME_SERVICES_CODE) { | 55 | if (md->type == EFI_RUNTIME_SERVICES_CODE) |
71 | unsigned long end; | 56 | efi_set_executable(md, executable); |
72 | end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); | ||
73 | early_mapping_set_exec(md->phys_addr, end, executable); | ||
74 | } | ||
75 | } | 57 | } |
76 | } | 58 | } |
77 | 59 | ||
@@ -79,7 +61,7 @@ void __init efi_call_phys_prelog(void) | |||
79 | { | 61 | { |
80 | unsigned long vaddress; | 62 | unsigned long vaddress; |
81 | 63 | ||
82 | early_runtime_code_mapping_set_exec(1); | 64 | early_code_mapping_set_exec(1); |
83 | local_irq_save(efi_flags); | 65 | local_irq_save(efi_flags); |
84 | vaddress = (unsigned long)__va(0x0UL); | 66 | vaddress = (unsigned long)__va(0x0UL); |
85 | save_pgd = *pgd_offset_k(0x0UL); | 67 | save_pgd = *pgd_offset_k(0x0UL); |
@@ -95,7 +77,7 @@ void __init efi_call_phys_epilog(void) | |||
95 | set_pgd(pgd_offset_k(0x0UL), save_pgd); | 77 | set_pgd(pgd_offset_k(0x0UL), save_pgd); |
96 | __flush_tlb_all(); | 78 | __flush_tlb_all(); |
97 | local_irq_restore(efi_flags); | 79 | local_irq_restore(efi_flags); |
98 | early_runtime_code_mapping_set_exec(0); | 80 | early_code_mapping_set_exec(0); |
99 | } | 81 | } |
100 | 82 | ||
101 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | 83 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
@@ -107,8 +89,10 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | |||
107 | return ioremap(phys_addr, size); | 89 | return ioremap(phys_addr, size); |
108 | 90 | ||
109 | last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); | 91 | last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); |
110 | if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) | 92 | if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { |
111 | return NULL; | 93 | unsigned long top = last_map_pfn << PAGE_SHIFT; |
94 | efi_ioremap(top, size - (top - phys_addr), type); | ||
95 | } | ||
112 | 96 | ||
113 | return (void __iomem *)__va(phys_addr); | 97 | return (void __iomem *)__va(phys_addr); |
114 | } | 98 | } |
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 275dbc19e2cf..7000e74b3087 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c | |||
@@ -194,7 +194,7 @@ static unsigned long __init mrst_calibrate_tsc(void) | |||
194 | return 0; | 194 | return 0; |
195 | } | 195 | } |
196 | 196 | ||
197 | void __init mrst_time_init(void) | 197 | static void __init mrst_time_init(void) |
198 | { | 198 | { |
199 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); | 199 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); |
200 | switch (mrst_timer_options) { | 200 | switch (mrst_timer_options) { |
@@ -216,7 +216,7 @@ void __init mrst_time_init(void) | |||
216 | apbt_time_init(); | 216 | apbt_time_init(); |
217 | } | 217 | } |
218 | 218 | ||
219 | void __cpuinit mrst_arch_setup(void) | 219 | static void __cpuinit mrst_arch_setup(void) |
220 | { | 220 | { |
221 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) | 221 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) |
222 | __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; | 222 | __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; |
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile index c2a8cab65e5d..81c5e2165c24 100644 --- a/arch/x86/platform/olpc/Makefile +++ b/arch/x86/platform/olpc/Makefile | |||
@@ -1,4 +1,2 @@ | |||
1 | obj-$(CONFIG_OLPC) += olpc.o | 1 | obj-$(CONFIG_OLPC) += olpc.o olpc_ofw.o olpc_dt.o |
2 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o | 2 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o |
3 | obj-$(CONFIG_OLPC) += olpc_ofw.o | ||
4 | obj-$(CONFIG_OF_PROMTREE) += olpc_dt.o | ||
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index edaf3fe8dc5e..0060fd59ea00 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/io.h> | 18 | #include <linux/io.h> |
19 | #include <linux/string.h> | 19 | #include <linux/string.h> |
20 | #include <linux/platform_device.h> | 20 | #include <linux/platform_device.h> |
21 | #include <linux/of.h> | ||
21 | 22 | ||
22 | #include <asm/geode.h> | 23 | #include <asm/geode.h> |
23 | #include <asm/setup.h> | 24 | #include <asm/setup.h> |
@@ -187,41 +188,43 @@ err: | |||
187 | } | 188 | } |
188 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); | 189 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); |
189 | 190 | ||
190 | static bool __init check_ofw_architecture(void) | 191 | static bool __init check_ofw_architecture(struct device_node *root) |
191 | { | 192 | { |
192 | size_t propsize; | 193 | const char *olpc_arch; |
193 | char olpc_arch[5]; | 194 | int propsize; |
194 | const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 }; | ||
195 | void *res[] = { &propsize }; | ||
196 | 195 | ||
197 | if (olpc_ofw("getprop", args, res)) { | 196 | olpc_arch = of_get_property(root, "architecture", &propsize); |
198 | printk(KERN_ERR "ofw: getprop call failed!\n"); | ||
199 | return false; | ||
200 | } | ||
201 | return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; | 197 | return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; |
202 | } | 198 | } |
203 | 199 | ||
204 | static u32 __init get_board_revision(void) | 200 | static u32 __init get_board_revision(struct device_node *root) |
205 | { | 201 | { |
206 | size_t propsize; | 202 | int propsize; |
207 | __be32 rev; | 203 | const __be32 *rev; |
208 | const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 }; | 204 | |
209 | void *res[] = { &propsize }; | 205 | rev = of_get_property(root, "board-revision-int", &propsize); |
210 | 206 | if (propsize != 4) | |
211 | if (olpc_ofw("getprop", args, res) || propsize != 4) { | 207 | return 0; |
212 | printk(KERN_ERR "ofw: getprop call failed!\n"); | 208 | |
213 | return cpu_to_be32(0); | 209 | return be32_to_cpu(*rev); |
214 | } | ||
215 | return be32_to_cpu(rev); | ||
216 | } | 210 | } |
217 | 211 | ||
218 | static bool __init platform_detect(void) | 212 | static bool __init platform_detect(void) |
219 | { | 213 | { |
220 | if (!check_ofw_architecture()) | 214 | struct device_node *root = of_find_node_by_path("/"); |
215 | bool success; | ||
216 | |||
217 | if (!root) | ||
221 | return false; | 218 | return false; |
222 | olpc_platform_info.flags |= OLPC_F_PRESENT; | 219 | |
223 | olpc_platform_info.boardrev = get_board_revision(); | 220 | success = check_ofw_architecture(root); |
224 | return true; | 221 | if (success) { |
222 | olpc_platform_info.boardrev = get_board_revision(root); | ||
223 | olpc_platform_info.flags |= OLPC_F_PRESENT; | ||
224 | } | ||
225 | |||
226 | of_node_put(root); | ||
227 | return success; | ||
225 | } | 228 | } |
226 | 229 | ||
227 | static int __init add_xo1_platform_devices(void) | 230 | static int __init add_xo1_platform_devices(void) |
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index 044bda5b3174..d39f63d017d2 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c | |||
@@ -19,7 +19,9 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/bootmem.h> | 20 | #include <linux/bootmem.h> |
21 | #include <linux/of.h> | 21 | #include <linux/of.h> |
22 | #include <linux/of_platform.h> | ||
22 | #include <linux/of_pdt.h> | 23 | #include <linux/of_pdt.h> |
24 | #include <asm/olpc.h> | ||
23 | #include <asm/olpc_ofw.h> | 25 | #include <asm/olpc_ofw.h> |
24 | 26 | ||
25 | static phandle __init olpc_dt_getsibling(phandle node) | 27 | static phandle __init olpc_dt_getsibling(phandle node) |
@@ -180,3 +182,20 @@ void __init olpc_dt_build_devicetree(void) | |||
180 | pr_info("PROM DT: Built device tree with %u bytes of memory.\n", | 182 | pr_info("PROM DT: Built device tree with %u bytes of memory.\n", |
181 | prom_early_allocated); | 183 | prom_early_allocated); |
182 | } | 184 | } |
185 | |||
186 | /* A list of DT node/bus matches that we want to expose as platform devices */ | ||
187 | static struct of_device_id __initdata of_ids[] = { | ||
188 | { .compatible = "olpc,xo1-battery" }, | ||
189 | { .compatible = "olpc,xo1-dcon" }, | ||
190 | { .compatible = "olpc,xo1-rtc" }, | ||
191 | {}, | ||
192 | }; | ||
193 | |||
194 | static int __init olpc_create_platform_devices(void) | ||
195 | { | ||
196 | if (machine_is_olpc()) | ||
197 | return of_platform_bus_probe(NULL, of_ids, NULL); | ||
198 | else | ||
199 | return 0; | ||
200 | } | ||
201 | device_initcall(olpc_create_platform_devices); | ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 7cb6424317f6..c58e0ea39ef5 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
699 | struct mm_struct *mm, | 699 | struct mm_struct *mm, |
700 | unsigned long va, unsigned int cpu) | 700 | unsigned long va, unsigned int cpu) |
701 | { | 701 | { |
702 | int tcpu; | ||
703 | int uvhub; | ||
704 | int locals = 0; | 702 | int locals = 0; |
705 | int remotes = 0; | 703 | int remotes = 0; |
706 | int hubs = 0; | 704 | int hubs = 0; |
705 | int tcpu; | ||
706 | int tpnode; | ||
707 | struct bau_desc *bau_desc; | 707 | struct bau_desc *bau_desc; |
708 | struct cpumask *flush_mask; | 708 | struct cpumask *flush_mask; |
709 | struct ptc_stats *stat; | 709 | struct ptc_stats *stat; |
710 | struct bau_control *bcp; | 710 | struct bau_control *bcp; |
711 | struct bau_control *tbcp; | 711 | struct bau_control *tbcp; |
712 | struct hub_and_pnode *hpp; | ||
712 | 713 | ||
713 | /* kernel was booted 'nobau' */ | 714 | /* kernel was booted 'nobau' */ |
714 | if (nobau) | 715 | if (nobau) |
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
750 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | 751 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; |
751 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 752 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
752 | 753 | ||
753 | /* cpu statistics */ | ||
754 | for_each_cpu(tcpu, flush_mask) { | 754 | for_each_cpu(tcpu, flush_mask) { |
755 | uvhub = uv_cpu_to_blade_id(tcpu); | 755 | /* |
756 | bau_uvhub_set(uvhub, &bau_desc->distribution); | 756 | * The distribution vector is a bit map of pnodes, relative |
757 | if (uvhub == bcp->uvhub) | 757 | * to the partition base pnode (and the partition base nasid |
758 | * in the header). | ||
759 | * Translate cpu to pnode and hub using an array stored | ||
760 | * in local memory. | ||
761 | */ | ||
762 | hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; | ||
763 | tpnode = hpp->pnode - bcp->partition_base_pnode; | ||
764 | bau_uvhub_set(tpnode, &bau_desc->distribution); | ||
765 | if (hpp->uvhub == bcp->uvhub) | ||
758 | locals++; | 766 | locals++; |
759 | else | 767 | else |
760 | remotes++; | 768 | remotes++; |
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) | |||
855 | * an interrupt, but causes an error message to be returned to | 863 | * an interrupt, but causes an error message to be returned to |
856 | * the sender. | 864 | * the sender. |
857 | */ | 865 | */ |
858 | static void uv_enable_timeouts(void) | 866 | static void __init uv_enable_timeouts(void) |
859 | { | 867 | { |
860 | int uvhub; | 868 | int uvhub; |
861 | int nuvhubs; | 869 | int nuvhubs; |
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void) | |||
1326 | } | 1334 | } |
1327 | 1335 | ||
1328 | /* | 1336 | /* |
1329 | * initialize the sending side's sending buffers | 1337 | * Initialize the sending side's sending buffers. |
1330 | */ | 1338 | */ |
1331 | static void | 1339 | static void |
1332 | uv_activation_descriptor_init(int node, int pnode) | 1340 | uv_activation_descriptor_init(int node, int pnode, int base_pnode) |
1333 | { | 1341 | { |
1334 | int i; | 1342 | int i; |
1335 | int cpu; | 1343 | int cpu; |
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1352 | n = pa >> uv_nshift; | 1360 | n = pa >> uv_nshift; |
1353 | m = pa & uv_mmask; | 1361 | m = pa & uv_mmask; |
1354 | 1362 | ||
1363 | /* the 14-bit pnode */ | ||
1355 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 1364 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
1356 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | 1365 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); |
1357 | |||
1358 | /* | 1366 | /* |
1359 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | 1367 | * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each |
1360 | * cpu even though we only use the first one; one descriptor can | 1368 | * cpu even though we only use the first one; one descriptor can |
1361 | * describe a broadcast to 256 uv hubs. | 1369 | * describe a broadcast to 256 uv hubs. |
1362 | */ | 1370 | */ |
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1365 | memset(bd2, 0, sizeof(struct bau_desc)); | 1373 | memset(bd2, 0, sizeof(struct bau_desc)); |
1366 | bd2->header.sw_ack_flag = 1; | 1374 | bd2->header.sw_ack_flag = 1; |
1367 | /* | 1375 | /* |
1368 | * base_dest_nodeid is the nasid of the first uvhub | 1376 | * The base_dest_nasid set in the message header is the nasid |
1369 | * in the partition. The bit map will indicate uvhub numbers, | 1377 | * of the first uvhub in the partition. The bit map will |
1370 | * which are 0-N in a partition. Pnodes are unique system-wide. | 1378 | * indicate destination pnode numbers relative to that base. |
1379 | * They may not be consecutive if nasid striding is being used. | ||
1371 | */ | 1380 | */ |
1372 | bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); | 1381 | bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); |
1373 | bd2->header.dest_subnodeid = 0x10; /* the LB */ | 1382 | bd2->header.dest_subnodeid = UV_LB_SUBNODEID; |
1374 | bd2->header.command = UV_NET_ENDPOINT_INTD; | 1383 | bd2->header.command = UV_NET_ENDPOINT_INTD; |
1375 | bd2->header.int_both = 1; | 1384 | bd2->header.int_both = 1; |
1376 | /* | 1385 | /* |
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode) | |||
1442 | /* | 1451 | /* |
1443 | * Initialization of each UV hub's structures | 1452 | * Initialization of each UV hub's structures |
1444 | */ | 1453 | */ |
1445 | static void __init uv_init_uvhub(int uvhub, int vector) | 1454 | static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) |
1446 | { | 1455 | { |
1447 | int node; | 1456 | int node; |
1448 | int pnode; | 1457 | int pnode; |
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector) | |||
1450 | 1459 | ||
1451 | node = uvhub_to_first_node(uvhub); | 1460 | node = uvhub_to_first_node(uvhub); |
1452 | pnode = uv_blade_to_pnode(uvhub); | 1461 | pnode = uv_blade_to_pnode(uvhub); |
1453 | uv_activation_descriptor_init(node, pnode); | 1462 | uv_activation_descriptor_init(node, pnode, base_pnode); |
1454 | uv_payload_queue_init(node, pnode); | 1463 | uv_payload_queue_init(node, pnode); |
1455 | /* | 1464 | /* |
1456 | * the below initialization can't be in firmware because the | 1465 | * The below initialization can't be in firmware because the |
1457 | * messaging IRQ will be determined by the OS | 1466 | * messaging IRQ will be determined by the OS. |
1458 | */ | 1467 | */ |
1459 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; | 1468 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; |
1460 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | 1469 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void) | |||
1491 | /* | 1500 | /* |
1492 | * initialize the bau_control structure for each cpu | 1501 | * initialize the bau_control structure for each cpu |
1493 | */ | 1502 | */ |
1494 | static int __init uv_init_per_cpu(int nuvhubs) | 1503 | static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) |
1495 | { | 1504 | { |
1496 | int i; | 1505 | int i; |
1497 | int cpu; | 1506 | int cpu; |
1507 | int tcpu; | ||
1498 | int pnode; | 1508 | int pnode; |
1499 | int uvhub; | 1509 | int uvhub; |
1500 | int have_hmaster; | 1510 | int have_hmaster; |
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1528 | bcp = &per_cpu(bau_control, cpu); | 1538 | bcp = &per_cpu(bau_control, cpu); |
1529 | memset(bcp, 0, sizeof(struct bau_control)); | 1539 | memset(bcp, 0, sizeof(struct bau_control)); |
1530 | pnode = uv_cpu_hub_info(cpu)->pnode; | 1540 | pnode = uv_cpu_hub_info(cpu)->pnode; |
1541 | if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { | ||
1542 | printk(KERN_EMERG | ||
1543 | "cpu %d pnode %d-%d beyond %d; BAU disabled\n", | ||
1544 | cpu, pnode, base_part_pnode, | ||
1545 | UV_DISTRIBUTION_SIZE); | ||
1546 | return 1; | ||
1547 | } | ||
1548 | bcp->osnode = cpu_to_node(cpu); | ||
1549 | bcp->partition_base_pnode = uv_partition_base_pnode; | ||
1531 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | 1550 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; |
1532 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); | 1551 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); |
1533 | bdp = &uvhub_descs[uvhub]; | 1552 | bdp = &uvhub_descs[uvhub]; |
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1536 | bdp->pnode = pnode; | 1555 | bdp->pnode = pnode; |
1537 | /* kludge: 'assuming' one node per socket, and assuming that | 1556 | /* kludge: 'assuming' one node per socket, and assuming that |
1538 | disabling a socket just leaves a gap in node numbers */ | 1557 | disabling a socket just leaves a gap in node numbers */ |
1539 | socket = (cpu_to_node(cpu) & 1); | 1558 | socket = bcp->osnode & 1; |
1540 | bdp->socket_mask |= (1 << socket); | 1559 | bdp->socket_mask |= (1 << socket); |
1541 | sdp = &bdp->socket[socket]; | 1560 | sdp = &bdp->socket[socket]; |
1542 | sdp->cpu_number[sdp->num_cpus] = cpu; | 1561 | sdp->cpu_number[sdp->num_cpus] = cpu; |
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1585 | nextsocket: | 1604 | nextsocket: |
1586 | socket++; | 1605 | socket++; |
1587 | socket_mask = (socket_mask >> 1); | 1606 | socket_mask = (socket_mask >> 1); |
1607 | /* each socket gets a local array of pnodes/hubs */ | ||
1608 | bcp = smaster; | ||
1609 | bcp->target_hub_and_pnode = kmalloc_node( | ||
1610 | sizeof(struct hub_and_pnode) * | ||
1611 | num_possible_cpus(), GFP_KERNEL, bcp->osnode); | ||
1612 | memset(bcp->target_hub_and_pnode, 0, | ||
1613 | sizeof(struct hub_and_pnode) * | ||
1614 | num_possible_cpus()); | ||
1615 | for_each_present_cpu(tcpu) { | ||
1616 | bcp->target_hub_and_pnode[tcpu].pnode = | ||
1617 | uv_cpu_hub_info(tcpu)->pnode; | ||
1618 | bcp->target_hub_and_pnode[tcpu].uvhub = | ||
1619 | uv_cpu_hub_info(tcpu)->numa_blade_id; | ||
1620 | } | ||
1588 | } | 1621 | } |
1589 | } | 1622 | } |
1590 | kfree(uvhub_descs); | 1623 | kfree(uvhub_descs); |
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void) | |||
1637 | spin_lock_init(&disable_lock); | 1670 | spin_lock_init(&disable_lock); |
1638 | congested_cycles = microsec_2_cycles(congested_response_us); | 1671 | congested_cycles = microsec_2_cycles(congested_response_us); |
1639 | 1672 | ||
1640 | if (uv_init_per_cpu(nuvhubs)) { | ||
1641 | nobau = 1; | ||
1642 | return 0; | ||
1643 | } | ||
1644 | |||
1645 | uv_partition_base_pnode = 0x7fffffff; | 1673 | uv_partition_base_pnode = 0x7fffffff; |
1646 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) | 1674 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
1647 | if (uv_blade_nr_possible_cpus(uvhub) && | 1675 | if (uv_blade_nr_possible_cpus(uvhub) && |
1648 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) | 1676 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) |
1649 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); | 1677 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); |
1678 | } | ||
1679 | |||
1680 | if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { | ||
1681 | nobau = 1; | ||
1682 | return 0; | ||
1683 | } | ||
1650 | 1684 | ||
1651 | vector = UV_BAU_MESSAGE; | 1685 | vector = UV_BAU_MESSAGE; |
1652 | for_each_possible_blade(uvhub) | 1686 | for_each_possible_blade(uvhub) |
1653 | if (uv_blade_nr_possible_cpus(uvhub)) | 1687 | if (uv_blade_nr_possible_cpus(uvhub)) |
1654 | uv_init_uvhub(uvhub, vector); | 1688 | uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); |
1655 | 1689 | ||
1656 | uv_enable_timeouts(); | 1690 | uv_enable_timeouts(); |
1657 | alloc_intr_gate(vector, uv_bau_message_intr1); | 1691 | alloc_intr_gate(vector, uv_bau_message_intr1); |
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 9daf5d1af9f1..0eb90184515f 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c | |||
@@ -40,7 +40,6 @@ static struct clocksource clocksource_uv = { | |||
40 | .rating = 400, | 40 | .rating = 400, |
41 | .read = uv_read_rtc, | 41 | .read = uv_read_rtc, |
42 | .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, | 42 | .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, |
43 | .shift = 10, | ||
44 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 43 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
45 | }; | 44 | }; |
46 | 45 | ||
@@ -372,14 +371,11 @@ static __init int uv_rtc_setup_clock(void) | |||
372 | if (!is_uv_system()) | 371 | if (!is_uv_system()) |
373 | return -ENODEV; | 372 | return -ENODEV; |
374 | 373 | ||
375 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, | ||
376 | clocksource_uv.shift); | ||
377 | |||
378 | /* If single blade, prefer tsc */ | 374 | /* If single blade, prefer tsc */ |
379 | if (uv_num_possible_blades() == 1) | 375 | if (uv_num_possible_blades() == 1) |
380 | clocksource_uv.rating = 250; | 376 | clocksource_uv.rating = 250; |
381 | 377 | ||
382 | rc = clocksource_register(&clocksource_uv); | 378 | rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second); |
383 | if (rc) | 379 | if (rc) |
384 | printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); | 380 | printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); |
385 | else | 381 | else |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e3c6a06cf725..dd7b88f2ec7a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -235,7 +235,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
235 | *dx &= maskedx; | 235 | *dx &= maskedx; |
236 | } | 236 | } |
237 | 237 | ||
238 | static __init void xen_init_cpuid_mask(void) | 238 | static void __init xen_init_cpuid_mask(void) |
239 | { | 239 | { |
240 | unsigned int ax, bx, cx, dx; | 240 | unsigned int ax, bx, cx, dx; |
241 | unsigned int xsave_mask; | 241 | unsigned int xsave_mask; |
@@ -400,7 +400,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr) | |||
400 | /* | 400 | /* |
401 | * load_gdt for early boot, when the gdt is only mapped once | 401 | * load_gdt for early boot, when the gdt is only mapped once |
402 | */ | 402 | */ |
403 | static __init void xen_load_gdt_boot(const struct desc_ptr *dtr) | 403 | static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) |
404 | { | 404 | { |
405 | unsigned long va = dtr->address; | 405 | unsigned long va = dtr->address; |
406 | unsigned int size = dtr->size + 1; | 406 | unsigned int size = dtr->size + 1; |
@@ -662,7 +662,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
662 | * Version of write_gdt_entry for use at early boot-time needed to | 662 | * Version of write_gdt_entry for use at early boot-time needed to |
663 | * update an entry as simply as possible. | 663 | * update an entry as simply as possible. |
664 | */ | 664 | */ |
665 | static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | 665 | static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, |
666 | const void *desc, int type) | 666 | const void *desc, int type) |
667 | { | 667 | { |
668 | switch (type) { | 668 | switch (type) { |
@@ -933,18 +933,18 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | |||
933 | return ret; | 933 | return ret; |
934 | } | 934 | } |
935 | 935 | ||
936 | static const struct pv_info xen_info __initdata = { | 936 | static const struct pv_info xen_info __initconst = { |
937 | .paravirt_enabled = 1, | 937 | .paravirt_enabled = 1, |
938 | .shared_kernel_pmd = 0, | 938 | .shared_kernel_pmd = 0, |
939 | 939 | ||
940 | .name = "Xen", | 940 | .name = "Xen", |
941 | }; | 941 | }; |
942 | 942 | ||
943 | static const struct pv_init_ops xen_init_ops __initdata = { | 943 | static const struct pv_init_ops xen_init_ops __initconst = { |
944 | .patch = xen_patch, | 944 | .patch = xen_patch, |
945 | }; | 945 | }; |
946 | 946 | ||
947 | static const struct pv_cpu_ops xen_cpu_ops __initdata = { | 947 | static const struct pv_cpu_ops xen_cpu_ops __initconst = { |
948 | .cpuid = xen_cpuid, | 948 | .cpuid = xen_cpuid, |
949 | 949 | ||
950 | .set_debugreg = xen_set_debugreg, | 950 | .set_debugreg = xen_set_debugreg, |
@@ -1004,7 +1004,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1004 | .end_context_switch = xen_end_context_switch, | 1004 | .end_context_switch = xen_end_context_switch, |
1005 | }; | 1005 | }; |
1006 | 1006 | ||
1007 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 1007 | static const struct pv_apic_ops xen_apic_ops __initconst = { |
1008 | #ifdef CONFIG_X86_LOCAL_APIC | 1008 | #ifdef CONFIG_X86_LOCAL_APIC |
1009 | .startup_ipi_hook = paravirt_nop, | 1009 | .startup_ipi_hook = paravirt_nop, |
1010 | #endif | 1010 | #endif |
@@ -1055,7 +1055,7 @@ int xen_panic_handler_init(void) | |||
1055 | return 0; | 1055 | return 0; |
1056 | } | 1056 | } |
1057 | 1057 | ||
1058 | static const struct machine_ops __initdata xen_machine_ops = { | 1058 | static const struct machine_ops xen_machine_ops __initconst = { |
1059 | .restart = xen_restart, | 1059 | .restart = xen_restart, |
1060 | .halt = xen_machine_halt, | 1060 | .halt = xen_machine_halt, |
1061 | .power_off = xen_machine_halt, | 1061 | .power_off = xen_machine_halt, |
@@ -1332,7 +1332,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | |||
1332 | return NOTIFY_OK; | 1332 | return NOTIFY_OK; |
1333 | } | 1333 | } |
1334 | 1334 | ||
1335 | static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = { | 1335 | static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { |
1336 | .notifier_call = xen_hvm_cpu_notify, | 1336 | .notifier_call = xen_hvm_cpu_notify, |
1337 | }; | 1337 | }; |
1338 | 1338 | ||
@@ -1381,7 +1381,7 @@ bool xen_hvm_need_lapic(void) | |||
1381 | } | 1381 | } |
1382 | EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); | 1382 | EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); |
1383 | 1383 | ||
1384 | const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { | 1384 | const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { |
1385 | .name = "Xen HVM", | 1385 | .name = "Xen HVM", |
1386 | .detect = xen_hvm_platform, | 1386 | .detect = xen_hvm_platform, |
1387 | .init_platform = xen_hvm_guest_init, | 1387 | .init_platform = xen_hvm_guest_init, |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 6a6fe8939645..8bbb465b6f0a 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -113,7 +113,7 @@ static void xen_halt(void) | |||
113 | xen_safe_halt(); | 113 | xen_safe_halt(); |
114 | } | 114 | } |
115 | 115 | ||
116 | static const struct pv_irq_ops xen_irq_ops __initdata = { | 116 | static const struct pv_irq_ops xen_irq_ops __initconst = { |
117 | .save_fl = PV_CALLEE_SAVE(xen_save_fl), | 117 | .save_fl = PV_CALLEE_SAVE(xen_save_fl), |
118 | .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), | 118 | .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), |
119 | .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), | 119 | .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 0684f3c74d53..02d752460371 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1054,7 +1054,7 @@ void xen_mm_pin_all(void) | |||
1054 | * that's before we have page structures to store the bits. So do all | 1054 | * that's before we have page structures to store the bits. So do all |
1055 | * the book-keeping now. | 1055 | * the book-keeping now. |
1056 | */ | 1056 | */ |
1057 | static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, | 1057 | static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, |
1058 | enum pt_level level) | 1058 | enum pt_level level) |
1059 | { | 1059 | { |
1060 | SetPagePinned(page); | 1060 | SetPagePinned(page); |
@@ -1187,7 +1187,7 @@ static void drop_other_mm_ref(void *info) | |||
1187 | 1187 | ||
1188 | active_mm = percpu_read(cpu_tlbstate.active_mm); | 1188 | active_mm = percpu_read(cpu_tlbstate.active_mm); |
1189 | 1189 | ||
1190 | if (active_mm == mm) | 1190 | if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) |
1191 | leave_mm(smp_processor_id()); | 1191 | leave_mm(smp_processor_id()); |
1192 | 1192 | ||
1193 | /* If this cpu still has a stale cr3 reference, then make sure | 1193 | /* If this cpu still has a stale cr3 reference, then make sure |
@@ -1271,7 +1271,7 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
1271 | spin_unlock(&mm->page_table_lock); | 1271 | spin_unlock(&mm->page_table_lock); |
1272 | } | 1272 | } |
1273 | 1273 | ||
1274 | static __init void xen_pagetable_setup_start(pgd_t *base) | 1274 | static void __init xen_pagetable_setup_start(pgd_t *base) |
1275 | { | 1275 | { |
1276 | } | 1276 | } |
1277 | 1277 | ||
@@ -1291,7 +1291,7 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | |||
1291 | 1291 | ||
1292 | static void xen_post_allocator_init(void); | 1292 | static void xen_post_allocator_init(void); |
1293 | 1293 | ||
1294 | static __init void xen_pagetable_setup_done(pgd_t *base) | 1294 | static void __init xen_pagetable_setup_done(pgd_t *base) |
1295 | { | 1295 | { |
1296 | xen_setup_shared_info(); | 1296 | xen_setup_shared_info(); |
1297 | xen_post_allocator_init(); | 1297 | xen_post_allocator_init(); |
@@ -1488,7 +1488,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
1488 | } | 1488 | } |
1489 | 1489 | ||
1490 | #ifdef CONFIG_X86_32 | 1490 | #ifdef CONFIG_X86_32 |
1491 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | 1491 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) |
1492 | { | 1492 | { |
1493 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ | 1493 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ |
1494 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) | 1494 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) |
@@ -1498,7 +1498,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1498 | return pte; | 1498 | return pte; |
1499 | } | 1499 | } |
1500 | #else /* CONFIG_X86_64 */ | 1500 | #else /* CONFIG_X86_64 */ |
1501 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | 1501 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) |
1502 | { | 1502 | { |
1503 | unsigned long pfn = pte_pfn(pte); | 1503 | unsigned long pfn = pte_pfn(pte); |
1504 | 1504 | ||
@@ -1519,7 +1519,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1519 | 1519 | ||
1520 | /* Init-time set_pte while constructing initial pagetables, which | 1520 | /* Init-time set_pte while constructing initial pagetables, which |
1521 | doesn't allow RO pagetable pages to be remapped RW */ | 1521 | doesn't allow RO pagetable pages to be remapped RW */ |
1522 | static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | 1522 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
1523 | { | 1523 | { |
1524 | pte = mask_rw_pte(ptep, pte); | 1524 | pte = mask_rw_pte(ptep, pte); |
1525 | 1525 | ||
@@ -1537,7 +1537,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | |||
1537 | 1537 | ||
1538 | /* Early in boot, while setting up the initial pagetable, assume | 1538 | /* Early in boot, while setting up the initial pagetable, assume |
1539 | everything is pinned. */ | 1539 | everything is pinned. */ |
1540 | static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | 1540 | static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) |
1541 | { | 1541 | { |
1542 | #ifdef CONFIG_FLATMEM | 1542 | #ifdef CONFIG_FLATMEM |
1543 | BUG_ON(mem_map); /* should only be used early */ | 1543 | BUG_ON(mem_map); /* should only be used early */ |
@@ -1547,7 +1547,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | |||
1547 | } | 1547 | } |
1548 | 1548 | ||
1549 | /* Used for pmd and pud */ | 1549 | /* Used for pmd and pud */ |
1550 | static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) | 1550 | static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) |
1551 | { | 1551 | { |
1552 | #ifdef CONFIG_FLATMEM | 1552 | #ifdef CONFIG_FLATMEM |
1553 | BUG_ON(mem_map); /* should only be used early */ | 1553 | BUG_ON(mem_map); /* should only be used early */ |
@@ -1557,13 +1557,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) | |||
1557 | 1557 | ||
1558 | /* Early release_pte assumes that all pts are pinned, since there's | 1558 | /* Early release_pte assumes that all pts are pinned, since there's |
1559 | only init_mm and anything attached to that is pinned. */ | 1559 | only init_mm and anything attached to that is pinned. */ |
1560 | static __init void xen_release_pte_init(unsigned long pfn) | 1560 | static void __init xen_release_pte_init(unsigned long pfn) |
1561 | { | 1561 | { |
1562 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 1562 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
1563 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1563 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1564 | } | 1564 | } |
1565 | 1565 | ||
1566 | static __init void xen_release_pmd_init(unsigned long pfn) | 1566 | static void __init xen_release_pmd_init(unsigned long pfn) |
1567 | { | 1567 | { |
1568 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1568 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1569 | } | 1569 | } |
@@ -1689,7 +1689,7 @@ static void set_page_prot(void *addr, pgprot_t prot) | |||
1689 | BUG(); | 1689 | BUG(); |
1690 | } | 1690 | } |
1691 | 1691 | ||
1692 | static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | 1692 | static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) |
1693 | { | 1693 | { |
1694 | unsigned pmdidx, pteidx; | 1694 | unsigned pmdidx, pteidx; |
1695 | unsigned ident_pte; | 1695 | unsigned ident_pte; |
@@ -1772,7 +1772,7 @@ static void convert_pfn_mfn(void *v) | |||
1772 | * of the physical mapping once some sort of allocator has been set | 1772 | * of the physical mapping once some sort of allocator has been set |
1773 | * up. | 1773 | * up. |
1774 | */ | 1774 | */ |
1775 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 1775 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, |
1776 | unsigned long max_pfn) | 1776 | unsigned long max_pfn) |
1777 | { | 1777 | { |
1778 | pud_t *l3; | 1778 | pud_t *l3; |
@@ -1843,7 +1843,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1843 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); | 1843 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
1844 | static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); | 1844 | static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); |
1845 | 1845 | ||
1846 | static __init void xen_write_cr3_init(unsigned long cr3) | 1846 | static void __init xen_write_cr3_init(unsigned long cr3) |
1847 | { | 1847 | { |
1848 | unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); | 1848 | unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); |
1849 | 1849 | ||
@@ -1880,7 +1880,7 @@ static __init void xen_write_cr3_init(unsigned long cr3) | |||
1880 | pv_mmu_ops.write_cr3 = &xen_write_cr3; | 1880 | pv_mmu_ops.write_cr3 = &xen_write_cr3; |
1881 | } | 1881 | } |
1882 | 1882 | ||
1883 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 1883 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, |
1884 | unsigned long max_pfn) | 1884 | unsigned long max_pfn) |
1885 | { | 1885 | { |
1886 | pmd_t *kernel_pmd; | 1886 | pmd_t *kernel_pmd; |
@@ -1986,7 +1986,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1986 | #endif | 1986 | #endif |
1987 | } | 1987 | } |
1988 | 1988 | ||
1989 | __init void xen_ident_map_ISA(void) | 1989 | void __init xen_ident_map_ISA(void) |
1990 | { | 1990 | { |
1991 | unsigned long pa; | 1991 | unsigned long pa; |
1992 | 1992 | ||
@@ -2009,7 +2009,7 @@ __init void xen_ident_map_ISA(void) | |||
2009 | xen_flush_tlb(); | 2009 | xen_flush_tlb(); |
2010 | } | 2010 | } |
2011 | 2011 | ||
2012 | static __init void xen_post_allocator_init(void) | 2012 | static void __init xen_post_allocator_init(void) |
2013 | { | 2013 | { |
2014 | #ifdef CONFIG_XEN_DEBUG | 2014 | #ifdef CONFIG_XEN_DEBUG |
2015 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); | 2015 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); |
@@ -2046,7 +2046,7 @@ static void xen_leave_lazy_mmu(void) | |||
2046 | preempt_enable(); | 2046 | preempt_enable(); |
2047 | } | 2047 | } |
2048 | 2048 | ||
2049 | static const struct pv_mmu_ops xen_mmu_ops __initdata = { | 2049 | static const struct pv_mmu_ops xen_mmu_ops __initconst = { |
2050 | .read_cr2 = xen_read_cr2, | 2050 | .read_cr2 = xen_read_cr2, |
2051 | .write_cr2 = xen_write_cr2, | 2051 | .write_cr2 = xen_write_cr2, |
2052 | 2052 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 141eb0de8b06..58efeb9d5440 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -522,11 +522,20 @@ static bool __init __early_alloc_p2m(unsigned long pfn) | |||
522 | /* Boundary cross-over for the edges: */ | 522 | /* Boundary cross-over for the edges: */ |
523 | if (idx) { | 523 | if (idx) { |
524 | unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); | 524 | unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); |
525 | unsigned long *mid_mfn_p; | ||
525 | 526 | ||
526 | p2m_init(p2m); | 527 | p2m_init(p2m); |
527 | 528 | ||
528 | p2m_top[topidx][mididx] = p2m; | 529 | p2m_top[topidx][mididx] = p2m; |
529 | 530 | ||
531 | /* For save/restore we need to MFN of the P2M saved */ | ||
532 | |||
533 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
534 | WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), | ||
535 | "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", | ||
536 | topidx, mididx); | ||
537 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
538 | |||
530 | } | 539 | } |
531 | return idx != 0; | 540 | return idx != 0; |
532 | } | 541 | } |
@@ -549,12 +558,29 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, | |||
549 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) | 558 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) |
550 | { | 559 | { |
551 | unsigned topidx = p2m_top_index(pfn); | 560 | unsigned topidx = p2m_top_index(pfn); |
552 | if (p2m_top[topidx] == p2m_mid_missing) { | 561 | unsigned long *mid_mfn_p; |
553 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | 562 | unsigned long **mid; |
563 | |||
564 | mid = p2m_top[topidx]; | ||
565 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
566 | if (mid == p2m_mid_missing) { | ||
567 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
554 | 568 | ||
555 | p2m_mid_init(mid); | 569 | p2m_mid_init(mid); |
556 | 570 | ||
557 | p2m_top[topidx] = mid; | 571 | p2m_top[topidx] = mid; |
572 | |||
573 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
574 | } | ||
575 | /* And the save/restore P2M tables.. */ | ||
576 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
577 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
578 | p2m_mid_mfn_init(mid_mfn_p); | ||
579 | |||
580 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
581 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
582 | /* Note: we don't set mid_mfn_p[midix] here, | ||
583 | * look in __early_alloc_p2m */ | ||
558 | } | 584 | } |
559 | } | 585 | } |
560 | 586 | ||
@@ -650,7 +676,7 @@ static unsigned long mfn_hash(unsigned long mfn) | |||
650 | } | 676 | } |
651 | 677 | ||
652 | /* Add an MFN override for a particular page */ | 678 | /* Add an MFN override for a particular page */ |
653 | int m2p_add_override(unsigned long mfn, struct page *page) | 679 | int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) |
654 | { | 680 | { |
655 | unsigned long flags; | 681 | unsigned long flags; |
656 | unsigned long pfn; | 682 | unsigned long pfn; |
@@ -662,7 +688,6 @@ int m2p_add_override(unsigned long mfn, struct page *page) | |||
662 | if (!PageHighMem(page)) { | 688 | if (!PageHighMem(page)) { |
663 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | 689 | address = (unsigned long)__va(pfn << PAGE_SHIFT); |
664 | ptep = lookup_address(address, &level); | 690 | ptep = lookup_address(address, &level); |
665 | |||
666 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | 691 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, |
667 | "m2p_add_override: pfn %lx not mapped", pfn)) | 692 | "m2p_add_override: pfn %lx not mapped", pfn)) |
668 | return -EINVAL; | 693 | return -EINVAL; |
@@ -674,18 +699,17 @@ int m2p_add_override(unsigned long mfn, struct page *page) | |||
674 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) | 699 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) |
675 | return -ENOMEM; | 700 | return -ENOMEM; |
676 | 701 | ||
677 | if (!PageHighMem(page)) | 702 | if (clear_pte && !PageHighMem(page)) |
678 | /* Just zap old mapping for now */ | 703 | /* Just zap old mapping for now */ |
679 | pte_clear(&init_mm, address, ptep); | 704 | pte_clear(&init_mm, address, ptep); |
680 | |||
681 | spin_lock_irqsave(&m2p_override_lock, flags); | 705 | spin_lock_irqsave(&m2p_override_lock, flags); |
682 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | 706 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); |
683 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 707 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
684 | 708 | ||
685 | return 0; | 709 | return 0; |
686 | } | 710 | } |
687 | 711 | EXPORT_SYMBOL_GPL(m2p_add_override); | |
688 | int m2p_remove_override(struct page *page) | 712 | int m2p_remove_override(struct page *page, bool clear_pte) |
689 | { | 713 | { |
690 | unsigned long flags; | 714 | unsigned long flags; |
691 | unsigned long mfn; | 715 | unsigned long mfn; |
@@ -713,7 +737,7 @@ int m2p_remove_override(struct page *page) | |||
713 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 737 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
714 | set_phys_to_machine(pfn, page->index); | 738 | set_phys_to_machine(pfn, page->index); |
715 | 739 | ||
716 | if (!PageHighMem(page)) | 740 | if (clear_pte && !PageHighMem(page)) |
717 | set_pte_at(&init_mm, address, ptep, | 741 | set_pte_at(&init_mm, address, ptep, |
718 | pfn_pte(pfn, PAGE_KERNEL)); | 742 | pfn_pte(pfn, PAGE_KERNEL)); |
719 | /* No tlb flush necessary because the caller already | 743 | /* No tlb flush necessary because the caller already |
@@ -721,6 +745,7 @@ int m2p_remove_override(struct page *page) | |||
721 | 745 | ||
722 | return 0; | 746 | return 0; |
723 | } | 747 | } |
748 | EXPORT_SYMBOL_GPL(m2p_remove_override); | ||
724 | 749 | ||
725 | struct page *m2p_find_override(unsigned long mfn) | 750 | struct page *m2p_find_override(unsigned long mfn) |
726 | { | 751 | { |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 90bac0aac3a5..be1a464f6d66 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -50,7 +50,7 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size; | |||
50 | */ | 50 | */ |
51 | #define EXTRA_MEM_RATIO (10) | 51 | #define EXTRA_MEM_RATIO (10) |
52 | 52 | ||
53 | static __init void xen_add_extra_mem(unsigned long pages) | 53 | static void __init xen_add_extra_mem(unsigned long pages) |
54 | { | 54 | { |
55 | unsigned long pfn; | 55 | unsigned long pfn; |
56 | 56 | ||
@@ -166,7 +166,7 @@ static unsigned long __init xen_set_identity(const struct e820entry *list, | |||
166 | if (last > end) | 166 | if (last > end) |
167 | continue; | 167 | continue; |
168 | 168 | ||
169 | if (entry->type == E820_RAM) { | 169 | if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { |
170 | if (start > start_pci) | 170 | if (start > start_pci) |
171 | identity += set_phys_range_identity( | 171 | identity += set_phys_range_identity( |
172 | PFN_UP(start_pci), PFN_DOWN(start)); | 172 | PFN_UP(start_pci), PFN_DOWN(start)); |
@@ -227,7 +227,11 @@ char * __init xen_memory_setup(void) | |||
227 | 227 | ||
228 | memcpy(map_raw, map, sizeof(map)); | 228 | memcpy(map_raw, map, sizeof(map)); |
229 | e820.nr_map = 0; | 229 | e820.nr_map = 0; |
230 | #ifdef CONFIG_X86_32 | ||
231 | xen_extra_mem_start = mem_end; | ||
232 | #else | ||
230 | xen_extra_mem_start = max((1ULL << 32), mem_end); | 233 | xen_extra_mem_start = max((1ULL << 32), mem_end); |
234 | #endif | ||
231 | for (i = 0; i < memmap.nr_entries; i++) { | 235 | for (i = 0; i < memmap.nr_entries; i++) { |
232 | unsigned long long end; | 236 | unsigned long long end; |
233 | 237 | ||
@@ -336,7 +340,7 @@ static void __init fiddle_vdso(void) | |||
336 | #endif | 340 | #endif |
337 | } | 341 | } |
338 | 342 | ||
339 | static __cpuinit int register_callback(unsigned type, const void *func) | 343 | static int __cpuinit register_callback(unsigned type, const void *func) |
340 | { | 344 | { |
341 | struct callback_register callback = { | 345 | struct callback_register callback = { |
342 | .type = type, | 346 | .type = type, |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 30612441ed99..41038c01de40 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -46,18 +46,17 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | |||
46 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); | 46 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * Reschedule call back. Nothing to do, | 49 | * Reschedule call back. |
50 | * all the work is done automatically when | ||
51 | * we return from the interrupt. | ||
52 | */ | 50 | */ |
53 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | 51 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) |
54 | { | 52 | { |
55 | inc_irq_stat(irq_resched_count); | 53 | inc_irq_stat(irq_resched_count); |
54 | scheduler_ipi(); | ||
56 | 55 | ||
57 | return IRQ_HANDLED; | 56 | return IRQ_HANDLED; |
58 | } | 57 | } |
59 | 58 | ||
60 | static __cpuinit void cpu_bringup(void) | 59 | static void __cpuinit cpu_bringup(void) |
61 | { | 60 | { |
62 | int cpu = smp_processor_id(); | 61 | int cpu = smp_processor_id(); |
63 | 62 | ||
@@ -85,7 +84,7 @@ static __cpuinit void cpu_bringup(void) | |||
85 | wmb(); /* make sure everything is out */ | 84 | wmb(); /* make sure everything is out */ |
86 | } | 85 | } |
87 | 86 | ||
88 | static __cpuinit void cpu_bringup_and_idle(void) | 87 | static void __cpuinit cpu_bringup_and_idle(void) |
89 | { | 88 | { |
90 | cpu_bringup(); | 89 | cpu_bringup(); |
91 | cpu_idle(); | 90 | cpu_idle(); |
@@ -242,7 +241,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
242 | } | 241 | } |
243 | } | 242 | } |
244 | 243 | ||
245 | static __cpuinit int | 244 | static int __cpuinit |
246 | cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | 245 | cpu_initialize_context(unsigned int cpu, struct task_struct *idle) |
247 | { | 246 | { |
248 | struct vcpu_guest_context *ctxt; | 247 | struct vcpu_guest_context *ctxt; |
@@ -486,7 +485,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) | |||
486 | return IRQ_HANDLED; | 485 | return IRQ_HANDLED; |
487 | } | 486 | } |
488 | 487 | ||
489 | static const struct smp_ops xen_smp_ops __initdata = { | 488 | static const struct smp_ops xen_smp_ops __initconst = { |
490 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | 489 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, |
491 | .smp_prepare_cpus = xen_smp_prepare_cpus, | 490 | .smp_prepare_cpus = xen_smp_prepare_cpus, |
492 | .smp_cpus_done = xen_smp_cpus_done, | 491 | .smp_cpus_done = xen_smp_cpus_done, |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 2e2d370a47b1..5158c505bef9 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -26,8 +26,6 @@ | |||
26 | 26 | ||
27 | #include "xen-ops.h" | 27 | #include "xen-ops.h" |
28 | 28 | ||
29 | #define XEN_SHIFT 22 | ||
30 | |||
31 | /* Xen may fire a timer up to this many ns early */ | 29 | /* Xen may fire a timer up to this many ns early */ |
32 | #define TIMER_SLOP 100000 | 30 | #define TIMER_SLOP 100000 |
33 | #define NS_PER_TICK (1000000000LL / HZ) | 31 | #define NS_PER_TICK (1000000000LL / HZ) |
@@ -211,8 +209,6 @@ static struct clocksource xen_clocksource __read_mostly = { | |||
211 | .rating = 400, | 209 | .rating = 400, |
212 | .read = xen_clocksource_get_cycles, | 210 | .read = xen_clocksource_get_cycles, |
213 | .mask = ~0, | 211 | .mask = ~0, |
214 | .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */ | ||
215 | .shift = XEN_SHIFT, | ||
216 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 212 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
217 | }; | 213 | }; |
218 | 214 | ||
@@ -439,16 +435,16 @@ void xen_timer_resume(void) | |||
439 | } | 435 | } |
440 | } | 436 | } |
441 | 437 | ||
442 | static const struct pv_time_ops xen_time_ops __initdata = { | 438 | static const struct pv_time_ops xen_time_ops __initconst = { |
443 | .sched_clock = xen_clocksource_read, | 439 | .sched_clock = xen_clocksource_read, |
444 | }; | 440 | }; |
445 | 441 | ||
446 | static __init void xen_time_init(void) | 442 | static void __init xen_time_init(void) |
447 | { | 443 | { |
448 | int cpu = smp_processor_id(); | 444 | int cpu = smp_processor_id(); |
449 | struct timespec tp; | 445 | struct timespec tp; |
450 | 446 | ||
451 | clocksource_register(&xen_clocksource); | 447 | clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); |
452 | 448 | ||
453 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { | 449 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { |
454 | /* Successfully turned off 100Hz tick, so we have the | 450 | /* Successfully turned off 100Hz tick, so we have the |
@@ -468,7 +464,7 @@ static __init void xen_time_init(void) | |||
468 | xen_setup_cpu_clockevents(); | 464 | xen_setup_cpu_clockevents(); |
469 | } | 465 | } |
470 | 466 | ||
471 | __init void xen_init_time_ops(void) | 467 | void __init xen_init_time_ops(void) |
472 | { | 468 | { |
473 | pv_time_ops = xen_time_ops; | 469 | pv_time_ops = xen_time_ops; |
474 | 470 | ||
@@ -490,7 +486,7 @@ static void xen_hvm_setup_cpu_clockevents(void) | |||
490 | xen_setup_cpu_clockevents(); | 486 | xen_setup_cpu_clockevents(); |
491 | } | 487 | } |
492 | 488 | ||
493 | __init void xen_hvm_init_time_ops(void) | 489 | void __init xen_hvm_init_time_ops(void) |
494 | { | 490 | { |
495 | /* vector callback is needed otherwise we cannot receive interrupts | 491 | /* vector callback is needed otherwise we cannot receive interrupts |
496 | * on cpu > 0 and at this point we don't know how many cpus are | 492 | * on cpu > 0 and at this point we don't know how many cpus are |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 3112f55638c4..97dfdc8757b3 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -74,7 +74,7 @@ static inline void xen_hvm_smp_init(void) {} | |||
74 | 74 | ||
75 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | 75 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
76 | void __init xen_init_spinlocks(void); | 76 | void __init xen_init_spinlocks(void); |
77 | __cpuinit void xen_init_lock_cpu(int cpu); | 77 | void __cpuinit xen_init_lock_cpu(int cpu); |
78 | void xen_uninit_lock_cpu(int cpu); | 78 | void xen_uninit_lock_cpu(int cpu); |
79 | #else | 79 | #else |
80 | static inline void xen_init_spinlocks(void) | 80 | static inline void xen_init_spinlocks(void) |