aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kbuild1
-rw-r--r--arch/x86/Kconfig62
-rw-r--r--arch/x86/Kconfig.cpu16
-rw-r--r--arch/x86/Makefile_32.cpu2
-rw-r--r--arch/x86/crypto/Makefile4
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c9
-rw-r--r--arch/x86/crypto/fpu.c10
-rw-r--r--arch/x86/ia32/ia32entry.S1
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/alternative-asm.h9
-rw-r--r--arch/x86/include/asm/alternative.h9
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h13
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h28
-rw-r--r--arch/x86/include/asm/amd_nb.h1
-rw-r--r--arch/x86/include/asm/apic.h9
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/bios_ebda.h28
-rw-r--r--arch/x86/include/asm/cpufeature.h15
-rw-r--r--arch/x86/include/asm/dma.h12
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/ftrace.h7
-rw-r--r--arch/x86/include/asm/i8253.h2
-rw-r--r--arch/x86/include/asm/jump_label.h27
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mmzone_32.h20
-rw-r--r--arch/x86/include/asm/mmzone_64.h23
-rw-r--r--arch/x86/include/asm/module.h2
-rw-r--r--arch/x86/include/asm/nops.h146
-rw-r--r--arch/x86/include/asm/numa.h32
-rw-r--r--arch/x86/include/asm/numa_32.h10
-rw-r--r--arch/x86/include/asm/numa_64.h36
-rw-r--r--arch/x86/include/asm/numaq.h7
-rw-r--r--arch/x86/include/asm/olpc_ofw.h9
-rw-r--r--arch/x86/include/asm/percpu.h29
-rw-r--r--arch/x86/include/asm/probe_roms.h8
-rw-r--r--arch/x86/include/asm/processor-flags.h1
-rw-r--r--arch/x86/include/asm/setup.h4
-rw-r--r--arch/x86/include/asm/srat.h39
-rw-r--r--arch/x86/include/asm/stacktrace.h3
-rw-r--r--arch/x86/include/asm/system.h85
-rw-r--r--arch/x86/include/asm/topology.h8
-rw-r--r--arch/x86/include/asm/uaccess.h3
-rw-r--r--arch/x86/include/asm/uaccess_32.h1
-rw-r--r--arch/x86/include/asm/uaccess_64.h1
-rw-r--r--arch/x86/include/asm/unistd_32.h3
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h17
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h2
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h16
-rw-r--r--arch/x86/include/asm/xen/page.h5
-rw-r--r--arch/x86/include/asm/xen/pci.h16
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/acpi/sleep.c5
-rw-r--r--arch/x86/kernel/alternative.c203
-rw-r--r--arch/x86/kernel/amd_gart_64.c (renamed from arch/x86/kernel/pci-gart_64.c)0
-rw-r--r--arch/x86/kernel/amd_iommu.c526
-rw-r--r--arch/x86/kernel/amd_iommu_init.c48
-rw-r--r--arch/x86/kernel/apb_timer.c10
-rw-r--r--arch/x86/kernel/aperture_64.c34
-rw-r--r--arch/x86/kernel/apic/apic.c69
-rw-r--r--arch/x86/kernel/apic/apic_noop.c9
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c7
-rw-r--r--arch/x86/kernel/apic/numaq_32.c34
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c48
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/common.c26
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig266
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Makefile21
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c776
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c446
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c367
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c309
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c517
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c1029
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.h353
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c327
-rw-r--r--arch/x86/kernel/cpu/cpufreq/mperf.c51
-rw-r--r--arch/x86/kernel/cpu/cpufreq/mperf.h9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c331
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c624
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c261
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c752
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.h43
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c1607
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h224
-rw-r--r--arch/x86/kernel/cpu/cpufreq/sc520_freq.c194
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c636
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c452
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c481
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.h49
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c467
-rw-r--r--arch/x86/kernel/cpu/intel.c29
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c25
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event.c28
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c14
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c37
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c7
-rw-r--r--arch/x86/kernel/dumpstack.c17
-rw-r--r--arch/x86/kernel/ftrace.c4
-rw-r--r--arch/x86/kernel/head32.c1
-rw-r--r--arch/x86/kernel/hpet.c72
-rw-r--r--arch/x86/kernel/i8253.c86
-rw-r--r--arch/x86/kernel/irq.c5
-rw-r--r--arch/x86/kernel/jump_label.c5
-rw-r--r--arch/x86/kernel/kprobes.c5
-rw-r--r--arch/x86/kernel/kvmclock.c6
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/mpparse.c6
-rw-r--r--arch/x86/kernel/pci-iommu_table.c18
-rw-r--r--arch/x86/kernel/probe_roms.c (renamed from arch/x86/kernel/probe_roms_32.c)101
-rw-r--r--arch/x86/kernel/process.c4
-rw-r--r--arch/x86/kernel/reboot.c24
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/kernel/signal.c14
-rw-r--r--arch/x86/kernel/smp.c5
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/stacktrace.c13
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/lguest/boot.c6
-rw-r--r--arch/x86/lib/clear_page_64.S33
-rw-r--r--arch/x86/lib/copy_user_64.S69
-rw-r--r--arch/x86/lib/memcpy_64.S47
-rw-r--r--arch/x86/lib/memmove_64.S29
-rw-r--r--arch/x86/lib/memset_64.S54
-rw-r--r--arch/x86/mm/Makefile4
-rw-r--r--arch/x86/mm/amdtopology.c (renamed from arch/x86/mm/amdtopology_64.c)21
-rw-r--r--arch/x86/mm/fault.c1
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/mm/ioremap.c14
-rw-r--r--arch/x86/mm/numa.c550
-rw-r--r--arch/x86/mm/numa_32.c398
-rw-r--r--arch/x86/mm/numa_64.c644
-rw-r--r--arch/x86/mm/numa_emulation.c16
-rw-r--r--arch/x86/mm/numa_internal.h8
-rw-r--r--arch/x86/mm/srat.c (renamed from arch/x86/mm/srat_64.c)82
-rw-r--r--arch/x86/mm/srat_32.c288
-rw-r--r--arch/x86/net/Makefile4
-rw-r--r--arch/x86/net/bpf_jit.S140
-rw-r--r--arch/x86/net/bpf_jit_comp.c654
-rw-r--r--arch/x86/oprofile/backtrace.c13
-rw-r--r--arch/x86/pci/xen.c96
-rw-r--r--arch/x86/platform/efi/efi.c78
-rw-r--r--arch/x86/platform/efi/efi_64.c34
-rw-r--r--arch/x86/platform/mrst/mrst.c4
-rw-r--r--arch/x86/platform/olpc/Makefile4
-rw-r--r--arch/x86/platform/olpc/olpc.c51
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c19
-rw-r--r--arch/x86/platform/uv/tlb_uv.c92
-rw-r--r--arch/x86/platform/uv/uv_time.c6
-rw-r--r--arch/x86/xen/enlighten.c20
-rw-r--r--arch/x86/xen/irq.c2
-rw-r--r--arch/x86/xen/mmu.c36
-rw-r--r--arch/x86/xen/p2m.c43
-rw-r--r--arch/x86/xen/setup.c10
-rw-r--r--arch/x86/xen/smp.c13
-rw-r--r--arch/x86/xen/time.c14
-rw-r--r--arch/x86/xen/xen-ops.h2
167 files changed, 3355 insertions, 13266 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 0e103236b754..0e9dec6cadd1 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -15,3 +15,4 @@ obj-y += vdso/
15obj-$(CONFIG_IA32_EMULATION) += ia32/ 15obj-$(CONFIG_IA32_EMULATION) += ia32/
16 16
17obj-y += platform/ 17obj-y += platform/
18obj-y += net/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..880fcb6c86f4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -8,6 +8,7 @@ config 64BIT
8 8
9config X86_32 9config X86_32
10 def_bool !64BIT 10 def_bool !64BIT
11 select CLKSRC_I8253
11 12
12config X86_64 13config X86_64
13 def_bool 64BIT 14 def_bool 64BIT
@@ -71,7 +72,7 @@ config X86
71 select GENERIC_IRQ_SHOW 72 select GENERIC_IRQ_SHOW
72 select IRQ_FORCED_THREADING 73 select IRQ_FORCED_THREADING
73 select USE_GENERIC_SMP_HELPERS if SMP 74 select USE_GENERIC_SMP_HELPERS if SMP
74 select ARCH_NO_SYSDEV_OPS 75 select HAVE_BPF_JIT if (X86_64 && NET)
75 76
76config INSTRUCTION_DECODER 77config INSTRUCTION_DECODER
77 def_bool (KPROBES || PERF_EVENTS) 78 def_bool (KPROBES || PERF_EVENTS)
@@ -112,7 +113,14 @@ config MMU
112 def_bool y 113 def_bool y
113 114
114config ZONE_DMA 115config ZONE_DMA
115 def_bool y 116 bool "DMA memory allocation support" if EXPERT
117 default y
118 help
119 DMA memory allocation support allows devices with less than 32-bit
120 addressing to allocate within the first 16MB of address space.
121 Disable if no such devices will be used.
122
123 If unsure, say Y.
116 124
117config SBUS 125config SBUS
118 bool 126 bool
@@ -365,17 +373,6 @@ config X86_UV
365# Following is an alphabetically sorted list of 32 bit extended platforms 373# Following is an alphabetically sorted list of 32 bit extended platforms
366# Please maintain the alphabetic order if and when there are additions 374# Please maintain the alphabetic order if and when there are additions
367 375
368config X86_ELAN
369 bool "AMD Elan"
370 depends on X86_32
371 depends on X86_EXTENDED_PLATFORM
372 ---help---
373 Select this for an AMD Elan processor.
374
375 Do not use this option for K6/Athlon/Opteron processors!
376
377 If unsure, choose "PC-compatible" instead.
378
379config X86_INTEL_CE 376config X86_INTEL_CE
380 bool "CE4100 TV platform" 377 bool "CE4100 TV platform"
381 depends on PCI 378 depends on PCI
@@ -690,6 +687,7 @@ config AMD_IOMMU
690 bool "AMD IOMMU support" 687 bool "AMD IOMMU support"
691 select SWIOTLB 688 select SWIOTLB
692 select PCI_MSI 689 select PCI_MSI
690 select PCI_IOV
693 depends on X86_64 && PCI && ACPI 691 depends on X86_64 && PCI && ACPI
694 ---help--- 692 ---help---
695 With this option you can enable support for AMD IOMMU hardware in 693 With this option you can enable support for AMD IOMMU hardware in
@@ -1174,7 +1172,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
1174config AMD_NUMA 1172config AMD_NUMA
1175 def_bool y 1173 def_bool y
1176 prompt "Old style AMD Opteron NUMA detection" 1174 prompt "Old style AMD Opteron NUMA detection"
1177 depends on X86_64 && NUMA && PCI 1175 depends on NUMA && PCI
1178 ---help--- 1176 ---help---
1179 Enable AMD NUMA node topology detection. You should say Y here if 1177 Enable AMD NUMA node topology detection. You should say Y here if
1180 you have a multi processor AMD system. This uses an old method to 1178 you have a multi processor AMD system. This uses an old method to
@@ -1201,7 +1199,7 @@ config NODES_SPAN_OTHER_NODES
1201 1199
1202config NUMA_EMU 1200config NUMA_EMU
1203 bool "NUMA emulation" 1201 bool "NUMA emulation"
1204 depends on X86_64 && NUMA 1202 depends on NUMA
1205 ---help--- 1203 ---help---
1206 Enable NUMA emulation. A flat machine will be split 1204 Enable NUMA emulation. A flat machine will be split
1207 into virtual nodes when booted with "numa=fake=N", where N is the 1205 into virtual nodes when booted with "numa=fake=N", where N is the
@@ -1223,6 +1221,10 @@ config HAVE_ARCH_BOOTMEM
1223 def_bool y 1221 def_bool y
1224 depends on X86_32 && NUMA 1222 depends on X86_32 && NUMA
1225 1223
1224config HAVE_ARCH_ALLOC_REMAP
1225 def_bool y
1226 depends on X86_32 && NUMA
1227
1226config ARCH_HAVE_MEMORY_PRESENT 1228config ARCH_HAVE_MEMORY_PRESENT
1227 def_bool y 1229 def_bool y
1228 depends on X86_32 && DISCONTIGMEM 1230 depends on X86_32 && DISCONTIGMEM
@@ -1231,13 +1233,9 @@ config NEED_NODE_MEMMAP_SIZE
1231 def_bool y 1233 def_bool y
1232 depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) 1234 depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
1233 1235
1234config HAVE_ARCH_ALLOC_REMAP
1235 def_bool y
1236 depends on X86_32 && NUMA
1237
1238config ARCH_FLATMEM_ENABLE 1236config ARCH_FLATMEM_ENABLE
1239 def_bool y 1237 def_bool y
1240 depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA 1238 depends on X86_32 && !NUMA
1241 1239
1242config ARCH_DISCONTIGMEM_ENABLE 1240config ARCH_DISCONTIGMEM_ENABLE
1243 def_bool y 1241 def_bool y
@@ -1247,20 +1245,16 @@ config ARCH_DISCONTIGMEM_DEFAULT
1247 def_bool y 1245 def_bool y
1248 depends on NUMA && X86_32 1246 depends on NUMA && X86_32
1249 1247
1250config ARCH_PROC_KCORE_TEXT
1251 def_bool y
1252 depends on X86_64 && PROC_KCORE
1253
1254config ARCH_SPARSEMEM_DEFAULT
1255 def_bool y
1256 depends on X86_64
1257
1258config ARCH_SPARSEMEM_ENABLE 1248config ARCH_SPARSEMEM_ENABLE
1259 def_bool y 1249 def_bool y
1260 depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD 1250 depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
1261 select SPARSEMEM_STATIC if X86_32 1251 select SPARSEMEM_STATIC if X86_32
1262 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 1252 select SPARSEMEM_VMEMMAP_ENABLE if X86_64
1263 1253
1254config ARCH_SPARSEMEM_DEFAULT
1255 def_bool y
1256 depends on X86_64
1257
1264config ARCH_SELECT_MEMORY_MODEL 1258config ARCH_SELECT_MEMORY_MODEL
1265 def_bool y 1259 def_bool y
1266 depends on ARCH_SPARSEMEM_ENABLE 1260 depends on ARCH_SPARSEMEM_ENABLE
@@ -1269,6 +1263,10 @@ config ARCH_MEMORY_PROBE
1269 def_bool X86_64 1263 def_bool X86_64
1270 depends on MEMORY_HOTPLUG 1264 depends on MEMORY_HOTPLUG
1271 1265
1266config ARCH_PROC_KCORE_TEXT
1267 def_bool y
1268 depends on X86_64 && PROC_KCORE
1269
1272config ILLEGAL_POINTER_VALUE 1270config ILLEGAL_POINTER_VALUE
1273 hex 1271 hex
1274 default 0 if X86_32 1272 default 0 if X86_32
@@ -1703,10 +1701,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
1703 def_bool y 1701 def_bool y
1704 depends on MEMORY_HOTPLUG 1702 depends on MEMORY_HOTPLUG
1705 1703
1706config HAVE_ARCH_EARLY_PFN_TO_NID
1707 def_bool X86_64
1708 depends on NUMA
1709
1710config USE_PERCPU_NUMA_NODE_ID 1704config USE_PERCPU_NUMA_NODE_ID
1711 def_bool y 1705 def_bool y
1712 depends on NUMA 1706 depends on NUMA
@@ -1848,7 +1842,7 @@ config APM_ALLOW_INTS
1848 1842
1849endif # APM 1843endif # APM
1850 1844
1851source "arch/x86/kernel/cpu/cpufreq/Kconfig" 1845source "drivers/cpufreq/Kconfig"
1852 1846
1853source "drivers/cpuidle/Kconfig" 1847source "drivers/cpuidle/Kconfig"
1854 1848
@@ -2076,7 +2070,7 @@ config OLPC
2076 depends on !X86_PAE 2070 depends on !X86_PAE
2077 select GPIOLIB 2071 select GPIOLIB
2078 select OF 2072 select OF
2079 select OF_PROMTREE if PROC_DEVICETREE 2073 select OF_PROMTREE
2080 ---help--- 2074 ---help---
2081 Add support for detecting the unique features of the OLPC 2075 Add support for detecting the unique features of the OLPC
2082 XO hardware. 2076 XO hardware.
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index d161e939df62..6a7cfdf8ff69 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -1,6 +1,4 @@
1# Put here option for CPU selection and depending optimization 1# Put here option for CPU selection and depending optimization
2if !X86_ELAN
3
4choice 2choice
5 prompt "Processor family" 3 prompt "Processor family"
6 default M686 if X86_32 4 default M686 if X86_32
@@ -203,6 +201,14 @@ config MWINCHIP3D
203 stores for this CPU, which can increase performance of some 201 stores for this CPU, which can increase performance of some
204 operations. 202 operations.
205 203
204config MELAN
205 bool "AMD Elan"
206 depends on X86_32
207 ---help---
208 Select this for an AMD Elan processor.
209
210 Do not use this option for K6/Athlon/Opteron processors!
211
206config MGEODEGX1 212config MGEODEGX1
207 bool "GeodeGX1" 213 bool "GeodeGX1"
208 depends on X86_32 214 depends on X86_32
@@ -292,8 +298,6 @@ config X86_GENERIC
292 This is really intended for distributors who need more 298 This is really intended for distributors who need more
293 generic optimizations. 299 generic optimizations.
294 300
295endif
296
297# 301#
298# Define implied options from the CPU selection here 302# Define implied options from the CPU selection here
299config X86_INTERNODE_CACHE_SHIFT 303config X86_INTERNODE_CACHE_SHIFT
@@ -312,7 +316,7 @@ config X86_L1_CACHE_SHIFT
312 int 316 int
313 default "7" if MPENTIUM4 || MPSC 317 default "7" if MPENTIUM4 || MPSC
314 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU 318 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
315 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 319 default "4" if MELAN || M486 || M386 || MGEODEGX1
316 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX 320 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
317 321
318config X86_XADD 322config X86_XADD
@@ -358,7 +362,7 @@ config X86_POPAD_OK
358 362
359config X86_ALIGNMENT_16 363config X86_ALIGNMENT_16
360 def_bool y 364 def_bool y
361 depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 365 depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
362 366
363config X86_INTEL_USERCOPY 367config X86_INTEL_USERCOPY
364 def_bool y 368 def_bool y
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index f2ee1abb1df9..86cee7b749e1 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -37,7 +37,7 @@ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=
37 $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) 37 $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
38 38
39# AMD Elan support 39# AMD Elan support
40cflags-$(CONFIG_X86_ELAN) += -march=i486 40cflags-$(CONFIG_MELAN) += -march=i486
41 41
42# Geode GX1 support 42# Geode GX1 support
43cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx 43cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 1a58ad89fdf7..c04f1b7a9139 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,8 +2,6 @@
2# Arch-specific CryptoAPI modules. 2# Arch-specific CryptoAPI modules.
3# 3#
4 4
5obj-$(CONFIG_CRYPTO_FPU) += fpu.o
6
7obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o 5obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
8obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o 6obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
9obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o 7obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
@@ -24,6 +22,6 @@ aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
24twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 22twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
25salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o 23salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
26 24
27aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o 25aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
28 26
29ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o 27ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2577613fb32b..feee8ff1d05e 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -94,6 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
94 const u8 *in, unsigned int len, u8 *iv); 94 const u8 *in, unsigned int len, u8 *iv);
95asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, 95asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
96 const u8 *in, unsigned int len, u8 *iv); 96 const u8 *in, unsigned int len, u8 *iv);
97
98int crypto_fpu_init(void);
99void crypto_fpu_exit(void);
100
97#ifdef CONFIG_X86_64 101#ifdef CONFIG_X86_64
98asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, 102asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
99 const u8 *in, unsigned int len, u8 *iv); 103 const u8 *in, unsigned int len, u8 *iv);
@@ -1257,6 +1261,8 @@ static int __init aesni_init(void)
1257 return -ENODEV; 1261 return -ENODEV;
1258 } 1262 }
1259 1263
1264 if ((err = crypto_fpu_init()))
1265 goto fpu_err;
1260 if ((err = crypto_register_alg(&aesni_alg))) 1266 if ((err = crypto_register_alg(&aesni_alg)))
1261 goto aes_err; 1267 goto aes_err;
1262 if ((err = crypto_register_alg(&__aesni_alg))) 1268 if ((err = crypto_register_alg(&__aesni_alg)))
@@ -1334,6 +1340,7 @@ blk_ecb_err:
1334__aes_err: 1340__aes_err:
1335 crypto_unregister_alg(&aesni_alg); 1341 crypto_unregister_alg(&aesni_alg);
1336aes_err: 1342aes_err:
1343fpu_err:
1337 return err; 1344 return err;
1338} 1345}
1339 1346
@@ -1363,6 +1370,8 @@ static void __exit aesni_exit(void)
1363 crypto_unregister_alg(&blk_ecb_alg); 1370 crypto_unregister_alg(&blk_ecb_alg);
1364 crypto_unregister_alg(&__aesni_alg); 1371 crypto_unregister_alg(&__aesni_alg);
1365 crypto_unregister_alg(&aesni_alg); 1372 crypto_unregister_alg(&aesni_alg);
1373
1374 crypto_fpu_exit();
1366} 1375}
1367 1376
1368module_init(aesni_init); 1377module_init(aesni_init);
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index 1a8f8649c035..98d7a188f46b 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -150,18 +150,12 @@ static struct crypto_template crypto_fpu_tmpl = {
150 .module = THIS_MODULE, 150 .module = THIS_MODULE,
151}; 151};
152 152
153static int __init crypto_fpu_module_init(void) 153int __init crypto_fpu_init(void)
154{ 154{
155 return crypto_register_template(&crypto_fpu_tmpl); 155 return crypto_register_template(&crypto_fpu_tmpl);
156} 156}
157 157
158static void __exit crypto_fpu_module_exit(void) 158void __exit crypto_fpu_exit(void)
159{ 159{
160 crypto_unregister_template(&crypto_fpu_tmpl); 160 crypto_unregister_template(&crypto_fpu_tmpl);
161} 161}
162
163module_init(crypto_fpu_module_init);
164module_exit(crypto_fpu_module_exit);
165
166MODULE_LICENSE("GPL");
167MODULE_DESCRIPTION("FPU block cipher wrapper");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d23c71d..95f5826be458 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
848 .quad compat_sys_open_by_handle_at 848 .quad compat_sys_open_by_handle_at
849 .quad compat_sys_clock_adjtime 849 .quad compat_sys_clock_adjtime
850 .quad sys_syncfs 850 .quad sys_syncfs
851 .quad compat_sys_sendmmsg /* 345 */
851ia32_syscall_end: 852ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 12e0e7dd869c..416d865eae39 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -183,8 +183,6 @@ static inline void disable_acpi(void) { }
183 183
184#define ARCH_HAS_POWER_INIT 1 184#define ARCH_HAS_POWER_INIT 1
185 185
186struct bootnode;
187
188#ifdef CONFIG_ACPI_NUMA 186#ifdef CONFIG_ACPI_NUMA
189extern int acpi_numa; 187extern int acpi_numa;
190extern int x86_acpi_numa_init(void); 188extern int x86_acpi_numa_init(void);
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index a63a68be1cce..94d420b360d1 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -15,4 +15,13 @@
15 .endm 15 .endm
16#endif 16#endif
17 17
18.macro altinstruction_entry orig alt feature orig_len alt_len
19 .align 8
20 .quad \orig
21 .quad \alt
22 .word \feature
23 .byte \orig_len
24 .byte \alt_len
25.endm
26
18#endif /* __ASSEMBLY__ */ 27#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13009d1af99a..bf535f947e8c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,7 +4,6 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/jump_label.h>
8#include <asm/asm.h> 7#include <asm/asm.h>
9 8
10/* 9/*
@@ -191,12 +190,4 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
191extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 190extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
192extern void text_poke_smp_batch(struct text_poke_param *params, int n); 191extern void text_poke_smp_batch(struct text_poke_param *params, int n);
193 192
194#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
195#define IDEAL_NOP_SIZE_5 5
196extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
197extern void arch_init_ideal_nop5(void);
198#else
199static inline void arch_init_ideal_nop5(void) {}
200#endif
201
202#endif /* _ASM_X86_ALTERNATIVE_H */ 193#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 916bc8111a01..55d95eb789b3 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -19,13 +19,12 @@
19#ifndef _ASM_X86_AMD_IOMMU_PROTO_H 19#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
20#define _ASM_X86_AMD_IOMMU_PROTO_H 20#define _ASM_X86_AMD_IOMMU_PROTO_H
21 21
22struct amd_iommu; 22#include <asm/amd_iommu_types.h>
23 23
24extern int amd_iommu_init_dma_ops(void); 24extern int amd_iommu_init_dma_ops(void);
25extern int amd_iommu_init_passthrough(void); 25extern int amd_iommu_init_passthrough(void);
26extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
26extern irqreturn_t amd_iommu_int_handler(int irq, void *data); 27extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
27extern void amd_iommu_flush_all_domains(void);
28extern void amd_iommu_flush_all_devices(void);
29extern void amd_iommu_apply_erratum_63(u16 devid); 28extern void amd_iommu_apply_erratum_63(u16 devid);
30extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); 29extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
31extern int amd_iommu_init_devices(void); 30extern int amd_iommu_init_devices(void);
@@ -44,4 +43,12 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
44 (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); 43 (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
45} 44}
46 45
46static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
47{
48 if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
49 return false;
50
51 return !!(iommu->features & f);
52}
53
47#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ 54#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index e3509fc303bf..4c9982995414 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -68,12 +68,25 @@
68#define MMIO_CONTROL_OFFSET 0x0018 68#define MMIO_CONTROL_OFFSET 0x0018
69#define MMIO_EXCL_BASE_OFFSET 0x0020 69#define MMIO_EXCL_BASE_OFFSET 0x0020
70#define MMIO_EXCL_LIMIT_OFFSET 0x0028 70#define MMIO_EXCL_LIMIT_OFFSET 0x0028
71#define MMIO_EXT_FEATURES 0x0030
71#define MMIO_CMD_HEAD_OFFSET 0x2000 72#define MMIO_CMD_HEAD_OFFSET 0x2000
72#define MMIO_CMD_TAIL_OFFSET 0x2008 73#define MMIO_CMD_TAIL_OFFSET 0x2008
73#define MMIO_EVT_HEAD_OFFSET 0x2010 74#define MMIO_EVT_HEAD_OFFSET 0x2010
74#define MMIO_EVT_TAIL_OFFSET 0x2018 75#define MMIO_EVT_TAIL_OFFSET 0x2018
75#define MMIO_STATUS_OFFSET 0x2020 76#define MMIO_STATUS_OFFSET 0x2020
76 77
78
79/* Extended Feature Bits */
80#define FEATURE_PREFETCH (1ULL<<0)
81#define FEATURE_PPR (1ULL<<1)
82#define FEATURE_X2APIC (1ULL<<2)
83#define FEATURE_NX (1ULL<<3)
84#define FEATURE_GT (1ULL<<4)
85#define FEATURE_IA (1ULL<<6)
86#define FEATURE_GA (1ULL<<7)
87#define FEATURE_HE (1ULL<<8)
88#define FEATURE_PC (1ULL<<9)
89
77/* MMIO status bits */ 90/* MMIO status bits */
78#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 91#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
79 92
@@ -113,7 +126,9 @@
113/* command specific defines */ 126/* command specific defines */
114#define CMD_COMPL_WAIT 0x01 127#define CMD_COMPL_WAIT 0x01
115#define CMD_INV_DEV_ENTRY 0x02 128#define CMD_INV_DEV_ENTRY 0x02
116#define CMD_INV_IOMMU_PAGES 0x03 129#define CMD_INV_IOMMU_PAGES 0x03
130#define CMD_INV_IOTLB_PAGES 0x04
131#define CMD_INV_ALL 0x08
117 132
118#define CMD_COMPL_WAIT_STORE_MASK 0x01 133#define CMD_COMPL_WAIT_STORE_MASK 0x01
119#define CMD_COMPL_WAIT_INT_MASK 0x02 134#define CMD_COMPL_WAIT_INT_MASK 0x02
@@ -215,6 +230,8 @@
215#define IOMMU_PTE_IR (1ULL << 61) 230#define IOMMU_PTE_IR (1ULL << 61)
216#define IOMMU_PTE_IW (1ULL << 62) 231#define IOMMU_PTE_IW (1ULL << 62)
217 232
233#define DTE_FLAG_IOTLB 0x01
234
218#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) 235#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
219#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) 236#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
220#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) 237#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
@@ -227,6 +244,7 @@
227/* IOMMU capabilities */ 244/* IOMMU capabilities */
228#define IOMMU_CAP_IOTLB 24 245#define IOMMU_CAP_IOTLB 24
229#define IOMMU_CAP_NPCACHE 26 246#define IOMMU_CAP_NPCACHE 26
247#define IOMMU_CAP_EFR 27
230 248
231#define MAX_DOMAIN_ID 65536 249#define MAX_DOMAIN_ID 65536
232 250
@@ -249,6 +267,8 @@ extern bool amd_iommu_dump;
249 267
250/* global flag if IOMMUs cache non-present entries */ 268/* global flag if IOMMUs cache non-present entries */
251extern bool amd_iommu_np_cache; 269extern bool amd_iommu_np_cache;
270/* Only true if all IOMMUs support device IOTLBs */
271extern bool amd_iommu_iotlb_sup;
252 272
253/* 273/*
254 * Make iterating over all IOMMUs easier 274 * Make iterating over all IOMMUs easier
@@ -371,6 +391,9 @@ struct amd_iommu {
371 /* flags read from acpi table */ 391 /* flags read from acpi table */
372 u8 acpi_flags; 392 u8 acpi_flags;
373 393
394 /* Extended features */
395 u64 features;
396
374 /* 397 /*
375 * Capability pointer. There could be more than one IOMMU per PCI 398 * Capability pointer. There could be more than one IOMMU per PCI
376 * device function if there are more than one AMD IOMMU capability 399 * device function if there are more than one AMD IOMMU capability
@@ -409,9 +432,6 @@ struct amd_iommu {
409 /* if one, we need to send a completion wait command */ 432 /* if one, we need to send a completion wait command */
410 bool need_sync; 433 bool need_sync;
411 434
412 /* becomes true if a command buffer reset is running */
413 bool reset_in_progress;
414
415 /* default dma_ops domain for that IOMMU */ 435 /* default dma_ops domain for that IOMMU */
416 struct dma_ops_domain *default_dom; 436 struct dma_ops_domain *default_dom;
417 437
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 331682231bb4..67f87f257611 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -11,7 +11,6 @@ struct amd_nb_bus_dev_range {
11 11
12extern const struct pci_device_id amd_nb_misc_ids[]; 12extern const struct pci_device_id amd_nb_misc_ids[];
13extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; 13extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
14struct bootnode;
15 14
16extern bool early_is_amd_nb(u32 value); 15extern bool early_is_amd_nb(u32 value);
17extern int amd_cache_northbridges(void); 16extern int amd_cache_northbridges(void);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 2b7d573be549..a0c46f061210 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -363,7 +363,12 @@ struct apic {
363 */ 363 */
364 int (*x86_32_early_logical_apicid)(int cpu); 364 int (*x86_32_early_logical_apicid)(int cpu);
365 365
366 /* determine CPU -> NUMA node mapping */ 366 /*
367 * Optional method called from setup_local_APIC() after logical
368 * apicid is guaranteed to be known to initialize apicid -> node
369 * mapping if NUMA initialization hasn't done so already. Don't
370 * add new users.
371 */
367 int (*x86_32_numa_cpu_node)(int cpu); 372 int (*x86_32_numa_cpu_node)(int cpu);
368#endif 373#endif
369}; 374};
@@ -537,8 +542,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
537 return cpuid_apic >> index_msb; 542 return cpuid_apic >> index_msb;
538} 543}
539 544
540extern int default_x86_32_numa_cpu_node(int cpu);
541
542#endif 545#endif
543 546
544static inline unsigned int 547static inline unsigned int
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index d87988bacf3e..34595d5e1038 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -78,6 +78,7 @@
78#define APIC_DEST_LOGICAL 0x00800 78#define APIC_DEST_LOGICAL 0x00800
79#define APIC_DEST_PHYSICAL 0x00000 79#define APIC_DEST_PHYSICAL 0x00000
80#define APIC_DM_FIXED 0x00000 80#define APIC_DM_FIXED 0x00000
81#define APIC_DM_FIXED_MASK 0x00700
81#define APIC_DM_LOWEST 0x00100 82#define APIC_DM_LOWEST 0x00100
82#define APIC_DM_SMI 0x00200 83#define APIC_DM_SMI 0x00200
83#define APIC_DM_REMRD 0x00300 84#define APIC_DM_REMRD 0x00300
diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h
index 3c7521063d3f..aa6a3170ab5a 100644
--- a/arch/x86/include/asm/bios_ebda.h
+++ b/arch/x86/include/asm/bios_ebda.h
@@ -4,16 +4,40 @@
4#include <asm/io.h> 4#include <asm/io.h>
5 5
6/* 6/*
7 * there is a real-mode segmented pointer pointing to the 7 * Returns physical address of EBDA. Returns 0 if there is no EBDA.
8 * 4K EBDA area at 0x40E.
9 */ 8 */
10static inline unsigned int get_bios_ebda(void) 9static inline unsigned int get_bios_ebda(void)
11{ 10{
11 /*
12 * There is a real-mode segmented pointer pointing to the
13 * 4K EBDA area at 0x40E.
14 */
12 unsigned int address = *(unsigned short *)phys_to_virt(0x40E); 15 unsigned int address = *(unsigned short *)phys_to_virt(0x40E);
13 address <<= 4; 16 address <<= 4;
14 return address; /* 0 means none */ 17 return address; /* 0 means none */
15} 18}
16 19
20/*
21 * Return the sanitized length of the EBDA in bytes, if it exists.
22 */
23static inline unsigned int get_bios_ebda_length(void)
24{
25 unsigned int address;
26 unsigned int length;
27
28 address = get_bios_ebda();
29 if (!address)
30 return 0;
31
32 /* EBDA length is byte 0 of the EBDA (stored in KiB) */
33 length = *(unsigned char *)phys_to_virt(address);
34 length <<= 10;
35
36 /* Trim the length if it extends beyond 640KiB */
37 length = min_t(unsigned int, (640 * 1024) - address, length);
38 return length;
39}
40
17void reserve_ebda_region(void); 41void reserve_ebda_region(void);
18 42
19#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION 43#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 91f3e087cf21..5dc6acc98dbd 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -195,6 +195,8 @@
195 195
196/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 196/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
197#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 197#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
198#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */
199#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
198 200
199#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 201#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
200 202
@@ -207,8 +209,7 @@ extern const char * const x86_power_flags[32];
207#define test_cpu_cap(c, bit) \ 209#define test_cpu_cap(c, bit) \
208 test_bit(bit, (unsigned long *)((c)->x86_capability)) 210 test_bit(bit, (unsigned long *)((c)->x86_capability))
209 211
210#define cpu_has(c, bit) \ 212#define REQUIRED_MASK_BIT_SET(bit) \
211 (__builtin_constant_p(bit) && \
212 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ 213 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \
213 (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ 214 (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \
214 (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ 215 (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \
@@ -218,10 +219,16 @@ extern const char * const x86_power_flags[32];
218 (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ 219 (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \
219 (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ 220 (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \
220 (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ 221 (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \
221 (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \ 222 (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
222 ? 1 : \ 223
224#define cpu_has(c, bit) \
225 (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
223 test_cpu_cap(c, bit)) 226 test_cpu_cap(c, bit))
224 227
228#define this_cpu_has(bit) \
229 (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
230 x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
231
225#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) 232#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
226 233
227#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) 234#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 057099e5faba..0bdb0c54d9a1 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -69,22 +69,18 @@
69 69
70#define MAX_DMA_CHANNELS 8 70#define MAX_DMA_CHANNELS 8
71 71
72#ifdef CONFIG_X86_32
73
74/* The maximum address that we can perform a DMA transfer to on this platform */
75#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000)
76
77#else
78
79/* 16MB ISA DMA zone */ 72/* 16MB ISA DMA zone */
80#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) 73#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT)
81 74
82/* 4GB broken PCI/AGP hardware bus master zone */ 75/* 4GB broken PCI/AGP hardware bus master zone */
83#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) 76#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
84 77
78#ifdef CONFIG_X86_32
79/* The maximum address that we can perform a DMA transfer to on this platform */
80#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000)
81#else
85/* Compat define for old dma zone */ 82/* Compat define for old dma zone */
86#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) 83#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
87
88#endif 84#endif
89 85
90/* 8237 DMA controllers */ 86/* 8237 DMA controllers */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 8e4a16508d4e..7093e4a6a0bc 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
90#endif /* CONFIG_X86_32 */ 90#endif /* CONFIG_X86_32 */
91 91
92extern int add_efi_memmap; 92extern int add_efi_memmap;
93extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
93extern void efi_memblock_x86_reserve_range(void); 94extern void efi_memblock_x86_reserve_range(void);
94extern void efi_call_phys_prelog(void); 95extern void efi_call_phys_prelog(void);
95extern void efi_call_phys_epilog(void); 96extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index db24c2278be0..268c783ab1c0 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -38,11 +38,10 @@ extern void mcount(void);
38static inline unsigned long ftrace_call_adjust(unsigned long addr) 38static inline unsigned long ftrace_call_adjust(unsigned long addr)
39{ 39{
40 /* 40 /*
41 * call mcount is "e8 <4 byte offset>" 41 * addr is the address of the mcount call instruction.
42 * The addr points to the 4 byte offset and the caller of this 42 * recordmcount does the necessary offset calculation.
43 * function wants the pointer to e8. Simply subtract one.
44 */ 43 */
45 return addr - 1; 44 return addr;
46} 45}
47 46
48#ifdef CONFIG_DYNAMIC_FTRACE 47#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h
index fc1f579fb965..65aaa91d5850 100644
--- a/arch/x86/include/asm/i8253.h
+++ b/arch/x86/include/asm/i8253.h
@@ -6,6 +6,8 @@
6#define PIT_CH0 0x40 6#define PIT_CH0 0x40
7#define PIT_CH2 0x42 7#define PIT_CH2 0x42
8 8
9#define PIT_LATCH LATCH
10
9extern raw_spinlock_t i8253_lock; 11extern raw_spinlock_t i8253_lock;
10 12
11extern struct clock_event_device *global_clock_event; 13extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 574dbc22893a..a32b18ce6ead 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -5,20 +5,25 @@
5 5
6#include <linux/types.h> 6#include <linux/types.h>
7#include <asm/nops.h> 7#include <asm/nops.h>
8#include <asm/asm.h>
8 9
9#define JUMP_LABEL_NOP_SIZE 5 10#define JUMP_LABEL_NOP_SIZE 5
10 11
11# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" 12#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
12 13
13# define JUMP_LABEL(key, label) \ 14static __always_inline bool arch_static_branch(struct jump_label_key *key)
14 do { \ 15{
15 asm goto("1:" \ 16 asm goto("1:"
16 JUMP_LABEL_INITIAL_NOP \ 17 JUMP_LABEL_INITIAL_NOP
17 ".pushsection __jump_table, \"aw\" \n\t"\ 18 ".pushsection __jump_table, \"aw\" \n\t"
18 _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ 19 _ASM_ALIGN "\n\t"
19 ".popsection \n\t" \ 20 _ASM_PTR "1b, %l[l_yes], %c0 \n\t"
20 : : "i" (key) : : label); \ 21 ".popsection \n\t"
21 } while (0) 22 : : "i" (key) : : l_yes);
23 return false;
24l_yes:
25 return true;
26}
22 27
23#endif /* __KERNEL__ */ 28#endif /* __KERNEL__ */
24 29
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index eb16e94ae04f..021979a6e23f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -142,8 +142,6 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
142static inline void enable_p5_mce(void) {} 142static inline void enable_p5_mce(void) {}
143#endif 143#endif
144 144
145extern void (*x86_mce_decode_callback)(struct mce *m);
146
147void mce_setup(struct mce *m); 145void mce_setup(struct mce *m);
148void mce_log(struct mce *m); 146void mce_log(struct mce *m);
149DECLARE_PER_CPU(struct sys_device, mce_dev); 147DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 91df7c51806c..5e83a416eca8 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -13,31 +13,11 @@ extern struct pglist_data *node_data[];
13#define NODE_DATA(nid) (node_data[nid]) 13#define NODE_DATA(nid) (node_data[nid])
14 14
15#include <asm/numaq.h> 15#include <asm/numaq.h>
16/* summit or generic arch */
17#include <asm/srat.h>
18
19extern int get_memcfg_numa_flat(void);
20/*
21 * This allows any one NUMA architecture to be compiled
22 * for, and still fall back to the flat function if it
23 * fails.
24 */
25static inline void get_memcfg_numa(void)
26{
27
28 if (get_memcfg_numaq())
29 return;
30 if (get_memcfg_from_srat())
31 return;
32 get_memcfg_numa_flat();
33}
34 16
35extern void resume_map_numa_kva(pgd_t *pgd); 17extern void resume_map_numa_kva(pgd_t *pgd);
36 18
37#else /* !CONFIG_NUMA */ 19#else /* !CONFIG_NUMA */
38 20
39#define get_memcfg_numa get_memcfg_numa_flat
40
41static inline void resume_map_numa_kva(pgd_t *pgd) {} 21static inline void resume_map_numa_kva(pgd_t *pgd) {}
42 22
43#endif /* CONFIG_NUMA */ 23#endif /* CONFIG_NUMA */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index 288b96f815a6..b3f88d7867c7 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -4,36 +4,13 @@
4#ifndef _ASM_X86_MMZONE_64_H 4#ifndef _ASM_X86_MMZONE_64_H
5#define _ASM_X86_MMZONE_64_H 5#define _ASM_X86_MMZONE_64_H
6 6
7
8#ifdef CONFIG_NUMA 7#ifdef CONFIG_NUMA
9 8
10#include <linux/mmdebug.h> 9#include <linux/mmdebug.h>
11
12#include <asm/smp.h> 10#include <asm/smp.h>
13 11
14/* Simple perfect hash to map physical addresses to node numbers */
15struct memnode {
16 int shift;
17 unsigned int mapsize;
18 s16 *map;
19 s16 embedded_map[64 - 8];
20} ____cacheline_aligned; /* total size = 128 bytes */
21extern struct memnode memnode;
22#define memnode_shift memnode.shift
23#define memnodemap memnode.map
24#define memnodemapsize memnode.mapsize
25
26extern struct pglist_data *node_data[]; 12extern struct pglist_data *node_data[];
27 13
28static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
29{
30 unsigned nid;
31 VIRTUAL_BUG_ON(!memnodemap);
32 nid = memnodemap[addr >> memnode_shift];
33 VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
34 return nid;
35}
36
37#define NODE_DATA(nid) (node_data[nid]) 14#define NODE_DATA(nid) (node_data[nid])
38 15
39#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) 16#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 67763c5d8b4e..9eae7752ae9b 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -35,7 +35,7 @@
35#define MODULE_PROC_FAMILY "K7 " 35#define MODULE_PROC_FAMILY "K7 "
36#elif defined CONFIG_MK8 36#elif defined CONFIG_MK8
37#define MODULE_PROC_FAMILY "K8 " 37#define MODULE_PROC_FAMILY "K8 "
38#elif defined CONFIG_X86_ELAN 38#elif defined CONFIG_MELAN
39#define MODULE_PROC_FAMILY "ELAN " 39#define MODULE_PROC_FAMILY "ELAN "
40#elif defined CONFIG_MCRUSOE 40#elif defined CONFIG_MCRUSOE
41#define MODULE_PROC_FAMILY "CRUSOE " 41#define MODULE_PROC_FAMILY "CRUSOE "
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index af788496020b..405b4032a60b 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -1,7 +1,13 @@
1#ifndef _ASM_X86_NOPS_H 1#ifndef _ASM_X86_NOPS_H
2#define _ASM_X86_NOPS_H 2#define _ASM_X86_NOPS_H
3 3
4/* Define nops for use with alternative() */ 4/*
5 * Define nops for use with alternative() and for tracing.
6 *
7 * *_NOP5_ATOMIC must be a single instruction.
8 */
9
10#define NOP_DS_PREFIX 0x3e
5 11
6/* generic versions from gas 12/* generic versions from gas
7 1: nop 13 1: nop
@@ -13,14 +19,15 @@
13 6: leal 0x00000000(%esi),%esi 19 6: leal 0x00000000(%esi),%esi
14 7: leal 0x00000000(,%esi,1),%esi 20 7: leal 0x00000000(,%esi,1),%esi
15*/ 21*/
16#define GENERIC_NOP1 ".byte 0x90\n" 22#define GENERIC_NOP1 0x90
17#define GENERIC_NOP2 ".byte 0x89,0xf6\n" 23#define GENERIC_NOP2 0x89,0xf6
18#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" 24#define GENERIC_NOP3 0x8d,0x76,0x00
19#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" 25#define GENERIC_NOP4 0x8d,0x74,0x26,0x00
20#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 26#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4
21#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" 27#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
22#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" 28#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
23#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 29#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7
30#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4
24 31
25/* Opteron 64bit nops 32/* Opteron 64bit nops
26 1: nop 33 1: nop
@@ -29,13 +36,14 @@
29 4: osp osp osp nop 36 4: osp osp osp nop
30*/ 37*/
31#define K8_NOP1 GENERIC_NOP1 38#define K8_NOP1 GENERIC_NOP1
32#define K8_NOP2 ".byte 0x66,0x90\n" 39#define K8_NOP2 0x66,K8_NOP1
33#define K8_NOP3 ".byte 0x66,0x66,0x90\n" 40#define K8_NOP3 0x66,K8_NOP2
34#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" 41#define K8_NOP4 0x66,K8_NOP3
35#define K8_NOP5 K8_NOP3 K8_NOP2 42#define K8_NOP5 K8_NOP3,K8_NOP2
36#define K8_NOP6 K8_NOP3 K8_NOP3 43#define K8_NOP6 K8_NOP3,K8_NOP3
37#define K8_NOP7 K8_NOP4 K8_NOP3 44#define K8_NOP7 K8_NOP4,K8_NOP3
38#define K8_NOP8 K8_NOP4 K8_NOP4 45#define K8_NOP8 K8_NOP4,K8_NOP4
46#define K8_NOP5_ATOMIC 0x66,K8_NOP4
39 47
40/* K7 nops 48/* K7 nops
41 uses eax dependencies (arbitrary choice) 49 uses eax dependencies (arbitrary choice)
@@ -47,13 +55,14 @@
47 7: leal 0x00000000(,%eax,1),%eax 55 7: leal 0x00000000(,%eax,1),%eax
48*/ 56*/
49#define K7_NOP1 GENERIC_NOP1 57#define K7_NOP1 GENERIC_NOP1
50#define K7_NOP2 ".byte 0x8b,0xc0\n" 58#define K7_NOP2 0x8b,0xc0
51#define K7_NOP3 ".byte 0x8d,0x04,0x20\n" 59#define K7_NOP3 0x8d,0x04,0x20
52#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n" 60#define K7_NOP4 0x8d,0x44,0x20,0x00
53#define K7_NOP5 K7_NOP4 ASM_NOP1 61#define K7_NOP5 K7_NOP4,K7_NOP1
54#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n" 62#define K7_NOP6 0x8d,0x80,0,0,0,0
55#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" 63#define K7_NOP7 0x8D,0x04,0x05,0,0,0,0
56#define K7_NOP8 K7_NOP7 ASM_NOP1 64#define K7_NOP8 K7_NOP7,K7_NOP1
65#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4
57 66
58/* P6 nops 67/* P6 nops
59 uses eax dependencies (Intel-recommended choice) 68 uses eax dependencies (Intel-recommended choice)
@@ -69,52 +78,65 @@
69 There is kernel code that depends on this. 78 There is kernel code that depends on this.
70*/ 79*/
71#define P6_NOP1 GENERIC_NOP1 80#define P6_NOP1 GENERIC_NOP1
72#define P6_NOP2 ".byte 0x66,0x90\n" 81#define P6_NOP2 0x66,0x90
73#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" 82#define P6_NOP3 0x0f,0x1f,0x00
74#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" 83#define P6_NOP4 0x0f,0x1f,0x40,0
75#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" 84#define P6_NOP5 0x0f,0x1f,0x44,0x00,0
76#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" 85#define P6_NOP6 0x66,0x0f,0x1f,0x44,0x00,0
77#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" 86#define P6_NOP7 0x0f,0x1f,0x80,0,0,0,0
78#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" 87#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0
88#define P6_NOP5_ATOMIC P6_NOP5
89
90#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
79 91
80#if defined(CONFIG_MK7) 92#if defined(CONFIG_MK7)
81#define ASM_NOP1 K7_NOP1 93#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
82#define ASM_NOP2 K7_NOP2 94#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2)
83#define ASM_NOP3 K7_NOP3 95#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3)
84#define ASM_NOP4 K7_NOP4 96#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4)
85#define ASM_NOP5 K7_NOP5 97#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5)
86#define ASM_NOP6 K7_NOP6 98#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6)
87#define ASM_NOP7 K7_NOP7 99#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7)
88#define ASM_NOP8 K7_NOP8 100#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8)
101#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC)
89#elif defined(CONFIG_X86_P6_NOP) 102#elif defined(CONFIG_X86_P6_NOP)
90#define ASM_NOP1 P6_NOP1 103#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1)
91#define ASM_NOP2 P6_NOP2 104#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2)
92#define ASM_NOP3 P6_NOP3 105#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3)
93#define ASM_NOP4 P6_NOP4 106#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4)
94#define ASM_NOP5 P6_NOP5 107#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5)
95#define ASM_NOP6 P6_NOP6 108#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6)
96#define ASM_NOP7 P6_NOP7 109#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7)
97#define ASM_NOP8 P6_NOP8 110#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8)
111#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC)
98#elif defined(CONFIG_X86_64) 112#elif defined(CONFIG_X86_64)
99#define ASM_NOP1 K8_NOP1 113#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1)
100#define ASM_NOP2 K8_NOP2 114#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2)
101#define ASM_NOP3 K8_NOP3 115#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3)
102#define ASM_NOP4 K8_NOP4 116#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4)
103#define ASM_NOP5 K8_NOP5 117#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5)
104#define ASM_NOP6 K8_NOP6 118#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6)
105#define ASM_NOP7 K8_NOP7 119#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7)
106#define ASM_NOP8 K8_NOP8 120#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
121#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC)
107#else 122#else
108#define ASM_NOP1 GENERIC_NOP1 123#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1)
109#define ASM_NOP2 GENERIC_NOP2 124#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2)
110#define ASM_NOP3 GENERIC_NOP3 125#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3)
111#define ASM_NOP4 GENERIC_NOP4 126#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4)
112#define ASM_NOP5 GENERIC_NOP5 127#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5)
113#define ASM_NOP6 GENERIC_NOP6 128#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6)
114#define ASM_NOP7 GENERIC_NOP7 129#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7)
115#define ASM_NOP8 GENERIC_NOP8 130#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8)
131#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC)
116#endif 132#endif
117 133
118#define ASM_NOP_MAX 8 134#define ASM_NOP_MAX 8
135#define NOP_ATOMIC5 (ASM_NOP_MAX+1) /* Entry for the 5-byte atomic NOP */
136
137#ifndef __ASSEMBLY__
138extern const unsigned char * const *ideal_nops;
139extern void arch_init_ideal_nops(void);
140#endif
119 141
120#endif /* _ASM_X86_NOPS_H */ 142#endif /* _ASM_X86_NOPS_H */
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index a50fc9f493b3..bfacd2ccf651 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,12 +1,24 @@
1#ifndef _ASM_X86_NUMA_H 1#ifndef _ASM_X86_NUMA_H
2#define _ASM_X86_NUMA_H 2#define _ASM_X86_NUMA_H
3 3
4#include <linux/nodemask.h>
5
4#include <asm/topology.h> 6#include <asm/topology.h>
5#include <asm/apicdef.h> 7#include <asm/apicdef.h>
6 8
7#ifdef CONFIG_NUMA 9#ifdef CONFIG_NUMA
8 10
9#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) 11#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
12#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
13
14/*
15 * Too small node sizes may confuse the VM badly. Usually they
16 * result from BIOS bugs. So dont recognize nodes as standalone
17 * NUMA entities that have less than this amount of RAM listed:
18 */
19#define NODE_MIN_SIZE (4*1024*1024)
20
21extern int numa_off;
10 22
11/* 23/*
12 * __apicid_to_node[] stores the raw mapping between physical apicid and 24 * __apicid_to_node[] stores the raw mapping between physical apicid and
@@ -17,15 +29,27 @@
17 * numa_cpu_node(). 29 * numa_cpu_node().
18 */ 30 */
19extern s16 __apicid_to_node[MAX_LOCAL_APIC]; 31extern s16 __apicid_to_node[MAX_LOCAL_APIC];
32extern nodemask_t numa_nodes_parsed __initdata;
33
34extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
35extern void __init numa_set_distance(int from, int to, int distance);
20 36
21static inline void set_apicid_to_node(int apicid, s16 node) 37static inline void set_apicid_to_node(int apicid, s16 node)
22{ 38{
23 __apicid_to_node[apicid] = node; 39 __apicid_to_node[apicid] = node;
24} 40}
41
42extern int __cpuinit numa_cpu_node(int cpu);
43
25#else /* CONFIG_NUMA */ 44#else /* CONFIG_NUMA */
26static inline void set_apicid_to_node(int apicid, s16 node) 45static inline void set_apicid_to_node(int apicid, s16 node)
27{ 46{
28} 47}
48
49static inline int numa_cpu_node(int cpu)
50{
51 return NUMA_NO_NODE;
52}
29#endif /* CONFIG_NUMA */ 53#endif /* CONFIG_NUMA */
30 54
31#ifdef CONFIG_X86_32 55#ifdef CONFIG_X86_32
@@ -37,14 +61,12 @@ static inline void set_apicid_to_node(int apicid, s16 node)
37#ifdef CONFIG_NUMA 61#ifdef CONFIG_NUMA
38extern void __cpuinit numa_set_node(int cpu, int node); 62extern void __cpuinit numa_set_node(int cpu, int node);
39extern void __cpuinit numa_clear_node(int cpu); 63extern void __cpuinit numa_clear_node(int cpu);
40extern void __init numa_init_array(void);
41extern void __init init_cpu_to_node(void); 64extern void __init init_cpu_to_node(void);
42extern void __cpuinit numa_add_cpu(int cpu); 65extern void __cpuinit numa_add_cpu(int cpu);
43extern void __cpuinit numa_remove_cpu(int cpu); 66extern void __cpuinit numa_remove_cpu(int cpu);
44#else /* CONFIG_NUMA */ 67#else /* CONFIG_NUMA */
45static inline void numa_set_node(int cpu, int node) { } 68static inline void numa_set_node(int cpu, int node) { }
46static inline void numa_clear_node(int cpu) { } 69static inline void numa_clear_node(int cpu) { }
47static inline void numa_init_array(void) { }
48static inline void init_cpu_to_node(void) { } 70static inline void init_cpu_to_node(void) { }
49static inline void numa_add_cpu(int cpu) { } 71static inline void numa_add_cpu(int cpu) { }
50static inline void numa_remove_cpu(int cpu) { } 72static inline void numa_remove_cpu(int cpu) { }
@@ -54,4 +76,10 @@ static inline void numa_remove_cpu(int cpu) { }
54void debug_cpumask_set_cpu(int cpu, int node, bool enable); 76void debug_cpumask_set_cpu(int cpu, int node, bool enable);
55#endif 77#endif
56 78
79#ifdef CONFIG_NUMA_EMU
80#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
81#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
82void numa_emu_cmdline(char *);
83#endif /* CONFIG_NUMA_EMU */
84
57#endif /* _ASM_X86_NUMA_H */ 85#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index c6beed1ef103..e7d6b8254742 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -1,16 +1,6 @@
1#ifndef _ASM_X86_NUMA_32_H 1#ifndef _ASM_X86_NUMA_32_H
2#define _ASM_X86_NUMA_32_H 2#define _ASM_X86_NUMA_32_H
3 3
4extern int numa_off;
5
6extern int pxm_to_nid(int pxm);
7
8#ifdef CONFIG_NUMA
9extern int __cpuinit numa_cpu_node(int cpu);
10#else /* CONFIG_NUMA */
11static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
12#endif /* CONFIG_NUMA */
13
14#ifdef CONFIG_HIGHMEM 4#ifdef CONFIG_HIGHMEM
15extern void set_highmem_pages_init(void); 5extern void set_highmem_pages_init(void);
16#else 6#else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 344eb1790b46..0c05f7ae46e8 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -1,42 +1,6 @@
1#ifndef _ASM_X86_NUMA_64_H 1#ifndef _ASM_X86_NUMA_64_H
2#define _ASM_X86_NUMA_64_H 2#define _ASM_X86_NUMA_64_H
3 3
4#include <linux/nodemask.h>
5
6struct bootnode {
7 u64 start;
8 u64 end;
9};
10
11#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
12
13extern int numa_off;
14
15extern unsigned long numa_free_all_bootmem(void); 4extern unsigned long numa_free_all_bootmem(void);
16extern void setup_node_bootmem(int nodeid, unsigned long start,
17 unsigned long end);
18
19#ifdef CONFIG_NUMA
20/*
21 * Too small node sizes may confuse the VM badly. Usually they
22 * result from BIOS bugs. So dont recognize nodes as standalone
23 * NUMA entities that have less than this amount of RAM listed:
24 */
25#define NODE_MIN_SIZE (4*1024*1024)
26
27extern nodemask_t numa_nodes_parsed __initdata;
28
29extern int __cpuinit numa_cpu_node(int cpu);
30extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
31extern void __init numa_set_distance(int from, int to, int distance);
32
33#ifdef CONFIG_NUMA_EMU
34#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
35#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
36void numa_emu_cmdline(char *);
37#endif /* CONFIG_NUMA_EMU */
38#else
39static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
40#endif
41 5
42#endif /* _ASM_X86_NUMA_64_H */ 6#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
index 37c516545ec8..c3b3c322fd87 100644
--- a/arch/x86/include/asm/numaq.h
+++ b/arch/x86/include/asm/numaq.h
@@ -29,7 +29,7 @@
29#ifdef CONFIG_X86_NUMAQ 29#ifdef CONFIG_X86_NUMAQ
30 30
31extern int found_numaq; 31extern int found_numaq;
32extern int get_memcfg_numaq(void); 32extern int numaq_numa_init(void);
33extern int pci_numaq_init(void); 33extern int pci_numaq_init(void);
34 34
35extern void *xquad_portio; 35extern void *xquad_portio;
@@ -166,11 +166,6 @@ struct sys_cfg_data {
166 166
167void numaq_tsc_disable(void); 167void numaq_tsc_disable(void);
168 168
169#else
170static inline int get_memcfg_numaq(void)
171{
172 return 0;
173}
174#endif /* CONFIG_X86_NUMAQ */ 169#endif /* CONFIG_X86_NUMAQ */
175#endif /* _ASM_X86_NUMAQ_H */ 170#endif /* _ASM_X86_NUMAQ_H */
176 171
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index c5d3a5abbb9f..24487712e0b1 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -26,15 +26,12 @@ extern void setup_olpc_ofw_pgd(void);
26/* check if OFW was detected during boot */ 26/* check if OFW was detected during boot */
27extern bool olpc_ofw_present(void); 27extern bool olpc_ofw_present(void);
28 28
29extern void olpc_dt_build_devicetree(void);
30
29#else /* !CONFIG_OLPC */ 31#else /* !CONFIG_OLPC */
30static inline void olpc_ofw_detect(void) { } 32static inline void olpc_ofw_detect(void) { }
31static inline void setup_olpc_ofw_pgd(void) { } 33static inline void setup_olpc_ofw_pgd(void) { }
32#endif /* !CONFIG_OLPC */
33
34#ifdef CONFIG_OF_PROMTREE
35extern void olpc_dt_build_devicetree(void);
36#else
37static inline void olpc_dt_build_devicetree(void) { } 34static inline void olpc_dt_build_devicetree(void) { }
38#endif 35#endif /* !CONFIG_OLPC */
39 36
40#endif /* _ASM_X86_OLPC_OFW_H */ 37#endif /* _ASM_X86_OLPC_OFW_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index d475b4398d8b..53278b0dfdf6 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -517,7 +517,7 @@ do { \
517 typeof(o2) __o2 = o2; \ 517 typeof(o2) __o2 = o2; \
518 typeof(o2) __n2 = n2; \ 518 typeof(o2) __n2 = n2; \
519 typeof(o2) __dummy; \ 519 typeof(o2) __dummy; \
520 alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ 520 alternative_io("call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP4, \
521 "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ 521 "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \
522 X86_FEATURE_CX16, \ 522 X86_FEATURE_CX16, \
523 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ 523 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
@@ -542,6 +542,33 @@ do { \
542 old__; \ 542 old__; \
543}) 543})
544 544
545static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
546 const unsigned long __percpu *addr)
547{
548 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
549
550 return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
551}
552
553static inline int x86_this_cpu_variable_test_bit(int nr,
554 const unsigned long __percpu *addr)
555{
556 int oldbit;
557
558 asm volatile("bt "__percpu_arg(2)",%1\n\t"
559 "sbb %0,%0"
560 : "=r" (oldbit)
561 : "m" (*(unsigned long *)addr), "Ir" (nr));
562
563 return oldbit;
564}
565
566#define x86_this_cpu_test_bit(nr, addr) \
567 (__builtin_constant_p((nr)) \
568 ? x86_this_cpu_constant_test_bit((nr), (addr)) \
569 : x86_this_cpu_variable_test_bit((nr), (addr)))
570
571
545#include <asm-generic/percpu.h> 572#include <asm-generic/percpu.h>
546 573
547/* We can use this directly for local CPU (faster). */ 574/* We can use this directly for local CPU (faster). */
diff --git a/arch/x86/include/asm/probe_roms.h b/arch/x86/include/asm/probe_roms.h
new file mode 100644
index 000000000000..4950a0b1d09c
--- /dev/null
+++ b/arch/x86/include/asm/probe_roms.h
@@ -0,0 +1,8 @@
1#ifndef _PROBE_ROMS_H_
2#define _PROBE_ROMS_H_
3struct pci_dev;
4
5extern void __iomem *pci_map_biosrom(struct pci_dev *pdev);
6extern void pci_unmap_biosrom(void __iomem *rom);
7extern size_t pci_biosrom_size(struct pci_dev *pdev);
8#endif
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index a898a2b6e10c..59ab4dffa377 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -60,6 +60,7 @@
60#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ 60#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
61#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ 61#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
62#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ 62#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
63#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
63 64
64/* 65/*
65 * x86-64 Task Priority Register, CR8 66 * x86-64 Task Priority Register, CR8
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index db8aa19a08a2..9756551ec760 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align);
88 * executable.) 88 * executable.)
89 */ 89 */
90#define RESERVE_BRK(name,sz) \ 90#define RESERVE_BRK(name,sz) \
91 static void __section(.discard.text) __used \ 91 static void __section(.discard.text) __used notrace \
92 __brk_reservation_fn_##name##__(void) { \ 92 __brk_reservation_fn_##name##__(void) { \
93 asm volatile ( \ 93 asm volatile ( \
94 ".pushsection .brk_reservation,\"aw\",@nobits;" \ 94 ".pushsection .brk_reservation,\"aw\",@nobits;" \
@@ -104,10 +104,10 @@ void *extend_brk(size_t size, size_t align);
104 type *name; \ 104 type *name; \
105 RESERVE_BRK(name, sizeof(type) * entries) 105 RESERVE_BRK(name, sizeof(type) * entries)
106 106
107extern void probe_roms(void);
107#ifdef __i386__ 108#ifdef __i386__
108 109
109void __init i386_start_kernel(void); 110void __init i386_start_kernel(void);
110extern void probe_roms(void);
111 111
112#else 112#else
113void __init x86_64_start_kernel(char *real_mode); 113void __init x86_64_start_kernel(char *real_mode);
diff --git a/arch/x86/include/asm/srat.h b/arch/x86/include/asm/srat.h
deleted file mode 100644
index b508d639d1a7..000000000000
--- a/arch/x86/include/asm/srat.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * Some of the code in this file has been gleaned from the 64 bit
3 * discontigmem support code base.
4 *
5 * Copyright (C) 2002, IBM Corp.
6 *
7 * All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
17 * NON INFRINGEMENT. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * Send feedback to Pat Gaughen <gone@us.ibm.com>
25 */
26
27#ifndef _ASM_X86_SRAT_H
28#define _ASM_X86_SRAT_H
29
30#ifdef CONFIG_ACPI_NUMA
31extern int get_memcfg_from_srat(void);
32#else
33static inline int get_memcfg_from_srat(void)
34{
35 return 0;
36}
37#endif
38
39#endif /* _ASM_X86_SRAT_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index d7e89c83645d..70bbe39043a9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo,
37/* Generic stack tracer with callbacks */ 37/* Generic stack tracer with callbacks */
38 38
39struct stacktrace_ops { 39struct stacktrace_ops {
40 void (*warning)(void *data, char *msg);
41 /* msg must contain %s for the symbol */
42 void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
43 void (*address)(void *data, unsigned long address, int reliable); 40 void (*address)(void *data, unsigned long address, int reliable);
44 /* On negative return stop dumping */ 41 /* On negative return stop dumping */
45 int (*stack)(void *data, char *name); 42 int (*stack)(void *data, char *name);
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 12569e691ce3..c2ff2a1d845e 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -303,24 +303,81 @@ static inline void native_wbinvd(void)
303#ifdef CONFIG_PARAVIRT 303#ifdef CONFIG_PARAVIRT
304#include <asm/paravirt.h> 304#include <asm/paravirt.h>
305#else 305#else
306#define read_cr0() (native_read_cr0()) 306
307#define write_cr0(x) (native_write_cr0(x)) 307static inline unsigned long read_cr0(void)
308#define read_cr2() (native_read_cr2()) 308{
309#define write_cr2(x) (native_write_cr2(x)) 309 return native_read_cr0();
310#define read_cr3() (native_read_cr3()) 310}
311#define write_cr3(x) (native_write_cr3(x)) 311
312#define read_cr4() (native_read_cr4()) 312static inline void write_cr0(unsigned long x)
313#define read_cr4_safe() (native_read_cr4_safe()) 313{
314#define write_cr4(x) (native_write_cr4(x)) 314 native_write_cr0(x);
315#define wbinvd() (native_wbinvd()) 315}
316
317static inline unsigned long read_cr2(void)
318{
319 return native_read_cr2();
320}
321
322static inline void write_cr2(unsigned long x)
323{
324 native_write_cr2(x);
325}
326
327static inline unsigned long read_cr3(void)
328{
329 return native_read_cr3();
330}
331
332static inline void write_cr3(unsigned long x)
333{
334 native_write_cr3(x);
335}
336
337static inline unsigned long read_cr4(void)
338{
339 return native_read_cr4();
340}
341
342static inline unsigned long read_cr4_safe(void)
343{
344 return native_read_cr4_safe();
345}
346
347static inline void write_cr4(unsigned long x)
348{
349 native_write_cr4(x);
350}
351
352static inline void wbinvd(void)
353{
354 native_wbinvd();
355}
356
316#ifdef CONFIG_X86_64 357#ifdef CONFIG_X86_64
317#define read_cr8() (native_read_cr8()) 358
318#define write_cr8(x) (native_write_cr8(x)) 359static inline unsigned long read_cr8(void)
319#define load_gs_index native_load_gs_index 360{
361 return native_read_cr8();
362}
363
364static inline void write_cr8(unsigned long x)
365{
366 native_write_cr8(x);
367}
368
369static inline void load_gs_index(unsigned selector)
370{
371 native_load_gs_index(selector);
372}
373
320#endif 374#endif
321 375
322/* Clear the 'TS' bit */ 376/* Clear the 'TS' bit */
323#define clts() (native_clts()) 377static inline void clts(void)
378{
379 native_clts();
380}
324 381
325#endif/* CONFIG_PARAVIRT */ 382#endif/* CONFIG_PARAVIRT */
326 383
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 910a7084f7f2..c00692476e9f 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -93,19 +93,11 @@ extern void setup_node_to_cpumask_map(void);
93#define pcibus_to_node(bus) __pcibus_to_node(bus) 93#define pcibus_to_node(bus) __pcibus_to_node(bus)
94 94
95#ifdef CONFIG_X86_32 95#ifdef CONFIG_X86_32
96extern unsigned long node_start_pfn[];
97extern unsigned long node_end_pfn[];
98extern unsigned long node_remap_size[];
99#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
100
101# define SD_CACHE_NICE_TRIES 1 96# define SD_CACHE_NICE_TRIES 1
102# define SD_IDLE_IDX 1 97# define SD_IDLE_IDX 1
103
104#else 98#else
105
106# define SD_CACHE_NICE_TRIES 2 99# define SD_CACHE_NICE_TRIES 2
107# define SD_IDLE_IDX 2 100# define SD_IDLE_IDX 2
108
109#endif 101#endif
110 102
111/* sched_domains SD_NODE_INIT for NUMA machines */ 103/* sched_domains SD_NODE_INIT for NUMA machines */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index abd3e0ea762a..99ddd148a760 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -6,7 +6,6 @@
6#include <linux/errno.h> 6#include <linux/errno.h>
7#include <linux/compiler.h> 7#include <linux/compiler.h>
8#include <linux/thread_info.h> 8#include <linux/thread_info.h>
9#include <linux/prefetch.h>
10#include <linux/string.h> 9#include <linux/string.h>
11#include <asm/asm.h> 10#include <asm/asm.h>
12#include <asm/page.h> 11#include <asm/page.h>
@@ -42,7 +41,7 @@
42 * Returns 0 if the range is valid, nonzero otherwise. 41 * Returns 0 if the range is valid, nonzero otherwise.
43 * 42 *
44 * This is equivalent to the following test: 43 * This is equivalent to the following test:
45 * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) 44 * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64)
46 * 45 *
47 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... 46 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
48 */ 47 */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 088d09fb1615..566e803cc602 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -6,7 +6,6 @@
6 */ 6 */
7#include <linux/errno.h> 7#include <linux/errno.h>
8#include <linux/thread_info.h> 8#include <linux/thread_info.h>
9#include <linux/prefetch.h>
10#include <linux/string.h> 9#include <linux/string.h>
11#include <asm/asm.h> 10#include <asm/asm.h>
12#include <asm/page.h> 11#include <asm/page.h>
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 316708d5af92..1c66d30971ad 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -6,7 +6,6 @@
6 */ 6 */
7#include <linux/compiler.h> 7#include <linux/compiler.h>
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/prefetch.h>
10#include <linux/lockdep.h> 9#include <linux/lockdep.h>
11#include <asm/alternative.h> 10#include <asm/alternative.h>
12#include <asm/cpufeature.h> 11#include <asm/cpufeature.h>
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5e5977..fb6a625c99bf 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
350#define __NR_open_by_handle_at 342 350#define __NR_open_by_handle_at 342
351#define __NR_clock_adjtime 343 351#define __NR_clock_adjtime 343
352#define __NR_syncfs 344 352#define __NR_syncfs 344
353#define __NR_sendmmsg 345
353 354
354#ifdef __KERNEL__ 355#ifdef __KERNEL__
355 356
356#define NR_syscalls 345 357#define NR_syscalls 346
357 358
358#define __ARCH_WANT_IPC_PARSE_VERSION 359#define __ARCH_WANT_IPC_PARSE_VERSION
359#define __ARCH_WANT_OLD_READDIR 360#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76bd578..79f90eb15aad 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
677__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) 677__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
678#define __NR_syncfs 306 678#define __NR_syncfs 306
679__SYSCALL(__NR_syncfs, sys_syncfs) 679__SYSCALL(__NR_syncfs, sys_syncfs)
680#define __NR_sendmmsg 307
681__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
680 682
681#ifndef __NO_STUBS 683#ifndef __NO_STUBS
682#define __ARCH_WANT_OLD_READDIR 684#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 3e094af443c3..130f1eeee5fe 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -94,6 +94,8 @@
94/* after this # consecutive successes, bump up the throttle if it was lowered */ 94/* after this # consecutive successes, bump up the throttle if it was lowered */
95#define COMPLETE_THRESHOLD 5 95#define COMPLETE_THRESHOLD 5
96 96
97#define UV_LB_SUBNODEID 0x10
98
97/* 99/*
98 * number of entries in the destination side payload queue 100 * number of entries in the destination side payload queue
99 */ 101 */
@@ -124,7 +126,7 @@
124 * The distribution specification (32 bytes) is interpreted as a 256-bit 126 * The distribution specification (32 bytes) is interpreted as a 256-bit
125 * distribution vector. Adjacent bits correspond to consecutive even numbered 127 * distribution vector. Adjacent bits correspond to consecutive even numbered
126 * nodeIDs. The result of adding the index of a given bit to the 15-bit 128 * nodeIDs. The result of adding the index of a given bit to the 15-bit
127 * 'base_dest_nodeid' field of the header corresponds to the 129 * 'base_dest_nasid' field of the header corresponds to the
128 * destination nodeID associated with that specified bit. 130 * destination nodeID associated with that specified bit.
129 */ 131 */
130struct bau_target_uvhubmask { 132struct bau_target_uvhubmask {
@@ -176,7 +178,7 @@ struct bau_msg_payload {
176struct bau_msg_header { 178struct bau_msg_header {
177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 179 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
178 /* bits 5:0 */ 180 /* bits 5:0 */
179 unsigned int base_dest_nodeid:15; /* nasid of the */ 181 unsigned int base_dest_nasid:15; /* nasid of the */
180 /* bits 20:6 */ /* first bit in uvhub map */ 182 /* bits 20:6 */ /* first bit in uvhub map */
181 unsigned int command:8; /* message type */ 183 unsigned int command:8; /* message type */
182 /* bits 28:21 */ 184 /* bits 28:21 */
@@ -378,6 +380,10 @@ struct ptc_stats {
378 unsigned long d_rcanceled; /* number of messages canceled by resets */ 380 unsigned long d_rcanceled; /* number of messages canceled by resets */
379}; 381};
380 382
383struct hub_and_pnode {
384 short uvhub;
385 short pnode;
386};
381/* 387/*
382 * one per-cpu; to locate the software tables 388 * one per-cpu; to locate the software tables
383 */ 389 */
@@ -399,10 +405,12 @@ struct bau_control {
399 int baudisabled; 405 int baudisabled;
400 int set_bau_off; 406 int set_bau_off;
401 short cpu; 407 short cpu;
408 short osnode;
402 short uvhub_cpu; 409 short uvhub_cpu;
403 short uvhub; 410 short uvhub;
404 short cpus_in_socket; 411 short cpus_in_socket;
405 short cpus_in_uvhub; 412 short cpus_in_uvhub;
413 short partition_base_pnode;
406 unsigned short message_number; 414 unsigned short message_number;
407 unsigned short uvhub_quiesce; 415 unsigned short uvhub_quiesce;
408 short socket_acknowledge_count[DEST_Q_SIZE]; 416 short socket_acknowledge_count[DEST_Q_SIZE];
@@ -422,15 +430,16 @@ struct bau_control {
422 int congested_period; 430 int congested_period;
423 cycles_t period_time; 431 cycles_t period_time;
424 long period_requests; 432 long period_requests;
433 struct hub_and_pnode *target_hub_and_pnode;
425}; 434};
426 435
427static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) 436static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
428{ 437{
429 return constant_test_bit(uvhub, &dstp->bits[0]); 438 return constant_test_bit(uvhub, &dstp->bits[0]);
430} 439}
431static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) 440static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
432{ 441{
433 __set_bit(uvhub, &dstp->bits[0]); 442 __set_bit(pnode, &dstp->bits[0]);
434} 443}
435static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, 444static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
436 int nbits) 445 int nbits)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a501741c2335..4298002d0c83 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -398,6 +398,8 @@ struct uv_blade_info {
398 unsigned short nr_online_cpus; 398 unsigned short nr_online_cpus;
399 unsigned short pnode; 399 unsigned short pnode;
400 short memory_nid; 400 short memory_nid;
401 spinlock_t nmi_lock;
402 unsigned long nmi_count;
401}; 403};
402extern struct uv_blade_info *uv_blade_info; 404extern struct uv_blade_info *uv_blade_info;
403extern short *uv_node_to_blade; 405extern short *uv_node_to_blade;
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 20cafeac7455..f5bb64a823d7 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV MMR definitions 6 * SGI UV MMR definitions
7 * 7 *
8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_MMRS_H 11#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
1099 } s; 1099 } s;
1100}; 1100};
1101 1101
1102/* ========================================================================= */
1103/* UVH_SCRATCH5 */
1104/* ========================================================================= */
1105#define UVH_SCRATCH5 0x2d0200UL
1106#define UVH_SCRATCH5_32 0x00778
1107
1108#define UVH_SCRATCH5_SCRATCH5_SHFT 0
1109#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
1110union uvh_scratch5_u {
1111 unsigned long v;
1112 struct uvh_scratch5_s {
1113 unsigned long scratch5 : 64; /* RW, W1CS */
1114 } s;
1115};
1102 1116
1103#endif /* __ASM_UV_MMRS_X86_H__ */ 1117#endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c61934fbf22a..64a619d47d34 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
47extern unsigned long set_phys_range_identity(unsigned long pfn_s, 47extern unsigned long set_phys_range_identity(unsigned long pfn_s,
48 unsigned long pfn_e); 48 unsigned long pfn_e);
49 49
50extern int m2p_add_override(unsigned long mfn, struct page *page); 50extern int m2p_add_override(unsigned long mfn, struct page *page,
51extern int m2p_remove_override(struct page *page); 51 bool clear_pte);
52extern int m2p_remove_override(struct page *page, bool clear_pte);
52extern struct page *m2p_find_override(unsigned long mfn); 53extern struct page *m2p_find_override(unsigned long mfn);
53extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); 54extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
54 55
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index aa8620989162..4fbda9a3f339 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
15#endif 15#endif
16#if defined(CONFIG_XEN_DOM0) 16#if defined(CONFIG_XEN_DOM0)
17void __init xen_setup_pirqs(void); 17void __init xen_setup_pirqs(void);
18int xen_find_device_domain_owner(struct pci_dev *dev);
19int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
20int xen_unregister_device_domain_owner(struct pci_dev *dev);
18#else 21#else
19static inline void __init xen_setup_pirqs(void) 22static inline void __init xen_setup_pirqs(void)
20{ 23{
21} 24}
25static inline int xen_find_device_domain_owner(struct pci_dev *dev)
26{
27 return -1;
28}
29static inline int xen_register_device_domain_owner(struct pci_dev *dev,
30 uint16_t domain)
31{
32 return -1;
33}
34static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
35{
36 return -1;
37}
22#endif 38#endif
23 39
24#if defined(CONFIG_PCI_MSI) 40#if defined(CONFIG_PCI_MSI)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7338ef2218bc..250806472a7e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,7 +36,7 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 36obj-y += time.o ioport.o ldt.o dumpstack.o
37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o 37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
38obj-$(CONFIG_IRQ_WORK) += irq_work.o 38obj-$(CONFIG_IRQ_WORK) += irq_work.o
39obj-$(CONFIG_X86_32) += probe_roms_32.o 39obj-y += probe_roms.o
40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
41obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 41obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
42obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 42obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
@@ -117,7 +117,7 @@ obj-$(CONFIG_OF) += devicetree.o
117ifeq ($(CONFIG_X86_64),y) 117ifeq ($(CONFIG_X86_64),y)
118 obj-$(CONFIG_AUDIT) += audit_64.o 118 obj-$(CONFIG_AUDIT) += audit_64.o
119 119
120 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 120 obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o
121 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 121 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
122 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o 122 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
123 123
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ff93bc1b09c3..18a857ba7a25 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -112,11 +112,6 @@ static int __init acpi_sleep_setup(char *str)
112#ifdef CONFIG_HIBERNATION 112#ifdef CONFIG_HIBERNATION
113 if (strncmp(str, "s4_nohwsig", 10) == 0) 113 if (strncmp(str, "s4_nohwsig", 10) == 0)
114 acpi_no_s4_hw_signature(); 114 acpi_no_s4_hw_signature();
115 if (strncmp(str, "s4_nonvs", 8) == 0) {
116 pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, "
117 "please use acpi_sleep=nonvs instead");
118 acpi_nvs_nosave();
119 }
120#endif 115#endif
121 if (strncmp(str, "nonvs", 5) == 0) 116 if (strncmp(str, "nonvs", 5) == 0)
122 acpi_nvs_nosave(); 117 acpi_nvs_nosave();
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4a234677e213..a81f2d52f869 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -67,17 +67,30 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
67#define DPRINTK(fmt, args...) if (debug_alternative) \ 67#define DPRINTK(fmt, args...) if (debug_alternative) \
68 printk(KERN_DEBUG fmt, args) 68 printk(KERN_DEBUG fmt, args)
69 69
70/*
71 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
72 * that correspond to that nop. Getting from one nop to the next, we
73 * add to the array the offset that is equal to the sum of all sizes of
74 * nops preceding the one we are after.
75 *
76 * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
77 * nice symmetry of sizes of the previous nops.
78 */
70#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) 79#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
71/* Use inline assembly to define this because the nops are defined 80static const unsigned char intelnops[] =
72 as inline assembly strings in the include files and we cannot 81{
73 get them easily into strings. */ 82 GENERIC_NOP1,
74asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: " 83 GENERIC_NOP2,
75 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 84 GENERIC_NOP3,
76 GENERIC_NOP7 GENERIC_NOP8 85 GENERIC_NOP4,
77 "\t.previous"); 86 GENERIC_NOP5,
78extern const unsigned char intelnops[]; 87 GENERIC_NOP6,
79static const unsigned char *const __initconst_or_module 88 GENERIC_NOP7,
80intel_nops[ASM_NOP_MAX+1] = { 89 GENERIC_NOP8,
90 GENERIC_NOP5_ATOMIC
91};
92static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
93{
81 NULL, 94 NULL,
82 intelnops, 95 intelnops,
83 intelnops + 1, 96 intelnops + 1,
@@ -87,17 +100,25 @@ intel_nops[ASM_NOP_MAX+1] = {
87 intelnops + 1 + 2 + 3 + 4 + 5, 100 intelnops + 1 + 2 + 3 + 4 + 5,
88 intelnops + 1 + 2 + 3 + 4 + 5 + 6, 101 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
89 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 102 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
103 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
90}; 104};
91#endif 105#endif
92 106
93#ifdef K8_NOP1 107#ifdef K8_NOP1
94asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: " 108static const unsigned char k8nops[] =
95 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 109{
96 K8_NOP7 K8_NOP8 110 K8_NOP1,
97 "\t.previous"); 111 K8_NOP2,
98extern const unsigned char k8nops[]; 112 K8_NOP3,
99static const unsigned char *const __initconst_or_module 113 K8_NOP4,
100k8_nops[ASM_NOP_MAX+1] = { 114 K8_NOP5,
115 K8_NOP6,
116 K8_NOP7,
117 K8_NOP8,
118 K8_NOP5_ATOMIC
119};
120static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
121{
101 NULL, 122 NULL,
102 k8nops, 123 k8nops,
103 k8nops + 1, 124 k8nops + 1,
@@ -107,17 +128,25 @@ k8_nops[ASM_NOP_MAX+1] = {
107 k8nops + 1 + 2 + 3 + 4 + 5, 128 k8nops + 1 + 2 + 3 + 4 + 5,
108 k8nops + 1 + 2 + 3 + 4 + 5 + 6, 129 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
109 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 130 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
131 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
110}; 132};
111#endif 133#endif
112 134
113#if defined(K7_NOP1) && !defined(CONFIG_X86_64) 135#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
114asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: " 136static const unsigned char k7nops[] =
115 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 137{
116 K7_NOP7 K7_NOP8 138 K7_NOP1,
117 "\t.previous"); 139 K7_NOP2,
118extern const unsigned char k7nops[]; 140 K7_NOP3,
119static const unsigned char *const __initconst_or_module 141 K7_NOP4,
120k7_nops[ASM_NOP_MAX+1] = { 142 K7_NOP5,
143 K7_NOP6,
144 K7_NOP7,
145 K7_NOP8,
146 K7_NOP5_ATOMIC
147};
148static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
149{
121 NULL, 150 NULL,
122 k7nops, 151 k7nops,
123 k7nops + 1, 152 k7nops + 1,
@@ -127,17 +156,25 @@ k7_nops[ASM_NOP_MAX+1] = {
127 k7nops + 1 + 2 + 3 + 4 + 5, 156 k7nops + 1 + 2 + 3 + 4 + 5,
128 k7nops + 1 + 2 + 3 + 4 + 5 + 6, 157 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
129 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 158 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
159 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
130}; 160};
131#endif 161#endif
132 162
133#ifdef P6_NOP1 163#ifdef P6_NOP1
134asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: " 164static const unsigned char __initconst_or_module p6nops[] =
135 P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 165{
136 P6_NOP7 P6_NOP8 166 P6_NOP1,
137 "\t.previous"); 167 P6_NOP2,
138extern const unsigned char p6nops[]; 168 P6_NOP3,
139static const unsigned char *const __initconst_or_module 169 P6_NOP4,
140p6_nops[ASM_NOP_MAX+1] = { 170 P6_NOP5,
171 P6_NOP6,
172 P6_NOP7,
173 P6_NOP8,
174 P6_NOP5_ATOMIC
175};
176static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
177{
141 NULL, 178 NULL,
142 p6nops, 179 p6nops,
143 p6nops + 1, 180 p6nops + 1,
@@ -147,47 +184,65 @@ p6_nops[ASM_NOP_MAX+1] = {
147 p6nops + 1 + 2 + 3 + 4 + 5, 184 p6nops + 1 + 2 + 3 + 4 + 5,
148 p6nops + 1 + 2 + 3 + 4 + 5 + 6, 185 p6nops + 1 + 2 + 3 + 4 + 5 + 6,
149 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 186 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
187 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
150}; 188};
151#endif 189#endif
152 190
191/* Initialize these to a safe default */
153#ifdef CONFIG_X86_64 192#ifdef CONFIG_X86_64
193const unsigned char * const *ideal_nops = p6_nops;
194#else
195const unsigned char * const *ideal_nops = intel_nops;
196#endif
154 197
155extern char __vsyscall_0; 198void __init arch_init_ideal_nops(void)
156static const unsigned char *const *__init_or_module find_nop_table(void)
157{ 199{
158 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 200 switch (boot_cpu_data.x86_vendor) {
159 boot_cpu_has(X86_FEATURE_NOPL)) 201 case X86_VENDOR_INTEL:
160 return p6_nops; 202 /*
161 else 203 * Due to a decoder implementation quirk, some
162 return k8_nops; 204 * specific Intel CPUs actually perform better with
163} 205 * the "k8_nops" than with the SDM-recommended NOPs.
164 206 */
165#else /* CONFIG_X86_64 */ 207 if (boot_cpu_data.x86 == 6 &&
208 boot_cpu_data.x86_model >= 0x0f &&
209 boot_cpu_data.x86_model != 0x1c &&
210 boot_cpu_data.x86_model != 0x26 &&
211 boot_cpu_data.x86_model != 0x27 &&
212 boot_cpu_data.x86_model < 0x30) {
213 ideal_nops = k8_nops;
214 } else if (boot_cpu_has(X86_FEATURE_NOPL)) {
215 ideal_nops = p6_nops;
216 } else {
217#ifdef CONFIG_X86_64
218 ideal_nops = k8_nops;
219#else
220 ideal_nops = intel_nops;
221#endif
222 }
166 223
167static const unsigned char *const *__init_or_module find_nop_table(void) 224 default:
168{ 225#ifdef CONFIG_X86_64
169 if (boot_cpu_has(X86_FEATURE_K8)) 226 ideal_nops = k8_nops;
170 return k8_nops; 227#else
171 else if (boot_cpu_has(X86_FEATURE_K7)) 228 if (boot_cpu_has(X86_FEATURE_K8))
172 return k7_nops; 229 ideal_nops = k8_nops;
173 else if (boot_cpu_has(X86_FEATURE_NOPL)) 230 else if (boot_cpu_has(X86_FEATURE_K7))
174 return p6_nops; 231 ideal_nops = k7_nops;
175 else 232 else
176 return intel_nops; 233 ideal_nops = intel_nops;
234#endif
235 }
177} 236}
178 237
179#endif /* CONFIG_X86_64 */
180
181/* Use this to add nops to a buffer, then text_poke the whole buffer. */ 238/* Use this to add nops to a buffer, then text_poke the whole buffer. */
182static void __init_or_module add_nops(void *insns, unsigned int len) 239static void __init_or_module add_nops(void *insns, unsigned int len)
183{ 240{
184 const unsigned char *const *noptable = find_nop_table();
185
186 while (len > 0) { 241 while (len > 0) {
187 unsigned int noplen = len; 242 unsigned int noplen = len;
188 if (noplen > ASM_NOP_MAX) 243 if (noplen > ASM_NOP_MAX)
189 noplen = ASM_NOP_MAX; 244 noplen = ASM_NOP_MAX;
190 memcpy(insns, noptable[noplen], noplen); 245 memcpy(insns, ideal_nops[noplen], noplen);
191 insns += noplen; 246 insns += noplen;
192 len -= noplen; 247 len -= noplen;
193 } 248 }
@@ -195,6 +250,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
195 250
196extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 251extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
197extern s32 __smp_locks[], __smp_locks_end[]; 252extern s32 __smp_locks[], __smp_locks_end[];
253extern char __vsyscall_0;
198void *text_poke_early(void *addr, const void *opcode, size_t len); 254void *text_poke_early(void *addr, const void *opcode, size_t len);
199 255
200/* Replace instructions with better alternatives for this CPU type. 256/* Replace instructions with better alternatives for this CPU type.
@@ -210,6 +266,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
210 u8 insnbuf[MAX_PATCH_LEN]; 266 u8 insnbuf[MAX_PATCH_LEN];
211 267
212 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); 268 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
269 /*
270 * The scan order should be from start to end. A later scanned
271 * alternative code can overwrite a previous scanned alternative code.
272 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
273 * patch code.
274 *
275 * So be careful if you want to change the scan order to any other
276 * order.
277 */
213 for (a = start; a < end; a++) { 278 for (a = start; a < end; a++) {
214 u8 *instr = a->instr; 279 u8 *instr = a->instr;
215 BUG_ON(a->replacementlen > a->instrlen); 280 BUG_ON(a->replacementlen > a->instrlen);
@@ -678,29 +743,3 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
678 wrote_text = 0; 743 wrote_text = 0;
679 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 744 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
680} 745}
681
682#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
683
684#ifdef CONFIG_X86_64
685unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
686#else
687unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
688#endif
689
690void __init arch_init_ideal_nop5(void)
691{
692 /*
693 * There is no good nop for all x86 archs. This selection
694 * algorithm should be unified with the one in find_nop_table(),
695 * but this should be good enough for now.
696 *
697 * For cases other than the ones below, use the safe (as in
698 * always functional) defaults above.
699 */
700#ifdef CONFIG_X86_64
701 /* Don't use these on 32 bits due to broken virtualizers */
702 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
703 memcpy(ideal_nop5, p6_nops[5], 5);
704#endif
705}
706#endif
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b117efd24f71..b117efd24f71 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 57ca77787220..873e7e1ead7b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/pci-ats.h>
21#include <linux/bitmap.h> 22#include <linux/bitmap.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/debugfs.h> 24#include <linux/debugfs.h>
@@ -25,6 +26,7 @@
25#include <linux/dma-mapping.h> 26#include <linux/dma-mapping.h>
26#include <linux/iommu-helper.h> 27#include <linux/iommu-helper.h>
27#include <linux/iommu.h> 28#include <linux/iommu.h>
29#include <linux/delay.h>
28#include <asm/proto.h> 30#include <asm/proto.h>
29#include <asm/iommu.h> 31#include <asm/iommu.h>
30#include <asm/gart.h> 32#include <asm/gart.h>
@@ -34,7 +36,7 @@
34 36
35#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) 37#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
36 38
37#define EXIT_LOOP_COUNT 10000000 39#define LOOP_TIMEOUT 100000
38 40
39static DEFINE_RWLOCK(amd_iommu_devtable_lock); 41static DEFINE_RWLOCK(amd_iommu_devtable_lock);
40 42
@@ -57,7 +59,6 @@ struct iommu_cmd {
57 u32 data[4]; 59 u32 data[4];
58}; 60};
59 61
60static void reset_iommu_command_buffer(struct amd_iommu *iommu);
61static void update_domain(struct protection_domain *domain); 62static void update_domain(struct protection_domain *domain);
62 63
63/**************************************************************************** 64/****************************************************************************
@@ -322,8 +323,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
322 break; 323 break;
323 case EVENT_TYPE_ILL_CMD: 324 case EVENT_TYPE_ILL_CMD:
324 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); 325 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
325 iommu->reset_in_progress = true;
326 reset_iommu_command_buffer(iommu);
327 dump_command(address); 326 dump_command(address);
328 break; 327 break;
329 case EVENT_TYPE_CMD_HARD_ERR: 328 case EVENT_TYPE_CMD_HARD_ERR:
@@ -367,7 +366,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
367 spin_unlock_irqrestore(&iommu->lock, flags); 366 spin_unlock_irqrestore(&iommu->lock, flags);
368} 367}
369 368
370irqreturn_t amd_iommu_int_handler(int irq, void *data) 369irqreturn_t amd_iommu_int_thread(int irq, void *data)
371{ 370{
372 struct amd_iommu *iommu; 371 struct amd_iommu *iommu;
373 372
@@ -377,192 +376,300 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
377 return IRQ_HANDLED; 376 return IRQ_HANDLED;
378} 377}
379 378
379irqreturn_t amd_iommu_int_handler(int irq, void *data)
380{
381 return IRQ_WAKE_THREAD;
382}
383
380/**************************************************************************** 384/****************************************************************************
381 * 385 *
382 * IOMMU command queuing functions 386 * IOMMU command queuing functions
383 * 387 *
384 ****************************************************************************/ 388 ****************************************************************************/
385 389
386/* 390static int wait_on_sem(volatile u64 *sem)
387 * Writes the command to the IOMMUs command buffer and informs the 391{
388 * hardware about the new command. Must be called with iommu->lock held. 392 int i = 0;
389 */ 393
390static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) 394 while (*sem == 0 && i < LOOP_TIMEOUT) {
395 udelay(1);
396 i += 1;
397 }
398
399 if (i == LOOP_TIMEOUT) {
400 pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
401 return -EIO;
402 }
403
404 return 0;
405}
406
407static void copy_cmd_to_buffer(struct amd_iommu *iommu,
408 struct iommu_cmd *cmd,
409 u32 tail)
391{ 410{
392 u32 tail, head;
393 u8 *target; 411 u8 *target;
394 412
395 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
396 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
397 target = iommu->cmd_buf + tail; 413 target = iommu->cmd_buf + tail;
398 memcpy_toio(target, cmd, sizeof(*cmd)); 414 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
399 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; 415
400 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 416 /* Copy command to buffer */
401 if (tail == head) 417 memcpy(target, cmd, sizeof(*cmd));
402 return -ENOMEM; 418
419 /* Tell the IOMMU about it */
403 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 420 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
421}
404 422
405 return 0; 423static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
424{
425 WARN_ON(address & 0x7ULL);
426
427 memset(cmd, 0, sizeof(*cmd));
428 cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
429 cmd->data[1] = upper_32_bits(__pa(address));
430 cmd->data[2] = 1;
431 CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
432}
433
434static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
435{
436 memset(cmd, 0, sizeof(*cmd));
437 cmd->data[0] = devid;
438 CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
439}
440
441static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
442 size_t size, u16 domid, int pde)
443{
444 u64 pages;
445 int s;
446
447 pages = iommu_num_pages(address, size, PAGE_SIZE);
448 s = 0;
449
450 if (pages > 1) {
451 /*
452 * If we have to flush more than one page, flush all
453 * TLB entries for this domain
454 */
455 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
456 s = 1;
457 }
458
459 address &= PAGE_MASK;
460
461 memset(cmd, 0, sizeof(*cmd));
462 cmd->data[1] |= domid;
463 cmd->data[2] = lower_32_bits(address);
464 cmd->data[3] = upper_32_bits(address);
465 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
466 if (s) /* size bit - we flush more than one 4kb page */
467 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
468 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
469 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
470}
471
472static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
473 u64 address, size_t size)
474{
475 u64 pages;
476 int s;
477
478 pages = iommu_num_pages(address, size, PAGE_SIZE);
479 s = 0;
480
481 if (pages > 1) {
482 /*
483 * If we have to flush more than one page, flush all
484 * TLB entries for this domain
485 */
486 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
487 s = 1;
488 }
489
490 address &= PAGE_MASK;
491
492 memset(cmd, 0, sizeof(*cmd));
493 cmd->data[0] = devid;
494 cmd->data[0] |= (qdep & 0xff) << 24;
495 cmd->data[1] = devid;
496 cmd->data[2] = lower_32_bits(address);
497 cmd->data[3] = upper_32_bits(address);
498 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
499 if (s)
500 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
501}
502
503static void build_inv_all(struct iommu_cmd *cmd)
504{
505 memset(cmd, 0, sizeof(*cmd));
506 CMD_SET_TYPE(cmd, CMD_INV_ALL);
406} 507}
407 508
408/* 509/*
409 * General queuing function for commands. Takes iommu->lock and calls 510 * Writes the command to the IOMMUs command buffer and informs the
410 * __iommu_queue_command(). 511 * hardware about the new command.
411 */ 512 */
412static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) 513static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
413{ 514{
515 u32 left, tail, head, next_tail;
414 unsigned long flags; 516 unsigned long flags;
415 int ret;
416 517
518 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
519
520again:
417 spin_lock_irqsave(&iommu->lock, flags); 521 spin_lock_irqsave(&iommu->lock, flags);
418 ret = __iommu_queue_command(iommu, cmd);
419 if (!ret)
420 iommu->need_sync = true;
421 spin_unlock_irqrestore(&iommu->lock, flags);
422 522
423 return ret; 523 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
424} 524 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
525 next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
526 left = (head - next_tail) % iommu->cmd_buf_size;
425 527
426/* 528 if (left <= 2) {
427 * This function waits until an IOMMU has completed a completion 529 struct iommu_cmd sync_cmd;
428 * wait command 530 volatile u64 sem = 0;
429 */ 531 int ret;
430static void __iommu_wait_for_completion(struct amd_iommu *iommu)
431{
432 int ready = 0;
433 unsigned status = 0;
434 unsigned long i = 0;
435 532
436 INC_STATS_COUNTER(compl_wait); 533 build_completion_wait(&sync_cmd, (u64)&sem);
534 copy_cmd_to_buffer(iommu, &sync_cmd, tail);
437 535
438 while (!ready && (i < EXIT_LOOP_COUNT)) { 536 spin_unlock_irqrestore(&iommu->lock, flags);
439 ++i; 537
440 /* wait for the bit to become one */ 538 if ((ret = wait_on_sem(&sem)) != 0)
441 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 539 return ret;
442 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; 540
541 goto again;
443 } 542 }
444 543
445 /* set bit back to zero */ 544 copy_cmd_to_buffer(iommu, cmd, tail);
446 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; 545
447 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); 546 /* We need to sync now to make sure all commands are processed */
547 iommu->need_sync = true;
548
549 spin_unlock_irqrestore(&iommu->lock, flags);
448 550
449 if (unlikely(i == EXIT_LOOP_COUNT)) 551 return 0;
450 iommu->reset_in_progress = true;
451} 552}
452 553
453/* 554/*
454 * This function queues a completion wait command into the command 555 * This function queues a completion wait command into the command
455 * buffer of an IOMMU 556 * buffer of an IOMMU
456 */ 557 */
457static int __iommu_completion_wait(struct amd_iommu *iommu) 558static int iommu_completion_wait(struct amd_iommu *iommu)
458{ 559{
459 struct iommu_cmd cmd; 560 struct iommu_cmd cmd;
561 volatile u64 sem = 0;
562 int ret;
460 563
461 memset(&cmd, 0, sizeof(cmd)); 564 if (!iommu->need_sync)
462 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 565 return 0;
463 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
464 566
465 return __iommu_queue_command(iommu, &cmd); 567 build_completion_wait(&cmd, (u64)&sem);
568
569 ret = iommu_queue_command(iommu, &cmd);
570 if (ret)
571 return ret;
572
573 return wait_on_sem(&sem);
466} 574}
467 575
468/* 576static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
469 * This function is called whenever we need to ensure that the IOMMU has
470 * completed execution of all commands we sent. It sends a
471 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
472 * us about that by writing a value to a physical address we pass with
473 * the command.
474 */
475static int iommu_completion_wait(struct amd_iommu *iommu)
476{ 577{
477 int ret = 0; 578 struct iommu_cmd cmd;
478 unsigned long flags;
479 579
480 spin_lock_irqsave(&iommu->lock, flags); 580 build_inv_dte(&cmd, devid);
481 581
482 if (!iommu->need_sync) 582 return iommu_queue_command(iommu, &cmd);
483 goto out; 583}
484 584
485 ret = __iommu_completion_wait(iommu); 585static void iommu_flush_dte_all(struct amd_iommu *iommu)
586{
587 u32 devid;
486 588
487 iommu->need_sync = false; 589 for (devid = 0; devid <= 0xffff; ++devid)
590 iommu_flush_dte(iommu, devid);
488 591
489 if (ret) 592 iommu_completion_wait(iommu);
490 goto out; 593}
491
492 __iommu_wait_for_completion(iommu);
493 594
494out: 595/*
495 spin_unlock_irqrestore(&iommu->lock, flags); 596 * This function uses heavy locking and may disable irqs for some time. But
597 * this is no issue because it is only called during resume.
598 */
599static void iommu_flush_tlb_all(struct amd_iommu *iommu)
600{
601 u32 dom_id;
496 602
497 if (iommu->reset_in_progress) 603 for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
498 reset_iommu_command_buffer(iommu); 604 struct iommu_cmd cmd;
605 build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
606 dom_id, 1);
607 iommu_queue_command(iommu, &cmd);
608 }
499 609
500 return 0; 610 iommu_completion_wait(iommu);
501} 611}
502 612
503static void iommu_flush_complete(struct protection_domain *domain) 613static void iommu_flush_all(struct amd_iommu *iommu)
504{ 614{
505 int i; 615 struct iommu_cmd cmd;
506 616
507 for (i = 0; i < amd_iommus_present; ++i) { 617 build_inv_all(&cmd);
508 if (!domain->dev_iommu[i])
509 continue;
510 618
511 /* 619 iommu_queue_command(iommu, &cmd);
512 * Devices of this domain are behind this IOMMU 620 iommu_completion_wait(iommu);
513 * We need to wait for completion of all commands. 621}
514 */ 622
515 iommu_completion_wait(amd_iommus[i]); 623void iommu_flush_all_caches(struct amd_iommu *iommu)
624{
625 if (iommu_feature(iommu, FEATURE_IA)) {
626 iommu_flush_all(iommu);
627 } else {
628 iommu_flush_dte_all(iommu);
629 iommu_flush_tlb_all(iommu);
516 } 630 }
517} 631}
518 632
519/* 633/*
520 * Command send function for invalidating a device table entry 634 * Command send function for flushing on-device TLB
521 */ 635 */
522static int iommu_flush_device(struct device *dev) 636static int device_flush_iotlb(struct device *dev, u64 address, size_t size)
523{ 637{
638 struct pci_dev *pdev = to_pci_dev(dev);
524 struct amd_iommu *iommu; 639 struct amd_iommu *iommu;
525 struct iommu_cmd cmd; 640 struct iommu_cmd cmd;
526 u16 devid; 641 u16 devid;
642 int qdep;
527 643
644 qdep = pci_ats_queue_depth(pdev);
528 devid = get_device_id(dev); 645 devid = get_device_id(dev);
529 iommu = amd_iommu_rlookup_table[devid]; 646 iommu = amd_iommu_rlookup_table[devid];
530 647
531 /* Build command */ 648 build_inv_iotlb_pages(&cmd, devid, qdep, address, size);
532 memset(&cmd, 0, sizeof(cmd));
533 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
534 cmd.data[0] = devid;
535 649
536 return iommu_queue_command(iommu, &cmd); 650 return iommu_queue_command(iommu, &cmd);
537} 651}
538 652
539static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
540 u16 domid, int pde, int s)
541{
542 memset(cmd, 0, sizeof(*cmd));
543 address &= PAGE_MASK;
544 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
545 cmd->data[1] |= domid;
546 cmd->data[2] = lower_32_bits(address);
547 cmd->data[3] = upper_32_bits(address);
548 if (s) /* size bit - we flush more than one 4kb page */
549 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
550 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
551 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
552}
553
554/* 653/*
555 * Generic command send function for invalidaing TLB entries 654 * Command send function for invalidating a device table entry
556 */ 655 */
557static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, 656static int device_flush_dte(struct device *dev)
558 u64 address, u16 domid, int pde, int s)
559{ 657{
560 struct iommu_cmd cmd; 658 struct amd_iommu *iommu;
659 struct pci_dev *pdev;
660 u16 devid;
561 int ret; 661 int ret;
562 662
563 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); 663 pdev = to_pci_dev(dev);
664 devid = get_device_id(dev);
665 iommu = amd_iommu_rlookup_table[devid];
564 666
565 ret = iommu_queue_command(iommu, &cmd); 667 ret = iommu_flush_dte(iommu, devid);
668 if (ret)
669 return ret;
670
671 if (pci_ats_enabled(pdev))
672 ret = device_flush_iotlb(dev, 0, ~0UL);
566 673
567 return ret; 674 return ret;
568} 675}
@@ -572,23 +679,14 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
572 * It invalidates a single PTE if the range to flush is within a single 679 * It invalidates a single PTE if the range to flush is within a single
573 * page. Otherwise it flushes the whole TLB of the IOMMU. 680 * page. Otherwise it flushes the whole TLB of the IOMMU.
574 */ 681 */
575static void __iommu_flush_pages(struct protection_domain *domain, 682static void __domain_flush_pages(struct protection_domain *domain,
576 u64 address, size_t size, int pde) 683 u64 address, size_t size, int pde)
577{ 684{
578 int s = 0, i; 685 struct iommu_dev_data *dev_data;
579 unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); 686 struct iommu_cmd cmd;
580 687 int ret = 0, i;
581 address &= PAGE_MASK;
582
583 if (pages > 1) {
584 /*
585 * If we have to flush more than one page, flush all
586 * TLB entries for this domain
587 */
588 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
589 s = 1;
590 }
591 688
689 build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
592 690
593 for (i = 0; i < amd_iommus_present; ++i) { 691 for (i = 0; i < amd_iommus_present; ++i) {
594 if (!domain->dev_iommu[i]) 692 if (!domain->dev_iommu[i])
@@ -598,101 +696,70 @@ static void __iommu_flush_pages(struct protection_domain *domain,
598 * Devices of this domain are behind this IOMMU 696 * Devices of this domain are behind this IOMMU
599 * We need a TLB flush 697 * We need a TLB flush
600 */ 698 */
601 iommu_queue_inv_iommu_pages(amd_iommus[i], address, 699 ret |= iommu_queue_command(amd_iommus[i], &cmd);
602 domain->id, pde, s); 700 }
701
702 list_for_each_entry(dev_data, &domain->dev_list, list) {
703 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
704
705 if (!pci_ats_enabled(pdev))
706 continue;
707
708 ret |= device_flush_iotlb(dev_data->dev, address, size);
603 } 709 }
604 710
605 return; 711 WARN_ON(ret);
606} 712}
607 713
608static void iommu_flush_pages(struct protection_domain *domain, 714static void domain_flush_pages(struct protection_domain *domain,
609 u64 address, size_t size) 715 u64 address, size_t size)
610{ 716{
611 __iommu_flush_pages(domain, address, size, 0); 717 __domain_flush_pages(domain, address, size, 0);
612} 718}
613 719
614/* Flush the whole IO/TLB for a given protection domain */ 720/* Flush the whole IO/TLB for a given protection domain */
615static void iommu_flush_tlb(struct protection_domain *domain) 721static void domain_flush_tlb(struct protection_domain *domain)
616{ 722{
617 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); 723 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
618} 724}
619 725
620/* Flush the whole IO/TLB for a given protection domain - including PDE */ 726/* Flush the whole IO/TLB for a given protection domain - including PDE */
621static void iommu_flush_tlb_pde(struct protection_domain *domain) 727static void domain_flush_tlb_pde(struct protection_domain *domain)
622{ 728{
623 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); 729 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
624}
625
626
627/*
628 * This function flushes the DTEs for all devices in domain
629 */
630static void iommu_flush_domain_devices(struct protection_domain *domain)
631{
632 struct iommu_dev_data *dev_data;
633 unsigned long flags;
634
635 spin_lock_irqsave(&domain->lock, flags);
636
637 list_for_each_entry(dev_data, &domain->dev_list, list)
638 iommu_flush_device(dev_data->dev);
639
640 spin_unlock_irqrestore(&domain->lock, flags);
641} 730}
642 731
643static void iommu_flush_all_domain_devices(void) 732static void domain_flush_complete(struct protection_domain *domain)
644{ 733{
645 struct protection_domain *domain; 734 int i;
646 unsigned long flags;
647 735
648 spin_lock_irqsave(&amd_iommu_pd_lock, flags); 736 for (i = 0; i < amd_iommus_present; ++i) {
737 if (!domain->dev_iommu[i])
738 continue;
649 739
650 list_for_each_entry(domain, &amd_iommu_pd_list, list) { 740 /*
651 iommu_flush_domain_devices(domain); 741 * Devices of this domain are behind this IOMMU
652 iommu_flush_complete(domain); 742 * We need to wait for completion of all commands.
743 */
744 iommu_completion_wait(amd_iommus[i]);
653 } 745 }
654
655 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
656} 746}
657 747
658void amd_iommu_flush_all_devices(void)
659{
660 iommu_flush_all_domain_devices();
661}
662 748
663/* 749/*
664 * This function uses heavy locking and may disable irqs for some time. But 750 * This function flushes the DTEs for all devices in domain
665 * this is no issue because it is only called during resume.
666 */ 751 */
667void amd_iommu_flush_all_domains(void) 752static void domain_flush_devices(struct protection_domain *domain)
668{ 753{
669 struct protection_domain *domain; 754 struct iommu_dev_data *dev_data;
670 unsigned long flags; 755 unsigned long flags;
671 756
672 spin_lock_irqsave(&amd_iommu_pd_lock, flags); 757 spin_lock_irqsave(&domain->lock, flags);
673
674 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
675 spin_lock(&domain->lock);
676 iommu_flush_tlb_pde(domain);
677 iommu_flush_complete(domain);
678 spin_unlock(&domain->lock);
679 }
680
681 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
682}
683
684static void reset_iommu_command_buffer(struct amd_iommu *iommu)
685{
686 pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
687
688 if (iommu->reset_in_progress)
689 panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
690 758
691 amd_iommu_reset_cmd_buffer(iommu); 759 list_for_each_entry(dev_data, &domain->dev_list, list)
692 amd_iommu_flush_all_devices(); 760 device_flush_dte(dev_data->dev);
693 amd_iommu_flush_all_domains();
694 761
695 iommu->reset_in_progress = false; 762 spin_unlock_irqrestore(&domain->lock, flags);
696} 763}
697 764
698/**************************************************************************** 765/****************************************************************************
@@ -1410,17 +1477,22 @@ static bool dma_ops_domain(struct protection_domain *domain)
1410 return domain->flags & PD_DMA_OPS_MASK; 1477 return domain->flags & PD_DMA_OPS_MASK;
1411} 1478}
1412 1479
1413static void set_dte_entry(u16 devid, struct protection_domain *domain) 1480static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1414{ 1481{
1415 u64 pte_root = virt_to_phys(domain->pt_root); 1482 u64 pte_root = virt_to_phys(domain->pt_root);
1483 u32 flags = 0;
1416 1484
1417 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 1485 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1418 << DEV_ENTRY_MODE_SHIFT; 1486 << DEV_ENTRY_MODE_SHIFT;
1419 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 1487 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1420 1488
1421 amd_iommu_dev_table[devid].data[2] = domain->id; 1489 if (ats)
1422 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); 1490 flags |= DTE_FLAG_IOTLB;
1423 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); 1491
1492 amd_iommu_dev_table[devid].data[3] |= flags;
1493 amd_iommu_dev_table[devid].data[2] = domain->id;
1494 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1495 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1424} 1496}
1425 1497
1426static void clear_dte_entry(u16 devid) 1498static void clear_dte_entry(u16 devid)
@@ -1437,34 +1509,42 @@ static void do_attach(struct device *dev, struct protection_domain *domain)
1437{ 1509{
1438 struct iommu_dev_data *dev_data; 1510 struct iommu_dev_data *dev_data;
1439 struct amd_iommu *iommu; 1511 struct amd_iommu *iommu;
1512 struct pci_dev *pdev;
1513 bool ats = false;
1440 u16 devid; 1514 u16 devid;
1441 1515
1442 devid = get_device_id(dev); 1516 devid = get_device_id(dev);
1443 iommu = amd_iommu_rlookup_table[devid]; 1517 iommu = amd_iommu_rlookup_table[devid];
1444 dev_data = get_dev_data(dev); 1518 dev_data = get_dev_data(dev);
1519 pdev = to_pci_dev(dev);
1520
1521 if (amd_iommu_iotlb_sup)
1522 ats = pci_ats_enabled(pdev);
1445 1523
1446 /* Update data structures */ 1524 /* Update data structures */
1447 dev_data->domain = domain; 1525 dev_data->domain = domain;
1448 list_add(&dev_data->list, &domain->dev_list); 1526 list_add(&dev_data->list, &domain->dev_list);
1449 set_dte_entry(devid, domain); 1527 set_dte_entry(devid, domain, ats);
1450 1528
1451 /* Do reference counting */ 1529 /* Do reference counting */
1452 domain->dev_iommu[iommu->index] += 1; 1530 domain->dev_iommu[iommu->index] += 1;
1453 domain->dev_cnt += 1; 1531 domain->dev_cnt += 1;
1454 1532
1455 /* Flush the DTE entry */ 1533 /* Flush the DTE entry */
1456 iommu_flush_device(dev); 1534 device_flush_dte(dev);
1457} 1535}
1458 1536
1459static void do_detach(struct device *dev) 1537static void do_detach(struct device *dev)
1460{ 1538{
1461 struct iommu_dev_data *dev_data; 1539 struct iommu_dev_data *dev_data;
1462 struct amd_iommu *iommu; 1540 struct amd_iommu *iommu;
1541 struct pci_dev *pdev;
1463 u16 devid; 1542 u16 devid;
1464 1543
1465 devid = get_device_id(dev); 1544 devid = get_device_id(dev);
1466 iommu = amd_iommu_rlookup_table[devid]; 1545 iommu = amd_iommu_rlookup_table[devid];
1467 dev_data = get_dev_data(dev); 1546 dev_data = get_dev_data(dev);
1547 pdev = to_pci_dev(dev);
1468 1548
1469 /* decrease reference counters */ 1549 /* decrease reference counters */
1470 dev_data->domain->dev_iommu[iommu->index] -= 1; 1550 dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -1476,7 +1556,7 @@ static void do_detach(struct device *dev)
1476 clear_dte_entry(devid); 1556 clear_dte_entry(devid);
1477 1557
1478 /* Flush the DTE entry */ 1558 /* Flush the DTE entry */
1479 iommu_flush_device(dev); 1559 device_flush_dte(dev);
1480} 1560}
1481 1561
1482/* 1562/*
@@ -1539,9 +1619,13 @@ out_unlock:
1539static int attach_device(struct device *dev, 1619static int attach_device(struct device *dev,
1540 struct protection_domain *domain) 1620 struct protection_domain *domain)
1541{ 1621{
1622 struct pci_dev *pdev = to_pci_dev(dev);
1542 unsigned long flags; 1623 unsigned long flags;
1543 int ret; 1624 int ret;
1544 1625
1626 if (amd_iommu_iotlb_sup)
1627 pci_enable_ats(pdev, PAGE_SHIFT);
1628
1545 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1629 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1546 ret = __attach_device(dev, domain); 1630 ret = __attach_device(dev, domain);
1547 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1631 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
@@ -1551,7 +1635,7 @@ static int attach_device(struct device *dev,
1551 * left the caches in the IOMMU dirty. So we have to flush 1635 * left the caches in the IOMMU dirty. So we have to flush
1552 * here to evict all dirty stuff. 1636 * here to evict all dirty stuff.
1553 */ 1637 */
1554 iommu_flush_tlb_pde(domain); 1638 domain_flush_tlb_pde(domain);
1555 1639
1556 return ret; 1640 return ret;
1557} 1641}
@@ -1598,12 +1682,16 @@ static void __detach_device(struct device *dev)
1598 */ 1682 */
1599static void detach_device(struct device *dev) 1683static void detach_device(struct device *dev)
1600{ 1684{
1685 struct pci_dev *pdev = to_pci_dev(dev);
1601 unsigned long flags; 1686 unsigned long flags;
1602 1687
1603 /* lock device table */ 1688 /* lock device table */
1604 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1689 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1605 __detach_device(dev); 1690 __detach_device(dev);
1606 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1691 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1692
1693 if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev))
1694 pci_disable_ats(pdev);
1607} 1695}
1608 1696
1609/* 1697/*
@@ -1692,7 +1780,7 @@ static int device_change_notifier(struct notifier_block *nb,
1692 goto out; 1780 goto out;
1693 } 1781 }
1694 1782
1695 iommu_flush_device(dev); 1783 device_flush_dte(dev);
1696 iommu_completion_wait(iommu); 1784 iommu_completion_wait(iommu);
1697 1785
1698out: 1786out:
@@ -1753,8 +1841,9 @@ static void update_device_table(struct protection_domain *domain)
1753 struct iommu_dev_data *dev_data; 1841 struct iommu_dev_data *dev_data;
1754 1842
1755 list_for_each_entry(dev_data, &domain->dev_list, list) { 1843 list_for_each_entry(dev_data, &domain->dev_list, list) {
1844 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
1756 u16 devid = get_device_id(dev_data->dev); 1845 u16 devid = get_device_id(dev_data->dev);
1757 set_dte_entry(devid, domain); 1846 set_dte_entry(devid, domain, pci_ats_enabled(pdev));
1758 } 1847 }
1759} 1848}
1760 1849
@@ -1764,8 +1853,9 @@ static void update_domain(struct protection_domain *domain)
1764 return; 1853 return;
1765 1854
1766 update_device_table(domain); 1855 update_device_table(domain);
1767 iommu_flush_domain_devices(domain); 1856
1768 iommu_flush_tlb_pde(domain); 1857 domain_flush_devices(domain);
1858 domain_flush_tlb_pde(domain);
1769 1859
1770 domain->updated = false; 1860 domain->updated = false;
1771} 1861}
@@ -1924,10 +2014,10 @@ retry:
1924 ADD_STATS_COUNTER(alloced_io_mem, size); 2014 ADD_STATS_COUNTER(alloced_io_mem, size);
1925 2015
1926 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 2016 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
1927 iommu_flush_tlb(&dma_dom->domain); 2017 domain_flush_tlb(&dma_dom->domain);
1928 dma_dom->need_flush = false; 2018 dma_dom->need_flush = false;
1929 } else if (unlikely(amd_iommu_np_cache)) 2019 } else if (unlikely(amd_iommu_np_cache))
1930 iommu_flush_pages(&dma_dom->domain, address, size); 2020 domain_flush_pages(&dma_dom->domain, address, size);
1931 2021
1932out: 2022out:
1933 return address; 2023 return address;
@@ -1976,7 +2066,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
1976 dma_ops_free_addresses(dma_dom, dma_addr, pages); 2066 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1977 2067
1978 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 2068 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
1979 iommu_flush_pages(&dma_dom->domain, flush_addr, size); 2069 domain_flush_pages(&dma_dom->domain, flush_addr, size);
1980 dma_dom->need_flush = false; 2070 dma_dom->need_flush = false;
1981 } 2071 }
1982} 2072}
@@ -2012,7 +2102,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
2012 if (addr == DMA_ERROR_CODE) 2102 if (addr == DMA_ERROR_CODE)
2013 goto out; 2103 goto out;
2014 2104
2015 iommu_flush_complete(domain); 2105 domain_flush_complete(domain);
2016 2106
2017out: 2107out:
2018 spin_unlock_irqrestore(&domain->lock, flags); 2108 spin_unlock_irqrestore(&domain->lock, flags);
@@ -2039,7 +2129,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
2039 2129
2040 __unmap_single(domain->priv, dma_addr, size, dir); 2130 __unmap_single(domain->priv, dma_addr, size, dir);
2041 2131
2042 iommu_flush_complete(domain); 2132 domain_flush_complete(domain);
2043 2133
2044 spin_unlock_irqrestore(&domain->lock, flags); 2134 spin_unlock_irqrestore(&domain->lock, flags);
2045} 2135}
@@ -2104,7 +2194,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
2104 goto unmap; 2194 goto unmap;
2105 } 2195 }
2106 2196
2107 iommu_flush_complete(domain); 2197 domain_flush_complete(domain);
2108 2198
2109out: 2199out:
2110 spin_unlock_irqrestore(&domain->lock, flags); 2200 spin_unlock_irqrestore(&domain->lock, flags);
@@ -2150,7 +2240,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
2150 s->dma_address = s->dma_length = 0; 2240 s->dma_address = s->dma_length = 0;
2151 } 2241 }
2152 2242
2153 iommu_flush_complete(domain); 2243 domain_flush_complete(domain);
2154 2244
2155 spin_unlock_irqrestore(&domain->lock, flags); 2245 spin_unlock_irqrestore(&domain->lock, flags);
2156} 2246}
@@ -2200,7 +2290,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
2200 goto out_free; 2290 goto out_free;
2201 } 2291 }
2202 2292
2203 iommu_flush_complete(domain); 2293 domain_flush_complete(domain);
2204 2294
2205 spin_unlock_irqrestore(&domain->lock, flags); 2295 spin_unlock_irqrestore(&domain->lock, flags);
2206 2296
@@ -2232,7 +2322,7 @@ static void free_coherent(struct device *dev, size_t size,
2232 2322
2233 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 2323 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2234 2324
2235 iommu_flush_complete(domain); 2325 domain_flush_complete(domain);
2236 2326
2237 spin_unlock_irqrestore(&domain->lock, flags); 2327 spin_unlock_irqrestore(&domain->lock, flags);
2238 2328
@@ -2476,7 +2566,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
2476 if (!iommu) 2566 if (!iommu)
2477 return; 2567 return;
2478 2568
2479 iommu_flush_device(dev); 2569 device_flush_dte(dev);
2480 iommu_completion_wait(iommu); 2570 iommu_completion_wait(iommu);
2481} 2571}
2482 2572
@@ -2542,7 +2632,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2542 unmap_size = iommu_unmap_page(domain, iova, page_size); 2632 unmap_size = iommu_unmap_page(domain, iova, page_size);
2543 mutex_unlock(&domain->api_lock); 2633 mutex_unlock(&domain->api_lock);
2544 2634
2545 iommu_flush_tlb_pde(domain); 2635 domain_flush_tlb_pde(domain);
2546 2636
2547 return get_order(unmap_size); 2637 return get_order(unmap_size);
2548} 2638}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 246d727b65b7..9179c21120a8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -137,6 +137,7 @@ int amd_iommus_present;
137 137
138/* IOMMUs have a non-present cache? */ 138/* IOMMUs have a non-present cache? */
139bool amd_iommu_np_cache __read_mostly; 139bool amd_iommu_np_cache __read_mostly;
140bool amd_iommu_iotlb_sup __read_mostly = true;
140 141
141/* 142/*
142 * The ACPI table parsing functions set this variable on an error 143 * The ACPI table parsing functions set this variable on an error
@@ -180,6 +181,12 @@ static u32 dev_table_size; /* size of the device table */
180static u32 alias_table_size; /* size of the alias table */ 181static u32 alias_table_size; /* size of the alias table */
181static u32 rlookup_table_size; /* size if the rlookup table */ 182static u32 rlookup_table_size; /* size if the rlookup table */
182 183
184/*
185 * This function flushes all internal caches of
186 * the IOMMU used by this driver.
187 */
188extern void iommu_flush_all_caches(struct amd_iommu *iommu);
189
183static inline void update_last_devid(u16 devid) 190static inline void update_last_devid(u16 devid)
184{ 191{
185 if (devid > amd_iommu_last_bdf) 192 if (devid > amd_iommu_last_bdf)
@@ -293,9 +300,23 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
293/* Function to enable the hardware */ 300/* Function to enable the hardware */
294static void iommu_enable(struct amd_iommu *iommu) 301static void iommu_enable(struct amd_iommu *iommu)
295{ 302{
296 printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", 303 static const char * const feat_str[] = {
304 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
305 "IA", "GA", "HE", "PC", NULL
306 };
307 int i;
308
309 printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
297 dev_name(&iommu->dev->dev), iommu->cap_ptr); 310 dev_name(&iommu->dev->dev), iommu->cap_ptr);
298 311
312 if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
313 printk(KERN_CONT " extended features: ");
314 for (i = 0; feat_str[i]; ++i)
315 if (iommu_feature(iommu, (1ULL << i)))
316 printk(KERN_CONT " %s", feat_str[i]);
317 }
318 printk(KERN_CONT "\n");
319
299 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 320 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
300} 321}
301 322
@@ -651,7 +672,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
651static void __init init_iommu_from_pci(struct amd_iommu *iommu) 672static void __init init_iommu_from_pci(struct amd_iommu *iommu)
652{ 673{
653 int cap_ptr = iommu->cap_ptr; 674 int cap_ptr = iommu->cap_ptr;
654 u32 range, misc; 675 u32 range, misc, low, high;
655 int i, j; 676 int i, j;
656 677
657 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 678 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
@@ -667,6 +688,15 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
667 MMIO_GET_LD(range)); 688 MMIO_GET_LD(range));
668 iommu->evt_msi_num = MMIO_MSI_NUM(misc); 689 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
669 690
691 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
692 amd_iommu_iotlb_sup = false;
693
694 /* read extended feature bits */
695 low = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
696 high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
697
698 iommu->features = ((u64)high << 32) | low;
699
670 if (!is_rd890_iommu(iommu->dev)) 700 if (!is_rd890_iommu(iommu->dev))
671 return; 701 return;
672 702
@@ -1004,10 +1034,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
1004 if (pci_enable_msi(iommu->dev)) 1034 if (pci_enable_msi(iommu->dev))
1005 return 1; 1035 return 1;
1006 1036
1007 r = request_irq(iommu->dev->irq, amd_iommu_int_handler, 1037 r = request_threaded_irq(iommu->dev->irq,
1008 IRQF_SAMPLE_RANDOM, 1038 amd_iommu_int_handler,
1009 "AMD-Vi", 1039 amd_iommu_int_thread,
1010 NULL); 1040 0, "AMD-Vi",
1041 iommu->dev);
1011 1042
1012 if (r) { 1043 if (r) {
1013 pci_disable_msi(iommu->dev); 1044 pci_disable_msi(iommu->dev);
@@ -1244,6 +1275,7 @@ static void enable_iommus(void)
1244 iommu_set_exclusion_range(iommu); 1275 iommu_set_exclusion_range(iommu);
1245 iommu_init_msi(iommu); 1276 iommu_init_msi(iommu);
1246 iommu_enable(iommu); 1277 iommu_enable(iommu);
1278 iommu_flush_all_caches(iommu);
1247 } 1279 }
1248} 1280}
1249 1281
@@ -1274,8 +1306,8 @@ static void amd_iommu_resume(void)
1274 * we have to flush after the IOMMUs are enabled because a 1306 * we have to flush after the IOMMUs are enabled because a
1275 * disabled IOMMU will never execute the commands we send 1307 * disabled IOMMU will never execute the commands we send
1276 */ 1308 */
1277 amd_iommu_flush_all_devices(); 1309 for_each_iommu(iommu)
1278 amd_iommu_flush_all_domains(); 1310 iommu_flush_all_caches(iommu);
1279} 1311}
1280 1312
1281static int amd_iommu_suspend(void) 1313static int amd_iommu_suspend(void)
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index cd1ffed4ee22..289e92862fd9 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -177,7 +177,6 @@ static struct clocksource clocksource_apbt = {
177 .rating = APBT_CLOCKSOURCE_RATING, 177 .rating = APBT_CLOCKSOURCE_RATING,
178 .read = apbt_read_clocksource, 178 .read = apbt_read_clocksource,
179 .mask = APBT_MASK, 179 .mask = APBT_MASK,
180 .shift = APBT_SHIFT,
181 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 180 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
182 .resume = apbt_restart_clocksource, 181 .resume = apbt_restart_clocksource,
183}; 182};
@@ -543,14 +542,7 @@ static int apbt_clocksource_register(void)
543 if (t1 == apbt_read_clocksource(&clocksource_apbt)) 542 if (t1 == apbt_read_clocksource(&clocksource_apbt))
544 panic("APBT counter not counting. APBT disabled\n"); 543 panic("APBT counter not counting. APBT disabled\n");
545 544
546 /* 545 clocksource_register_khz(&clocksource_apbt, (u32)apbt_freq*1000);
547 * initialize and register APBT clocksource
548 * convert that to ns/clock cycle
549 * mult = (ns/c) * 2^APBT_SHIFT
550 */
551 clocksource_apbt.mult = div_sc(MSEC_PER_SEC,
552 (unsigned long) apbt_freq, APBT_SHIFT);
553 clocksource_register(&clocksource_apbt);
554 546
555 return 0; 547 return 0;
556} 548}
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 73fb469908c6..3d2661ca6542 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -30,6 +30,22 @@
30#include <asm/amd_nb.h> 30#include <asm/amd_nb.h>
31#include <asm/x86_init.h> 31#include <asm/x86_init.h>
32 32
33/*
34 * Using 512M as goal, in case kexec will load kernel_big
35 * that will do the on-position decompress, and could overlap with
36 * with the gart aperture that is used.
37 * Sequence:
38 * kernel_small
39 * ==> kexec (with kdump trigger path or gart still enabled)
40 * ==> kernel_small (gart area become e820_reserved)
41 * ==> kexec (with kdump trigger path or gart still enabled)
42 * ==> kerne_big (uncompressed size will be big than 64M or 128M)
43 * So don't use 512M below as gart iommu, leave the space for kernel
44 * code for safe.
45 */
46#define GART_MIN_ADDR (512ULL << 20)
47#define GART_MAX_ADDR (1ULL << 32)
48
33int gart_iommu_aperture; 49int gart_iommu_aperture;
34int gart_iommu_aperture_disabled __initdata; 50int gart_iommu_aperture_disabled __initdata;
35int gart_iommu_aperture_allowed __initdata; 51int gart_iommu_aperture_allowed __initdata;
@@ -70,21 +86,9 @@ static u32 __init allocate_aperture(void)
70 * memory. Unfortunately we cannot move it up because that would 86 * memory. Unfortunately we cannot move it up because that would
71 * make the IOMMU useless. 87 * make the IOMMU useless.
72 */ 88 */
73 /* 89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
74 * using 512M as goal, in case kexec will load kernel_big 90 aper_size, aper_size);
75 * that will do the on position decompress, and could overlap with 91 if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) {
76 * that position with gart that is used.
77 * sequende:
78 * kernel_small
79 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
80 * ==> kernel_small(gart area become e820_reserved)
81 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
82 * ==> kerne_big (uncompressed size will be big than 64M or 128M)
83 * so don't use 512M below as gart iommu, leave the space for kernel
84 * code for safe
85 */
86 addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
87 if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
88 printk(KERN_ERR 92 printk(KERN_ERR
89 "Cannot allocate aperture memory hole (%lx,%uK)\n", 93 "Cannot allocate aperture memory hole (%lx,%uK)\n",
90 addr, aper_size>>10); 94 addr, aper_size>>10);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index fabf01eff771..f92a8e5d1e21 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void)
505{ 505{
506 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 506 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
507 507
508 if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) { 508 if (this_cpu_has(X86_FEATURE_ARAT)) {
509 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; 509 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
510 /* Make LAPIC timer preferrable over percpu HPET */ 510 /* Make LAPIC timer preferrable over percpu HPET */
511 lapic_clockevent.rating = 150; 511 lapic_clockevent.rating = 150;
@@ -1237,6 +1237,17 @@ void __cpuinit setup_local_APIC(void)
1237 /* always use the value from LDR */ 1237 /* always use the value from LDR */
1238 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = 1238 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1239 logical_smp_processor_id(); 1239 logical_smp_processor_id();
1240
1241 /*
1242 * Some NUMA implementations (NUMAQ) don't initialize apicid to
1243 * node mapping during NUMA init. Now that logical apicid is
1244 * guaranteed to be known, give it another chance. This is already
1245 * a bit too late - percpu allocation has already happened without
1246 * proper NUMA affinity.
1247 */
1248 if (apic->x86_32_numa_cpu_node)
1249 set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
1250 apic->x86_32_numa_cpu_node(cpu));
1240#endif 1251#endif
1241 1252
1242 /* 1253 /*
@@ -1812,30 +1823,41 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1812 */ 1823 */
1813void smp_error_interrupt(struct pt_regs *regs) 1824void smp_error_interrupt(struct pt_regs *regs)
1814{ 1825{
1815 u32 v, v1; 1826 u32 v0, v1;
1827 u32 i = 0;
1828 static const char * const error_interrupt_reason[] = {
1829 "Send CS error", /* APIC Error Bit 0 */
1830 "Receive CS error", /* APIC Error Bit 1 */
1831 "Send accept error", /* APIC Error Bit 2 */
1832 "Receive accept error", /* APIC Error Bit 3 */
1833 "Redirectable IPI", /* APIC Error Bit 4 */
1834 "Send illegal vector", /* APIC Error Bit 5 */
1835 "Received illegal vector", /* APIC Error Bit 6 */
1836 "Illegal register address", /* APIC Error Bit 7 */
1837 };
1816 1838
1817 exit_idle(); 1839 exit_idle();
1818 irq_enter(); 1840 irq_enter();
1819 /* First tickle the hardware, only then report what went on. -- REW */ 1841 /* First tickle the hardware, only then report what went on. -- REW */
1820 v = apic_read(APIC_ESR); 1842 v0 = apic_read(APIC_ESR);
1821 apic_write(APIC_ESR, 0); 1843 apic_write(APIC_ESR, 0);
1822 v1 = apic_read(APIC_ESR); 1844 v1 = apic_read(APIC_ESR);
1823 ack_APIC_irq(); 1845 ack_APIC_irq();
1824 atomic_inc(&irq_err_count); 1846 atomic_inc(&irq_err_count);
1825 1847
1826 /* 1848 apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
1827 * Here is what the APIC error bits mean: 1849 smp_processor_id(), v0 , v1);
1828 * 0: Send CS error 1850
1829 * 1: Receive CS error 1851 v1 = v1 & 0xff;
1830 * 2: Send accept error 1852 while (v1) {
1831 * 3: Receive accept error 1853 if (v1 & 0x1)
1832 * 4: Reserved 1854 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
1833 * 5: Send illegal vector 1855 i++;
1834 * 6: Received illegal vector 1856 v1 >>= 1;
1835 * 7: Illegal register address 1857 };
1836 */ 1858
1837 pr_debug("APIC error on CPU%d: %02x(%02x)\n", 1859 apic_printk(APIC_DEBUG, KERN_CONT "\n");
1838 smp_processor_id(), v , v1); 1860
1839 irq_exit(); 1861 irq_exit();
1840} 1862}
1841 1863
@@ -2003,21 +2025,6 @@ void default_init_apic_ldr(void)
2003 apic_write(APIC_LDR, val); 2025 apic_write(APIC_LDR, val);
2004} 2026}
2005 2027
2006#ifdef CONFIG_X86_32
2007int default_x86_32_numa_cpu_node(int cpu)
2008{
2009#ifdef CONFIG_NUMA
2010 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
2011
2012 if (apicid != BAD_APICID)
2013 return __apicid_to_node[apicid];
2014 return NUMA_NO_NODE;
2015#else
2016 return 0;
2017#endif
2018}
2019#endif
2020
2021/* 2028/*
2022 * Power management 2029 * Power management
2023 */ 2030 */
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index f1baa2dc087a..775b82bc655c 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -119,14 +119,6 @@ static void noop_apic_write(u32 reg, u32 v)
119 WARN_ON_ONCE(cpu_has_apic && !disable_apic); 119 WARN_ON_ONCE(cpu_has_apic && !disable_apic);
120} 120}
121 121
122#ifdef CONFIG_X86_32
123static int noop_x86_32_numa_cpu_node(int cpu)
124{
125 /* we're always on node 0 */
126 return 0;
127}
128#endif
129
130struct apic apic_noop = { 122struct apic apic_noop = {
131 .name = "noop", 123 .name = "noop",
132 .probe = noop_probe, 124 .probe = noop_probe,
@@ -195,6 +187,5 @@ struct apic apic_noop = {
195 187
196#ifdef CONFIG_X86_32 188#ifdef CONFIG_X86_32
197 .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, 189 .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
198 .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node,
199#endif 190#endif
200}; 191};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 541a2e431659..d84ac5a584b5 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -253,5 +253,4 @@ struct apic apic_bigsmp = {
253 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 253 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
254 254
255 .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, 255 .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
256 .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
257}; 256};
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 3e9de4854c5b..70533de5bd29 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -510,11 +510,6 @@ static void es7000_setup_apic_routing(void)
510 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); 510 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
511} 511}
512 512
513static int es7000_numa_cpu_node(int cpu)
514{
515 return 0;
516}
517
518static int es7000_cpu_present_to_apicid(int mps_cpu) 513static int es7000_cpu_present_to_apicid(int mps_cpu)
519{ 514{
520 if (!mps_cpu) 515 if (!mps_cpu)
@@ -688,7 +683,6 @@ struct apic __refdata apic_es7000_cluster = {
688 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 683 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
689 684
690 .x86_32_early_logical_apicid = es7000_early_logical_apicid, 685 .x86_32_early_logical_apicid = es7000_early_logical_apicid,
691 .x86_32_numa_cpu_node = es7000_numa_cpu_node,
692}; 686};
693 687
694struct apic __refdata apic_es7000 = { 688struct apic __refdata apic_es7000 = {
@@ -752,5 +746,4 @@ struct apic __refdata apic_es7000 = {
752 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 746 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
753 747
754 .x86_32_early_logical_apicid = es7000_early_logical_apicid, 748 .x86_32_early_logical_apicid = es7000_early_logical_apicid,
755 .x86_32_numa_cpu_node = es7000_numa_cpu_node,
756}; 749};
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 6273eee5134b..30f13319e24b 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -48,8 +48,6 @@
48#include <asm/e820.h> 48#include <asm/e820.h>
49#include <asm/ipi.h> 49#include <asm/ipi.h>
50 50
51#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
52
53int found_numaq; 51int found_numaq;
54 52
55/* 53/*
@@ -79,31 +77,20 @@ int quad_local_to_mp_bus_id[NR_CPUS/4][4];
79static inline void numaq_register_node(int node, struct sys_cfg_data *scd) 77static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
80{ 78{
81 struct eachquadmem *eq = scd->eq + node; 79 struct eachquadmem *eq = scd->eq + node;
80 u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
81 u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
82 int ret;
82 83
83 node_set_online(node); 84 node_set(node, numa_nodes_parsed);
84 85 ret = numa_add_memblk(node, start, end);
85 /* Convert to pages */ 86 BUG_ON(ret < 0);
86 node_start_pfn[node] =
87 MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
88
89 node_end_pfn[node] =
90 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
91
92 memblock_x86_register_active_regions(node, node_start_pfn[node],
93 node_end_pfn[node]);
94
95 memory_present(node, node_start_pfn[node], node_end_pfn[node]);
96
97 node_remap_size[node] = node_memmap_size_bytes(node,
98 node_start_pfn[node],
99 node_end_pfn[node]);
100} 87}
101 88
102/* 89/*
103 * Function: smp_dump_qct() 90 * Function: smp_dump_qct()
104 * 91 *
105 * Description: gets memory layout from the quad config table. This 92 * Description: gets memory layout from the quad config table. This
106 * function also updates node_online_map with the nodes (quads) present. 93 * function also updates numa_nodes_parsed with the nodes (quads) present.
107 */ 94 */
108static void __init smp_dump_qct(void) 95static void __init smp_dump_qct(void)
109{ 96{
@@ -112,7 +99,6 @@ static void __init smp_dump_qct(void)
112 99
113 scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); 100 scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
114 101
115 nodes_clear(node_online_map);
116 for_each_node(node) { 102 for_each_node(node) {
117 if (scd->quads_present31_0 & (1 << node)) 103 if (scd->quads_present31_0 & (1 << node))
118 numaq_register_node(node, scd); 104 numaq_register_node(node, scd);
@@ -282,14 +268,14 @@ static __init void early_check_numaq(void)
282 } 268 }
283} 269}
284 270
285int __init get_memcfg_numaq(void) 271int __init numaq_numa_init(void)
286{ 272{
287 early_check_numaq(); 273 early_check_numaq();
288 if (!found_numaq) 274 if (!found_numaq)
289 return 0; 275 return -ENOENT;
290 smp_dump_qct(); 276 smp_dump_qct();
291 277
292 return 1; 278 return 0;
293} 279}
294 280
295#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) 281#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fc84c7b61108..6541e471fd91 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -172,7 +172,6 @@ struct apic apic_default = {
172 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 172 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
173 173
174 .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, 174 .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
175 .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
176}; 175};
177 176
178extern struct apic apic_numaq; 177extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index e4b8059b414a..35bcd7d995a1 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -551,5 +551,4 @@ struct apic apic_summit = {
551 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 551 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
552 552
553 .x86_32_early_logical_apicid = summit_early_logical_apicid, 553 .x86_32_early_logical_apicid = summit_early_logical_apicid,
554 .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
555}; 554};
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 33b10a0fc095..7acd2d2ac965 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -37,6 +37,13 @@
37#include <asm/smp.h> 37#include <asm/smp.h>
38#include <asm/x86_init.h> 38#include <asm/x86_init.h>
39#include <asm/emergency-restart.h> 39#include <asm/emergency-restart.h>
40#include <asm/nmi.h>
41
42/* BMC sets a bit this MMR non-zero before sending an NMI */
43#define UVH_NMI_MMR UVH_SCRATCH5
44#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
45#define UV_NMI_PENDING_MASK (1UL << 63)
46DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
40 47
41DEFINE_PER_CPU(int, x2apic_extra_bits); 48DEFINE_PER_CPU(int, x2apic_extra_bits);
42 49
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
642 */ 649 */
643int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 650int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
644{ 651{
652 unsigned long real_uv_nmi;
653 int bid;
654
645 if (reason != DIE_NMIUNKNOWN) 655 if (reason != DIE_NMIUNKNOWN)
646 return NOTIFY_OK; 656 return NOTIFY_OK;
647 657
648 if (in_crash_kexec) 658 if (in_crash_kexec)
649 /* do nothing if entering the crash kernel */ 659 /* do nothing if entering the crash kernel */
650 return NOTIFY_OK; 660 return NOTIFY_OK;
661
651 /* 662 /*
652 * Use a lock so only one cpu prints at a time 663 * Each blade has an MMR that indicates when an NMI has been sent
653 * to prevent intermixed output. 664 * to cpus on the blade. If an NMI is detected, atomically
665 * clear the MMR and update a per-blade NMI count used to
666 * cause each cpu on the blade to notice a new NMI.
667 */
668 bid = uv_numa_blade_id();
669 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
670
671 if (unlikely(real_uv_nmi)) {
672 spin_lock(&uv_blade_info[bid].nmi_lock);
673 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
674 if (real_uv_nmi) {
675 uv_blade_info[bid].nmi_count++;
676 uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
677 }
678 spin_unlock(&uv_blade_info[bid].nmi_lock);
679 }
680
681 if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
682 return NOTIFY_DONE;
683
684 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
685
686 /*
687 * Use a lock so only one cpu prints at a time.
688 * This prevents intermixed output.
654 */ 689 */
655 spin_lock(&uv_nmi_lock); 690 spin_lock(&uv_nmi_lock);
656 pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); 691 pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
657 dump_stack(); 692 dump_stack();
658 spin_unlock(&uv_nmi_lock); 693 spin_unlock(&uv_nmi_lock);
659 694
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
661} 696}
662 697
663static struct notifier_block uv_dump_stack_nmi_nb = { 698static struct notifier_block uv_dump_stack_nmi_nb = {
664 .notifier_call = uv_handle_nmi 699 .notifier_call = uv_handle_nmi,
700 .priority = NMI_LOCAL_LOW_PRIOR - 1,
665}; 701};
666 702
667void uv_register_nmi_notifier(void) 703void uv_register_nmi_notifier(void)
@@ -720,8 +756,9 @@ void __init uv_system_init(void)
720 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); 756 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
721 757
722 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 758 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
723 uv_blade_info = kmalloc(bytes, GFP_KERNEL); 759 uv_blade_info = kzalloc(bytes, GFP_KERNEL);
724 BUG_ON(!uv_blade_info); 760 BUG_ON(!uv_blade_info);
761
725 for (blade = 0; blade < uv_num_possible_blades(); blade++) 762 for (blade = 0; blade < uv_num_possible_blades(); blade++)
726 uv_blade_info[blade].memory_nid = -1; 763 uv_blade_info[blade].memory_nid = -1;
727 764
@@ -747,6 +784,7 @@ void __init uv_system_init(void)
747 uv_blade_info[blade].pnode = pnode; 784 uv_blade_info[blade].pnode = pnode;
748 uv_blade_info[blade].nr_possible_cpus = 0; 785 uv_blade_info[blade].nr_possible_cpus = 0;
749 uv_blade_info[blade].nr_online_cpus = 0; 786 uv_blade_info[blade].nr_online_cpus = 0;
787 spin_lock_init(&uv_blade_info[blade].nmi_lock);
750 max_pnode = max(pnode, max_pnode); 788 max_pnode = max(pnode, max_pnode);
751 blade++; 789 blade++;
752 } 790 }
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index adee12e0da1f..3bfa02235965 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1238,7 +1238,6 @@ static int suspend(int vetoable)
1238 dpm_suspend_noirq(PMSG_SUSPEND); 1238 dpm_suspend_noirq(PMSG_SUSPEND);
1239 1239
1240 local_irq_disable(); 1240 local_irq_disable();
1241 sysdev_suspend(PMSG_SUSPEND);
1242 syscore_suspend(); 1241 syscore_suspend();
1243 1242
1244 local_irq_enable(); 1243 local_irq_enable();
@@ -1258,7 +1257,6 @@ static int suspend(int vetoable)
1258 err = (err == APM_SUCCESS) ? 0 : -EIO; 1257 err = (err == APM_SUCCESS) ? 0 : -EIO;
1259 1258
1260 syscore_resume(); 1259 syscore_resume();
1261 sysdev_resume();
1262 local_irq_enable(); 1260 local_irq_enable();
1263 1261
1264 dpm_resume_noirq(PMSG_RESUME); 1262 dpm_resume_noirq(PMSG_RESUME);
@@ -1282,7 +1280,6 @@ static void standby(void)
1282 dpm_suspend_noirq(PMSG_SUSPEND); 1280 dpm_suspend_noirq(PMSG_SUSPEND);
1283 1281
1284 local_irq_disable(); 1282 local_irq_disable();
1285 sysdev_suspend(PMSG_SUSPEND);
1286 syscore_suspend(); 1283 syscore_suspend();
1287 local_irq_enable(); 1284 local_irq_enable();
1288 1285
@@ -1292,7 +1289,6 @@ static void standby(void)
1292 1289
1293 local_irq_disable(); 1290 local_irq_disable();
1294 syscore_resume(); 1291 syscore_resume();
1295 sysdev_resume();
1296 local_irq_enable(); 1292 local_irq_enable();
1297 1293
1298 dpm_resume_noirq(PMSG_RESUME); 1294 dpm_resume_noirq(PMSG_RESUME);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 3f0ebe429a01..6042981d0309 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
30 30
31obj-$(CONFIG_X86_MCE) += mcheck/ 31obj-$(CONFIG_X86_MCE) += mcheck/
32obj-$(CONFIG_MTRR) += mtrr/ 32obj-$(CONFIG_MTRR) += mtrr/
33obj-$(CONFIG_CPU_FREQ) += cpufreq/
34 33
35obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 34obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
36 35
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb9eb29a52dd..6f9d1f6063e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
613#endif 613#endif
614 614
615 /* As a rule processors have APIC timer running in deep C states */ 615 /* As a rule processors have APIC timer running in deep C states */
616 if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) 616 if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
617 set_cpu_cap(c, X86_FEATURE_ARAT); 617 set_cpu_cap(c, X86_FEATURE_ARAT);
618 618
619 /* 619 /*
@@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
698 */ 698 */
699 699
700const int amd_erratum_400[] = 700const int amd_erratum_400[] =
701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), 701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
703EXPORT_SYMBOL_GPL(amd_erratum_400); 703EXPORT_SYMBOL_GPL(amd_erratum_400);
704 704
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2ced0074a45..cbc70a27430c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -254,6 +254,25 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
254} 254}
255#endif 255#endif
256 256
257static int disable_smep __initdata;
258static __init int setup_disable_smep(char *arg)
259{
260 disable_smep = 1;
261 return 1;
262}
263__setup("nosmep", setup_disable_smep);
264
265static __init void setup_smep(struct cpuinfo_x86 *c)
266{
267 if (cpu_has(c, X86_FEATURE_SMEP)) {
268 if (unlikely(disable_smep)) {
269 setup_clear_cpu_cap(X86_FEATURE_SMEP);
270 clear_in_cr4(X86_CR4_SMEP);
271 } else
272 set_in_cr4(X86_CR4_SMEP);
273 }
274}
275
257/* 276/*
258 * Some CPU features depend on higher CPUID levels, which may not always 277 * Some CPU features depend on higher CPUID levels, which may not always
259 * be available due to CPUID level capping or broken virtualization 278 * be available due to CPUID level capping or broken virtualization
@@ -565,8 +584,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
565 584
566 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); 585 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
567 586
568 if (eax > 0) 587 c->x86_capability[9] = ebx;
569 c->x86_capability[9] = ebx;
570 } 588 }
571 589
572 /* AMD-defined flags: level 0x80000001 */ 590 /* AMD-defined flags: level 0x80000001 */
@@ -668,6 +686,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
668 c->cpu_index = 0; 686 c->cpu_index = 0;
669#endif 687#endif
670 filter_cpuid_features(c, false); 688 filter_cpuid_features(c, false);
689
690 setup_smep(c);
671} 691}
672 692
673void __init early_cpu_init(void) 693void __init early_cpu_init(void)
@@ -753,6 +773,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
753#endif 773#endif
754 } 774 }
755 775
776 setup_smep(c);
777
756 get_model_name(c); /* Default name */ 778 get_model_name(c); /* Default name */
757 779
758 detect_nopl(c); 780 detect_nopl(c);
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
deleted file mode 100644
index 870e6cc6ad28..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ /dev/null
@@ -1,266 +0,0 @@
1#
2# CPU Frequency scaling
3#
4
5menu "CPU Frequency scaling"
6
7source "drivers/cpufreq/Kconfig"
8
9if CPU_FREQ
10
11comment "CPUFreq processor drivers"
12
13config X86_PCC_CPUFREQ
14 tristate "Processor Clocking Control interface driver"
15 depends on ACPI && ACPI_PROCESSOR
16 help
17 This driver adds support for the PCC interface.
18
19 For details, take a look at:
20 <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
21
22 To compile this driver as a module, choose M here: the
23 module will be called pcc-cpufreq.
24
25 If in doubt, say N.
26
27config X86_ACPI_CPUFREQ
28 tristate "ACPI Processor P-States driver"
29 select CPU_FREQ_TABLE
30 depends on ACPI_PROCESSOR
31 help
32 This driver adds a CPUFreq driver which utilizes the ACPI
33 Processor Performance States.
34 This driver also supports Intel Enhanced Speedstep.
35
36 To compile this driver as a module, choose M here: the
37 module will be called acpi-cpufreq.
38
39 For details, take a look at <file:Documentation/cpu-freq/>.
40
41 If in doubt, say N.
42
43config ELAN_CPUFREQ
44 tristate "AMD Elan SC400 and SC410"
45 select CPU_FREQ_TABLE
46 depends on X86_ELAN
47 ---help---
48 This adds the CPUFreq driver for AMD Elan SC400 and SC410
49 processors.
50
51 You need to specify the processor maximum speed as boot
52 parameter: elanfreq=maxspeed (in kHz) or as module
53 parameter "max_freq".
54
55 For details, take a look at <file:Documentation/cpu-freq/>.
56
57 If in doubt, say N.
58
59config SC520_CPUFREQ
60 tristate "AMD Elan SC520"
61 select CPU_FREQ_TABLE
62 depends on X86_ELAN
63 ---help---
64 This adds the CPUFreq driver for AMD Elan SC520 processor.
65
66 For details, take a look at <file:Documentation/cpu-freq/>.
67
68 If in doubt, say N.
69
70
71config X86_POWERNOW_K6
72 tristate "AMD Mobile K6-2/K6-3 PowerNow!"
73 select CPU_FREQ_TABLE
74 depends on X86_32
75 help
76 This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
77 AMD K6-3+ processors.
78
79 For details, take a look at <file:Documentation/cpu-freq/>.
80
81 If in doubt, say N.
82
83config X86_POWERNOW_K7
84 tristate "AMD Mobile Athlon/Duron PowerNow!"
85 select CPU_FREQ_TABLE
86 depends on X86_32
87 help
88 This adds the CPUFreq driver for mobile AMD K7 mobile processors.
89
90 For details, take a look at <file:Documentation/cpu-freq/>.
91
92 If in doubt, say N.
93
94config X86_POWERNOW_K7_ACPI
95 bool
96 depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
97 depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
98 depends on X86_32
99 default y
100
101config X86_POWERNOW_K8
102 tristate "AMD Opteron/Athlon64 PowerNow!"
103 select CPU_FREQ_TABLE
104 depends on ACPI && ACPI_PROCESSOR
105 help
106 This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
107
108 To compile this driver as a module, choose M here: the
109 module will be called powernow-k8.
110
111 For details, take a look at <file:Documentation/cpu-freq/>.
112
113config X86_GX_SUSPMOD
114 tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
115 depends on X86_32 && PCI
116 help
117 This add the CPUFreq driver for NatSemi Geode processors which
118 support suspend modulation.
119
120 For details, take a look at <file:Documentation/cpu-freq/>.
121
122 If in doubt, say N.
123
124config X86_SPEEDSTEP_CENTRINO
125 tristate "Intel Enhanced SpeedStep (deprecated)"
126 select CPU_FREQ_TABLE
127 select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32
128 depends on X86_32 || (X86_64 && ACPI_PROCESSOR)
129 help
130 This is deprecated and this functionality is now merged into
131 acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
132 speedstep_centrino.
133 This adds the CPUFreq driver for Enhanced SpeedStep enabled
134 mobile CPUs. This means Intel Pentium M (Centrino) CPUs
135 or 64bit enabled Intel Xeons.
136
137 To compile this driver as a module, choose M here: the
138 module will be called speedstep-centrino.
139
140 For details, take a look at <file:Documentation/cpu-freq/>.
141
142 If in doubt, say N.
143
144config X86_SPEEDSTEP_CENTRINO_TABLE
145 bool "Built-in tables for Banias CPUs"
146 depends on X86_32 && X86_SPEEDSTEP_CENTRINO
147 default y
148 help
149 Use built-in tables for Banias CPUs if ACPI encoding
150 is not available.
151
152 If in doubt, say N.
153
154config X86_SPEEDSTEP_ICH
155 tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
156 select CPU_FREQ_TABLE
157 depends on X86_32
158 help
159 This adds the CPUFreq driver for certain mobile Intel Pentium III
160 (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
161 mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2,
162 ICH3 or ICH4 southbridge.
163
164 For details, take a look at <file:Documentation/cpu-freq/>.
165
166 If in doubt, say N.
167
168config X86_SPEEDSTEP_SMI
169 tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
170 select CPU_FREQ_TABLE
171 depends on X86_32 && EXPERIMENTAL
172 help
173 This adds the CPUFreq driver for certain mobile Intel Pentium III
174 (Coppermine), all mobile Intel Pentium III-M (Tualatin)
175 on systems which have an Intel 440BX/ZX/MX southbridge.
176
177 For details, take a look at <file:Documentation/cpu-freq/>.
178
179 If in doubt, say N.
180
181config X86_P4_CLOCKMOD
182 tristate "Intel Pentium 4 clock modulation"
183 select CPU_FREQ_TABLE
184 help
185 This adds the CPUFreq driver for Intel Pentium 4 / XEON
186 processors. When enabled it will lower CPU temperature by skipping
187 clocks.
188
189 This driver should be only used in exceptional
190 circumstances when very low power is needed because it causes severe
191 slowdowns and noticeable latencies. Normally Speedstep should be used
192 instead.
193
194 To compile this driver as a module, choose M here: the
195 module will be called p4-clockmod.
196
197 For details, take a look at <file:Documentation/cpu-freq/>.
198
199 Unless you are absolutely sure say N.
200
201config X86_CPUFREQ_NFORCE2
202 tristate "nVidia nForce2 FSB changing"
203 depends on X86_32 && EXPERIMENTAL
204 help
205 This adds the CPUFreq driver for FSB changing on nVidia nForce2
206 platforms.
207
208 For details, take a look at <file:Documentation/cpu-freq/>.
209
210 If in doubt, say N.
211
212config X86_LONGRUN
213 tristate "Transmeta LongRun"
214 depends on X86_32
215 help
216 This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
217 which support LongRun.
218
219 For details, take a look at <file:Documentation/cpu-freq/>.
220
221 If in doubt, say N.
222
223config X86_LONGHAUL
224 tristate "VIA Cyrix III Longhaul"
225 select CPU_FREQ_TABLE
226 depends on X86_32 && ACPI_PROCESSOR
227 help
228 This adds the CPUFreq driver for VIA Samuel/CyrixIII,
229 VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
230 processors.
231
232 For details, take a look at <file:Documentation/cpu-freq/>.
233
234 If in doubt, say N.
235
236config X86_E_POWERSAVER
237 tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
238 select CPU_FREQ_TABLE
239 depends on X86_32 && EXPERIMENTAL
240 help
241 This adds the CPUFreq driver for VIA C7 processors. However, this driver
242 does not have any safeguards to prevent operating the CPU out of spec
243 and is thus considered dangerous. Please use the regular ACPI cpufreq
244 driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
245
246 If in doubt, say N.
247
248comment "shared options"
249
250config X86_SPEEDSTEP_LIB
251 tristate
252 default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD)
253
254config X86_SPEEDSTEP_RELAXED_CAP_CHECK
255 bool "Relaxed speedstep capability checks"
256 depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
257 help
258 Don't perform all checks for a speedstep capable system which would
259 normally be done. Some ancient or strange systems, though speedstep
260 capable, don't always indicate that they are speedstep capable. This
261 option lets the probing code bypass some of those checks if the
262 parameter "relaxed_check=1" is passed to the module.
263
264endif # CPU_FREQ
265
266endmenu
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
deleted file mode 100644
index bd54bf67e6fb..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
1# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
2# K8 systems. ACPI is preferred to all other hardware-specific drivers.
3# speedstep-* is preferred over p4-clockmod.
4
5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o
6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o
7obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
8obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
9obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
10obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
11obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o
12obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o
13obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o
14obj-$(CONFIG_X86_LONGRUN) += longrun.o
15obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o
16obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o
17obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
18obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o
19obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
20obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
21obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
deleted file mode 100644
index a2baafb2fe6d..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ /dev/null
@@ -1,776 +0,0 @@
1/*
2 * acpi-cpufreq.c - ACPI Processor P-States Driver
3 *
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6 * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7 * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com>
8 *
9 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or (at
14 * your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with this program; if not, write to the Free Software Foundation, Inc.,
23 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
24 *
25 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
26 */
27
28#include <linux/kernel.h>
29#include <linux/module.h>
30#include <linux/init.h>
31#include <linux/smp.h>
32#include <linux/sched.h>
33#include <linux/cpufreq.h>
34#include <linux/compiler.h>
35#include <linux/dmi.h>
36#include <linux/slab.h>
37
38#include <linux/acpi.h>
39#include <linux/io.h>
40#include <linux/delay.h>
41#include <linux/uaccess.h>
42
43#include <acpi/processor.h>
44
45#include <asm/msr.h>
46#include <asm/processor.h>
47#include <asm/cpufeature.h>
48#include "mperf.h"
49
50#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
51 "acpi-cpufreq", msg)
52
53MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
54MODULE_DESCRIPTION("ACPI Processor P-States Driver");
55MODULE_LICENSE("GPL");
56
57enum {
58 UNDEFINED_CAPABLE = 0,
59 SYSTEM_INTEL_MSR_CAPABLE,
60 SYSTEM_IO_CAPABLE,
61};
62
63#define INTEL_MSR_RANGE (0xffff)
64
65struct acpi_cpufreq_data {
66 struct acpi_processor_performance *acpi_data;
67 struct cpufreq_frequency_table *freq_table;
68 unsigned int resume;
69 unsigned int cpu_feature;
70};
71
72static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
73
74/* acpi_perf_data is a pointer to percpu data. */
75static struct acpi_processor_performance __percpu *acpi_perf_data;
76
77static struct cpufreq_driver acpi_cpufreq_driver;
78
79static unsigned int acpi_pstate_strict;
80
81static int check_est_cpu(unsigned int cpuid)
82{
83 struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
84
85 return cpu_has(cpu, X86_FEATURE_EST);
86}
87
88static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
89{
90 struct acpi_processor_performance *perf;
91 int i;
92
93 perf = data->acpi_data;
94
95 for (i = 0; i < perf->state_count; i++) {
96 if (value == perf->states[i].status)
97 return data->freq_table[i].frequency;
98 }
99 return 0;
100}
101
102static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
103{
104 int i;
105 struct acpi_processor_performance *perf;
106
107 msr &= INTEL_MSR_RANGE;
108 perf = data->acpi_data;
109
110 for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
111 if (msr == perf->states[data->freq_table[i].index].status)
112 return data->freq_table[i].frequency;
113 }
114 return data->freq_table[0].frequency;
115}
116
117static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
118{
119 switch (data->cpu_feature) {
120 case SYSTEM_INTEL_MSR_CAPABLE:
121 return extract_msr(val, data);
122 case SYSTEM_IO_CAPABLE:
123 return extract_io(val, data);
124 default:
125 return 0;
126 }
127}
128
129struct msr_addr {
130 u32 reg;
131};
132
133struct io_addr {
134 u16 port;
135 u8 bit_width;
136};
137
138struct drv_cmd {
139 unsigned int type;
140 const struct cpumask *mask;
141 union {
142 struct msr_addr msr;
143 struct io_addr io;
144 } addr;
145 u32 val;
146};
147
148/* Called via smp_call_function_single(), on the target CPU */
149static void do_drv_read(void *_cmd)
150{
151 struct drv_cmd *cmd = _cmd;
152 u32 h;
153
154 switch (cmd->type) {
155 case SYSTEM_INTEL_MSR_CAPABLE:
156 rdmsr(cmd->addr.msr.reg, cmd->val, h);
157 break;
158 case SYSTEM_IO_CAPABLE:
159 acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
160 &cmd->val,
161 (u32)cmd->addr.io.bit_width);
162 break;
163 default:
164 break;
165 }
166}
167
168/* Called via smp_call_function_many(), on the target CPUs */
169static void do_drv_write(void *_cmd)
170{
171 struct drv_cmd *cmd = _cmd;
172 u32 lo, hi;
173
174 switch (cmd->type) {
175 case SYSTEM_INTEL_MSR_CAPABLE:
176 rdmsr(cmd->addr.msr.reg, lo, hi);
177 lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
178 wrmsr(cmd->addr.msr.reg, lo, hi);
179 break;
180 case SYSTEM_IO_CAPABLE:
181 acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
182 cmd->val,
183 (u32)cmd->addr.io.bit_width);
184 break;
185 default:
186 break;
187 }
188}
189
190static void drv_read(struct drv_cmd *cmd)
191{
192 int err;
193 cmd->val = 0;
194
195 err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
196 WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */
197}
198
199static void drv_write(struct drv_cmd *cmd)
200{
201 int this_cpu;
202
203 this_cpu = get_cpu();
204 if (cpumask_test_cpu(this_cpu, cmd->mask))
205 do_drv_write(cmd);
206 smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
207 put_cpu();
208}
209
210static u32 get_cur_val(const struct cpumask *mask)
211{
212 struct acpi_processor_performance *perf;
213 struct drv_cmd cmd;
214
215 if (unlikely(cpumask_empty(mask)))
216 return 0;
217
218 switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
219 case SYSTEM_INTEL_MSR_CAPABLE:
220 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
221 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
222 break;
223 case SYSTEM_IO_CAPABLE:
224 cmd.type = SYSTEM_IO_CAPABLE;
225 perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
226 cmd.addr.io.port = perf->control_register.address;
227 cmd.addr.io.bit_width = perf->control_register.bit_width;
228 break;
229 default:
230 return 0;
231 }
232
233 cmd.mask = mask;
234 drv_read(&cmd);
235
236 dprintk("get_cur_val = %u\n", cmd.val);
237
238 return cmd.val;
239}
240
241static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
242{
243 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
244 unsigned int freq;
245 unsigned int cached_freq;
246
247 dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
248
249 if (unlikely(data == NULL ||
250 data->acpi_data == NULL || data->freq_table == NULL)) {
251 return 0;
252 }
253
254 cached_freq = data->freq_table[data->acpi_data->state].frequency;
255 freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
256 if (freq != cached_freq) {
257 /*
258 * The dreaded BIOS frequency change behind our back.
259 * Force set the frequency on next target call.
260 */
261 data->resume = 1;
262 }
263
264 dprintk("cur freq = %u\n", freq);
265
266 return freq;
267}
268
269static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
270 struct acpi_cpufreq_data *data)
271{
272 unsigned int cur_freq;
273 unsigned int i;
274
275 for (i = 0; i < 100; i++) {
276 cur_freq = extract_freq(get_cur_val(mask), data);
277 if (cur_freq == freq)
278 return 1;
279 udelay(10);
280 }
281 return 0;
282}
283
284static int acpi_cpufreq_target(struct cpufreq_policy *policy,
285 unsigned int target_freq, unsigned int relation)
286{
287 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
288 struct acpi_processor_performance *perf;
289 struct cpufreq_freqs freqs;
290 struct drv_cmd cmd;
291 unsigned int next_state = 0; /* Index into freq_table */
292 unsigned int next_perf_state = 0; /* Index into perf table */
293 unsigned int i;
294 int result = 0;
295
296 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
297
298 if (unlikely(data == NULL ||
299 data->acpi_data == NULL || data->freq_table == NULL)) {
300 return -ENODEV;
301 }
302
303 perf = data->acpi_data;
304 result = cpufreq_frequency_table_target(policy,
305 data->freq_table,
306 target_freq,
307 relation, &next_state);
308 if (unlikely(result)) {
309 result = -ENODEV;
310 goto out;
311 }
312
313 next_perf_state = data->freq_table[next_state].index;
314 if (perf->state == next_perf_state) {
315 if (unlikely(data->resume)) {
316 dprintk("Called after resume, resetting to P%d\n",
317 next_perf_state);
318 data->resume = 0;
319 } else {
320 dprintk("Already at target state (P%d)\n",
321 next_perf_state);
322 goto out;
323 }
324 }
325
326 switch (data->cpu_feature) {
327 case SYSTEM_INTEL_MSR_CAPABLE:
328 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
329 cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
330 cmd.val = (u32) perf->states[next_perf_state].control;
331 break;
332 case SYSTEM_IO_CAPABLE:
333 cmd.type = SYSTEM_IO_CAPABLE;
334 cmd.addr.io.port = perf->control_register.address;
335 cmd.addr.io.bit_width = perf->control_register.bit_width;
336 cmd.val = (u32) perf->states[next_perf_state].control;
337 break;
338 default:
339 result = -ENODEV;
340 goto out;
341 }
342
343 /* cpufreq holds the hotplug lock, so we are safe from here on */
344 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
345 cmd.mask = policy->cpus;
346 else
347 cmd.mask = cpumask_of(policy->cpu);
348
349 freqs.old = perf->states[perf->state].core_frequency * 1000;
350 freqs.new = data->freq_table[next_state].frequency;
351 for_each_cpu(i, policy->cpus) {
352 freqs.cpu = i;
353 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
354 }
355
356 drv_write(&cmd);
357
358 if (acpi_pstate_strict) {
359 if (!check_freqs(cmd.mask, freqs.new, data)) {
360 dprintk("acpi_cpufreq_target failed (%d)\n",
361 policy->cpu);
362 result = -EAGAIN;
363 goto out;
364 }
365 }
366
367 for_each_cpu(i, policy->cpus) {
368 freqs.cpu = i;
369 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
370 }
371 perf->state = next_perf_state;
372
373out:
374 return result;
375}
376
377static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
378{
379 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
380
381 dprintk("acpi_cpufreq_verify\n");
382
383 return cpufreq_frequency_table_verify(policy, data->freq_table);
384}
385
386static unsigned long
387acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
388{
389 struct acpi_processor_performance *perf = data->acpi_data;
390
391 if (cpu_khz) {
392 /* search the closest match to cpu_khz */
393 unsigned int i;
394 unsigned long freq;
395 unsigned long freqn = perf->states[0].core_frequency * 1000;
396
397 for (i = 0; i < (perf->state_count-1); i++) {
398 freq = freqn;
399 freqn = perf->states[i+1].core_frequency * 1000;
400 if ((2 * cpu_khz) > (freqn + freq)) {
401 perf->state = i;
402 return freq;
403 }
404 }
405 perf->state = perf->state_count-1;
406 return freqn;
407 } else {
408 /* assume CPU is at P0... */
409 perf->state = 0;
410 return perf->states[0].core_frequency * 1000;
411 }
412}
413
414static void free_acpi_perf_data(void)
415{
416 unsigned int i;
417
418 /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
419 for_each_possible_cpu(i)
420 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
421 ->shared_cpu_map);
422 free_percpu(acpi_perf_data);
423}
424
425/*
426 * acpi_cpufreq_early_init - initialize ACPI P-States library
427 *
428 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
429 * in order to determine correct frequency and voltage pairings. We can
430 * do _PDC and _PSD and find out the processor dependency for the
431 * actual init that will happen later...
432 */
433static int __init acpi_cpufreq_early_init(void)
434{
435 unsigned int i;
436 dprintk("acpi_cpufreq_early_init\n");
437
438 acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
439 if (!acpi_perf_data) {
440 dprintk("Memory allocation error for acpi_perf_data.\n");
441 return -ENOMEM;
442 }
443 for_each_possible_cpu(i) {
444 if (!zalloc_cpumask_var_node(
445 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
446 GFP_KERNEL, cpu_to_node(i))) {
447
448 /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
449 free_acpi_perf_data();
450 return -ENOMEM;
451 }
452 }
453
454 /* Do initialization in ACPI core */
455 acpi_processor_preregister_performance(acpi_perf_data);
456 return 0;
457}
458
459#ifdef CONFIG_SMP
460/*
461 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
462 * or do it in BIOS firmware and won't inform about it to OS. If not
463 * detected, this has a side effect of making CPU run at a different speed
464 * than OS intended it to run at. Detect it and handle it cleanly.
465 */
466static int bios_with_sw_any_bug;
467
468static int sw_any_bug_found(const struct dmi_system_id *d)
469{
470 bios_with_sw_any_bug = 1;
471 return 0;
472}
473
474static const struct dmi_system_id sw_any_bug_dmi_table[] = {
475 {
476 .callback = sw_any_bug_found,
477 .ident = "Supermicro Server X6DLP",
478 .matches = {
479 DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
480 DMI_MATCH(DMI_BIOS_VERSION, "080010"),
481 DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
482 },
483 },
484 { }
485};
486
487static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
488{
489 /* Intel Xeon Processor 7100 Series Specification Update
490 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
491 * AL30: A Machine Check Exception (MCE) Occurring during an
492 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
493 * Both Processor Cores to Lock Up. */
494 if (c->x86_vendor == X86_VENDOR_INTEL) {
495 if ((c->x86 == 15) &&
496 (c->x86_model == 6) &&
497 (c->x86_mask == 8)) {
498 printk(KERN_INFO "acpi-cpufreq: Intel(R) "
499 "Xeon(R) 7100 Errata AL30, processors may "
500 "lock up on frequency changes: disabling "
501 "acpi-cpufreq.\n");
502 return -ENODEV;
503 }
504 }
505 return 0;
506}
507#endif
508
509static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
510{
511 unsigned int i;
512 unsigned int valid_states = 0;
513 unsigned int cpu = policy->cpu;
514 struct acpi_cpufreq_data *data;
515 unsigned int result = 0;
516 struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
517 struct acpi_processor_performance *perf;
518#ifdef CONFIG_SMP
519 static int blacklisted;
520#endif
521
522 dprintk("acpi_cpufreq_cpu_init\n");
523
524#ifdef CONFIG_SMP
525 if (blacklisted)
526 return blacklisted;
527 blacklisted = acpi_cpufreq_blacklist(c);
528 if (blacklisted)
529 return blacklisted;
530#endif
531
532 data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
533 if (!data)
534 return -ENOMEM;
535
536 data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
537 per_cpu(acfreq_data, cpu) = data;
538
539 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
540 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
541
542 result = acpi_processor_register_performance(data->acpi_data, cpu);
543 if (result)
544 goto err_free;
545
546 perf = data->acpi_data;
547 policy->shared_type = perf->shared_type;
548
549 /*
550 * Will let policy->cpus know about dependency only when software
551 * coordination is required.
552 */
553 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
554 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
555 cpumask_copy(policy->cpus, perf->shared_cpu_map);
556 }
557 cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
558
559#ifdef CONFIG_SMP
560 dmi_check_system(sw_any_bug_dmi_table);
561 if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
562 policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
563 cpumask_copy(policy->cpus, cpu_core_mask(cpu));
564 }
565#endif
566
567 /* capability check */
568 if (perf->state_count <= 1) {
569 dprintk("No P-States\n");
570 result = -ENODEV;
571 goto err_unreg;
572 }
573
574 if (perf->control_register.space_id != perf->status_register.space_id) {
575 result = -ENODEV;
576 goto err_unreg;
577 }
578
579 switch (perf->control_register.space_id) {
580 case ACPI_ADR_SPACE_SYSTEM_IO:
581 dprintk("SYSTEM IO addr space\n");
582 data->cpu_feature = SYSTEM_IO_CAPABLE;
583 break;
584 case ACPI_ADR_SPACE_FIXED_HARDWARE:
585 dprintk("HARDWARE addr space\n");
586 if (!check_est_cpu(cpu)) {
587 result = -ENODEV;
588 goto err_unreg;
589 }
590 data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
591 break;
592 default:
593 dprintk("Unknown addr space %d\n",
594 (u32) (perf->control_register.space_id));
595 result = -ENODEV;
596 goto err_unreg;
597 }
598
599 data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
600 (perf->state_count+1), GFP_KERNEL);
601 if (!data->freq_table) {
602 result = -ENOMEM;
603 goto err_unreg;
604 }
605
606 /* detect transition latency */
607 policy->cpuinfo.transition_latency = 0;
608 for (i = 0; i < perf->state_count; i++) {
609 if ((perf->states[i].transition_latency * 1000) >
610 policy->cpuinfo.transition_latency)
611 policy->cpuinfo.transition_latency =
612 perf->states[i].transition_latency * 1000;
613 }
614
615 /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
616 if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
617 policy->cpuinfo.transition_latency > 20 * 1000) {
618 policy->cpuinfo.transition_latency = 20 * 1000;
619 printk_once(KERN_INFO
620 "P-state transition latency capped at 20 uS\n");
621 }
622
623 /* table init */
624 for (i = 0; i < perf->state_count; i++) {
625 if (i > 0 && perf->states[i].core_frequency >=
626 data->freq_table[valid_states-1].frequency / 1000)
627 continue;
628
629 data->freq_table[valid_states].index = i;
630 data->freq_table[valid_states].frequency =
631 perf->states[i].core_frequency * 1000;
632 valid_states++;
633 }
634 data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
635 perf->state = 0;
636
637 result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
638 if (result)
639 goto err_freqfree;
640
641 if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
642 printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
643
644 switch (perf->control_register.space_id) {
645 case ACPI_ADR_SPACE_SYSTEM_IO:
646 /* Current speed is unknown and not detectable by IO port */
647 policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
648 break;
649 case ACPI_ADR_SPACE_FIXED_HARDWARE:
650 acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
651 policy->cur = get_cur_freq_on_cpu(cpu);
652 break;
653 default:
654 break;
655 }
656
657 /* notify BIOS that we exist */
658 acpi_processor_notify_smm(THIS_MODULE);
659
660 /* Check for APERF/MPERF support in hardware */
661 if (cpu_has(c, X86_FEATURE_APERFMPERF))
662 acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
663
664 dprintk("CPU%u - ACPI performance management activated.\n", cpu);
665 for (i = 0; i < perf->state_count; i++)
666 dprintk(" %cP%d: %d MHz, %d mW, %d uS\n",
667 (i == perf->state ? '*' : ' '), i,
668 (u32) perf->states[i].core_frequency,
669 (u32) perf->states[i].power,
670 (u32) perf->states[i].transition_latency);
671
672 cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
673
674 /*
675 * the first call to ->target() should result in us actually
676 * writing something to the appropriate registers.
677 */
678 data->resume = 1;
679
680 return result;
681
682err_freqfree:
683 kfree(data->freq_table);
684err_unreg:
685 acpi_processor_unregister_performance(perf, cpu);
686err_free:
687 kfree(data);
688 per_cpu(acfreq_data, cpu) = NULL;
689
690 return result;
691}
692
693static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
694{
695 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
696
697 dprintk("acpi_cpufreq_cpu_exit\n");
698
699 if (data) {
700 cpufreq_frequency_table_put_attr(policy->cpu);
701 per_cpu(acfreq_data, policy->cpu) = NULL;
702 acpi_processor_unregister_performance(data->acpi_data,
703 policy->cpu);
704 kfree(data->freq_table);
705 kfree(data);
706 }
707
708 return 0;
709}
710
711static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
712{
713 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
714
715 dprintk("acpi_cpufreq_resume\n");
716
717 data->resume = 1;
718
719 return 0;
720}
721
722static struct freq_attr *acpi_cpufreq_attr[] = {
723 &cpufreq_freq_attr_scaling_available_freqs,
724 NULL,
725};
726
727static struct cpufreq_driver acpi_cpufreq_driver = {
728 .verify = acpi_cpufreq_verify,
729 .target = acpi_cpufreq_target,
730 .bios_limit = acpi_processor_get_bios_limit,
731 .init = acpi_cpufreq_cpu_init,
732 .exit = acpi_cpufreq_cpu_exit,
733 .resume = acpi_cpufreq_resume,
734 .name = "acpi-cpufreq",
735 .owner = THIS_MODULE,
736 .attr = acpi_cpufreq_attr,
737};
738
739static int __init acpi_cpufreq_init(void)
740{
741 int ret;
742
743 if (acpi_disabled)
744 return 0;
745
746 dprintk("acpi_cpufreq_init\n");
747
748 ret = acpi_cpufreq_early_init();
749 if (ret)
750 return ret;
751
752 ret = cpufreq_register_driver(&acpi_cpufreq_driver);
753 if (ret)
754 free_acpi_perf_data();
755
756 return ret;
757}
758
759static void __exit acpi_cpufreq_exit(void)
760{
761 dprintk("acpi_cpufreq_exit\n");
762
763 cpufreq_unregister_driver(&acpi_cpufreq_driver);
764
765 free_percpu(acpi_perf_data);
766}
767
768module_param(acpi_pstate_strict, uint, 0644);
769MODULE_PARM_DESC(acpi_pstate_strict,
770 "value 0 or non-zero. non-zero -> strict ACPI checks are "
771 "performed during frequency changes.");
772
773late_initcall(acpi_cpufreq_init);
774module_exit(acpi_cpufreq_exit);
775
776MODULE_ALIAS("acpi");
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
deleted file mode 100644
index 141abebc4516..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ /dev/null
@@ -1,446 +0,0 @@
1/*
2 * (C) 2004-2006 Sebastian Witt <se.witt@gmx.net>
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 * Based upon reverse engineered information
6 *
7 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/moduleparam.h>
13#include <linux/init.h>
14#include <linux/cpufreq.h>
15#include <linux/pci.h>
16#include <linux/delay.h>
17
18#define NFORCE2_XTAL 25
19#define NFORCE2_BOOTFSB 0x48
20#define NFORCE2_PLLENABLE 0xa8
21#define NFORCE2_PLLREG 0xa4
22#define NFORCE2_PLLADR 0xa0
23#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div)
24
25#define NFORCE2_MIN_FSB 50
26#define NFORCE2_SAFE_DISTANCE 50
27
28/* Delay in ms between FSB changes */
29/* #define NFORCE2_DELAY 10 */
30
31/*
32 * nforce2_chipset:
33 * FSB is changed using the chipset
34 */
35static struct pci_dev *nforce2_dev;
36
37/* fid:
38 * multiplier * 10
39 */
40static int fid;
41
42/* min_fsb, max_fsb:
43 * minimum and maximum FSB (= FSB at boot time)
44 */
45static int min_fsb;
46static int max_fsb;
47
48MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
49MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
50MODULE_LICENSE("GPL");
51
52module_param(fid, int, 0444);
53module_param(min_fsb, int, 0444);
54
55MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
56MODULE_PARM_DESC(min_fsb,
57 "Minimum FSB to use, if not defined: current FSB - 50");
58
59#define PFX "cpufreq-nforce2: "
60#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
61 "cpufreq-nforce2", msg)
62
63/**
64 * nforce2_calc_fsb - calculate FSB
65 * @pll: PLL value
66 *
67 * Calculates FSB from PLL value
68 */
69static int nforce2_calc_fsb(int pll)
70{
71 unsigned char mul, div;
72
73 mul = (pll >> 8) & 0xff;
74 div = pll & 0xff;
75
76 if (div > 0)
77 return NFORCE2_XTAL * mul / div;
78
79 return 0;
80}
81
82/**
83 * nforce2_calc_pll - calculate PLL value
84 * @fsb: FSB
85 *
86 * Calculate PLL value for given FSB
87 */
88static int nforce2_calc_pll(unsigned int fsb)
89{
90 unsigned char xmul, xdiv;
91 unsigned char mul = 0, div = 0;
92 int tried = 0;
93
94 /* Try to calculate multiplier and divider up to 4 times */
95 while (((mul == 0) || (div == 0)) && (tried <= 3)) {
96 for (xdiv = 2; xdiv <= 0x80; xdiv++)
97 for (xmul = 1; xmul <= 0xfe; xmul++)
98 if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
99 fsb + tried) {
100 mul = xmul;
101 div = xdiv;
102 }
103 tried++;
104 }
105
106 if ((mul == 0) || (div == 0))
107 return -1;
108
109 return NFORCE2_PLL(mul, div);
110}
111
112/**
113 * nforce2_write_pll - write PLL value to chipset
114 * @pll: PLL value
115 *
116 * Writes new FSB PLL value to chipset
117 */
118static void nforce2_write_pll(int pll)
119{
120 int temp;
121
122 /* Set the pll addr. to 0x00 */
123 pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0);
124
125 /* Now write the value in all 64 registers */
126 for (temp = 0; temp <= 0x3f; temp++)
127 pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
128
129 return;
130}
131
132/**
133 * nforce2_fsb_read - Read FSB
134 *
135 * Read FSB from chipset
136 * If bootfsb != 0, return FSB at boot-time
137 */
138static unsigned int nforce2_fsb_read(int bootfsb)
139{
140 struct pci_dev *nforce2_sub5;
141 u32 fsb, temp = 0;
142
143 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
144 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF,
145 PCI_ANY_ID, PCI_ANY_ID, NULL);
146 if (!nforce2_sub5)
147 return 0;
148
149 pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
150 fsb /= 1000000;
151
152 /* Check if PLL register is already set */
153 pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
154
155 if (bootfsb || !temp)
156 return fsb;
157
158 /* Use PLL register FSB value */
159 pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp);
160 fsb = nforce2_calc_fsb(temp);
161
162 return fsb;
163}
164
165/**
166 * nforce2_set_fsb - set new FSB
167 * @fsb: New FSB
168 *
169 * Sets new FSB
170 */
171static int nforce2_set_fsb(unsigned int fsb)
172{
173 u32 temp = 0;
174 unsigned int tfsb;
175 int diff;
176 int pll = 0;
177
178 if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
179 printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
180 return -EINVAL;
181 }
182
183 tfsb = nforce2_fsb_read(0);
184 if (!tfsb) {
185 printk(KERN_ERR PFX "Error while reading the FSB\n");
186 return -EINVAL;
187 }
188
189 /* First write? Then set actual value */
190 pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
191 if (!temp) {
192 pll = nforce2_calc_pll(tfsb);
193
194 if (pll < 0)
195 return -EINVAL;
196
197 nforce2_write_pll(pll);
198 }
199
200 /* Enable write access */
201 temp = 0x01;
202 pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp);
203
204 diff = tfsb - fsb;
205
206 if (!diff)
207 return 0;
208
209 while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) {
210 if (diff < 0)
211 tfsb++;
212 else
213 tfsb--;
214
215 /* Calculate the PLL reg. value */
216 pll = nforce2_calc_pll(tfsb);
217 if (pll == -1)
218 return -EINVAL;
219
220 nforce2_write_pll(pll);
221#ifdef NFORCE2_DELAY
222 mdelay(NFORCE2_DELAY);
223#endif
224 }
225
226 temp = 0x40;
227 pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp);
228
229 return 0;
230}
231
232/**
233 * nforce2_get - get the CPU frequency
234 * @cpu: CPU number
235 *
236 * Returns the CPU frequency
237 */
238static unsigned int nforce2_get(unsigned int cpu)
239{
240 if (cpu)
241 return 0;
242 return nforce2_fsb_read(0) * fid * 100;
243}
244
245/**
246 * nforce2_target - set a new CPUFreq policy
247 * @policy: new policy
248 * @target_freq: the target frequency
249 * @relation: how that frequency relates to achieved frequency
250 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
251 *
252 * Sets a new CPUFreq policy.
253 */
254static int nforce2_target(struct cpufreq_policy *policy,
255 unsigned int target_freq, unsigned int relation)
256{
257/* unsigned long flags; */
258 struct cpufreq_freqs freqs;
259 unsigned int target_fsb;
260
261 if ((target_freq > policy->max) || (target_freq < policy->min))
262 return -EINVAL;
263
264 target_fsb = target_freq / (fid * 100);
265
266 freqs.old = nforce2_get(policy->cpu);
267 freqs.new = target_fsb * fid * 100;
268 freqs.cpu = 0; /* Only one CPU on nForce2 platforms */
269
270 if (freqs.old == freqs.new)
271 return 0;
272
273 dprintk("Old CPU frequency %d kHz, new %d kHz\n",
274 freqs.old, freqs.new);
275
276 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
277
278 /* Disable IRQs */
279 /* local_irq_save(flags); */
280
281 if (nforce2_set_fsb(target_fsb) < 0)
282 printk(KERN_ERR PFX "Changing FSB to %d failed\n",
283 target_fsb);
284 else
285 dprintk("Changed FSB successfully to %d\n",
286 target_fsb);
287
288 /* Enable IRQs */
289 /* local_irq_restore(flags); */
290
291 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
292
293 return 0;
294}
295
296/**
297 * nforce2_verify - verifies a new CPUFreq policy
298 * @policy: new policy
299 */
300static int nforce2_verify(struct cpufreq_policy *policy)
301{
302 unsigned int fsb_pol_max;
303
304 fsb_pol_max = policy->max / (fid * 100);
305
306 if (policy->min < (fsb_pol_max * fid * 100))
307 policy->max = (fsb_pol_max + 1) * fid * 100;
308
309 cpufreq_verify_within_limits(policy,
310 policy->cpuinfo.min_freq,
311 policy->cpuinfo.max_freq);
312 return 0;
313}
314
315static int nforce2_cpu_init(struct cpufreq_policy *policy)
316{
317 unsigned int fsb;
318 unsigned int rfid;
319
320 /* capability check */
321 if (policy->cpu != 0)
322 return -ENODEV;
323
324 /* Get current FSB */
325 fsb = nforce2_fsb_read(0);
326
327 if (!fsb)
328 return -EIO;
329
330 /* FIX: Get FID from CPU */
331 if (!fid) {
332 if (!cpu_khz) {
333 printk(KERN_WARNING PFX
334 "cpu_khz not set, can't calculate multiplier!\n");
335 return -ENODEV;
336 }
337
338 fid = cpu_khz / (fsb * 100);
339 rfid = fid % 5;
340
341 if (rfid) {
342 if (rfid > 2)
343 fid += 5 - rfid;
344 else
345 fid -= rfid;
346 }
347 }
348
349 printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
350 fid / 10, fid % 10);
351
352 /* Set maximum FSB to FSB at boot time */
353 max_fsb = nforce2_fsb_read(1);
354
355 if (!max_fsb)
356 return -EIO;
357
358 if (!min_fsb)
359 min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE;
360
361 if (min_fsb < NFORCE2_MIN_FSB)
362 min_fsb = NFORCE2_MIN_FSB;
363
364 /* cpuinfo and default policy values */
365 policy->cpuinfo.min_freq = min_fsb * fid * 100;
366 policy->cpuinfo.max_freq = max_fsb * fid * 100;
367 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
368 policy->cur = nforce2_get(policy->cpu);
369 policy->min = policy->cpuinfo.min_freq;
370 policy->max = policy->cpuinfo.max_freq;
371
372 return 0;
373}
374
375static int nforce2_cpu_exit(struct cpufreq_policy *policy)
376{
377 return 0;
378}
379
380static struct cpufreq_driver nforce2_driver = {
381 .name = "nforce2",
382 .verify = nforce2_verify,
383 .target = nforce2_target,
384 .get = nforce2_get,
385 .init = nforce2_cpu_init,
386 .exit = nforce2_cpu_exit,
387 .owner = THIS_MODULE,
388};
389
390/**
391 * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic
392 *
393 * Detects nForce2 A2 and C1 stepping
394 *
395 */
396static int nforce2_detect_chipset(void)
397{
398 nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
399 PCI_DEVICE_ID_NVIDIA_NFORCE2,
400 PCI_ANY_ID, PCI_ANY_ID, NULL);
401
402 if (nforce2_dev == NULL)
403 return -ENODEV;
404
405 printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
406 nforce2_dev->revision);
407 printk(KERN_INFO PFX
408 "FSB changing is maybe unstable and can lead to "
409 "crashes and data loss.\n");
410
411 return 0;
412}
413
414/**
415 * nforce2_init - initializes the nForce2 CPUFreq driver
416 *
417 * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported
418 * devices, -EINVAL on problems during initiatization, and zero on
419 * success.
420 */
421static int __init nforce2_init(void)
422{
423 /* TODO: do we need to detect the processor? */
424
425 /* detect chipset */
426 if (nforce2_detect_chipset()) {
427 printk(KERN_INFO PFX "No nForce2 chipset.\n");
428 return -ENODEV;
429 }
430
431 return cpufreq_register_driver(&nforce2_driver);
432}
433
434/**
435 * nforce2_exit - unregisters cpufreq module
436 *
437 * Unregisters nForce2 FSB change support.
438 */
439static void __exit nforce2_exit(void)
440{
441 cpufreq_unregister_driver(&nforce2_driver);
442}
443
444module_init(nforce2_init);
445module_exit(nforce2_exit);
446
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
deleted file mode 100644
index 35a257dd4bb7..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ /dev/null
@@ -1,367 +0,0 @@
1/*
2 * Based on documentation provided by Dave Jones. Thanks!
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
7 */
8
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/cpufreq.h>
13#include <linux/ioport.h>
14#include <linux/slab.h>
15#include <linux/timex.h>
16#include <linux/io.h>
17#include <linux/delay.h>
18
19#include <asm/msr.h>
20#include <asm/tsc.h>
21
22#define EPS_BRAND_C7M 0
23#define EPS_BRAND_C7 1
24#define EPS_BRAND_EDEN 2
25#define EPS_BRAND_C3 3
26#define EPS_BRAND_C7D 4
27
28struct eps_cpu_data {
29 u32 fsb;
30 struct cpufreq_frequency_table freq_table[];
31};
32
33static struct eps_cpu_data *eps_cpu[NR_CPUS];
34
35
36static unsigned int eps_get(unsigned int cpu)
37{
38 struct eps_cpu_data *centaur;
39 u32 lo, hi;
40
41 if (cpu)
42 return 0;
43 centaur = eps_cpu[cpu];
44 if (centaur == NULL)
45 return 0;
46
47 /* Return current frequency */
48 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
49 return centaur->fsb * ((lo >> 8) & 0xff);
50}
51
52static int eps_set_state(struct eps_cpu_data *centaur,
53 unsigned int cpu,
54 u32 dest_state)
55{
56 struct cpufreq_freqs freqs;
57 u32 lo, hi;
58 int err = 0;
59 int i;
60
61 freqs.old = eps_get(cpu);
62 freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff);
63 freqs.cpu = cpu;
64 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
65
66 /* Wait while CPU is busy */
67 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
68 i = 0;
69 while (lo & ((1 << 16) | (1 << 17))) {
70 udelay(16);
71 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
72 i++;
73 if (unlikely(i > 64)) {
74 err = -ENODEV;
75 goto postchange;
76 }
77 }
78 /* Set new multiplier and voltage */
79 wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0);
80 /* Wait until transition end */
81 i = 0;
82 do {
83 udelay(16);
84 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
85 i++;
86 if (unlikely(i > 64)) {
87 err = -ENODEV;
88 goto postchange;
89 }
90 } while (lo & ((1 << 16) | (1 << 17)));
91
92 /* Return current frequency */
93postchange:
94 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
95 freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
96
97#ifdef DEBUG
98 {
99 u8 current_multiplier, current_voltage;
100
101 /* Print voltage and multiplier */
102 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
103 current_voltage = lo & 0xff;
104 printk(KERN_INFO "eps: Current voltage = %dmV\n",
105 current_voltage * 16 + 700);
106 current_multiplier = (lo >> 8) & 0xff;
107 printk(KERN_INFO "eps: Current multiplier = %d\n",
108 current_multiplier);
109 }
110#endif
111 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
112 return err;
113}
114
115static int eps_target(struct cpufreq_policy *policy,
116 unsigned int target_freq,
117 unsigned int relation)
118{
119 struct eps_cpu_data *centaur;
120 unsigned int newstate = 0;
121 unsigned int cpu = policy->cpu;
122 unsigned int dest_state;
123 int ret;
124
125 if (unlikely(eps_cpu[cpu] == NULL))
126 return -ENODEV;
127 centaur = eps_cpu[cpu];
128
129 if (unlikely(cpufreq_frequency_table_target(policy,
130 &eps_cpu[cpu]->freq_table[0],
131 target_freq,
132 relation,
133 &newstate))) {
134 return -EINVAL;
135 }
136
137 /* Make frequency transition */
138 dest_state = centaur->freq_table[newstate].index & 0xffff;
139 ret = eps_set_state(centaur, cpu, dest_state);
140 if (ret)
141 printk(KERN_ERR "eps: Timeout!\n");
142 return ret;
143}
144
145static int eps_verify(struct cpufreq_policy *policy)
146{
147 return cpufreq_frequency_table_verify(policy,
148 &eps_cpu[policy->cpu]->freq_table[0]);
149}
150
151static int eps_cpu_init(struct cpufreq_policy *policy)
152{
153 unsigned int i;
154 u32 lo, hi;
155 u64 val;
156 u8 current_multiplier, current_voltage;
157 u8 max_multiplier, max_voltage;
158 u8 min_multiplier, min_voltage;
159 u8 brand = 0;
160 u32 fsb;
161 struct eps_cpu_data *centaur;
162 struct cpuinfo_x86 *c = &cpu_data(0);
163 struct cpufreq_frequency_table *f_table;
164 int k, step, voltage;
165 int ret;
166 int states;
167
168 if (policy->cpu != 0)
169 return -ENODEV;
170
171 /* Check brand */
172 printk(KERN_INFO "eps: Detected VIA ");
173
174 switch (c->x86_model) {
175 case 10:
176 rdmsr(0x1153, lo, hi);
177 brand = (((lo >> 2) ^ lo) >> 18) & 3;
178 printk(KERN_CONT "Model A ");
179 break;
180 case 13:
181 rdmsr(0x1154, lo, hi);
182 brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
183 printk(KERN_CONT "Model D ");
184 break;
185 }
186
187 switch (brand) {
188 case EPS_BRAND_C7M:
189 printk(KERN_CONT "C7-M\n");
190 break;
191 case EPS_BRAND_C7:
192 printk(KERN_CONT "C7\n");
193 break;
194 case EPS_BRAND_EDEN:
195 printk(KERN_CONT "Eden\n");
196 break;
197 case EPS_BRAND_C7D:
198 printk(KERN_CONT "C7-D\n");
199 break;
200 case EPS_BRAND_C3:
201 printk(KERN_CONT "C3\n");
202 return -ENODEV;
203 break;
204 }
205 /* Enable Enhanced PowerSaver */
206 rdmsrl(MSR_IA32_MISC_ENABLE, val);
207 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
208 val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
209 wrmsrl(MSR_IA32_MISC_ENABLE, val);
210 /* Can be locked at 0 */
211 rdmsrl(MSR_IA32_MISC_ENABLE, val);
212 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
213 printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
214 return -ENODEV;
215 }
216 }
217
218 /* Print voltage and multiplier */
219 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
220 current_voltage = lo & 0xff;
221 printk(KERN_INFO "eps: Current voltage = %dmV\n",
222 current_voltage * 16 + 700);
223 current_multiplier = (lo >> 8) & 0xff;
224 printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
225
226 /* Print limits */
227 max_voltage = hi & 0xff;
228 printk(KERN_INFO "eps: Highest voltage = %dmV\n",
229 max_voltage * 16 + 700);
230 max_multiplier = (hi >> 8) & 0xff;
231 printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
232 min_voltage = (hi >> 16) & 0xff;
233 printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
234 min_voltage * 16 + 700);
235 min_multiplier = (hi >> 24) & 0xff;
236 printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
237
238 /* Sanity checks */
239 if (current_multiplier == 0 || max_multiplier == 0
240 || min_multiplier == 0)
241 return -EINVAL;
242 if (current_multiplier > max_multiplier
243 || max_multiplier <= min_multiplier)
244 return -EINVAL;
245 if (current_voltage > 0x1f || max_voltage > 0x1f)
246 return -EINVAL;
247 if (max_voltage < min_voltage)
248 return -EINVAL;
249
250 /* Calc FSB speed */
251 fsb = cpu_khz / current_multiplier;
252 /* Calc number of p-states supported */
253 if (brand == EPS_BRAND_C7M)
254 states = max_multiplier - min_multiplier + 1;
255 else
256 states = 2;
257
258 /* Allocate private data and frequency table for current cpu */
259 centaur = kzalloc(sizeof(struct eps_cpu_data)
260 + (states + 1) * sizeof(struct cpufreq_frequency_table),
261 GFP_KERNEL);
262 if (!centaur)
263 return -ENOMEM;
264 eps_cpu[0] = centaur;
265
266 /* Copy basic values */
267 centaur->fsb = fsb;
268
269 /* Fill frequency and MSR value table */
270 f_table = &centaur->freq_table[0];
271 if (brand != EPS_BRAND_C7M) {
272 f_table[0].frequency = fsb * min_multiplier;
273 f_table[0].index = (min_multiplier << 8) | min_voltage;
274 f_table[1].frequency = fsb * max_multiplier;
275 f_table[1].index = (max_multiplier << 8) | max_voltage;
276 f_table[2].frequency = CPUFREQ_TABLE_END;
277 } else {
278 k = 0;
279 step = ((max_voltage - min_voltage) * 256)
280 / (max_multiplier - min_multiplier);
281 for (i = min_multiplier; i <= max_multiplier; i++) {
282 voltage = (k * step) / 256 + min_voltage;
283 f_table[k].frequency = fsb * i;
284 f_table[k].index = (i << 8) | voltage;
285 k++;
286 }
287 f_table[k].frequency = CPUFREQ_TABLE_END;
288 }
289
290 policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */
291 policy->cur = fsb * current_multiplier;
292
293 ret = cpufreq_frequency_table_cpuinfo(policy, &centaur->freq_table[0]);
294 if (ret) {
295 kfree(centaur);
296 return ret;
297 }
298
299 cpufreq_frequency_table_get_attr(&centaur->freq_table[0], policy->cpu);
300 return 0;
301}
302
303static int eps_cpu_exit(struct cpufreq_policy *policy)
304{
305 unsigned int cpu = policy->cpu;
306 struct eps_cpu_data *centaur;
307 u32 lo, hi;
308
309 if (eps_cpu[cpu] == NULL)
310 return -ENODEV;
311 centaur = eps_cpu[cpu];
312
313 /* Get max frequency */
314 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
315 /* Set max frequency */
316 eps_set_state(centaur, cpu, hi & 0xffff);
317 /* Bye */
318 cpufreq_frequency_table_put_attr(policy->cpu);
319 kfree(eps_cpu[cpu]);
320 eps_cpu[cpu] = NULL;
321 return 0;
322}
323
324static struct freq_attr *eps_attr[] = {
325 &cpufreq_freq_attr_scaling_available_freqs,
326 NULL,
327};
328
329static struct cpufreq_driver eps_driver = {
330 .verify = eps_verify,
331 .target = eps_target,
332 .init = eps_cpu_init,
333 .exit = eps_cpu_exit,
334 .get = eps_get,
335 .name = "e_powersaver",
336 .owner = THIS_MODULE,
337 .attr = eps_attr,
338};
339
340static int __init eps_init(void)
341{
342 struct cpuinfo_x86 *c = &cpu_data(0);
343
344 /* This driver will work only on Centaur C7 processors with
345 * Enhanced SpeedStep/PowerSaver registers */
346 if (c->x86_vendor != X86_VENDOR_CENTAUR
347 || c->x86 != 6 || c->x86_model < 10)
348 return -ENODEV;
349 if (!cpu_has(c, X86_FEATURE_EST))
350 return -ENODEV;
351
352 if (cpufreq_register_driver(&eps_driver))
353 return -EINVAL;
354 return 0;
355}
356
357static void __exit eps_exit(void)
358{
359 cpufreq_unregister_driver(&eps_driver);
360}
361
362MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
363MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
364MODULE_LICENSE("GPL");
365
366module_init(eps_init);
367module_exit(eps_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
deleted file mode 100644
index c587db472a75..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ /dev/null
@@ -1,309 +0,0 @@
1/*
2 * elanfreq: cpufreq driver for the AMD ELAN family
3 *
4 * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de>
5 *
6 * Parts of this code are (c) Sven Geggus <sven@geggus.net>
7 *
8 * All Rights Reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel
16 *
17 */
18
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/init.h>
22
23#include <linux/delay.h>
24#include <linux/cpufreq.h>
25
26#include <asm/msr.h>
27#include <linux/timex.h>
28#include <linux/io.h>
29
30#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */
31#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */
32
33/* Module parameter */
34static int max_freq;
35
36struct s_elan_multiplier {
37 int clock; /* frequency in kHz */
38 int val40h; /* PMU Force Mode register */
39 int val80h; /* CPU Clock Speed Register */
40};
41
42/*
43 * It is important that the frequencies
44 * are listed in ascending order here!
45 */
46static struct s_elan_multiplier elan_multiplier[] = {
47 {1000, 0x02, 0x18},
48 {2000, 0x02, 0x10},
49 {4000, 0x02, 0x08},
50 {8000, 0x00, 0x00},
51 {16000, 0x00, 0x02},
52 {33000, 0x00, 0x04},
53 {66000, 0x01, 0x04},
54 {99000, 0x01, 0x05}
55};
56
57static struct cpufreq_frequency_table elanfreq_table[] = {
58 {0, 1000},
59 {1, 2000},
60 {2, 4000},
61 {3, 8000},
62 {4, 16000},
63 {5, 33000},
64 {6, 66000},
65 {7, 99000},
66 {0, CPUFREQ_TABLE_END},
67};
68
69
70/**
71 * elanfreq_get_cpu_frequency: determine current cpu speed
72 *
73 * Finds out at which frequency the CPU of the Elan SOC runs
74 * at the moment. Frequencies from 1 to 33 MHz are generated
75 * the normal way, 66 and 99 MHz are called "Hyperspeed Mode"
76 * and have the rest of the chip running with 33 MHz.
77 */
78
79static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
80{
81 u8 clockspeed_reg; /* Clock Speed Register */
82
83 local_irq_disable();
84 outb_p(0x80, REG_CSCIR);
85 clockspeed_reg = inb_p(REG_CSCDR);
86 local_irq_enable();
87
88 if ((clockspeed_reg & 0xE0) == 0xE0)
89 return 0;
90
91 /* Are we in CPU clock multiplied mode (66/99 MHz)? */
92 if ((clockspeed_reg & 0xE0) == 0xC0) {
93 if ((clockspeed_reg & 0x01) == 0)
94 return 66000;
95 else
96 return 99000;
97 }
98
99 /* 33 MHz is not 32 MHz... */
100 if ((clockspeed_reg & 0xE0) == 0xA0)
101 return 33000;
102
103 return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000;
104}
105
106
107/**
108 * elanfreq_set_cpu_frequency: Change the CPU core frequency
109 * @cpu: cpu number
110 * @freq: frequency in kHz
111 *
112 * This function takes a frequency value and changes the CPU frequency
113 * according to this. Note that the frequency has to be checked by
114 * elanfreq_validatespeed() for correctness!
115 *
116 * There is no return value.
117 */
118
119static void elanfreq_set_cpu_state(unsigned int state)
120{
121 struct cpufreq_freqs freqs;
122
123 freqs.old = elanfreq_get_cpu_frequency(0);
124 freqs.new = elan_multiplier[state].clock;
125 freqs.cpu = 0; /* elanfreq.c is UP only driver */
126
127 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
128
129 printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",
130 elan_multiplier[state].clock);
131
132
133 /*
134 * Access to the Elan's internal registers is indexed via
135 * 0x22: Chip Setup & Control Register Index Register (CSCI)
136 * 0x23: Chip Setup & Control Register Data Register (CSCD)
137 *
138 */
139
140 /*
141 * 0x40 is the Power Management Unit's Force Mode Register.
142 * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency)
143 */
144
145 local_irq_disable();
146 outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */
147 outb_p(0x00, REG_CSCDR);
148 local_irq_enable(); /* wait till internal pipelines and */
149 udelay(1000); /* buffers have cleaned up */
150
151 local_irq_disable();
152
153 /* now, set the CPU clock speed register (0x80) */
154 outb_p(0x80, REG_CSCIR);
155 outb_p(elan_multiplier[state].val80h, REG_CSCDR);
156
157 /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
158 outb_p(0x40, REG_CSCIR);
159 outb_p(elan_multiplier[state].val40h, REG_CSCDR);
160 udelay(10000);
161 local_irq_enable();
162
163 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
164};
165
166
167/**
168 * elanfreq_validatespeed: test if frequency range is valid
169 * @policy: the policy to validate
170 *
171 * This function checks if a given frequency range in kHz is valid
172 * for the hardware supported by the driver.
173 */
174
175static int elanfreq_verify(struct cpufreq_policy *policy)
176{
177 return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
178}
179
180static int elanfreq_target(struct cpufreq_policy *policy,
181 unsigned int target_freq,
182 unsigned int relation)
183{
184 unsigned int newstate = 0;
185
186 if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
187 target_freq, relation, &newstate))
188 return -EINVAL;
189
190 elanfreq_set_cpu_state(newstate);
191
192 return 0;
193}
194
195
196/*
197 * Module init and exit code
198 */
199
200static int elanfreq_cpu_init(struct cpufreq_policy *policy)
201{
202 struct cpuinfo_x86 *c = &cpu_data(0);
203 unsigned int i;
204 int result;
205
206 /* capability check */
207 if ((c->x86_vendor != X86_VENDOR_AMD) ||
208 (c->x86 != 4) || (c->x86_model != 10))
209 return -ENODEV;
210
211 /* max freq */
212 if (!max_freq)
213 max_freq = elanfreq_get_cpu_frequency(0);
214
215 /* table init */
216 for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
217 if (elanfreq_table[i].frequency > max_freq)
218 elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
219 }
220
221 /* cpuinfo and default policy values */
222 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
223 policy->cur = elanfreq_get_cpu_frequency(0);
224
225 result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
226 if (result)
227 return result;
228
229 cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
230 return 0;
231}
232
233
234static int elanfreq_cpu_exit(struct cpufreq_policy *policy)
235{
236 cpufreq_frequency_table_put_attr(policy->cpu);
237 return 0;
238}
239
240
241#ifndef MODULE
242/**
243 * elanfreq_setup - elanfreq command line parameter parsing
244 *
245 * elanfreq command line parameter. Use:
246 * elanfreq=66000
247 * to set the maximum CPU frequency to 66 MHz. Note that in
248 * case you do not give this boot parameter, the maximum
249 * frequency will fall back to _current_ CPU frequency which
250 * might be lower. If you build this as a module, use the
251 * max_freq module parameter instead.
252 */
253static int __init elanfreq_setup(char *str)
254{
255 max_freq = simple_strtoul(str, &str, 0);
256 printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
257 return 1;
258}
259__setup("elanfreq=", elanfreq_setup);
260#endif
261
262
263static struct freq_attr *elanfreq_attr[] = {
264 &cpufreq_freq_attr_scaling_available_freqs,
265 NULL,
266};
267
268
269static struct cpufreq_driver elanfreq_driver = {
270 .get = elanfreq_get_cpu_frequency,
271 .verify = elanfreq_verify,
272 .target = elanfreq_target,
273 .init = elanfreq_cpu_init,
274 .exit = elanfreq_cpu_exit,
275 .name = "elanfreq",
276 .owner = THIS_MODULE,
277 .attr = elanfreq_attr,
278};
279
280
281static int __init elanfreq_init(void)
282{
283 struct cpuinfo_x86 *c = &cpu_data(0);
284
285 /* Test if we have the right hardware */
286 if ((c->x86_vendor != X86_VENDOR_AMD) ||
287 (c->x86 != 4) || (c->x86_model != 10)) {
288 printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
289 return -ENODEV;
290 }
291 return cpufreq_register_driver(&elanfreq_driver);
292}
293
294
295static void __exit elanfreq_exit(void)
296{
297 cpufreq_unregister_driver(&elanfreq_driver);
298}
299
300
301module_param(max_freq, int, 0444);
302
303MODULE_LICENSE("GPL");
304MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, "
305 "Sven Geggus <sven@geggus.net>");
306MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
307
308module_init(elanfreq_init);
309module_exit(elanfreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
deleted file mode 100644
index 32974cf84232..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ /dev/null
@@ -1,517 +0,0 @@
1/*
2 * Cyrix MediaGX and NatSemi Geode Suspend Modulation
3 * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
4 * (C) 2002 Hiroshi Miura <miura@da-cha.org>
5 * All Rights Reserved
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation
10 *
11 * The author(s) of this software shall not be held liable for damages
12 * of any nature resulting due to the use of this software. This
13 * software is provided AS-IS with no warranties.
14 *
15 * Theoretical note:
16 *
17 * (see Geode(tm) CS5530 manual (rev.4.1) page.56)
18 *
19 * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
20 * are based on Suspend Modulation.
21 *
22 * Suspend Modulation works by asserting and de-asserting the SUSP# pin
23 * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
24 * the CPU enters an idle state. GX1 stops its core clock when SUSP# is
25 * asserted then power consumption is reduced.
26 *
27 * Suspend Modulation's OFF/ON duration are configurable
28 * with 'Suspend Modulation OFF Count Register'
29 * and 'Suspend Modulation ON Count Register'.
30 * These registers are 8bit counters that represent the number of
31 * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF)
32 * to the processor.
33 *
34 * These counters define a ratio which is the effective frequency
35 * of operation of the system.
36 *
37 * OFF Count
38 * F_eff = Fgx * ----------------------
39 * OFF Count + ON Count
40 *
41 * 0 <= On Count, Off Count <= 255
42 *
43 * From these limits, we can get register values
44 *
45 * off_duration + on_duration <= MAX_DURATION
46 * on_duration = off_duration * (stock_freq - freq) / freq
47 *
48 * off_duration = (freq * DURATION) / stock_freq
49 * on_duration = DURATION - off_duration
50 *
51 *
52 *---------------------------------------------------------------------------
53 *
54 * ChangeLog:
55 * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org>
56 * - fix on/off register mistake
57 * - fix cpu_khz calc when it stops cpu modulation.
58 *
59 * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org>
60 * - rewrite for Cyrix MediaGX Cx5510/5520 and
61 * NatSemi Geode Cs5530(A).
62 *
63 * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com>
64 * - cs5530_mod patch for 2.4.19-rc1.
65 *
66 *---------------------------------------------------------------------------
67 *
68 * Todo
69 * Test on machines with 5510, 5530, 5530A
70 */
71
72/************************************************************************
73 * Suspend Modulation - Definitions *
74 ************************************************************************/
75
76#include <linux/kernel.h>
77#include <linux/module.h>
78#include <linux/init.h>
79#include <linux/smp.h>
80#include <linux/cpufreq.h>
81#include <linux/pci.h>
82#include <linux/errno.h>
83#include <linux/slab.h>
84
85#include <asm/processor-cyrix.h>
86
87/* PCI config registers, all at F0 */
88#define PCI_PMER1 0x80 /* power management enable register 1 */
89#define PCI_PMER2 0x81 /* power management enable register 2 */
90#define PCI_PMER3 0x82 /* power management enable register 3 */
91#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */
92#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */
93#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */
94#define PCI_MODON 0x95 /* suspend modulation ON counter register */
95#define PCI_SUSCFG 0x96 /* suspend configuration register */
96
97/* PMER1 bits */
98#define GPM (1<<0) /* global power management */
99#define GIT (1<<1) /* globally enable PM device idle timers */
100#define GTR (1<<2) /* globally enable IO traps */
101#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */
102#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */
103
104/* SUSCFG bits */
105#define SUSMOD (1<<0) /* enable/disable suspend modulation */
106/* the below is supported only with cs5530 (after rev.1.2)/cs5530A */
107#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */
108 /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
109#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
110/* the below is supported only with cs5530A */
111#define PWRSVE_ISA (1<<3) /* stop ISA clock */
112#define PWRSVE (1<<4) /* active idle */
113
114struct gxfreq_params {
115 u8 on_duration;
116 u8 off_duration;
117 u8 pci_suscfg;
118 u8 pci_pmer1;
119 u8 pci_pmer2;
120 struct pci_dev *cs55x0;
121};
122
123static struct gxfreq_params *gx_params;
124static int stock_freq;
125
126/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
127static int pci_busclk;
128module_param(pci_busclk, int, 0444);
129
130/* maximum duration for which the cpu may be suspended
131 * (32us * MAX_DURATION). If no parameter is given, this defaults
132 * to 255.
133 * Note that this leads to a maximum of 8 ms(!) where the CPU clock
134 * is suspended -- processing power is just 0.39% of what it used to be,
135 * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
136static int max_duration = 255;
137module_param(max_duration, int, 0444);
138
139/* For the default policy, we want at least some processing power
140 * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
141 */
142#define POLICY_MIN_DIV 20
143
144
145#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
146 "gx-suspmod", msg)
147
148/**
149 * we can detect a core multipiler from dir0_lsb
150 * from GX1 datasheet p.56,
151 * MULT[3:0]:
152 * 0000 = SYSCLK multiplied by 4 (test only)
153 * 0001 = SYSCLK multiplied by 10
154 * 0010 = SYSCLK multiplied by 4
155 * 0011 = SYSCLK multiplied by 6
156 * 0100 = SYSCLK multiplied by 9
157 * 0101 = SYSCLK multiplied by 5
158 * 0110 = SYSCLK multiplied by 7
159 * 0111 = SYSCLK multiplied by 8
160 * of 33.3MHz
161 **/
162static int gx_freq_mult[16] = {
163 4, 10, 4, 6, 9, 5, 7, 8,
164 0, 0, 0, 0, 0, 0, 0, 0
165};
166
167
168/****************************************************************
169 * Low Level chipset interface *
170 ****************************************************************/
171static struct pci_device_id gx_chipset_tbl[] __initdata = {
172 { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), },
173 { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), },
174 { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), },
175 { 0, },
176};
177
178static void gx_write_byte(int reg, int value)
179{
180 pci_write_config_byte(gx_params->cs55x0, reg, value);
181}
182
183/**
184 * gx_detect_chipset:
185 *
186 **/
187static __init struct pci_dev *gx_detect_chipset(void)
188{
189 struct pci_dev *gx_pci = NULL;
190
191 /* check if CPU is a MediaGX or a Geode. */
192 if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
193 (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
194 dprintk("error: no MediaGX/Geode processor found!\n");
195 return NULL;
196 }
197
198 /* detect which companion chip is used */
199 for_each_pci_dev(gx_pci) {
200 if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL)
201 return gx_pci;
202 }
203
204 dprintk("error: no supported chipset found!\n");
205 return NULL;
206}
207
208/**
209 * gx_get_cpuspeed:
210 *
211 * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi
212 * Geode CPU runs.
213 */
214static unsigned int gx_get_cpuspeed(unsigned int cpu)
215{
216 if ((gx_params->pci_suscfg & SUSMOD) == 0)
217 return stock_freq;
218
219 return (stock_freq * gx_params->off_duration)
220 / (gx_params->on_duration + gx_params->off_duration);
221}
222
223/**
224 * gx_validate_speed:
225 * determine current cpu speed
226 *
227 **/
228
229static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration,
230 u8 *off_duration)
231{
232 unsigned int i;
233 u8 tmp_on, tmp_off;
234 int old_tmp_freq = stock_freq;
235 int tmp_freq;
236
237 *off_duration = 1;
238 *on_duration = 0;
239
240 for (i = max_duration; i > 0; i--) {
241 tmp_off = ((khz * i) / stock_freq) & 0xff;
242 tmp_on = i - tmp_off;
243 tmp_freq = (stock_freq * tmp_off) / i;
244 /* if this relation is closer to khz, use this. If it's equal,
245 * prefer it, too - lower latency */
246 if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) {
247 *on_duration = tmp_on;
248 *off_duration = tmp_off;
249 old_tmp_freq = tmp_freq;
250 }
251 }
252
253 return old_tmp_freq;
254}
255
256
257/**
258 * gx_set_cpuspeed:
259 * set cpu speed in khz.
260 **/
261
262static void gx_set_cpuspeed(unsigned int khz)
263{
264 u8 suscfg, pmer1;
265 unsigned int new_khz;
266 unsigned long flags;
267 struct cpufreq_freqs freqs;
268
269 freqs.cpu = 0;
270 freqs.old = gx_get_cpuspeed(0);
271
272 new_khz = gx_validate_speed(khz, &gx_params->on_duration,
273 &gx_params->off_duration);
274
275 freqs.new = new_khz;
276
277 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
278 local_irq_save(flags);
279
280
281
282 if (new_khz != stock_freq) {
283 /* if new khz == 100% of CPU speed, it is special case */
284 switch (gx_params->cs55x0->device) {
285 case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
286 pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
287 /* FIXME: need to test other values -- Zwane,Miura */
288 /* typical 2 to 4ms */
289 gx_write_byte(PCI_IRQTC, 4);
290 /* typical 50 to 100ms */
291 gx_write_byte(PCI_VIDTC, 100);
292 gx_write_byte(PCI_PMER1, pmer1);
293
294 if (gx_params->cs55x0->revision < 0x10) {
295 /* CS5530(rev 1.2, 1.3) */
296 suscfg = gx_params->pci_suscfg|SUSMOD;
297 } else {
298 /* CS5530A,B.. */
299 suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE;
300 }
301 break;
302 case PCI_DEVICE_ID_CYRIX_5520:
303 case PCI_DEVICE_ID_CYRIX_5510:
304 suscfg = gx_params->pci_suscfg | SUSMOD;
305 break;
306 default:
307 local_irq_restore(flags);
308 dprintk("fatal: try to set unknown chipset.\n");
309 return;
310 }
311 } else {
312 suscfg = gx_params->pci_suscfg & ~(SUSMOD);
313 gx_params->off_duration = 0;
314 gx_params->on_duration = 0;
315 dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
316 }
317
318 gx_write_byte(PCI_MODOFF, gx_params->off_duration);
319 gx_write_byte(PCI_MODON, gx_params->on_duration);
320
321 gx_write_byte(PCI_SUSCFG, suscfg);
322 pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
323
324 local_irq_restore(flags);
325
326 gx_params->pci_suscfg = suscfg;
327
328 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
329
330 dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
331 gx_params->on_duration * 32, gx_params->off_duration * 32);
332 dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
333}
334
335/****************************************************************
336 * High level functions *
337 ****************************************************************/
338
339/*
340 * cpufreq_gx_verify: test if frequency range is valid
341 *
342 * This function checks if a given frequency range in kHz is valid
343 * for the hardware supported by the driver.
344 */
345
346static int cpufreq_gx_verify(struct cpufreq_policy *policy)
347{
348 unsigned int tmp_freq = 0;
349 u8 tmp1, tmp2;
350
351 if (!stock_freq || !policy)
352 return -EINVAL;
353
354 policy->cpu = 0;
355 cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
356 stock_freq);
357
358 /* it needs to be assured that at least one supported frequency is
359 * within policy->min and policy->max. If it is not, policy->max
360 * needs to be increased until one freuqency is supported.
361 * policy->min may not be decreased, though. This way we guarantee a
362 * specific processing capacity.
363 */
364 tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2);
365 if (tmp_freq < policy->min)
366 tmp_freq += stock_freq / max_duration;
367 policy->min = tmp_freq;
368 if (policy->min > policy->max)
369 policy->max = tmp_freq;
370 tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2);
371 if (tmp_freq > policy->max)
372 tmp_freq -= stock_freq / max_duration;
373 policy->max = tmp_freq;
374 if (policy->max < policy->min)
375 policy->max = policy->min;
376 cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
377 stock_freq);
378
379 return 0;
380}
381
382/*
383 * cpufreq_gx_target:
384 *
385 */
386static int cpufreq_gx_target(struct cpufreq_policy *policy,
387 unsigned int target_freq,
388 unsigned int relation)
389{
390 u8 tmp1, tmp2;
391 unsigned int tmp_freq;
392
393 if (!stock_freq || !policy)
394 return -EINVAL;
395
396 policy->cpu = 0;
397
398 tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2);
399 while (tmp_freq < policy->min) {
400 tmp_freq += stock_freq / max_duration;
401 tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
402 }
403 while (tmp_freq > policy->max) {
404 tmp_freq -= stock_freq / max_duration;
405 tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
406 }
407
408 gx_set_cpuspeed(tmp_freq);
409
410 return 0;
411}
412
413static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
414{
415 unsigned int maxfreq, curfreq;
416
417 if (!policy || policy->cpu != 0)
418 return -ENODEV;
419
420 /* determine maximum frequency */
421 if (pci_busclk)
422 maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
423 else if (cpu_khz)
424 maxfreq = cpu_khz;
425 else
426 maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
427
428 stock_freq = maxfreq;
429 curfreq = gx_get_cpuspeed(0);
430
431 dprintk("cpu max frequency is %d.\n", maxfreq);
432 dprintk("cpu current frequency is %dkHz.\n", curfreq);
433
434 /* setup basic struct for cpufreq API */
435 policy->cpu = 0;
436
437 if (max_duration < POLICY_MIN_DIV)
438 policy->min = maxfreq / max_duration;
439 else
440 policy->min = maxfreq / POLICY_MIN_DIV;
441 policy->max = maxfreq;
442 policy->cur = curfreq;
443 policy->cpuinfo.min_freq = maxfreq / max_duration;
444 policy->cpuinfo.max_freq = maxfreq;
445 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
446
447 return 0;
448}
449
450/*
451 * cpufreq_gx_init:
452 * MediaGX/Geode GX initialize cpufreq driver
453 */
454static struct cpufreq_driver gx_suspmod_driver = {
455 .get = gx_get_cpuspeed,
456 .verify = cpufreq_gx_verify,
457 .target = cpufreq_gx_target,
458 .init = cpufreq_gx_cpu_init,
459 .name = "gx-suspmod",
460 .owner = THIS_MODULE,
461};
462
463static int __init cpufreq_gx_init(void)
464{
465 int ret;
466 struct gxfreq_params *params;
467 struct pci_dev *gx_pci;
468
469 /* Test if we have the right hardware */
470 gx_pci = gx_detect_chipset();
471 if (gx_pci == NULL)
472 return -ENODEV;
473
474 /* check whether module parameters are sane */
475 if (max_duration > 0xff)
476 max_duration = 0xff;
477
478 dprintk("geode suspend modulation available.\n");
479
480 params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
481 if (params == NULL)
482 return -ENOMEM;
483
484 params->cs55x0 = gx_pci;
485 gx_params = params;
486
487 /* keep cs55x0 configurations */
488 pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg));
489 pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
490 pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
491 pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
492 pci_read_config_byte(params->cs55x0, PCI_MODOFF,
493 &(params->off_duration));
494
495 ret = cpufreq_register_driver(&gx_suspmod_driver);
496 if (ret) {
497 kfree(params);
498 return ret; /* register error! */
499 }
500
501 return 0;
502}
503
504static void __exit cpufreq_gx_exit(void)
505{
506 cpufreq_unregister_driver(&gx_suspmod_driver);
507 pci_dev_put(gx_params->cs55x0);
508 kfree(gx_params);
509}
510
511MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
512MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
513MODULE_LICENSE("GPL");
514
515module_init(cpufreq_gx_init);
516module_exit(cpufreq_gx_exit);
517
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
deleted file mode 100644
index cf48cdd6907d..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ /dev/null
@@ -1,1029 +0,0 @@
1/*
2 * (C) 2001-2004 Dave Jones. <davej@redhat.com>
3 * (C) 2002 Padraig Brady. <padraig@antefacto.com>
4 *
5 * Licensed under the terms of the GNU GPL License version 2.
6 * Based upon datasheets & sample CPUs kindly provided by VIA.
7 *
8 * VIA have currently 3 different versions of Longhaul.
9 * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
10 * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
11 * Version 2 of longhaul is backward compatible with v1, but adds
12 * LONGHAUL MSR for purpose of both frequency and voltage scaling.
13 * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C).
14 * Version 3 of longhaul got renamed to Powersaver and redesigned
15 * to use only the POWERSAVER MSR at 0x110a.
16 * It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
17 * It's pretty much the same feature wise to longhaul v2, though
18 * there is provision for scaling FSB too, but this doesn't work
19 * too well in practice so we don't even try to use this.
20 *
21 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
22 */
23
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/moduleparam.h>
27#include <linux/init.h>
28#include <linux/cpufreq.h>
29#include <linux/pci.h>
30#include <linux/slab.h>
31#include <linux/string.h>
32#include <linux/delay.h>
33#include <linux/timex.h>
34#include <linux/io.h>
35#include <linux/acpi.h>
36
37#include <asm/msr.h>
38#include <acpi/processor.h>
39
40#include "longhaul.h"
41
42#define PFX "longhaul: "
43
44#define TYPE_LONGHAUL_V1 1
45#define TYPE_LONGHAUL_V2 2
46#define TYPE_POWERSAVER 3
47
48#define CPU_SAMUEL 1
49#define CPU_SAMUEL2 2
50#define CPU_EZRA 3
51#define CPU_EZRA_T 4
52#define CPU_NEHEMIAH 5
53#define CPU_NEHEMIAH_C 6
54
55/* Flags */
56#define USE_ACPI_C3 (1 << 1)
57#define USE_NORTHBRIDGE (1 << 2)
58
59static int cpu_model;
60static unsigned int numscales = 16;
61static unsigned int fsb;
62
63static const struct mV_pos *vrm_mV_table;
64static const unsigned char *mV_vrm_table;
65
66static unsigned int highest_speed, lowest_speed; /* kHz */
67static unsigned int minmult, maxmult;
68static int can_scale_voltage;
69static struct acpi_processor *pr;
70static struct acpi_processor_cx *cx;
71static u32 acpi_regs_addr;
72static u8 longhaul_flags;
73static unsigned int longhaul_index;
74
75/* Module parameters */
76static int scale_voltage;
77static int disable_acpi_c3;
78static int revid_errata;
79
80#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
81 "longhaul", msg)
82
83
84/* Clock ratios multiplied by 10 */
85static int mults[32];
86static int eblcr[32];
87static int longhaul_version;
88static struct cpufreq_frequency_table *longhaul_table;
89
90#ifdef CONFIG_CPU_FREQ_DEBUG
91static char speedbuffer[8];
92
93static char *print_speed(int speed)
94{
95 if (speed < 1000) {
96 snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed);
97 return speedbuffer;
98 }
99
100 if (speed%1000 == 0)
101 snprintf(speedbuffer, sizeof(speedbuffer),
102 "%dGHz", speed/1000);
103 else
104 snprintf(speedbuffer, sizeof(speedbuffer),
105 "%d.%dGHz", speed/1000, (speed%1000)/100);
106
107 return speedbuffer;
108}
109#endif
110
111
112static unsigned int calc_speed(int mult)
113{
114 int khz;
115 khz = (mult/10)*fsb;
116 if (mult%10)
117 khz += fsb/2;
118 khz *= 1000;
119 return khz;
120}
121
122
123static int longhaul_get_cpu_mult(void)
124{
125 unsigned long invalue = 0, lo, hi;
126
127 rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi);
128 invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22;
129 if (longhaul_version == TYPE_LONGHAUL_V2 ||
130 longhaul_version == TYPE_POWERSAVER) {
131 if (lo & (1<<27))
132 invalue += 16;
133 }
134 return eblcr[invalue];
135}
136
137/* For processor with BCR2 MSR */
138
139static void do_longhaul1(unsigned int mults_index)
140{
141 union msr_bcr2 bcr2;
142
143 rdmsrl(MSR_VIA_BCR2, bcr2.val);
144 /* Enable software clock multiplier */
145 bcr2.bits.ESOFTBF = 1;
146 bcr2.bits.CLOCKMUL = mults_index & 0xff;
147
148 /* Sync to timer tick */
149 safe_halt();
150 /* Change frequency on next halt or sleep */
151 wrmsrl(MSR_VIA_BCR2, bcr2.val);
152 /* Invoke transition */
153 ACPI_FLUSH_CPU_CACHE();
154 halt();
155
156 /* Disable software clock multiplier */
157 local_irq_disable();
158 rdmsrl(MSR_VIA_BCR2, bcr2.val);
159 bcr2.bits.ESOFTBF = 0;
160 wrmsrl(MSR_VIA_BCR2, bcr2.val);
161}
162
163/* For processor with Longhaul MSR */
164
165static void do_powersaver(int cx_address, unsigned int mults_index,
166 unsigned int dir)
167{
168 union msr_longhaul longhaul;
169 u32 t;
170
171 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
172 /* Setup new frequency */
173 if (!revid_errata)
174 longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
175 else
176 longhaul.bits.RevisionKey = 0;
177 longhaul.bits.SoftBusRatio = mults_index & 0xf;
178 longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4;
179 /* Setup new voltage */
180 if (can_scale_voltage)
181 longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f;
182 /* Sync to timer tick */
183 safe_halt();
184 /* Raise voltage if necessary */
185 if (can_scale_voltage && dir) {
186 longhaul.bits.EnableSoftVID = 1;
187 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
188 /* Change voltage */
189 if (!cx_address) {
190 ACPI_FLUSH_CPU_CACHE();
191 halt();
192 } else {
193 ACPI_FLUSH_CPU_CACHE();
194 /* Invoke C3 */
195 inb(cx_address);
196 /* Dummy op - must do something useless after P_LVL3
197 * read */
198 t = inl(acpi_gbl_FADT.xpm_timer_block.address);
199 }
200 longhaul.bits.EnableSoftVID = 0;
201 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
202 }
203
204 /* Change frequency on next halt or sleep */
205 longhaul.bits.EnableSoftBusRatio = 1;
206 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
207 if (!cx_address) {
208 ACPI_FLUSH_CPU_CACHE();
209 halt();
210 } else {
211 ACPI_FLUSH_CPU_CACHE();
212 /* Invoke C3 */
213 inb(cx_address);
214 /* Dummy op - must do something useless after P_LVL3 read */
215 t = inl(acpi_gbl_FADT.xpm_timer_block.address);
216 }
217 /* Disable bus ratio bit */
218 longhaul.bits.EnableSoftBusRatio = 0;
219 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
220
221 /* Reduce voltage if necessary */
222 if (can_scale_voltage && !dir) {
223 longhaul.bits.EnableSoftVID = 1;
224 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
225 /* Change voltage */
226 if (!cx_address) {
227 ACPI_FLUSH_CPU_CACHE();
228 halt();
229 } else {
230 ACPI_FLUSH_CPU_CACHE();
231 /* Invoke C3 */
232 inb(cx_address);
233 /* Dummy op - must do something useless after P_LVL3
234 * read */
235 t = inl(acpi_gbl_FADT.xpm_timer_block.address);
236 }
237 longhaul.bits.EnableSoftVID = 0;
238 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
239 }
240}
241
242/**
243 * longhaul_set_cpu_frequency()
244 * @mults_index : bitpattern of the new multiplier.
245 *
246 * Sets a new clock ratio.
247 */
248
249static void longhaul_setstate(unsigned int table_index)
250{
251 unsigned int mults_index;
252 int speed, mult;
253 struct cpufreq_freqs freqs;
254 unsigned long flags;
255 unsigned int pic1_mask, pic2_mask;
256 u16 bm_status = 0;
257 u32 bm_timeout = 1000;
258 unsigned int dir = 0;
259
260 mults_index = longhaul_table[table_index].index;
261 /* Safety precautions */
262 mult = mults[mults_index & 0x1f];
263 if (mult == -1)
264 return;
265 speed = calc_speed(mult);
266 if ((speed > highest_speed) || (speed < lowest_speed))
267 return;
268 /* Voltage transition before frequency transition? */
269 if (can_scale_voltage && longhaul_index < table_index)
270 dir = 1;
271
272 freqs.old = calc_speed(longhaul_get_cpu_mult());
273 freqs.new = speed;
274 freqs.cpu = 0; /* longhaul.c is UP only driver */
275
276 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
277
278 dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
279 fsb, mult/10, mult%10, print_speed(speed/1000));
280retry_loop:
281 preempt_disable();
282 local_irq_save(flags);
283
284 pic2_mask = inb(0xA1);
285 pic1_mask = inb(0x21); /* works on C3. save mask. */
286 outb(0xFF, 0xA1); /* Overkill */
287 outb(0xFE, 0x21); /* TMR0 only */
288
289 /* Wait while PCI bus is busy. */
290 if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
291 || ((pr != NULL) && pr->flags.bm_control))) {
292 bm_status = inw(acpi_regs_addr);
293 bm_status &= 1 << 4;
294 while (bm_status && bm_timeout) {
295 outw(1 << 4, acpi_regs_addr);
296 bm_timeout--;
297 bm_status = inw(acpi_regs_addr);
298 bm_status &= 1 << 4;
299 }
300 }
301
302 if (longhaul_flags & USE_NORTHBRIDGE) {
303 /* Disable AGP and PCI arbiters */
304 outb(3, 0x22);
305 } else if ((pr != NULL) && pr->flags.bm_control) {
306 /* Disable bus master arbitration */
307 acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
308 }
309 switch (longhaul_version) {
310
311 /*
312 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
313 * Software controlled multipliers only.
314 */
315 case TYPE_LONGHAUL_V1:
316 do_longhaul1(mults_index);
317 break;
318
319 /*
320 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C]
321 *
322 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
323 * Nehemiah can do FSB scaling too, but this has never been proven
324 * to work in practice.
325 */
326 case TYPE_LONGHAUL_V2:
327 case TYPE_POWERSAVER:
328 if (longhaul_flags & USE_ACPI_C3) {
329 /* Don't allow wakeup */
330 acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
331 do_powersaver(cx->address, mults_index, dir);
332 } else {
333 do_powersaver(0, mults_index, dir);
334 }
335 break;
336 }
337
338 if (longhaul_flags & USE_NORTHBRIDGE) {
339 /* Enable arbiters */
340 outb(0, 0x22);
341 } else if ((pr != NULL) && pr->flags.bm_control) {
342 /* Enable bus master arbitration */
343 acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
344 }
345 outb(pic2_mask, 0xA1); /* restore mask */
346 outb(pic1_mask, 0x21);
347
348 local_irq_restore(flags);
349 preempt_enable();
350
351 freqs.new = calc_speed(longhaul_get_cpu_mult());
352 /* Check if requested frequency is set. */
353 if (unlikely(freqs.new != speed)) {
354 printk(KERN_INFO PFX "Failed to set requested frequency!\n");
355 /* Revision ID = 1 but processor is expecting revision key
356 * equal to 0. Jumpers at the bottom of processor will change
357 * multiplier and FSB, but will not change bits in Longhaul
358 * MSR nor enable voltage scaling. */
359 if (!revid_errata) {
360 printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" "
361 "option.\n");
362 revid_errata = 1;
363 msleep(200);
364 goto retry_loop;
365 }
366 /* Why ACPI C3 sometimes doesn't work is a mystery for me.
367 * But it does happen. Processor is entering ACPI C3 state,
368 * but it doesn't change frequency. I tried poking various
369 * bits in northbridge registers, but without success. */
370 if (longhaul_flags & USE_ACPI_C3) {
371 printk(KERN_INFO PFX "Disabling ACPI C3 support.\n");
372 longhaul_flags &= ~USE_ACPI_C3;
373 if (revid_errata) {
374 printk(KERN_INFO PFX "Disabling \"Ignore "
375 "Revision ID\" option.\n");
376 revid_errata = 0;
377 }
378 msleep(200);
379 goto retry_loop;
380 }
381 /* This shouldn't happen. Longhaul ver. 2 was reported not
382 * working on processors without voltage scaling, but with
383 * RevID = 1. RevID errata will make things right. Just
384 * to be 100% sure. */
385 if (longhaul_version == TYPE_LONGHAUL_V2) {
386 printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n");
387 longhaul_version = TYPE_LONGHAUL_V1;
388 msleep(200);
389 goto retry_loop;
390 }
391 }
392 /* Report true CPU frequency */
393 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
394
395 if (!bm_timeout)
396 printk(KERN_INFO PFX "Warning: Timeout while waiting for "
397 "idle PCI bus.\n");
398}
399
400/*
401 * Centaur decided to make life a little more tricky.
402 * Only longhaul v1 is allowed to read EBLCR BSEL[0:1].
403 * Samuel2 and above have to try and guess what the FSB is.
404 * We do this by assuming we booted at maximum multiplier, and interpolate
405 * between that value multiplied by possible FSBs and cpu_mhz which
406 * was calculated at boot time. Really ugly, but no other way to do this.
407 */
408
409#define ROUNDING 0xf
410
411static int guess_fsb(int mult)
412{
413 int speed = cpu_khz / 1000;
414 int i;
415 int speeds[] = { 666, 1000, 1333, 2000 };
416 int f_max, f_min;
417
418 for (i = 0; i < 4; i++) {
419 f_max = ((speeds[i] * mult) + 50) / 100;
420 f_max += (ROUNDING / 2);
421 f_min = f_max - ROUNDING;
422 if ((speed <= f_max) && (speed >= f_min))
423 return speeds[i] / 10;
424 }
425 return 0;
426}
427
428
429static int __cpuinit longhaul_get_ranges(void)
430{
431 unsigned int i, j, k = 0;
432 unsigned int ratio;
433 int mult;
434
435 /* Get current frequency */
436 mult = longhaul_get_cpu_mult();
437 if (mult == -1) {
438 printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
439 return -EINVAL;
440 }
441 fsb = guess_fsb(mult);
442 if (fsb == 0) {
443 printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
444 return -EINVAL;
445 }
446 /* Get max multiplier - as we always did.
447 * Longhaul MSR is useful only when voltage scaling is enabled.
448 * C3 is booting at max anyway. */
449 maxmult = mult;
450 /* Get min multiplier */
451 switch (cpu_model) {
452 case CPU_NEHEMIAH:
453 minmult = 50;
454 break;
455 case CPU_NEHEMIAH_C:
456 minmult = 40;
457 break;
458 default:
459 minmult = 30;
460 break;
461 }
462
463 dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
464 minmult/10, minmult%10, maxmult/10, maxmult%10);
465
466 highest_speed = calc_speed(maxmult);
467 lowest_speed = calc_speed(minmult);
468 dprintk("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
469 print_speed(lowest_speed/1000),
470 print_speed(highest_speed/1000));
471
472 if (lowest_speed == highest_speed) {
473 printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
474 return -EINVAL;
475 }
476 if (lowest_speed > highest_speed) {
477 printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
478 lowest_speed, highest_speed);
479 return -EINVAL;
480 }
481
482 longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table),
483 GFP_KERNEL);
484 if (!longhaul_table)
485 return -ENOMEM;
486
487 for (j = 0; j < numscales; j++) {
488 ratio = mults[j];
489 if (ratio == -1)
490 continue;
491 if (ratio > maxmult || ratio < minmult)
492 continue;
493 longhaul_table[k].frequency = calc_speed(ratio);
494 longhaul_table[k].index = j;
495 k++;
496 }
497 if (k <= 1) {
498 kfree(longhaul_table);
499 return -ENODEV;
500 }
501 /* Sort */
502 for (j = 0; j < k - 1; j++) {
503 unsigned int min_f, min_i;
504 min_f = longhaul_table[j].frequency;
505 min_i = j;
506 for (i = j + 1; i < k; i++) {
507 if (longhaul_table[i].frequency < min_f) {
508 min_f = longhaul_table[i].frequency;
509 min_i = i;
510 }
511 }
512 if (min_i != j) {
513 swap(longhaul_table[j].frequency,
514 longhaul_table[min_i].frequency);
515 swap(longhaul_table[j].index,
516 longhaul_table[min_i].index);
517 }
518 }
519
520 longhaul_table[k].frequency = CPUFREQ_TABLE_END;
521
522 /* Find index we are running on */
523 for (j = 0; j < k; j++) {
524 if (mults[longhaul_table[j].index & 0x1f] == mult) {
525 longhaul_index = j;
526 break;
527 }
528 }
529 return 0;
530}
531
532
533static void __cpuinit longhaul_setup_voltagescaling(void)
534{
535 union msr_longhaul longhaul;
536 struct mV_pos minvid, maxvid, vid;
537 unsigned int j, speed, pos, kHz_step, numvscales;
538 int min_vid_speed;
539
540 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
541 if (!(longhaul.bits.RevisionID & 1)) {
542 printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
543 return;
544 }
545
546 if (!longhaul.bits.VRMRev) {
547 printk(KERN_INFO PFX "VRM 8.5\n");
548 vrm_mV_table = &vrm85_mV[0];
549 mV_vrm_table = &mV_vrm85[0];
550 } else {
551 printk(KERN_INFO PFX "Mobile VRM\n");
552 if (cpu_model < CPU_NEHEMIAH)
553 return;
554 vrm_mV_table = &mobilevrm_mV[0];
555 mV_vrm_table = &mV_mobilevrm[0];
556 }
557
558 minvid = vrm_mV_table[longhaul.bits.MinimumVID];
559 maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
560
561 if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
562 printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
563 "Voltage scaling disabled.\n",
564 minvid.mV/1000, minvid.mV%1000,
565 maxvid.mV/1000, maxvid.mV%1000);
566 return;
567 }
568
569 if (minvid.mV == maxvid.mV) {
570 printk(KERN_INFO PFX "Claims to support voltage scaling but "
571 "min & max are both %d.%03d. "
572 "Voltage scaling disabled\n",
573 maxvid.mV/1000, maxvid.mV%1000);
574 return;
575 }
576
577 /* How many voltage steps*/
578 numvscales = maxvid.pos - minvid.pos + 1;
579 printk(KERN_INFO PFX
580 "Max VID=%d.%03d "
581 "Min VID=%d.%03d, "
582 "%d possible voltage scales\n",
583 maxvid.mV/1000, maxvid.mV%1000,
584 minvid.mV/1000, minvid.mV%1000,
585 numvscales);
586
587 /* Calculate max frequency at min voltage */
588 j = longhaul.bits.MinMHzBR;
589 if (longhaul.bits.MinMHzBR4)
590 j += 16;
591 min_vid_speed = eblcr[j];
592 if (min_vid_speed == -1)
593 return;
594 switch (longhaul.bits.MinMHzFSB) {
595 case 0:
596 min_vid_speed *= 13333;
597 break;
598 case 1:
599 min_vid_speed *= 10000;
600 break;
601 case 3:
602 min_vid_speed *= 6666;
603 break;
604 default:
605 return;
606 break;
607 }
608 if (min_vid_speed >= highest_speed)
609 return;
610 /* Calculate kHz for one voltage step */
611 kHz_step = (highest_speed - min_vid_speed) / numvscales;
612
613 j = 0;
614 while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
615 speed = longhaul_table[j].frequency;
616 if (speed > min_vid_speed)
617 pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
618 else
619 pos = minvid.pos;
620 longhaul_table[j].index |= mV_vrm_table[pos] << 8;
621 vid = vrm_mV_table[mV_vrm_table[pos]];
622 printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
623 speed, j, vid.mV);
624 j++;
625 }
626
627 can_scale_voltage = 1;
628 printk(KERN_INFO PFX "Voltage scaling enabled.\n");
629}
630
631
632static int longhaul_verify(struct cpufreq_policy *policy)
633{
634 return cpufreq_frequency_table_verify(policy, longhaul_table);
635}
636
637
638static int longhaul_target(struct cpufreq_policy *policy,
639 unsigned int target_freq, unsigned int relation)
640{
641 unsigned int table_index = 0;
642 unsigned int i;
643 unsigned int dir = 0;
644 u8 vid, current_vid;
645
646 if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
647 relation, &table_index))
648 return -EINVAL;
649
650 /* Don't set same frequency again */
651 if (longhaul_index == table_index)
652 return 0;
653
654 if (!can_scale_voltage)
655 longhaul_setstate(table_index);
656 else {
657 /* On test system voltage transitions exceeding single
658 * step up or down were turning motherboard off. Both
659 * "ondemand" and "userspace" are unsafe. C7 is doing
660 * this in hardware, C3 is old and we need to do this
661 * in software. */
662 i = longhaul_index;
663 current_vid = (longhaul_table[longhaul_index].index >> 8);
664 current_vid &= 0x1f;
665 if (table_index > longhaul_index)
666 dir = 1;
667 while (i != table_index) {
668 vid = (longhaul_table[i].index >> 8) & 0x1f;
669 if (vid != current_vid) {
670 longhaul_setstate(i);
671 current_vid = vid;
672 msleep(200);
673 }
674 if (dir)
675 i++;
676 else
677 i--;
678 }
679 longhaul_setstate(table_index);
680 }
681 longhaul_index = table_index;
682 return 0;
683}
684
685
686static unsigned int longhaul_get(unsigned int cpu)
687{
688 if (cpu)
689 return 0;
690 return calc_speed(longhaul_get_cpu_mult());
691}
692
693static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
694 u32 nesting_level,
695 void *context, void **return_value)
696{
697 struct acpi_device *d;
698
699 if (acpi_bus_get_device(obj_handle, &d))
700 return 0;
701
702 *return_value = acpi_driver_data(d);
703 return 1;
704}
705
706/* VIA don't support PM2 reg, but have something similar */
707static int enable_arbiter_disable(void)
708{
709 struct pci_dev *dev;
710 int status = 1;
711 int reg;
712 u8 pci_cmd;
713
714 /* Find PLE133 host bridge */
715 reg = 0x78;
716 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0,
717 NULL);
718 /* Find PM133/VT8605 host bridge */
719 if (dev == NULL)
720 dev = pci_get_device(PCI_VENDOR_ID_VIA,
721 PCI_DEVICE_ID_VIA_8605_0, NULL);
722 /* Find CLE266 host bridge */
723 if (dev == NULL) {
724 reg = 0x76;
725 dev = pci_get_device(PCI_VENDOR_ID_VIA,
726 PCI_DEVICE_ID_VIA_862X_0, NULL);
727 /* Find CN400 V-Link host bridge */
728 if (dev == NULL)
729 dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL);
730 }
731 if (dev != NULL) {
732 /* Enable access to port 0x22 */
733 pci_read_config_byte(dev, reg, &pci_cmd);
734 if (!(pci_cmd & 1<<7)) {
735 pci_cmd |= 1<<7;
736 pci_write_config_byte(dev, reg, pci_cmd);
737 pci_read_config_byte(dev, reg, &pci_cmd);
738 if (!(pci_cmd & 1<<7)) {
739 printk(KERN_ERR PFX
740 "Can't enable access to port 0x22.\n");
741 status = 0;
742 }
743 }
744 pci_dev_put(dev);
745 return status;
746 }
747 return 0;
748}
749
750static int longhaul_setup_southbridge(void)
751{
752 struct pci_dev *dev;
753 u8 pci_cmd;
754
755 /* Find VT8235 southbridge */
756 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
757 if (dev == NULL)
758 /* Find VT8237 southbridge */
759 dev = pci_get_device(PCI_VENDOR_ID_VIA,
760 PCI_DEVICE_ID_VIA_8237, NULL);
761 if (dev != NULL) {
762 /* Set transition time to max */
763 pci_read_config_byte(dev, 0xec, &pci_cmd);
764 pci_cmd &= ~(1 << 2);
765 pci_write_config_byte(dev, 0xec, pci_cmd);
766 pci_read_config_byte(dev, 0xe4, &pci_cmd);
767 pci_cmd &= ~(1 << 7);
768 pci_write_config_byte(dev, 0xe4, pci_cmd);
769 pci_read_config_byte(dev, 0xe5, &pci_cmd);
770 pci_cmd |= 1 << 7;
771 pci_write_config_byte(dev, 0xe5, pci_cmd);
772 /* Get address of ACPI registers block*/
773 pci_read_config_byte(dev, 0x81, &pci_cmd);
774 if (pci_cmd & 1 << 7) {
775 pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
776 acpi_regs_addr &= 0xff00;
777 printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
778 acpi_regs_addr);
779 }
780
781 pci_dev_put(dev);
782 return 1;
783 }
784 return 0;
785}
786
787static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy)
788{
789 struct cpuinfo_x86 *c = &cpu_data(0);
790 char *cpuname = NULL;
791 int ret;
792 u32 lo, hi;
793
794 /* Check what we have on this motherboard */
795 switch (c->x86_model) {
796 case 6:
797 cpu_model = CPU_SAMUEL;
798 cpuname = "C3 'Samuel' [C5A]";
799 longhaul_version = TYPE_LONGHAUL_V1;
800 memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
801 memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr));
802 break;
803
804 case 7:
805 switch (c->x86_mask) {
806 case 0:
807 longhaul_version = TYPE_LONGHAUL_V1;
808 cpu_model = CPU_SAMUEL2;
809 cpuname = "C3 'Samuel 2' [C5B]";
810 /* Note, this is not a typo, early Samuel2's had
811 * Samuel1 ratios. */
812 memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
813 memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
814 break;
815 case 1 ... 15:
816 longhaul_version = TYPE_LONGHAUL_V2;
817 if (c->x86_mask < 8) {
818 cpu_model = CPU_SAMUEL2;
819 cpuname = "C3 'Samuel 2' [C5B]";
820 } else {
821 cpu_model = CPU_EZRA;
822 cpuname = "C3 'Ezra' [C5C]";
823 }
824 memcpy(mults, ezra_mults, sizeof(ezra_mults));
825 memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr));
826 break;
827 }
828 break;
829
830 case 8:
831 cpu_model = CPU_EZRA_T;
832 cpuname = "C3 'Ezra-T' [C5M]";
833 longhaul_version = TYPE_POWERSAVER;
834 numscales = 32;
835 memcpy(mults, ezrat_mults, sizeof(ezrat_mults));
836 memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr));
837 break;
838
839 case 9:
840 longhaul_version = TYPE_POWERSAVER;
841 numscales = 32;
842 memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
843 memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
844 switch (c->x86_mask) {
845 case 0 ... 1:
846 cpu_model = CPU_NEHEMIAH;
847 cpuname = "C3 'Nehemiah A' [C5XLOE]";
848 break;
849 case 2 ... 4:
850 cpu_model = CPU_NEHEMIAH;
851 cpuname = "C3 'Nehemiah B' [C5XLOH]";
852 break;
853 case 5 ... 15:
854 cpu_model = CPU_NEHEMIAH_C;
855 cpuname = "C3 'Nehemiah C' [C5P]";
856 break;
857 }
858 break;
859
860 default:
861 cpuname = "Unknown";
862 break;
863 }
864 /* Check Longhaul ver. 2 */
865 if (longhaul_version == TYPE_LONGHAUL_V2) {
866 rdmsr(MSR_VIA_LONGHAUL, lo, hi);
867 if (lo == 0 && hi == 0)
868 /* Looks like MSR isn't present */
869 longhaul_version = TYPE_LONGHAUL_V1;
870 }
871
872 printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
873 switch (longhaul_version) {
874 case TYPE_LONGHAUL_V1:
875 case TYPE_LONGHAUL_V2:
876 printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
877 break;
878 case TYPE_POWERSAVER:
879 printk(KERN_CONT "Powersaver supported.\n");
880 break;
881 };
882
883 /* Doesn't hurt */
884 longhaul_setup_southbridge();
885
886 /* Find ACPI data for processor */
887 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
888 ACPI_UINT32_MAX, &longhaul_walk_callback, NULL,
889 NULL, (void *)&pr);
890
891 /* Check ACPI support for C3 state */
892 if (pr != NULL && longhaul_version == TYPE_POWERSAVER) {
893 cx = &pr->power.states[ACPI_STATE_C3];
894 if (cx->address > 0 && cx->latency <= 1000)
895 longhaul_flags |= USE_ACPI_C3;
896 }
897 /* Disable if it isn't working */
898 if (disable_acpi_c3)
899 longhaul_flags &= ~USE_ACPI_C3;
900 /* Check if northbridge is friendly */
901 if (enable_arbiter_disable())
902 longhaul_flags |= USE_NORTHBRIDGE;
903
904 /* Check ACPI support for bus master arbiter disable */
905 if (!(longhaul_flags & USE_ACPI_C3
906 || longhaul_flags & USE_NORTHBRIDGE)
907 && ((pr == NULL) || !(pr->flags.bm_control))) {
908 printk(KERN_ERR PFX
909 "No ACPI support. Unsupported northbridge.\n");
910 return -ENODEV;
911 }
912
913 if (longhaul_flags & USE_NORTHBRIDGE)
914 printk(KERN_INFO PFX "Using northbridge support.\n");
915 if (longhaul_flags & USE_ACPI_C3)
916 printk(KERN_INFO PFX "Using ACPI support.\n");
917
918 ret = longhaul_get_ranges();
919 if (ret != 0)
920 return ret;
921
922 if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0))
923 longhaul_setup_voltagescaling();
924
925 policy->cpuinfo.transition_latency = 200000; /* nsec */
926 policy->cur = calc_speed(longhaul_get_cpu_mult());
927
928 ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table);
929 if (ret)
930 return ret;
931
932 cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu);
933
934 return 0;
935}
936
937static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
938{
939 cpufreq_frequency_table_put_attr(policy->cpu);
940 return 0;
941}
942
943static struct freq_attr *longhaul_attr[] = {
944 &cpufreq_freq_attr_scaling_available_freqs,
945 NULL,
946};
947
948static struct cpufreq_driver longhaul_driver = {
949 .verify = longhaul_verify,
950 .target = longhaul_target,
951 .get = longhaul_get,
952 .init = longhaul_cpu_init,
953 .exit = __devexit_p(longhaul_cpu_exit),
954 .name = "longhaul",
955 .owner = THIS_MODULE,
956 .attr = longhaul_attr,
957};
958
959
960static int __init longhaul_init(void)
961{
962 struct cpuinfo_x86 *c = &cpu_data(0);
963
964 if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
965 return -ENODEV;
966
967#ifdef CONFIG_SMP
968 if (num_online_cpus() > 1) {
969 printk(KERN_ERR PFX "More than 1 CPU detected, "
970 "longhaul disabled.\n");
971 return -ENODEV;
972 }
973#endif
974#ifdef CONFIG_X86_IO_APIC
975 if (cpu_has_apic) {
976 printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
977 "broken in this configuration.\n");
978 return -ENODEV;
979 }
980#endif
981 switch (c->x86_model) {
982 case 6 ... 9:
983 return cpufreq_register_driver(&longhaul_driver);
984 case 10:
985 printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
986 default:
987 ;
988 }
989
990 return -ENODEV;
991}
992
993
994static void __exit longhaul_exit(void)
995{
996 int i;
997
998 for (i = 0; i < numscales; i++) {
999 if (mults[i] == maxmult) {
1000 longhaul_setstate(i);
1001 break;
1002 }
1003 }
1004
1005 cpufreq_unregister_driver(&longhaul_driver);
1006 kfree(longhaul_table);
1007}
1008
1009/* Even if BIOS is exporting ACPI C3 state, and it is used
1010 * with success when CPU is idle, this state doesn't
1011 * trigger frequency transition in some cases. */
1012module_param(disable_acpi_c3, int, 0644);
1013MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
1014/* Change CPU voltage with frequency. Very useful to save
1015 * power, but most VIA C3 processors aren't supporting it. */
1016module_param(scale_voltage, int, 0644);
1017MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
1018/* Force revision key to 0 for processors which doesn't
1019 * support voltage scaling, but are introducing itself as
1020 * such. */
1021module_param(revid_errata, int, 0644);
1022MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
1023
1024MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
1025MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
1026MODULE_LICENSE("GPL");
1027
1028late_initcall(longhaul_init);
1029module_exit(longhaul_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h
deleted file mode 100644
index cbf48fbca881..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.h
+++ /dev/null
@@ -1,353 +0,0 @@
1/*
2 * longhaul.h
3 * (C) 2003 Dave Jones.
4 *
5 * Licensed under the terms of the GNU GPL License version 2.
6 *
7 * VIA-specific information
8 */
9
10union msr_bcr2 {
11 struct {
12 unsigned Reseved:19, // 18:0
13 ESOFTBF:1, // 19
14 Reserved2:3, // 22:20
15 CLOCKMUL:4, // 26:23
16 Reserved3:5; // 31:27
17 } bits;
18 unsigned long val;
19};
20
21union msr_longhaul {
22 struct {
23 unsigned RevisionID:4, // 3:0
24 RevisionKey:4, // 7:4
25 EnableSoftBusRatio:1, // 8
26 EnableSoftVID:1, // 9
27 EnableSoftBSEL:1, // 10
28 Reserved:3, // 11:13
29 SoftBusRatio4:1, // 14
30 VRMRev:1, // 15
31 SoftBusRatio:4, // 19:16
32 SoftVID:5, // 24:20
33 Reserved2:3, // 27:25
34 SoftBSEL:2, // 29:28
35 Reserved3:2, // 31:30
36 MaxMHzBR:4, // 35:32
37 MaximumVID:5, // 40:36
38 MaxMHzFSB:2, // 42:41
39 MaxMHzBR4:1, // 43
40 Reserved4:4, // 47:44
41 MinMHzBR:4, // 51:48
42 MinimumVID:5, // 56:52
43 MinMHzFSB:2, // 58:57
44 MinMHzBR4:1, // 59
45 Reserved5:4; // 63:60
46 } bits;
47 unsigned long long val;
48};
49
50/*
51 * Clock ratio tables. Div/Mod by 10 to get ratio.
52 * The eblcr values specify the ratio read from the CPU.
53 * The mults values specify what to write to the CPU.
54 */
55
56/*
57 * VIA C3 Samuel 1 & Samuel 2 (stepping 0)
58 */
59static const int __cpuinitdata samuel1_mults[16] = {
60 -1, /* 0000 -> RESERVED */
61 30, /* 0001 -> 3.0x */
62 40, /* 0010 -> 4.0x */
63 -1, /* 0011 -> RESERVED */
64 -1, /* 0100 -> RESERVED */
65 35, /* 0101 -> 3.5x */
66 45, /* 0110 -> 4.5x */
67 55, /* 0111 -> 5.5x */
68 60, /* 1000 -> 6.0x */
69 70, /* 1001 -> 7.0x */
70 80, /* 1010 -> 8.0x */
71 50, /* 1011 -> 5.0x */
72 65, /* 1100 -> 6.5x */
73 75, /* 1101 -> 7.5x */
74 -1, /* 1110 -> RESERVED */
75 -1, /* 1111 -> RESERVED */
76};
77
78static const int __cpuinitdata samuel1_eblcr[16] = {
79 50, /* 0000 -> RESERVED */
80 30, /* 0001 -> 3.0x */
81 40, /* 0010 -> 4.0x */
82 -1, /* 0011 -> RESERVED */
83 55, /* 0100 -> 5.5x */
84 35, /* 0101 -> 3.5x */
85 45, /* 0110 -> 4.5x */
86 -1, /* 0111 -> RESERVED */
87 -1, /* 1000 -> RESERVED */
88 70, /* 1001 -> 7.0x */
89 80, /* 1010 -> 8.0x */
90 60, /* 1011 -> 6.0x */
91 -1, /* 1100 -> RESERVED */
92 75, /* 1101 -> 7.5x */
93 -1, /* 1110 -> RESERVED */
94 65, /* 1111 -> 6.5x */
95};
96
97/*
98 * VIA C3 Samuel2 Stepping 1->15
99 */
100static const int __cpuinitdata samuel2_eblcr[16] = {
101 50, /* 0000 -> 5.0x */
102 30, /* 0001 -> 3.0x */
103 40, /* 0010 -> 4.0x */
104 100, /* 0011 -> 10.0x */
105 55, /* 0100 -> 5.5x */
106 35, /* 0101 -> 3.5x */
107 45, /* 0110 -> 4.5x */
108 110, /* 0111 -> 11.0x */
109 90, /* 1000 -> 9.0x */
110 70, /* 1001 -> 7.0x */
111 80, /* 1010 -> 8.0x */
112 60, /* 1011 -> 6.0x */
113 120, /* 1100 -> 12.0x */
114 75, /* 1101 -> 7.5x */
115 130, /* 1110 -> 13.0x */
116 65, /* 1111 -> 6.5x */
117};
118
119/*
120 * VIA C3 Ezra
121 */
122static const int __cpuinitdata ezra_mults[16] = {
123 100, /* 0000 -> 10.0x */
124 30, /* 0001 -> 3.0x */
125 40, /* 0010 -> 4.0x */
126 90, /* 0011 -> 9.0x */
127 95, /* 0100 -> 9.5x */
128 35, /* 0101 -> 3.5x */
129 45, /* 0110 -> 4.5x */
130 55, /* 0111 -> 5.5x */
131 60, /* 1000 -> 6.0x */
132 70, /* 1001 -> 7.0x */
133 80, /* 1010 -> 8.0x */
134 50, /* 1011 -> 5.0x */
135 65, /* 1100 -> 6.5x */
136 75, /* 1101 -> 7.5x */
137 85, /* 1110 -> 8.5x */
138 120, /* 1111 -> 12.0x */
139};
140
141static const int __cpuinitdata ezra_eblcr[16] = {
142 50, /* 0000 -> 5.0x */
143 30, /* 0001 -> 3.0x */
144 40, /* 0010 -> 4.0x */
145 100, /* 0011 -> 10.0x */
146 55, /* 0100 -> 5.5x */
147 35, /* 0101 -> 3.5x */
148 45, /* 0110 -> 4.5x */
149 95, /* 0111 -> 9.5x */
150 90, /* 1000 -> 9.0x */
151 70, /* 1001 -> 7.0x */
152 80, /* 1010 -> 8.0x */
153 60, /* 1011 -> 6.0x */
154 120, /* 1100 -> 12.0x */
155 75, /* 1101 -> 7.5x */
156 85, /* 1110 -> 8.5x */
157 65, /* 1111 -> 6.5x */
158};
159
160/*
161 * VIA C3 (Ezra-T) [C5M].
162 */
163static const int __cpuinitdata ezrat_mults[32] = {
164 100, /* 0000 -> 10.0x */
165 30, /* 0001 -> 3.0x */
166 40, /* 0010 -> 4.0x */
167 90, /* 0011 -> 9.0x */
168 95, /* 0100 -> 9.5x */
169 35, /* 0101 -> 3.5x */
170 45, /* 0110 -> 4.5x */
171 55, /* 0111 -> 5.5x */
172 60, /* 1000 -> 6.0x */
173 70, /* 1001 -> 7.0x */
174 80, /* 1010 -> 8.0x */
175 50, /* 1011 -> 5.0x */
176 65, /* 1100 -> 6.5x */
177 75, /* 1101 -> 7.5x */
178 85, /* 1110 -> 8.5x */
179 120, /* 1111 -> 12.0x */
180
181 -1, /* 0000 -> RESERVED (10.0x) */
182 110, /* 0001 -> 11.0x */
183 -1, /* 0010 -> 12.0x */
184 -1, /* 0011 -> RESERVED (9.0x)*/
185 105, /* 0100 -> 10.5x */
186 115, /* 0101 -> 11.5x */
187 125, /* 0110 -> 12.5x */
188 135, /* 0111 -> 13.5x */
189 140, /* 1000 -> 14.0x */
190 150, /* 1001 -> 15.0x */
191 160, /* 1010 -> 16.0x */
192 130, /* 1011 -> 13.0x */
193 145, /* 1100 -> 14.5x */
194 155, /* 1101 -> 15.5x */
195 -1, /* 1110 -> RESERVED (13.0x) */
196 -1, /* 1111 -> RESERVED (12.0x) */
197};
198
199static const int __cpuinitdata ezrat_eblcr[32] = {
200 50, /* 0000 -> 5.0x */
201 30, /* 0001 -> 3.0x */
202 40, /* 0010 -> 4.0x */
203 100, /* 0011 -> 10.0x */
204 55, /* 0100 -> 5.5x */
205 35, /* 0101 -> 3.5x */
206 45, /* 0110 -> 4.5x */
207 95, /* 0111 -> 9.5x */
208 90, /* 1000 -> 9.0x */
209 70, /* 1001 -> 7.0x */
210 80, /* 1010 -> 8.0x */
211 60, /* 1011 -> 6.0x */
212 120, /* 1100 -> 12.0x */
213 75, /* 1101 -> 7.5x */
214 85, /* 1110 -> 8.5x */
215 65, /* 1111 -> 6.5x */
216
217 -1, /* 0000 -> RESERVED (9.0x) */
218 110, /* 0001 -> 11.0x */
219 120, /* 0010 -> 12.0x */
220 -1, /* 0011 -> RESERVED (10.0x)*/
221 135, /* 0100 -> 13.5x */
222 115, /* 0101 -> 11.5x */
223 125, /* 0110 -> 12.5x */
224 105, /* 0111 -> 10.5x */
225 130, /* 1000 -> 13.0x */
226 150, /* 1001 -> 15.0x */
227 160, /* 1010 -> 16.0x */
228 140, /* 1011 -> 14.0x */
229 -1, /* 1100 -> RESERVED (12.0x) */
230 155, /* 1101 -> 15.5x */
231 -1, /* 1110 -> RESERVED (13.0x) */
232 145, /* 1111 -> 14.5x */
233};
234
235/*
236 * VIA C3 Nehemiah */
237
238static const int __cpuinitdata nehemiah_mults[32] = {
239 100, /* 0000 -> 10.0x */
240 -1, /* 0001 -> 16.0x */
241 40, /* 0010 -> 4.0x */
242 90, /* 0011 -> 9.0x */
243 95, /* 0100 -> 9.5x */
244 -1, /* 0101 -> RESERVED */
245 45, /* 0110 -> 4.5x */
246 55, /* 0111 -> 5.5x */
247 60, /* 1000 -> 6.0x */
248 70, /* 1001 -> 7.0x */
249 80, /* 1010 -> 8.0x */
250 50, /* 1011 -> 5.0x */
251 65, /* 1100 -> 6.5x */
252 75, /* 1101 -> 7.5x */
253 85, /* 1110 -> 8.5x */
254 120, /* 1111 -> 12.0x */
255 -1, /* 0000 -> 10.0x */
256 110, /* 0001 -> 11.0x */
257 -1, /* 0010 -> 12.0x */
258 -1, /* 0011 -> 9.0x */
259 105, /* 0100 -> 10.5x */
260 115, /* 0101 -> 11.5x */
261 125, /* 0110 -> 12.5x */
262 135, /* 0111 -> 13.5x */
263 140, /* 1000 -> 14.0x */
264 150, /* 1001 -> 15.0x */
265 160, /* 1010 -> 16.0x */
266 130, /* 1011 -> 13.0x */
267 145, /* 1100 -> 14.5x */
268 155, /* 1101 -> 15.5x */
269 -1, /* 1110 -> RESERVED (13.0x) */
270 -1, /* 1111 -> 12.0x */
271};
272
273static const int __cpuinitdata nehemiah_eblcr[32] = {
274 50, /* 0000 -> 5.0x */
275 160, /* 0001 -> 16.0x */
276 40, /* 0010 -> 4.0x */
277 100, /* 0011 -> 10.0x */
278 55, /* 0100 -> 5.5x */
279 -1, /* 0101 -> RESERVED */
280 45, /* 0110 -> 4.5x */
281 95, /* 0111 -> 9.5x */
282 90, /* 1000 -> 9.0x */
283 70, /* 1001 -> 7.0x */
284 80, /* 1010 -> 8.0x */
285 60, /* 1011 -> 6.0x */
286 120, /* 1100 -> 12.0x */
287 75, /* 1101 -> 7.5x */
288 85, /* 1110 -> 8.5x */
289 65, /* 1111 -> 6.5x */
290 90, /* 0000 -> 9.0x */
291 110, /* 0001 -> 11.0x */
292 120, /* 0010 -> 12.0x */
293 100, /* 0011 -> 10.0x */
294 135, /* 0100 -> 13.5x */
295 115, /* 0101 -> 11.5x */
296 125, /* 0110 -> 12.5x */
297 105, /* 0111 -> 10.5x */
298 130, /* 1000 -> 13.0x */
299 150, /* 1001 -> 15.0x */
300 160, /* 1010 -> 16.0x */
301 140, /* 1011 -> 14.0x */
302 120, /* 1100 -> 12.0x */
303 155, /* 1101 -> 15.5x */
304 -1, /* 1110 -> RESERVED (13.0x) */
305 145 /* 1111 -> 14.5x */
306};
307
308/*
309 * Voltage scales. Div/Mod by 1000 to get actual voltage.
310 * Which scale to use depends on the VRM type in use.
311 */
312
313struct mV_pos {
314 unsigned short mV;
315 unsigned short pos;
316};
317
318static const struct mV_pos __cpuinitdata vrm85_mV[32] = {
319 {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2},
320 {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26},
321 {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18},
322 {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10},
323 {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3},
324 {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27},
325 {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19},
326 {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11}
327};
328
329static const unsigned char __cpuinitdata mV_vrm85[32] = {
330 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11,
331 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d,
332 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19,
333 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15
334};
335
336static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = {
337 {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28},
338 {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24},
339 {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20},
340 {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16},
341 {975, 15}, {950, 14}, {925, 13}, {900, 12},
342 {875, 11}, {850, 10}, {825, 9}, {800, 8},
343 {775, 7}, {750, 6}, {725, 5}, {700, 4},
344 {675, 3}, {650, 2}, {625, 1}, {600, 0}
345};
346
347static const unsigned char __cpuinitdata mV_mobilevrm[32] = {
348 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,
349 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
350 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
351 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
352};
353
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
deleted file mode 100644
index d9f51367666b..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ /dev/null
@@ -1,327 +0,0 @@
1/*
2 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
7 */
8
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/cpufreq.h>
13#include <linux/timex.h>
14
15#include <asm/msr.h>
16#include <asm/processor.h>
17
18#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
19 "longrun", msg)
20
21static struct cpufreq_driver longrun_driver;
22
23/**
24 * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz
25 * values into per cent values. In TMTA microcode, the following is valid:
26 * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
27 */
28static unsigned int longrun_low_freq, longrun_high_freq;
29
30
31/**
32 * longrun_get_policy - get the current LongRun policy
33 * @policy: struct cpufreq_policy where current policy is written into
34 *
35 * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
36 * and MSR_TMTA_LONGRUN_CTRL
37 */
38static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
39{
40 u32 msr_lo, msr_hi;
41
42 rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
43 dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
44 if (msr_lo & 0x01)
45 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
46 else
47 policy->policy = CPUFREQ_POLICY_POWERSAVE;
48
49 rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
50 dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
51 msr_lo &= 0x0000007F;
52 msr_hi &= 0x0000007F;
53
54 if (longrun_high_freq <= longrun_low_freq) {
55 /* Assume degenerate Longrun table */
56 policy->min = policy->max = longrun_high_freq;
57 } else {
58 policy->min = longrun_low_freq + msr_lo *
59 ((longrun_high_freq - longrun_low_freq) / 100);
60 policy->max = longrun_low_freq + msr_hi *
61 ((longrun_high_freq - longrun_low_freq) / 100);
62 }
63 policy->cpu = 0;
64}
65
66
67/**
68 * longrun_set_policy - sets a new CPUFreq policy
69 * @policy: new policy
70 *
71 * Sets a new CPUFreq policy on LongRun-capable processors. This function
72 * has to be called with cpufreq_driver locked.
73 */
74static int longrun_set_policy(struct cpufreq_policy *policy)
75{
76 u32 msr_lo, msr_hi;
77 u32 pctg_lo, pctg_hi;
78
79 if (!policy)
80 return -EINVAL;
81
82 if (longrun_high_freq <= longrun_low_freq) {
83 /* Assume degenerate Longrun table */
84 pctg_lo = pctg_hi = 100;
85 } else {
86 pctg_lo = (policy->min - longrun_low_freq) /
87 ((longrun_high_freq - longrun_low_freq) / 100);
88 pctg_hi = (policy->max - longrun_low_freq) /
89 ((longrun_high_freq - longrun_low_freq) / 100);
90 }
91
92 if (pctg_hi > 100)
93 pctg_hi = 100;
94 if (pctg_lo > pctg_hi)
95 pctg_lo = pctg_hi;
96
97 /* performance or economy mode */
98 rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
99 msr_lo &= 0xFFFFFFFE;
100 switch (policy->policy) {
101 case CPUFREQ_POLICY_PERFORMANCE:
102 msr_lo |= 0x00000001;
103 break;
104 case CPUFREQ_POLICY_POWERSAVE:
105 break;
106 }
107 wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
108
109 /* lower and upper boundary */
110 rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
111 msr_lo &= 0xFFFFFF80;
112 msr_hi &= 0xFFFFFF80;
113 msr_lo |= pctg_lo;
114 msr_hi |= pctg_hi;
115 wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
116
117 return 0;
118}
119
120
121/**
122 * longrun_verify_poliy - verifies a new CPUFreq policy
123 * @policy: the policy to verify
124 *
125 * Validates a new CPUFreq policy. This function has to be called with
126 * cpufreq_driver locked.
127 */
128static int longrun_verify_policy(struct cpufreq_policy *policy)
129{
130 if (!policy)
131 return -EINVAL;
132
133 policy->cpu = 0;
134 cpufreq_verify_within_limits(policy,
135 policy->cpuinfo.min_freq,
136 policy->cpuinfo.max_freq);
137
138 if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
139 (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
140 return -EINVAL;
141
142 return 0;
143}
144
145static unsigned int longrun_get(unsigned int cpu)
146{
147 u32 eax, ebx, ecx, edx;
148
149 if (cpu)
150 return 0;
151
152 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
153 dprintk("cpuid eax is %u\n", eax);
154
155 return eax * 1000;
156}
157
158/**
159 * longrun_determine_freqs - determines the lowest and highest possible core frequency
160 * @low_freq: an int to put the lowest frequency into
161 * @high_freq: an int to put the highest frequency into
162 *
163 * Determines the lowest and highest possible core frequencies on this CPU.
164 * This is necessary to calculate the performance percentage according to
165 * TMTA rules:
166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
167 */
168static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
169 unsigned int *high_freq)
170{
171 u32 msr_lo, msr_hi;
172 u32 save_lo, save_hi;
173 u32 eax, ebx, ecx, edx;
174 u32 try_hi;
175 struct cpuinfo_x86 *c = &cpu_data(0);
176
177 if (!low_freq || !high_freq)
178 return -EINVAL;
179
180 if (cpu_has(c, X86_FEATURE_LRTI)) {
181 /* if the LongRun Table Interface is present, the
182 * detection is a bit easier:
183 * For minimum frequency, read out the maximum
184 * level (msr_hi), write that into "currently
185 * selected level", and read out the frequency.
186 * For maximum frequency, read out level zero.
187 */
188 /* minimum */
189 rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi);
190 wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi);
191 rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
192 *low_freq = msr_lo * 1000; /* to kHz */
193
194 /* maximum */
195 wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi);
196 rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
197 *high_freq = msr_lo * 1000; /* to kHz */
198
199 dprintk("longrun table interface told %u - %u kHz\n",
200 *low_freq, *high_freq);
201
202 if (*low_freq > *high_freq)
203 *low_freq = *high_freq;
204 return 0;
205 }
206
207 /* set the upper border to the value determined during TSC init */
208 *high_freq = (cpu_khz / 1000);
209 *high_freq = *high_freq * 1000;
210 dprintk("high frequency is %u kHz\n", *high_freq);
211
212 /* get current borders */
213 rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
214 save_lo = msr_lo & 0x0000007F;
215 save_hi = msr_hi & 0x0000007F;
216
217 /* if current perf_pctg is larger than 90%, we need to decrease the
218 * upper limit to make the calculation more accurate.
219 */
220 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
221 /* try decreasing in 10% steps, some processors react only
222 * on some barrier values */
223 for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) {
224 /* set to 0 to try_hi perf_pctg */
225 msr_lo &= 0xFFFFFF80;
226 msr_hi &= 0xFFFFFF80;
227 msr_hi |= try_hi;
228 wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
229
230 /* read out current core MHz and current perf_pctg */
231 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
232
233 /* restore values */
234 wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
235 }
236 dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
237
238 /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
239 * eqals
240 * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
241 *
242 * high_freq * perf_pctg is stored tempoarily into "ebx".
243 */
244 ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */
245
246 if ((ecx > 95) || (ecx == 0) || (eax < ebx))
247 return -EIO;
248
249 edx = ((eax - ebx) * 100) / (100 - ecx);
250 *low_freq = edx * 1000; /* back to kHz */
251
252 dprintk("low frequency is %u kHz\n", *low_freq);
253
254 if (*low_freq > *high_freq)
255 *low_freq = *high_freq;
256
257 return 0;
258}
259
260
261static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy)
262{
263 int result = 0;
264
265 /* capability check */
266 if (policy->cpu != 0)
267 return -ENODEV;
268
269 /* detect low and high frequency */
270 result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq);
271 if (result)
272 return result;
273
274 /* cpuinfo and default policy values */
275 policy->cpuinfo.min_freq = longrun_low_freq;
276 policy->cpuinfo.max_freq = longrun_high_freq;
277 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
278 longrun_get_policy(policy);
279
280 return 0;
281}
282
283
284static struct cpufreq_driver longrun_driver = {
285 .flags = CPUFREQ_CONST_LOOPS,
286 .verify = longrun_verify_policy,
287 .setpolicy = longrun_set_policy,
288 .get = longrun_get,
289 .init = longrun_cpu_init,
290 .name = "longrun",
291 .owner = THIS_MODULE,
292};
293
294
295/**
296 * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver
297 *
298 * Initializes the LongRun support.
299 */
300static int __init longrun_init(void)
301{
302 struct cpuinfo_x86 *c = &cpu_data(0);
303
304 if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
305 !cpu_has(c, X86_FEATURE_LONGRUN))
306 return -ENODEV;
307
308 return cpufreq_register_driver(&longrun_driver);
309}
310
311
312/**
313 * longrun_exit - unregisters LongRun support
314 */
315static void __exit longrun_exit(void)
316{
317 cpufreq_unregister_driver(&longrun_driver);
318}
319
320
321MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
322MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and "
323 "Efficeon processors.");
324MODULE_LICENSE("GPL");
325
326module_init(longrun_init);
327module_exit(longrun_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c
deleted file mode 100644
index 911e193018ae..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/mperf.c
+++ /dev/null
@@ -1,51 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/smp.h>
3#include <linux/module.h>
4#include <linux/init.h>
5#include <linux/cpufreq.h>
6#include <linux/slab.h>
7
8#include "mperf.h"
9
10static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
11
12/* Called via smp_call_function_single(), on the target CPU */
13static void read_measured_perf_ctrs(void *_cur)
14{
15 struct aperfmperf *am = _cur;
16
17 get_aperfmperf(am);
18}
19
20/*
21 * Return the measured active (C0) frequency on this CPU since last call
22 * to this function.
23 * Input: cpu number
24 * Return: Average CPU frequency in terms of max frequency (zero on error)
25 *
26 * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
27 * over a period of time, while CPU is in C0 state.
28 * IA32_MPERF counts at the rate of max advertised frequency
29 * IA32_APERF counts at the rate of actual CPU frequency
30 * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
31 * no meaning should be associated with absolute values of these MSRs.
32 */
33unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
34 unsigned int cpu)
35{
36 struct aperfmperf perf;
37 unsigned long ratio;
38 unsigned int retval;
39
40 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
41 return 0;
42
43 ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
44 per_cpu(acfreq_old_perf, cpu) = perf;
45
46 retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
47
48 return retval;
49}
50EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
51MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h
deleted file mode 100644
index 5dbf2950dc22..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/mperf.h
+++ /dev/null
@@ -1,9 +0,0 @@
1/*
2 * (c) 2010 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 */
7
8unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
9 unsigned int cpu);
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
deleted file mode 100644
index 52c93648e492..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ /dev/null
@@ -1,331 +0,0 @@
1/*
2 * Pentium 4/Xeon CPU on demand clock modulation/speed scaling
3 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
4 * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
5 * (C) 2002 Arjan van de Ven <arjanv@redhat.com>
6 * (C) 2002 Tora T. Engstad
7 * All Rights Reserved
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 *
14 * The author(s) of this software shall not be held liable for damages
15 * of any nature resulting due to the use of this software. This
16 * software is provided AS-IS with no warranties.
17 *
18 * Date Errata Description
19 * 20020525 N44, O17 12.5% or 25% DC causes lockup
20 *
21 */
22
23#include <linux/kernel.h>
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/smp.h>
27#include <linux/cpufreq.h>
28#include <linux/cpumask.h>
29#include <linux/timex.h>
30
31#include <asm/processor.h>
32#include <asm/msr.h>
33#include <asm/timer.h>
34
35#include "speedstep-lib.h"
36
37#define PFX "p4-clockmod: "
38#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
39 "p4-clockmod", msg)
40
41/*
42 * Duty Cycle (3bits), note DC_DISABLE is not specified in
43 * intel docs i just use it to mean disable
44 */
45enum {
46 DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT,
47 DC_64PT, DC_75PT, DC_88PT, DC_DISABLE
48};
49
50#define DC_ENTRIES 8
51
52
53static int has_N44_O17_errata[NR_CPUS];
54static unsigned int stock_freq;
55static struct cpufreq_driver p4clockmod_driver;
56static unsigned int cpufreq_p4_get(unsigned int cpu);
57
58static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
59{
60 u32 l, h;
61
62 if (!cpu_online(cpu) ||
63 (newstate > DC_DISABLE) || (newstate == DC_RESV))
64 return -EINVAL;
65
66 rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
67
68 if (l & 0x01)
69 dprintk("CPU#%d currently thermal throttled\n", cpu);
70
71 if (has_N44_O17_errata[cpu] &&
72 (newstate == DC_25PT || newstate == DC_DFLT))
73 newstate = DC_38PT;
74
75 rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
76 if (newstate == DC_DISABLE) {
77 dprintk("CPU#%d disabling modulation\n", cpu);
78 wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
79 } else {
80 dprintk("CPU#%d setting duty cycle to %d%%\n",
81 cpu, ((125 * newstate) / 10));
82 /* bits 63 - 5 : reserved
83 * bit 4 : enable/disable
84 * bits 3-1 : duty cycle
85 * bit 0 : reserved
86 */
87 l = (l & ~14);
88 l = l | (1<<4) | ((newstate & 0x7)<<1);
89 wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h);
90 }
91
92 return 0;
93}
94
95
96static struct cpufreq_frequency_table p4clockmod_table[] = {
97 {DC_RESV, CPUFREQ_ENTRY_INVALID},
98 {DC_DFLT, 0},
99 {DC_25PT, 0},
100 {DC_38PT, 0},
101 {DC_50PT, 0},
102 {DC_64PT, 0},
103 {DC_75PT, 0},
104 {DC_88PT, 0},
105 {DC_DISABLE, 0},
106 {DC_RESV, CPUFREQ_TABLE_END},
107};
108
109
110static int cpufreq_p4_target(struct cpufreq_policy *policy,
111 unsigned int target_freq,
112 unsigned int relation)
113{
114 unsigned int newstate = DC_RESV;
115 struct cpufreq_freqs freqs;
116 int i;
117
118 if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
119 target_freq, relation, &newstate))
120 return -EINVAL;
121
122 freqs.old = cpufreq_p4_get(policy->cpu);
123 freqs.new = stock_freq * p4clockmod_table[newstate].index / 8;
124
125 if (freqs.new == freqs.old)
126 return 0;
127
128 /* notifiers */
129 for_each_cpu(i, policy->cpus) {
130 freqs.cpu = i;
131 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
132 }
133
134 /* run on each logical CPU,
135 * see section 13.15.3 of IA32 Intel Architecture Software
136 * Developer's Manual, Volume 3
137 */
138 for_each_cpu(i, policy->cpus)
139 cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
140
141 /* notifiers */
142 for_each_cpu(i, policy->cpus) {
143 freqs.cpu = i;
144 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
145 }
146
147 return 0;
148}
149
150
151static int cpufreq_p4_verify(struct cpufreq_policy *policy)
152{
153 return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]);
154}
155
156
157static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
158{
159 if (c->x86 == 0x06) {
160 if (cpu_has(c, X86_FEATURE_EST))
161 printk_once(KERN_WARNING PFX "Warning: EST-capable "
162 "CPU detected. The acpi-cpufreq module offers "
163 "voltage scaling in addition to frequency "
164 "scaling. You should use that instead of "
165 "p4-clockmod, if possible.\n");
166 switch (c->x86_model) {
167 case 0x0E: /* Core */
168 case 0x0F: /* Core Duo */
169 case 0x16: /* Celeron Core */
170 case 0x1C: /* Atom */
171 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
172 return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
173 case 0x0D: /* Pentium M (Dothan) */
174 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
175 /* fall through */
176 case 0x09: /* Pentium M (Banias) */
177 return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
178 }
179 }
180
181 if (c->x86 != 0xF)
182 return 0;
183
184 /* on P-4s, the TSC runs with constant frequency independent whether
185 * throttling is active or not. */
186 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
187
188 if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
189 printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
190 "The speedstep-ich or acpi cpufreq modules offer "
191 "voltage scaling in addition of frequency scaling. "
192 "You should use either one instead of p4-clockmod, "
193 "if possible.\n");
194 return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
195 }
196
197 return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
198}
199
200
201
202static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
203{
204 struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
205 int cpuid = 0;
206 unsigned int i;
207
208#ifdef CONFIG_SMP
209 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
210#endif
211
212 /* Errata workaround */
213 cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
214 switch (cpuid) {
215 case 0x0f07:
216 case 0x0f0a:
217 case 0x0f11:
218 case 0x0f12:
219 has_N44_O17_errata[policy->cpu] = 1;
220 dprintk("has errata -- disabling low frequencies\n");
221 }
222
223 if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
224 c->x86_model < 2) {
225 /* switch to maximum frequency and measure result */
226 cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
227 recalibrate_cpu_khz();
228 }
229 /* get max frequency */
230 stock_freq = cpufreq_p4_get_frequency(c);
231 if (!stock_freq)
232 return -EINVAL;
233
234 /* table init */
235 for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
236 if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
237 p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
238 else
239 p4clockmod_table[i].frequency = (stock_freq * i)/8;
240 }
241 cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
242
243 /* cpuinfo and default policy values */
244
245 /* the transition latency is set to be 1 higher than the maximum
246 * transition latency of the ondemand governor */
247 policy->cpuinfo.transition_latency = 10000001;
248 policy->cur = stock_freq;
249
250 return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
251}
252
253
254static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy)
255{
256 cpufreq_frequency_table_put_attr(policy->cpu);
257 return 0;
258}
259
260static unsigned int cpufreq_p4_get(unsigned int cpu)
261{
262 u32 l, h;
263
264 rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
265
266 if (l & 0x10) {
267 l = l >> 1;
268 l &= 0x7;
269 } else
270 l = DC_DISABLE;
271
272 if (l != DC_DISABLE)
273 return stock_freq * l / 8;
274
275 return stock_freq;
276}
277
278static struct freq_attr *p4clockmod_attr[] = {
279 &cpufreq_freq_attr_scaling_available_freqs,
280 NULL,
281};
282
283static struct cpufreq_driver p4clockmod_driver = {
284 .verify = cpufreq_p4_verify,
285 .target = cpufreq_p4_target,
286 .init = cpufreq_p4_cpu_init,
287 .exit = cpufreq_p4_cpu_exit,
288 .get = cpufreq_p4_get,
289 .name = "p4-clockmod",
290 .owner = THIS_MODULE,
291 .attr = p4clockmod_attr,
292};
293
294
295static int __init cpufreq_p4_init(void)
296{
297 struct cpuinfo_x86 *c = &cpu_data(0);
298 int ret;
299
300 /*
301 * THERM_CONTROL is architectural for IA32 now, so
302 * we can rely on the capability checks
303 */
304 if (c->x86_vendor != X86_VENDOR_INTEL)
305 return -ENODEV;
306
307 if (!test_cpu_cap(c, X86_FEATURE_ACPI) ||
308 !test_cpu_cap(c, X86_FEATURE_ACC))
309 return -ENODEV;
310
311 ret = cpufreq_register_driver(&p4clockmod_driver);
312 if (!ret)
313 printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
314 "Modulation available\n");
315
316 return ret;
317}
318
319
320static void __exit cpufreq_p4_exit(void)
321{
322 cpufreq_unregister_driver(&p4clockmod_driver);
323}
324
325
326MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
327MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
328MODULE_LICENSE("GPL");
329
330late_initcall(cpufreq_p4_init);
331module_exit(cpufreq_p4_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
deleted file mode 100644
index 755a31e0f5b0..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ /dev/null
@@ -1,624 +0,0 @@
1/*
2 * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
3 *
4 * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
5 * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
6 * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
7 *
8 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2 of the License.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
17 * INFRINGEMENT. See the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 */
25
26#include <linux/kernel.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/smp.h>
30#include <linux/sched.h>
31#include <linux/cpufreq.h>
32#include <linux/compiler.h>
33#include <linux/slab.h>
34
35#include <linux/acpi.h>
36#include <linux/io.h>
37#include <linux/spinlock.h>
38#include <linux/uaccess.h>
39
40#include <acpi/processor.h>
41
42#define PCC_VERSION "1.00.00"
43#define POLL_LOOPS 300
44
45#define CMD_COMPLETE 0x1
46#define CMD_GET_FREQ 0x0
47#define CMD_SET_FREQ 0x1
48
49#define BUF_SZ 4
50
51#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
52 "pcc-cpufreq", msg)
53
54struct pcc_register_resource {
55 u8 descriptor;
56 u16 length;
57 u8 space_id;
58 u8 bit_width;
59 u8 bit_offset;
60 u8 access_size;
61 u64 address;
62} __attribute__ ((packed));
63
64struct pcc_memory_resource {
65 u8 descriptor;
66 u16 length;
67 u8 space_id;
68 u8 resource_usage;
69 u8 type_specific;
70 u64 granularity;
71 u64 minimum;
72 u64 maximum;
73 u64 translation_offset;
74 u64 address_length;
75} __attribute__ ((packed));
76
77static struct cpufreq_driver pcc_cpufreq_driver;
78
79struct pcc_header {
80 u32 signature;
81 u16 length;
82 u8 major;
83 u8 minor;
84 u32 features;
85 u16 command;
86 u16 status;
87 u32 latency;
88 u32 minimum_time;
89 u32 maximum_time;
90 u32 nominal;
91 u32 throttled_frequency;
92 u32 minimum_frequency;
93};
94
95static void __iomem *pcch_virt_addr;
96static struct pcc_header __iomem *pcch_hdr;
97
98static DEFINE_SPINLOCK(pcc_lock);
99
100static struct acpi_generic_address doorbell;
101
102static u64 doorbell_preserve;
103static u64 doorbell_write;
104
105static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
106 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
107
108struct pcc_cpu {
109 u32 input_offset;
110 u32 output_offset;
111};
112
113static struct pcc_cpu __percpu *pcc_cpu_info;
114
115static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
116{
117 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
118 policy->cpuinfo.max_freq);
119 return 0;
120}
121
122static inline void pcc_cmd(void)
123{
124 u64 doorbell_value;
125 int i;
126
127 acpi_read(&doorbell_value, &doorbell);
128 acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
129 &doorbell);
130
131 for (i = 0; i < POLL_LOOPS; i++) {
132 if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
133 break;
134 }
135}
136
137static inline void pcc_clear_mapping(void)
138{
139 if (pcch_virt_addr)
140 iounmap(pcch_virt_addr);
141 pcch_virt_addr = NULL;
142}
143
144static unsigned int pcc_get_freq(unsigned int cpu)
145{
146 struct pcc_cpu *pcc_cpu_data;
147 unsigned int curr_freq;
148 unsigned int freq_limit;
149 u16 status;
150 u32 input_buffer;
151 u32 output_buffer;
152
153 spin_lock(&pcc_lock);
154
155 dprintk("get: get_freq for CPU %d\n", cpu);
156 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
157
158 input_buffer = 0x1;
159 iowrite32(input_buffer,
160 (pcch_virt_addr + pcc_cpu_data->input_offset));
161 iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
162
163 pcc_cmd();
164
165 output_buffer =
166 ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
167
168 /* Clear the input buffer - we are done with the current command */
169 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
170
171 status = ioread16(&pcch_hdr->status);
172 if (status != CMD_COMPLETE) {
173 dprintk("get: FAILED: for CPU %d, status is %d\n",
174 cpu, status);
175 goto cmd_incomplete;
176 }
177 iowrite16(0, &pcch_hdr->status);
178 curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
179 / 100) * 1000);
180
181 dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
182 "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
183 cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
184 output_buffer, curr_freq);
185
186 freq_limit = (output_buffer >> 8) & 0xff;
187 if (freq_limit != 0xff) {
188 dprintk("get: frequency for cpu %d is being temporarily"
189 " capped at %d\n", cpu, curr_freq);
190 }
191
192 spin_unlock(&pcc_lock);
193 return curr_freq;
194
195cmd_incomplete:
196 iowrite16(0, &pcch_hdr->status);
197 spin_unlock(&pcc_lock);
198 return 0;
199}
200
201static int pcc_cpufreq_target(struct cpufreq_policy *policy,
202 unsigned int target_freq,
203 unsigned int relation)
204{
205 struct pcc_cpu *pcc_cpu_data;
206 struct cpufreq_freqs freqs;
207 u16 status;
208 u32 input_buffer;
209 int cpu;
210
211 spin_lock(&pcc_lock);
212 cpu = policy->cpu;
213 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
214
215 dprintk("target: CPU %d should go to target freq: %d "
216 "(virtual) input_offset is 0x%x\n",
217 cpu, target_freq,
218 (pcch_virt_addr + pcc_cpu_data->input_offset));
219
220 freqs.new = target_freq;
221 freqs.cpu = cpu;
222 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
223
224 input_buffer = 0x1 | (((target_freq * 100)
225 / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
226 iowrite32(input_buffer,
227 (pcch_virt_addr + pcc_cpu_data->input_offset));
228 iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
229
230 pcc_cmd();
231
232 /* Clear the input buffer - we are done with the current command */
233 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
234
235 status = ioread16(&pcch_hdr->status);
236 if (status != CMD_COMPLETE) {
237 dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
238 cpu, status);
239 goto cmd_incomplete;
240 }
241 iowrite16(0, &pcch_hdr->status);
242
243 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
244 dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
245 spin_unlock(&pcc_lock);
246
247 return 0;
248
249cmd_incomplete:
250 iowrite16(0, &pcch_hdr->status);
251 spin_unlock(&pcc_lock);
252 return -EINVAL;
253}
254
255static int pcc_get_offset(int cpu)
256{
257 acpi_status status;
258 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
259 union acpi_object *pccp, *offset;
260 struct pcc_cpu *pcc_cpu_data;
261 struct acpi_processor *pr;
262 int ret = 0;
263
264 pr = per_cpu(processors, cpu);
265 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
266
267 status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
268 if (ACPI_FAILURE(status))
269 return -ENODEV;
270
271 pccp = buffer.pointer;
272 if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
273 ret = -ENODEV;
274 goto out_free;
275 };
276
277 offset = &(pccp->package.elements[0]);
278 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
279 ret = -ENODEV;
280 goto out_free;
281 }
282
283 pcc_cpu_data->input_offset = offset->integer.value;
284
285 offset = &(pccp->package.elements[1]);
286 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
287 ret = -ENODEV;
288 goto out_free;
289 }
290
291 pcc_cpu_data->output_offset = offset->integer.value;
292
293 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
294 memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
295
296 dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
297 "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
298 cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
299out_free:
300 kfree(buffer.pointer);
301 return ret;
302}
303
304static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
305{
306 acpi_status status;
307 struct acpi_object_list input;
308 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
309 union acpi_object in_params[4];
310 union acpi_object *out_obj;
311 u32 capabilities[2];
312 u32 errors;
313 u32 supported;
314 int ret = 0;
315
316 input.count = 4;
317 input.pointer = in_params;
318 in_params[0].type = ACPI_TYPE_BUFFER;
319 in_params[0].buffer.length = 16;
320 in_params[0].buffer.pointer = OSC_UUID;
321 in_params[1].type = ACPI_TYPE_INTEGER;
322 in_params[1].integer.value = 1;
323 in_params[2].type = ACPI_TYPE_INTEGER;
324 in_params[2].integer.value = 2;
325 in_params[3].type = ACPI_TYPE_BUFFER;
326 in_params[3].buffer.length = 8;
327 in_params[3].buffer.pointer = (u8 *)&capabilities;
328
329 capabilities[0] = OSC_QUERY_ENABLE;
330 capabilities[1] = 0x1;
331
332 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
333 if (ACPI_FAILURE(status))
334 return -ENODEV;
335
336 if (!output.length)
337 return -ENODEV;
338
339 out_obj = output.pointer;
340 if (out_obj->type != ACPI_TYPE_BUFFER) {
341 ret = -ENODEV;
342 goto out_free;
343 }
344
345 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
346 if (errors) {
347 ret = -ENODEV;
348 goto out_free;
349 }
350
351 supported = *((u32 *)(out_obj->buffer.pointer + 4));
352 if (!(supported & 0x1)) {
353 ret = -ENODEV;
354 goto out_free;
355 }
356
357 kfree(output.pointer);
358 capabilities[0] = 0x0;
359 capabilities[1] = 0x1;
360
361 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
362 if (ACPI_FAILURE(status))
363 return -ENODEV;
364
365 if (!output.length)
366 return -ENODEV;
367
368 out_obj = output.pointer;
369 if (out_obj->type != ACPI_TYPE_BUFFER) {
370 ret = -ENODEV;
371 goto out_free;
372 }
373
374 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
375 if (errors) {
376 ret = -ENODEV;
377 goto out_free;
378 }
379
380 supported = *((u32 *)(out_obj->buffer.pointer + 4));
381 if (!(supported & 0x1)) {
382 ret = -ENODEV;
383 goto out_free;
384 }
385
386out_free:
387 kfree(output.pointer);
388 return ret;
389}
390
391static int __init pcc_cpufreq_probe(void)
392{
393 acpi_status status;
394 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
395 struct pcc_memory_resource *mem_resource;
396 struct pcc_register_resource *reg_resource;
397 union acpi_object *out_obj, *member;
398 acpi_handle handle, osc_handle, pcch_handle;
399 int ret = 0;
400
401 status = acpi_get_handle(NULL, "\\_SB", &handle);
402 if (ACPI_FAILURE(status))
403 return -ENODEV;
404
405 status = acpi_get_handle(handle, "PCCH", &pcch_handle);
406 if (ACPI_FAILURE(status))
407 return -ENODEV;
408
409 status = acpi_get_handle(handle, "_OSC", &osc_handle);
410 if (ACPI_SUCCESS(status)) {
411 ret = pcc_cpufreq_do_osc(&osc_handle);
412 if (ret)
413 dprintk("probe: _OSC evaluation did not succeed\n");
414 /* Firmware's use of _OSC is optional */
415 ret = 0;
416 }
417
418 status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
419 if (ACPI_FAILURE(status))
420 return -ENODEV;
421
422 out_obj = output.pointer;
423 if (out_obj->type != ACPI_TYPE_PACKAGE) {
424 ret = -ENODEV;
425 goto out_free;
426 }
427
428 member = &out_obj->package.elements[0];
429 if (member->type != ACPI_TYPE_BUFFER) {
430 ret = -ENODEV;
431 goto out_free;
432 }
433
434 mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
435
436 dprintk("probe: mem_resource descriptor: 0x%x,"
437 " length: %d, space_id: %d, resource_usage: %d,"
438 " type_specific: %d, granularity: 0x%llx,"
439 " minimum: 0x%llx, maximum: 0x%llx,"
440 " translation_offset: 0x%llx, address_length: 0x%llx\n",
441 mem_resource->descriptor, mem_resource->length,
442 mem_resource->space_id, mem_resource->resource_usage,
443 mem_resource->type_specific, mem_resource->granularity,
444 mem_resource->minimum, mem_resource->maximum,
445 mem_resource->translation_offset,
446 mem_resource->address_length);
447
448 if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
449 ret = -ENODEV;
450 goto out_free;
451 }
452
453 pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
454 mem_resource->address_length);
455 if (pcch_virt_addr == NULL) {
456 dprintk("probe: could not map shared mem region\n");
457 goto out_free;
458 }
459 pcch_hdr = pcch_virt_addr;
460
461 dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
462 dprintk("probe: PCCH header is at physical address: 0x%llx,"
463 " signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
464 " supported features: 0x%x, command field: 0x%x,"
465 " status field: 0x%x, nominal latency: %d us\n",
466 mem_resource->minimum, ioread32(&pcch_hdr->signature),
467 ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
468 ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
469 ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
470 ioread32(&pcch_hdr->latency));
471
472 dprintk("probe: min time between commands: %d us,"
473 " max time between commands: %d us,"
474 " nominal CPU frequency: %d MHz,"
475 " minimum CPU frequency: %d MHz,"
476 " minimum CPU frequency without throttling: %d MHz\n",
477 ioread32(&pcch_hdr->minimum_time),
478 ioread32(&pcch_hdr->maximum_time),
479 ioread32(&pcch_hdr->nominal),
480 ioread32(&pcch_hdr->throttled_frequency),
481 ioread32(&pcch_hdr->minimum_frequency));
482
483 member = &out_obj->package.elements[1];
484 if (member->type != ACPI_TYPE_BUFFER) {
485 ret = -ENODEV;
486 goto pcch_free;
487 }
488
489 reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
490
491 doorbell.space_id = reg_resource->space_id;
492 doorbell.bit_width = reg_resource->bit_width;
493 doorbell.bit_offset = reg_resource->bit_offset;
494 doorbell.access_width = 64;
495 doorbell.address = reg_resource->address;
496
497 dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
498 "bit_offset is %d, access_width is %d, address is 0x%llx\n",
499 doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
500 doorbell.access_width, reg_resource->address);
501
502 member = &out_obj->package.elements[2];
503 if (member->type != ACPI_TYPE_INTEGER) {
504 ret = -ENODEV;
505 goto pcch_free;
506 }
507
508 doorbell_preserve = member->integer.value;
509
510 member = &out_obj->package.elements[3];
511 if (member->type != ACPI_TYPE_INTEGER) {
512 ret = -ENODEV;
513 goto pcch_free;
514 }
515
516 doorbell_write = member->integer.value;
517
518 dprintk("probe: doorbell_preserve: 0x%llx,"
519 " doorbell_write: 0x%llx\n",
520 doorbell_preserve, doorbell_write);
521
522 pcc_cpu_info = alloc_percpu(struct pcc_cpu);
523 if (!pcc_cpu_info) {
524 ret = -ENOMEM;
525 goto pcch_free;
526 }
527
528 printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
529 " limits: %d MHz, %d MHz\n", PCC_VERSION,
530 ioread32(&pcch_hdr->minimum_frequency),
531 ioread32(&pcch_hdr->nominal));
532 kfree(output.pointer);
533 return ret;
534pcch_free:
535 pcc_clear_mapping();
536out_free:
537 kfree(output.pointer);
538 return ret;
539}
540
541static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
542{
543 unsigned int cpu = policy->cpu;
544 unsigned int result = 0;
545
546 if (!pcch_virt_addr) {
547 result = -1;
548 goto out;
549 }
550
551 result = pcc_get_offset(cpu);
552 if (result) {
553 dprintk("init: PCCP evaluation failed\n");
554 goto out;
555 }
556
557 policy->max = policy->cpuinfo.max_freq =
558 ioread32(&pcch_hdr->nominal) * 1000;
559 policy->min = policy->cpuinfo.min_freq =
560 ioread32(&pcch_hdr->minimum_frequency) * 1000;
561 policy->cur = pcc_get_freq(cpu);
562
563 if (!policy->cur) {
564 dprintk("init: Unable to get current CPU frequency\n");
565 result = -EINVAL;
566 goto out;
567 }
568
569 dprintk("init: policy->max is %d, policy->min is %d\n",
570 policy->max, policy->min);
571out:
572 return result;
573}
574
575static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
576{
577 return 0;
578}
579
580static struct cpufreq_driver pcc_cpufreq_driver = {
581 .flags = CPUFREQ_CONST_LOOPS,
582 .get = pcc_get_freq,
583 .verify = pcc_cpufreq_verify,
584 .target = pcc_cpufreq_target,
585 .init = pcc_cpufreq_cpu_init,
586 .exit = pcc_cpufreq_cpu_exit,
587 .name = "pcc-cpufreq",
588 .owner = THIS_MODULE,
589};
590
591static int __init pcc_cpufreq_init(void)
592{
593 int ret;
594
595 if (acpi_disabled)
596 return 0;
597
598 ret = pcc_cpufreq_probe();
599 if (ret) {
600 dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
601 return ret;
602 }
603
604 ret = cpufreq_register_driver(&pcc_cpufreq_driver);
605
606 return ret;
607}
608
609static void __exit pcc_cpufreq_exit(void)
610{
611 cpufreq_unregister_driver(&pcc_cpufreq_driver);
612
613 pcc_clear_mapping();
614
615 free_percpu(pcc_cpu_info);
616}
617
618MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
619MODULE_VERSION(PCC_VERSION);
620MODULE_DESCRIPTION("Processor Clocking Control interface driver");
621MODULE_LICENSE("GPL");
622
623late_initcall(pcc_cpufreq_init);
624module_exit(pcc_cpufreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
deleted file mode 100644
index b3379d6a5c57..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ /dev/null
@@ -1,261 +0,0 @@
1/*
2 * This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
3 * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä,
4 * Dominik Brodowski.
5 *
6 * Licensed under the terms of the GNU GPL License version 2.
7 *
8 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
9 */
10
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/cpufreq.h>
15#include <linux/ioport.h>
16#include <linux/timex.h>
17#include <linux/io.h>
18
19#include <asm/msr.h>
20
21#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
22 as it is unused */
23
24#define PFX "powernow-k6: "
25static unsigned int busfreq; /* FSB, in 10 kHz */
26static unsigned int max_multiplier;
27
28
29/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */
30static struct cpufreq_frequency_table clock_ratio[] = {
31 {45, /* 000 -> 4.5x */ 0},
32 {50, /* 001 -> 5.0x */ 0},
33 {40, /* 010 -> 4.0x */ 0},
34 {55, /* 011 -> 5.5x */ 0},
35 {20, /* 100 -> 2.0x */ 0},
36 {30, /* 101 -> 3.0x */ 0},
37 {60, /* 110 -> 6.0x */ 0},
38 {35, /* 111 -> 3.5x */ 0},
39 {0, CPUFREQ_TABLE_END}
40};
41
42
43/**
44 * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier
45 *
46 * Returns the current setting of the frequency multiplier. Core clock
47 * speed is frequency of the Front-Side Bus multiplied with this value.
48 */
49static int powernow_k6_get_cpu_multiplier(void)
50{
51 u64 invalue = 0;
52 u32 msrval;
53
54 msrval = POWERNOW_IOPORT + 0x1;
55 wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
56 invalue = inl(POWERNOW_IOPORT + 0x8);
57 msrval = POWERNOW_IOPORT + 0x0;
58 wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
59
60 return clock_ratio[(invalue >> 5)&7].index;
61}
62
63
64/**
65 * powernow_k6_set_state - set the PowerNow! multiplier
66 * @best_i: clock_ratio[best_i] is the target multiplier
67 *
68 * Tries to change the PowerNow! multiplier
69 */
70static void powernow_k6_set_state(unsigned int best_i)
71{
72 unsigned long outvalue = 0, invalue = 0;
73 unsigned long msrval;
74 struct cpufreq_freqs freqs;
75
76 if (clock_ratio[best_i].index > max_multiplier) {
77 printk(KERN_ERR PFX "invalid target frequency\n");
78 return;
79 }
80
81 freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
82 freqs.new = busfreq * clock_ratio[best_i].index;
83 freqs.cpu = 0; /* powernow-k6.c is UP only driver */
84
85 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
86
87 /* we now need to transform best_i to the BVC format, see AMD#23446 */
88
89 outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5);
90
91 msrval = POWERNOW_IOPORT + 0x1;
92 wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
93 invalue = inl(POWERNOW_IOPORT + 0x8);
94 invalue = invalue & 0xf;
95 outvalue = outvalue | invalue;
96 outl(outvalue , (POWERNOW_IOPORT + 0x8));
97 msrval = POWERNOW_IOPORT + 0x0;
98 wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
99
100 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
101
102 return;
103}
104
105
106/**
107 * powernow_k6_verify - verifies a new CPUfreq policy
108 * @policy: new policy
109 *
110 * Policy must be within lowest and highest possible CPU Frequency,
111 * and at least one possible state must be within min and max.
112 */
113static int powernow_k6_verify(struct cpufreq_policy *policy)
114{
115 return cpufreq_frequency_table_verify(policy, &clock_ratio[0]);
116}
117
118
119/**
120 * powernow_k6_setpolicy - sets a new CPUFreq policy
121 * @policy: new policy
122 * @target_freq: the target frequency
123 * @relation: how that frequency relates to achieved frequency
124 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
125 *
126 * sets a new CPUFreq policy
127 */
128static int powernow_k6_target(struct cpufreq_policy *policy,
129 unsigned int target_freq,
130 unsigned int relation)
131{
132 unsigned int newstate = 0;
133
134 if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
135 target_freq, relation, &newstate))
136 return -EINVAL;
137
138 powernow_k6_set_state(newstate);
139
140 return 0;
141}
142
143
144static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
145{
146 unsigned int i, f;
147 int result;
148
149 if (policy->cpu != 0)
150 return -ENODEV;
151
152 /* get frequencies */
153 max_multiplier = powernow_k6_get_cpu_multiplier();
154 busfreq = cpu_khz / max_multiplier;
155
156 /* table init */
157 for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
158 f = clock_ratio[i].index;
159 if (f > max_multiplier)
160 clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
161 else
162 clock_ratio[i].frequency = busfreq * f;
163 }
164
165 /* cpuinfo and default policy values */
166 policy->cpuinfo.transition_latency = 200000;
167 policy->cur = busfreq * max_multiplier;
168
169 result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
170 if (result)
171 return result;
172
173 cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
174
175 return 0;
176}
177
178
179static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
180{
181 unsigned int i;
182 for (i = 0; i < 8; i++) {
183 if (i == max_multiplier)
184 powernow_k6_set_state(i);
185 }
186 cpufreq_frequency_table_put_attr(policy->cpu);
187 return 0;
188}
189
190static unsigned int powernow_k6_get(unsigned int cpu)
191{
192 unsigned int ret;
193 ret = (busfreq * powernow_k6_get_cpu_multiplier());
194 return ret;
195}
196
197static struct freq_attr *powernow_k6_attr[] = {
198 &cpufreq_freq_attr_scaling_available_freqs,
199 NULL,
200};
201
202static struct cpufreq_driver powernow_k6_driver = {
203 .verify = powernow_k6_verify,
204 .target = powernow_k6_target,
205 .init = powernow_k6_cpu_init,
206 .exit = powernow_k6_cpu_exit,
207 .get = powernow_k6_get,
208 .name = "powernow-k6",
209 .owner = THIS_MODULE,
210 .attr = powernow_k6_attr,
211};
212
213
214/**
215 * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver
216 *
217 * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported
218 * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero
219 * on success.
220 */
221static int __init powernow_k6_init(void)
222{
223 struct cpuinfo_x86 *c = &cpu_data(0);
224
225 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
226 ((c->x86_model != 12) && (c->x86_model != 13)))
227 return -ENODEV;
228
229 if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
230 printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
231 return -EIO;
232 }
233
234 if (cpufreq_register_driver(&powernow_k6_driver)) {
235 release_region(POWERNOW_IOPORT, 16);
236 return -EINVAL;
237 }
238
239 return 0;
240}
241
242
243/**
244 * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support
245 *
246 * Unregisters AMD K6-2+ / K6-3+ PowerNow! support.
247 */
248static void __exit powernow_k6_exit(void)
249{
250 cpufreq_unregister_driver(&powernow_k6_driver);
251 release_region(POWERNOW_IOPORT, 16);
252}
253
254
255MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
256 "Dominik Brodowski <linux@brodo.de>");
257MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
258MODULE_LICENSE("GPL");
259
260module_init(powernow_k6_init);
261module_exit(powernow_k6_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
deleted file mode 100644
index 4a45fd6e41ba..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ /dev/null
@@ -1,752 +0,0 @@
1/*
2 * AMD K7 Powernow driver.
3 * (C) 2003 Dave Jones on behalf of SuSE Labs.
4 * (C) 2003-2004 Dave Jones <davej@redhat.com>
5 *
6 * Licensed under the terms of the GNU GPL License version 2.
7 * Based upon datasheets & sample CPUs kindly provided by AMD.
8 *
9 * Errata 5:
10 * CPU may fail to execute a FID/VID change in presence of interrupt.
11 * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
12 * Errata 15:
13 * CPU with half frequency multipliers may hang upon wakeup from disconnect.
14 * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
15 */
16
17#include <linux/kernel.h>
18#include <linux/module.h>
19#include <linux/moduleparam.h>
20#include <linux/init.h>
21#include <linux/cpufreq.h>
22#include <linux/slab.h>
23#include <linux/string.h>
24#include <linux/dmi.h>
25#include <linux/timex.h>
26#include <linux/io.h>
27
28#include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */
29#include <asm/msr.h>
30#include <asm/system.h>
31
32#ifdef CONFIG_X86_POWERNOW_K7_ACPI
33#include <linux/acpi.h>
34#include <acpi/processor.h>
35#endif
36
37#include "powernow-k7.h"
38
39#define PFX "powernow: "
40
41
42struct psb_s {
43 u8 signature[10];
44 u8 tableversion;
45 u8 flags;
46 u16 settlingtime;
47 u8 reserved1;
48 u8 numpst;
49};
50
51struct pst_s {
52 u32 cpuid;
53 u8 fsbspeed;
54 u8 maxfid;
55 u8 startvid;
56 u8 numpstates;
57};
58
59#ifdef CONFIG_X86_POWERNOW_K7_ACPI
60union powernow_acpi_control_t {
61 struct {
62 unsigned long fid:5,
63 vid:5,
64 sgtc:20,
65 res1:2;
66 } bits;
67 unsigned long val;
68};
69#endif
70
71#ifdef CONFIG_CPU_FREQ_DEBUG
72/* divide by 1000 to get VCore voltage in V. */
73static const int mobile_vid_table[32] = {
74 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
75 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
76 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
77 1075, 1050, 1025, 1000, 975, 950, 925, 0,
78};
79#endif
80
81/* divide by 10 to get FID. */
82static const int fid_codes[32] = {
83 110, 115, 120, 125, 50, 55, 60, 65,
84 70, 75, 80, 85, 90, 95, 100, 105,
85 30, 190, 40, 200, 130, 135, 140, 210,
86 150, 225, 160, 165, 170, 180, -1, -1,
87};
88
89/* This parameter is used in order to force ACPI instead of legacy method for
90 * configuration purpose.
91 */
92
93static int acpi_force;
94
95static struct cpufreq_frequency_table *powernow_table;
96
97static unsigned int can_scale_bus;
98static unsigned int can_scale_vid;
99static unsigned int minimum_speed = -1;
100static unsigned int maximum_speed;
101static unsigned int number_scales;
102static unsigned int fsb;
103static unsigned int latency;
104static char have_a0;
105
106#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
107 "powernow-k7", msg)
108
109static int check_fsb(unsigned int fsbspeed)
110{
111 int delta;
112 unsigned int f = fsb / 1000;
113
114 delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
115 return delta < 5;
116}
117
118static int check_powernow(void)
119{
120 struct cpuinfo_x86 *c = &cpu_data(0);
121 unsigned int maxei, eax, ebx, ecx, edx;
122
123 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
124#ifdef MODULE
125 printk(KERN_INFO PFX "This module only works with "
126 "AMD K7 CPUs\n");
127#endif
128 return 0;
129 }
130
131 /* Get maximum capabilities */
132 maxei = cpuid_eax(0x80000000);
133 if (maxei < 0x80000007) { /* Any powernow info ? */
134#ifdef MODULE
135 printk(KERN_INFO PFX "No powernow capabilities detected\n");
136#endif
137 return 0;
138 }
139
140 if ((c->x86_model == 6) && (c->x86_mask == 0)) {
141 printk(KERN_INFO PFX "K7 660[A0] core detected, "
142 "enabling errata workarounds\n");
143 have_a0 = 1;
144 }
145
146 cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
147
148 /* Check we can actually do something before we say anything.*/
149 if (!(edx & (1 << 1 | 1 << 2)))
150 return 0;
151
152 printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
153
154 if (edx & 1 << 1) {
155 printk("frequency");
156 can_scale_bus = 1;
157 }
158
159 if ((edx & (1 << 1 | 1 << 2)) == 0x6)
160 printk(" and ");
161
162 if (edx & 1 << 2) {
163 printk("voltage");
164 can_scale_vid = 1;
165 }
166
167 printk(".\n");
168 return 1;
169}
170
171#ifdef CONFIG_X86_POWERNOW_K7_ACPI
172static void invalidate_entry(unsigned int entry)
173{
174 powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
175}
176#endif
177
178static int get_ranges(unsigned char *pst)
179{
180 unsigned int j;
181 unsigned int speed;
182 u8 fid, vid;
183
184 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
185 (number_scales + 1)), GFP_KERNEL);
186 if (!powernow_table)
187 return -ENOMEM;
188
189 for (j = 0 ; j < number_scales; j++) {
190 fid = *pst++;
191
192 powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
193 powernow_table[j].index = fid; /* lower 8 bits */
194
195 speed = powernow_table[j].frequency;
196
197 if ((fid_codes[fid] % 10) == 5) {
198#ifdef CONFIG_X86_POWERNOW_K7_ACPI
199 if (have_a0 == 1)
200 invalidate_entry(j);
201#endif
202 }
203
204 if (speed < minimum_speed)
205 minimum_speed = speed;
206 if (speed > maximum_speed)
207 maximum_speed = speed;
208
209 vid = *pst++;
210 powernow_table[j].index |= (vid << 8); /* upper 8 bits */
211
212 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
213 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
214 fid_codes[fid] % 10, speed/1000, vid,
215 mobile_vid_table[vid]/1000,
216 mobile_vid_table[vid]%1000);
217 }
218 powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
219 powernow_table[number_scales].index = 0;
220
221 return 0;
222}
223
224
225static void change_FID(int fid)
226{
227 union msr_fidvidctl fidvidctl;
228
229 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
230 if (fidvidctl.bits.FID != fid) {
231 fidvidctl.bits.SGTC = latency;
232 fidvidctl.bits.FID = fid;
233 fidvidctl.bits.VIDC = 0;
234 fidvidctl.bits.FIDC = 1;
235 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
236 }
237}
238
239
240static void change_VID(int vid)
241{
242 union msr_fidvidctl fidvidctl;
243
244 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
245 if (fidvidctl.bits.VID != vid) {
246 fidvidctl.bits.SGTC = latency;
247 fidvidctl.bits.VID = vid;
248 fidvidctl.bits.FIDC = 0;
249 fidvidctl.bits.VIDC = 1;
250 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
251 }
252}
253
254
255static void change_speed(unsigned int index)
256{
257 u8 fid, vid;
258 struct cpufreq_freqs freqs;
259 union msr_fidvidstatus fidvidstatus;
260 int cfid;
261
262 /* fid are the lower 8 bits of the index we stored into
263 * the cpufreq frequency table in powernow_decode_bios,
264 * vid are the upper 8 bits.
265 */
266
267 fid = powernow_table[index].index & 0xFF;
268 vid = (powernow_table[index].index & 0xFF00) >> 8;
269
270 freqs.cpu = 0;
271
272 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
273 cfid = fidvidstatus.bits.CFID;
274 freqs.old = fsb * fid_codes[cfid] / 10;
275
276 freqs.new = powernow_table[index].frequency;
277
278 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
279
280 /* Now do the magic poking into the MSRs. */
281
282 if (have_a0 == 1) /* A0 errata 5 */
283 local_irq_disable();
284
285 if (freqs.old > freqs.new) {
286 /* Going down, so change FID first */
287 change_FID(fid);
288 change_VID(vid);
289 } else {
290 /* Going up, so change VID first */
291 change_VID(vid);
292 change_FID(fid);
293 }
294
295
296 if (have_a0 == 1)
297 local_irq_enable();
298
299 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
300}
301
302
303#ifdef CONFIG_X86_POWERNOW_K7_ACPI
304
305static struct acpi_processor_performance *acpi_processor_perf;
306
307static int powernow_acpi_init(void)
308{
309 int i;
310 int retval = 0;
311 union powernow_acpi_control_t pc;
312
313 if (acpi_processor_perf != NULL && powernow_table != NULL) {
314 retval = -EINVAL;
315 goto err0;
316 }
317
318 acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
319 GFP_KERNEL);
320 if (!acpi_processor_perf) {
321 retval = -ENOMEM;
322 goto err0;
323 }
324
325 if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
326 GFP_KERNEL)) {
327 retval = -ENOMEM;
328 goto err05;
329 }
330
331 if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
332 retval = -EIO;
333 goto err1;
334 }
335
336 if (acpi_processor_perf->control_register.space_id !=
337 ACPI_ADR_SPACE_FIXED_HARDWARE) {
338 retval = -ENODEV;
339 goto err2;
340 }
341
342 if (acpi_processor_perf->status_register.space_id !=
343 ACPI_ADR_SPACE_FIXED_HARDWARE) {
344 retval = -ENODEV;
345 goto err2;
346 }
347
348 number_scales = acpi_processor_perf->state_count;
349
350 if (number_scales < 2) {
351 retval = -ENODEV;
352 goto err2;
353 }
354
355 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
356 (number_scales + 1)), GFP_KERNEL);
357 if (!powernow_table) {
358 retval = -ENOMEM;
359 goto err2;
360 }
361
362 pc.val = (unsigned long) acpi_processor_perf->states[0].control;
363 for (i = 0; i < number_scales; i++) {
364 u8 fid, vid;
365 struct acpi_processor_px *state =
366 &acpi_processor_perf->states[i];
367 unsigned int speed, speed_mhz;
368
369 pc.val = (unsigned long) state->control;
370 dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
371 i,
372 (u32) state->core_frequency,
373 (u32) state->power,
374 (u32) state->transition_latency,
375 (u32) state->control,
376 pc.bits.sgtc);
377
378 vid = pc.bits.vid;
379 fid = pc.bits.fid;
380
381 powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
382 powernow_table[i].index = fid; /* lower 8 bits */
383 powernow_table[i].index |= (vid << 8); /* upper 8 bits */
384
385 speed = powernow_table[i].frequency;
386 speed_mhz = speed / 1000;
387
388 /* processor_perflib will multiply the MHz value by 1000 to
389 * get a KHz value (e.g. 1266000). However, powernow-k7 works
390 * with true KHz values (e.g. 1266768). To ensure that all
391 * powernow frequencies are available, we must ensure that
392 * ACPI doesn't restrict them, so we round up the MHz value
393 * to ensure that perflib's computed KHz value is greater than
394 * or equal to powernow's KHz value.
395 */
396 if (speed % 1000 > 0)
397 speed_mhz++;
398
399 if ((fid_codes[fid] % 10) == 5) {
400 if (have_a0 == 1)
401 invalidate_entry(i);
402 }
403
404 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
405 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
406 fid_codes[fid] % 10, speed_mhz, vid,
407 mobile_vid_table[vid]/1000,
408 mobile_vid_table[vid]%1000);
409
410 if (state->core_frequency != speed_mhz) {
411 state->core_frequency = speed_mhz;
412 dprintk(" Corrected ACPI frequency to %d\n",
413 speed_mhz);
414 }
415
416 if (latency < pc.bits.sgtc)
417 latency = pc.bits.sgtc;
418
419 if (speed < minimum_speed)
420 minimum_speed = speed;
421 if (speed > maximum_speed)
422 maximum_speed = speed;
423 }
424
425 powernow_table[i].frequency = CPUFREQ_TABLE_END;
426 powernow_table[i].index = 0;
427
428 /* notify BIOS that we exist */
429 acpi_processor_notify_smm(THIS_MODULE);
430
431 return 0;
432
433err2:
434 acpi_processor_unregister_performance(acpi_processor_perf, 0);
435err1:
436 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
437err05:
438 kfree(acpi_processor_perf);
439err0:
440 printk(KERN_WARNING PFX "ACPI perflib can not be used on "
441 "this platform\n");
442 acpi_processor_perf = NULL;
443 return retval;
444}
445#else
446static int powernow_acpi_init(void)
447{
448 printk(KERN_INFO PFX "no support for ACPI processor found."
449 " Please recompile your kernel with ACPI processor\n");
450 return -EINVAL;
451}
452#endif
453
454static void print_pst_entry(struct pst_s *pst, unsigned int j)
455{
456 dprintk("PST:%d (@%p)\n", j, pst);
457 dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
458 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
459}
460
461static int powernow_decode_bios(int maxfid, int startvid)
462{
463 struct psb_s *psb;
464 struct pst_s *pst;
465 unsigned int i, j;
466 unsigned char *p;
467 unsigned int etuple;
468 unsigned int ret;
469
470 etuple = cpuid_eax(0x80000001);
471
472 for (i = 0xC0000; i < 0xffff0 ; i += 16) {
473
474 p = phys_to_virt(i);
475
476 if (memcmp(p, "AMDK7PNOW!", 10) == 0) {
477 dprintk("Found PSB header at %p\n", p);
478 psb = (struct psb_s *) p;
479 dprintk("Table version: 0x%x\n", psb->tableversion);
480 if (psb->tableversion != 0x12) {
481 printk(KERN_INFO PFX "Sorry, only v1.2 tables"
482 " supported right now\n");
483 return -ENODEV;
484 }
485
486 dprintk("Flags: 0x%x\n", psb->flags);
487 if ((psb->flags & 1) == 0)
488 dprintk("Mobile voltage regulator\n");
489 else
490 dprintk("Desktop voltage regulator\n");
491
492 latency = psb->settlingtime;
493 if (latency < 100) {
494 printk(KERN_INFO PFX "BIOS set settling time "
495 "to %d microseconds. "
496 "Should be at least 100. "
497 "Correcting.\n", latency);
498 latency = 100;
499 }
500 dprintk("Settling Time: %d microseconds.\n",
501 psb->settlingtime);
502 dprintk("Has %d PST tables. (Only dumping ones "
503 "relevant to this CPU).\n",
504 psb->numpst);
505
506 p += sizeof(struct psb_s);
507
508 pst = (struct pst_s *) p;
509
510 for (j = 0; j < psb->numpst; j++) {
511 pst = (struct pst_s *) p;
512 number_scales = pst->numpstates;
513
514 if ((etuple == pst->cpuid) &&
515 check_fsb(pst->fsbspeed) &&
516 (maxfid == pst->maxfid) &&
517 (startvid == pst->startvid)) {
518 print_pst_entry(pst, j);
519 p = (char *)pst + sizeof(struct pst_s);
520 ret = get_ranges(p);
521 return ret;
522 } else {
523 unsigned int k;
524 p = (char *)pst + sizeof(struct pst_s);
525 for (k = 0; k < number_scales; k++)
526 p += 2;
527 }
528 }
529 printk(KERN_INFO PFX "No PST tables match this cpuid "
530 "(0x%x)\n", etuple);
531 printk(KERN_INFO PFX "This is indicative of a broken "
532 "BIOS.\n");
533
534 return -EINVAL;
535 }
536 p++;
537 }
538
539 return -ENODEV;
540}
541
542
543static int powernow_target(struct cpufreq_policy *policy,
544 unsigned int target_freq,
545 unsigned int relation)
546{
547 unsigned int newstate;
548
549 if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
550 relation, &newstate))
551 return -EINVAL;
552
553 change_speed(newstate);
554
555 return 0;
556}
557
558
559static int powernow_verify(struct cpufreq_policy *policy)
560{
561 return cpufreq_frequency_table_verify(policy, powernow_table);
562}
563
564/*
565 * We use the fact that the bus frequency is somehow
566 * a multiple of 100000/3 khz, then we compute sgtc according
567 * to this multiple.
568 * That way, we match more how AMD thinks all of that work.
569 * We will then get the same kind of behaviour already tested under
570 * the "well-known" other OS.
571 */
572static int __cpuinit fixup_sgtc(void)
573{
574 unsigned int sgtc;
575 unsigned int m;
576
577 m = fsb / 3333;
578 if ((m % 10) >= 5)
579 m += 5;
580
581 m /= 10;
582
583 sgtc = 100 * m * latency;
584 sgtc = sgtc / 3;
585 if (sgtc > 0xfffff) {
586 printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
587 sgtc = 0xfffff;
588 }
589 return sgtc;
590}
591
592static unsigned int powernow_get(unsigned int cpu)
593{
594 union msr_fidvidstatus fidvidstatus;
595 unsigned int cfid;
596
597 if (cpu)
598 return 0;
599 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
600 cfid = fidvidstatus.bits.CFID;
601
602 return fsb * fid_codes[cfid] / 10;
603}
604
605
606static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d)
607{
608 printk(KERN_WARNING PFX
609 "%s laptop with broken PST tables in BIOS detected.\n",
610 d->ident);
611 printk(KERN_WARNING PFX
612 "You need to downgrade to 3A21 (09/09/2002), or try a newer "
613 "BIOS than 3A71 (01/20/2003)\n");
614 printk(KERN_WARNING PFX
615 "cpufreq scaling has been disabled as a result of this.\n");
616 return 0;
617}
618
619/*
620 * Some Athlon laptops have really fucked PST tables.
621 * A BIOS update is all that can save them.
622 * Mention this, and disable cpufreq.
623 */
624static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = {
625 {
626 .callback = acer_cpufreq_pst,
627 .ident = "Acer Aspire",
628 .matches = {
629 DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
630 DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
631 },
632 },
633 { }
634};
635
636static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
637{
638 union msr_fidvidstatus fidvidstatus;
639 int result;
640
641 if (policy->cpu != 0)
642 return -ENODEV;
643
644 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
645
646 recalibrate_cpu_khz();
647
648 fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
649 if (!fsb) {
650 printk(KERN_WARNING PFX "can not determine bus frequency\n");
651 return -EINVAL;
652 }
653 dprintk("FSB: %3dMHz\n", fsb/1000);
654
655 if (dmi_check_system(powernow_dmi_table) || acpi_force) {
656 printk(KERN_INFO PFX "PSB/PST known to be broken. "
657 "Trying ACPI instead\n");
658 result = powernow_acpi_init();
659 } else {
660 result = powernow_decode_bios(fidvidstatus.bits.MFID,
661 fidvidstatus.bits.SVID);
662 if (result) {
663 printk(KERN_INFO PFX "Trying ACPI perflib\n");
664 maximum_speed = 0;
665 minimum_speed = -1;
666 latency = 0;
667 result = powernow_acpi_init();
668 if (result) {
669 printk(KERN_INFO PFX
670 "ACPI and legacy methods failed\n");
671 }
672 } else {
673 /* SGTC use the bus clock as timer */
674 latency = fixup_sgtc();
675 printk(KERN_INFO PFX "SGTC: %d\n", latency);
676 }
677 }
678
679 if (result)
680 return result;
681
682 printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
683 minimum_speed/1000, maximum_speed/1000);
684
685 policy->cpuinfo.transition_latency =
686 cpufreq_scale(2000000UL, fsb, latency);
687
688 policy->cur = powernow_get(0);
689
690 cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
691
692 return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
693}
694
695static int powernow_cpu_exit(struct cpufreq_policy *policy)
696{
697 cpufreq_frequency_table_put_attr(policy->cpu);
698
699#ifdef CONFIG_X86_POWERNOW_K7_ACPI
700 if (acpi_processor_perf) {
701 acpi_processor_unregister_performance(acpi_processor_perf, 0);
702 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
703 kfree(acpi_processor_perf);
704 }
705#endif
706
707 kfree(powernow_table);
708 return 0;
709}
710
711static struct freq_attr *powernow_table_attr[] = {
712 &cpufreq_freq_attr_scaling_available_freqs,
713 NULL,
714};
715
716static struct cpufreq_driver powernow_driver = {
717 .verify = powernow_verify,
718 .target = powernow_target,
719 .get = powernow_get,
720#ifdef CONFIG_X86_POWERNOW_K7_ACPI
721 .bios_limit = acpi_processor_get_bios_limit,
722#endif
723 .init = powernow_cpu_init,
724 .exit = powernow_cpu_exit,
725 .name = "powernow-k7",
726 .owner = THIS_MODULE,
727 .attr = powernow_table_attr,
728};
729
730static int __init powernow_init(void)
731{
732 if (check_powernow() == 0)
733 return -ENODEV;
734 return cpufreq_register_driver(&powernow_driver);
735}
736
737
738static void __exit powernow_exit(void)
739{
740 cpufreq_unregister_driver(&powernow_driver);
741}
742
743module_param(acpi_force, int, 0444);
744MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
745
746MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
747MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
748MODULE_LICENSE("GPL");
749
750late_initcall(powernow_init);
751module_exit(powernow_exit);
752
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
deleted file mode 100644
index 35fb4eaf6e1c..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
+++ /dev/null
@@ -1,43 +0,0 @@
1/*
2 * (C) 2003 Dave Jones.
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * AMD-specific information
7 *
8 */
9
10union msr_fidvidctl {
11 struct {
12 unsigned FID:5, // 4:0
13 reserved1:3, // 7:5
14 VID:5, // 12:8
15 reserved2:3, // 15:13
16 FIDC:1, // 16
17 VIDC:1, // 17
18 reserved3:2, // 19:18
19 FIDCHGRATIO:1, // 20
20 reserved4:11, // 31-21
21 SGTC:20, // 32:51
22 reserved5:12; // 63:52
23 } bits;
24 unsigned long long val;
25};
26
27union msr_fidvidstatus {
28 struct {
29 unsigned CFID:5, // 4:0
30 reserved1:3, // 7:5
31 SFID:5, // 12:8
32 reserved2:3, // 15:13
33 MFID:5, // 20:16
34 reserved3:11, // 31:21
35 CVID:5, // 36:32
36 reserved4:3, // 39:37
37 SVID:5, // 44:40
38 reserved5:3, // 47:45
39 MVID:5, // 52:48
40 reserved6:11; // 63:53
41 } bits;
42 unsigned long long val;
43};
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
deleted file mode 100644
index 2368e38327b3..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ /dev/null
@@ -1,1607 +0,0 @@
1/*
2 * (c) 2003-2010 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 *
7 * Support : mark.langsdorf@amd.com
8 *
9 * Based on the powernow-k7.c module written by Dave Jones.
10 * (C) 2003 Dave Jones on behalf of SuSE Labs
11 * (C) 2004 Dominik Brodowski <linux@brodo.de>
12 * (C) 2004 Pavel Machek <pavel@ucw.cz>
13 * Licensed under the terms of the GNU GPL License version 2.
14 * Based upon datasheets & sample CPUs kindly provided by AMD.
15 *
16 * Valuable input gratefully received from Dave Jones, Pavel Machek,
17 * Dominik Brodowski, Jacob Shin, and others.
18 * Originally developed by Paul Devriendt.
19 * Processor information obtained from Chapter 9 (Power and Thermal Management)
20 * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
21 * Opteron Processors" available for download from www.amd.com
22 *
23 * Tables for specific CPUs can be inferred from
24 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
25 */
26
27#include <linux/kernel.h>
28#include <linux/smp.h>
29#include <linux/module.h>
30#include <linux/init.h>
31#include <linux/cpufreq.h>
32#include <linux/slab.h>
33#include <linux/string.h>
34#include <linux/cpumask.h>
35#include <linux/sched.h> /* for current / set_cpus_allowed() */
36#include <linux/io.h>
37#include <linux/delay.h>
38
39#include <asm/msr.h>
40
41#include <linux/acpi.h>
42#include <linux/mutex.h>
43#include <acpi/processor.h>
44
45#define PFX "powernow-k8: "
46#define VERSION "version 2.20.00"
47#include "powernow-k8.h"
48#include "mperf.h"
49
50/* serialize freq changes */
51static DEFINE_MUTEX(fidvid_mutex);
52
53static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
54
55static int cpu_family = CPU_OPTERON;
56
57/* core performance boost */
58static bool cpb_capable, cpb_enabled;
59static struct msr __percpu *msrs;
60
61static struct cpufreq_driver cpufreq_amd64_driver;
62
63#ifndef CONFIG_SMP
64static inline const struct cpumask *cpu_core_mask(int cpu)
65{
66 return cpumask_of(0);
67}
68#endif
69
70/* Return a frequency in MHz, given an input fid */
71static u32 find_freq_from_fid(u32 fid)
72{
73 return 800 + (fid * 100);
74}
75
76/* Return a frequency in KHz, given an input fid */
77static u32 find_khz_freq_from_fid(u32 fid)
78{
79 return 1000 * find_freq_from_fid(fid);
80}
81
82static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
83 u32 pstate)
84{
85 return data[pstate].frequency;
86}
87
88/* Return the vco fid for an input fid
89 *
90 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
91 * only from corresponding high fids. This returns "high" fid corresponding to
92 * "low" one.
93 */
94static u32 convert_fid_to_vco_fid(u32 fid)
95{
96 if (fid < HI_FID_TABLE_BOTTOM)
97 return 8 + (2 * fid);
98 else
99 return fid;
100}
101
102/*
103 * Return 1 if the pending bit is set. Unless we just instructed the processor
104 * to transition to a new state, seeing this bit set is really bad news.
105 */
106static int pending_bit_stuck(void)
107{
108 u32 lo, hi;
109
110 if (cpu_family == CPU_HW_PSTATE)
111 return 0;
112
113 rdmsr(MSR_FIDVID_STATUS, lo, hi);
114 return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
115}
116
117/*
118 * Update the global current fid / vid values from the status msr.
119 * Returns 1 on error.
120 */
121static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
122{
123 u32 lo, hi;
124 u32 i = 0;
125
126 if (cpu_family == CPU_HW_PSTATE) {
127 rdmsr(MSR_PSTATE_STATUS, lo, hi);
128 i = lo & HW_PSTATE_MASK;
129 data->currpstate = i;
130
131 /*
132 * a workaround for family 11h erratum 311 might cause
133 * an "out-of-range Pstate if the core is in Pstate-0
134 */
135 if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
136 data->currpstate = HW_PSTATE_0;
137
138 return 0;
139 }
140 do {
141 if (i++ > 10000) {
142 dprintk("detected change pending stuck\n");
143 return 1;
144 }
145 rdmsr(MSR_FIDVID_STATUS, lo, hi);
146 } while (lo & MSR_S_LO_CHANGE_PENDING);
147
148 data->currvid = hi & MSR_S_HI_CURRENT_VID;
149 data->currfid = lo & MSR_S_LO_CURRENT_FID;
150
151 return 0;
152}
153
154/* the isochronous relief time */
155static void count_off_irt(struct powernow_k8_data *data)
156{
157 udelay((1 << data->irt) * 10);
158 return;
159}
160
161/* the voltage stabilization time */
162static void count_off_vst(struct powernow_k8_data *data)
163{
164 udelay(data->vstable * VST_UNITS_20US);
165 return;
166}
167
168/* need to init the control msr to a safe value (for each cpu) */
169static void fidvid_msr_init(void)
170{
171 u32 lo, hi;
172 u8 fid, vid;
173
174 rdmsr(MSR_FIDVID_STATUS, lo, hi);
175 vid = hi & MSR_S_HI_CURRENT_VID;
176 fid = lo & MSR_S_LO_CURRENT_FID;
177 lo = fid | (vid << MSR_C_LO_VID_SHIFT);
178 hi = MSR_C_HI_STP_GNT_BENIGN;
179 dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
180 wrmsr(MSR_FIDVID_CTL, lo, hi);
181}
182
183/* write the new fid value along with the other control fields to the msr */
184static int write_new_fid(struct powernow_k8_data *data, u32 fid)
185{
186 u32 lo;
187 u32 savevid = data->currvid;
188 u32 i = 0;
189
190 if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
191 printk(KERN_ERR PFX "internal error - overflow on fid write\n");
192 return 1;
193 }
194
195 lo = fid;
196 lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
197 lo |= MSR_C_LO_INIT_FID_VID;
198
199 dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
200 fid, lo, data->plllock * PLL_LOCK_CONVERSION);
201
202 do {
203 wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
204 if (i++ > 100) {
205 printk(KERN_ERR PFX
206 "Hardware error - pending bit very stuck - "
207 "no further pstate changes possible\n");
208 return 1;
209 }
210 } while (query_current_values_with_pending_wait(data));
211
212 count_off_irt(data);
213
214 if (savevid != data->currvid) {
215 printk(KERN_ERR PFX
216 "vid change on fid trans, old 0x%x, new 0x%x\n",
217 savevid, data->currvid);
218 return 1;
219 }
220
221 if (fid != data->currfid) {
222 printk(KERN_ERR PFX
223 "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
224 data->currfid);
225 return 1;
226 }
227
228 return 0;
229}
230
231/* Write a new vid to the hardware */
232static int write_new_vid(struct powernow_k8_data *data, u32 vid)
233{
234 u32 lo;
235 u32 savefid = data->currfid;
236 int i = 0;
237
238 if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
239 printk(KERN_ERR PFX "internal error - overflow on vid write\n");
240 return 1;
241 }
242
243 lo = data->currfid;
244 lo |= (vid << MSR_C_LO_VID_SHIFT);
245 lo |= MSR_C_LO_INIT_FID_VID;
246
247 dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
248 vid, lo, STOP_GRANT_5NS);
249
250 do {
251 wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
252 if (i++ > 100) {
253 printk(KERN_ERR PFX "internal error - pending bit "
254 "very stuck - no further pstate "
255 "changes possible\n");
256 return 1;
257 }
258 } while (query_current_values_with_pending_wait(data));
259
260 if (savefid != data->currfid) {
261 printk(KERN_ERR PFX "fid changed on vid trans, old "
262 "0x%x new 0x%x\n",
263 savefid, data->currfid);
264 return 1;
265 }
266
267 if (vid != data->currvid) {
268 printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
269 "curr 0x%x\n",
270 vid, data->currvid);
271 return 1;
272 }
273
274 return 0;
275}
276
277/*
278 * Reduce the vid by the max of step or reqvid.
279 * Decreasing vid codes represent increasing voltages:
280 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
281 */
282static int decrease_vid_code_by_step(struct powernow_k8_data *data,
283 u32 reqvid, u32 step)
284{
285 if ((data->currvid - reqvid) > step)
286 reqvid = data->currvid - step;
287
288 if (write_new_vid(data, reqvid))
289 return 1;
290
291 count_off_vst(data);
292
293 return 0;
294}
295
296/* Change hardware pstate by single MSR write */
297static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
298{
299 wrmsr(MSR_PSTATE_CTRL, pstate, 0);
300 data->currpstate = pstate;
301 return 0;
302}
303
304/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
305static int transition_fid_vid(struct powernow_k8_data *data,
306 u32 reqfid, u32 reqvid)
307{
308 if (core_voltage_pre_transition(data, reqvid, reqfid))
309 return 1;
310
311 if (core_frequency_transition(data, reqfid))
312 return 1;
313
314 if (core_voltage_post_transition(data, reqvid))
315 return 1;
316
317 if (query_current_values_with_pending_wait(data))
318 return 1;
319
320 if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
321 printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
322 "curr 0x%x 0x%x\n",
323 smp_processor_id(),
324 reqfid, reqvid, data->currfid, data->currvid);
325 return 1;
326 }
327
328 dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
329 smp_processor_id(), data->currfid, data->currvid);
330
331 return 0;
332}
333
334/* Phase 1 - core voltage transition ... setup voltage */
335static int core_voltage_pre_transition(struct powernow_k8_data *data,
336 u32 reqvid, u32 reqfid)
337{
338 u32 rvosteps = data->rvo;
339 u32 savefid = data->currfid;
340 u32 maxvid, lo, rvomult = 1;
341
342 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
343 "reqvid 0x%x, rvo 0x%x\n",
344 smp_processor_id(),
345 data->currfid, data->currvid, reqvid, data->rvo);
346
347 if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
348 rvomult = 2;
349 rvosteps *= rvomult;
350 rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
351 maxvid = 0x1f & (maxvid >> 16);
352 dprintk("ph1 maxvid=0x%x\n", maxvid);
353 if (reqvid < maxvid) /* lower numbers are higher voltages */
354 reqvid = maxvid;
355
356 while (data->currvid > reqvid) {
357 dprintk("ph1: curr 0x%x, req vid 0x%x\n",
358 data->currvid, reqvid);
359 if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
360 return 1;
361 }
362
363 while ((rvosteps > 0) &&
364 ((rvomult * data->rvo + data->currvid) > reqvid)) {
365 if (data->currvid == maxvid) {
366 rvosteps = 0;
367 } else {
368 dprintk("ph1: changing vid for rvo, req 0x%x\n",
369 data->currvid - 1);
370 if (decrease_vid_code_by_step(data, data->currvid-1, 1))
371 return 1;
372 rvosteps--;
373 }
374 }
375
376 if (query_current_values_with_pending_wait(data))
377 return 1;
378
379 if (savefid != data->currfid) {
380 printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
381 data->currfid);
382 return 1;
383 }
384
385 dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
386 data->currfid, data->currvid);
387
388 return 0;
389}
390
391/* Phase 2 - core frequency transition */
392static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
393{
394 u32 vcoreqfid, vcocurrfid, vcofiddiff;
395 u32 fid_interval, savevid = data->currvid;
396
397 if (data->currfid == reqfid) {
398 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
399 data->currfid);
400 return 0;
401 }
402
403 dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
404 "reqfid 0x%x\n",
405 smp_processor_id(),
406 data->currfid, data->currvid, reqfid);
407
408 vcoreqfid = convert_fid_to_vco_fid(reqfid);
409 vcocurrfid = convert_fid_to_vco_fid(data->currfid);
410 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
411 : vcoreqfid - vcocurrfid;
412
413 if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
414 vcofiddiff = 0;
415
416 while (vcofiddiff > 2) {
417 (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
418
419 if (reqfid > data->currfid) {
420 if (data->currfid > LO_FID_TABLE_TOP) {
421 if (write_new_fid(data,
422 data->currfid + fid_interval))
423 return 1;
424 } else {
425 if (write_new_fid
426 (data,
427 2 + convert_fid_to_vco_fid(data->currfid)))
428 return 1;
429 }
430 } else {
431 if (write_new_fid(data, data->currfid - fid_interval))
432 return 1;
433 }
434
435 vcocurrfid = convert_fid_to_vco_fid(data->currfid);
436 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
437 : vcoreqfid - vcocurrfid;
438 }
439
440 if (write_new_fid(data, reqfid))
441 return 1;
442
443 if (query_current_values_with_pending_wait(data))
444 return 1;
445
446 if (data->currfid != reqfid) {
447 printk(KERN_ERR PFX
448 "ph2: mismatch, failed fid transition, "
449 "curr 0x%x, req 0x%x\n",
450 data->currfid, reqfid);
451 return 1;
452 }
453
454 if (savevid != data->currvid) {
455 printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
456 savevid, data->currvid);
457 return 1;
458 }
459
460 dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
461 data->currfid, data->currvid);
462
463 return 0;
464}
465
466/* Phase 3 - core voltage transition flow ... jump to the final vid. */
467static int core_voltage_post_transition(struct powernow_k8_data *data,
468 u32 reqvid)
469{
470 u32 savefid = data->currfid;
471 u32 savereqvid = reqvid;
472
473 dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
474 smp_processor_id(),
475 data->currfid, data->currvid);
476
477 if (reqvid != data->currvid) {
478 if (write_new_vid(data, reqvid))
479 return 1;
480
481 if (savefid != data->currfid) {
482 printk(KERN_ERR PFX
483 "ph3: bad fid change, save 0x%x, curr 0x%x\n",
484 savefid, data->currfid);
485 return 1;
486 }
487
488 if (data->currvid != reqvid) {
489 printk(KERN_ERR PFX
490 "ph3: failed vid transition\n, "
491 "req 0x%x, curr 0x%x",
492 reqvid, data->currvid);
493 return 1;
494 }
495 }
496
497 if (query_current_values_with_pending_wait(data))
498 return 1;
499
500 if (savereqvid != data->currvid) {
501 dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
502 return 1;
503 }
504
505 if (savefid != data->currfid) {
506 dprintk("ph3 failed, currfid changed 0x%x\n",
507 data->currfid);
508 return 1;
509 }
510
511 dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
512 data->currfid, data->currvid);
513
514 return 0;
515}
516
517static void check_supported_cpu(void *_rc)
518{
519 u32 eax, ebx, ecx, edx;
520 int *rc = _rc;
521
522 *rc = -ENODEV;
523
524 if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
525 return;
526
527 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
528 if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
529 ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
530 return;
531
532 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
533 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
534 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
535 printk(KERN_INFO PFX
536 "Processor cpuid %x not supported\n", eax);
537 return;
538 }
539
540 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
541 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
542 printk(KERN_INFO PFX
543 "No frequency change capabilities detected\n");
544 return;
545 }
546
547 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
548 if ((edx & P_STATE_TRANSITION_CAPABLE)
549 != P_STATE_TRANSITION_CAPABLE) {
550 printk(KERN_INFO PFX
551 "Power state transitions not supported\n");
552 return;
553 }
554 } else { /* must be a HW Pstate capable processor */
555 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
556 if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
557 cpu_family = CPU_HW_PSTATE;
558 else
559 return;
560 }
561
562 *rc = 0;
563}
564
565static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
566 u8 maxvid)
567{
568 unsigned int j;
569 u8 lastfid = 0xff;
570
571 for (j = 0; j < data->numps; j++) {
572 if (pst[j].vid > LEAST_VID) {
573 printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n",
574 j, pst[j].vid);
575 return -EINVAL;
576 }
577 if (pst[j].vid < data->rvo) {
578 /* vid + rvo >= 0 */
579 printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
580 " %d\n", j);
581 return -ENODEV;
582 }
583 if (pst[j].vid < maxvid + data->rvo) {
584 /* vid + rvo >= maxvid */
585 printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
586 " %d\n", j);
587 return -ENODEV;
588 }
589 if (pst[j].fid > MAX_FID) {
590 printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate"
591 " %d\n", j);
592 return -ENODEV;
593 }
594 if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
595 /* Only first fid is allowed to be in "low" range */
596 printk(KERN_ERR FW_BUG PFX "two low fids - %d : "
597 "0x%x\n", j, pst[j].fid);
598 return -EINVAL;
599 }
600 if (pst[j].fid < lastfid)
601 lastfid = pst[j].fid;
602 }
603 if (lastfid & 1) {
604 printk(KERN_ERR FW_BUG PFX "lastfid invalid\n");
605 return -EINVAL;
606 }
607 if (lastfid > LO_FID_TABLE_TOP)
608 printk(KERN_INFO FW_BUG PFX
609 "first fid not from lo freq table\n");
610
611 return 0;
612}
613
614static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
615 unsigned int entry)
616{
617 powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
618}
619
620static void print_basics(struct powernow_k8_data *data)
621{
622 int j;
623 for (j = 0; j < data->numps; j++) {
624 if (data->powernow_table[j].frequency !=
625 CPUFREQ_ENTRY_INVALID) {
626 if (cpu_family == CPU_HW_PSTATE) {
627 printk(KERN_INFO PFX
628 " %d : pstate %d (%d MHz)\n", j,
629 data->powernow_table[j].index,
630 data->powernow_table[j].frequency/1000);
631 } else {
632 printk(KERN_INFO PFX
633 "fid 0x%x (%d MHz), vid 0x%x\n",
634 data->powernow_table[j].index & 0xff,
635 data->powernow_table[j].frequency/1000,
636 data->powernow_table[j].index >> 8);
637 }
638 }
639 }
640 if (data->batps)
641 printk(KERN_INFO PFX "Only %d pstates on battery\n",
642 data->batps);
643}
644
645static u32 freq_from_fid_did(u32 fid, u32 did)
646{
647 u32 mhz = 0;
648
649 if (boot_cpu_data.x86 == 0x10)
650 mhz = (100 * (fid + 0x10)) >> did;
651 else if (boot_cpu_data.x86 == 0x11)
652 mhz = (100 * (fid + 8)) >> did;
653 else
654 BUG();
655
656 return mhz * 1000;
657}
658
659static int fill_powernow_table(struct powernow_k8_data *data,
660 struct pst_s *pst, u8 maxvid)
661{
662 struct cpufreq_frequency_table *powernow_table;
663 unsigned int j;
664
665 if (data->batps) {
666 /* use ACPI support to get full speed on mains power */
667 printk(KERN_WARNING PFX
668 "Only %d pstates usable (use ACPI driver for full "
669 "range\n", data->batps);
670 data->numps = data->batps;
671 }
672
673 for (j = 1; j < data->numps; j++) {
674 if (pst[j-1].fid >= pst[j].fid) {
675 printk(KERN_ERR PFX "PST out of sequence\n");
676 return -EINVAL;
677 }
678 }
679
680 if (data->numps < 2) {
681 printk(KERN_ERR PFX "no p states to transition\n");
682 return -ENODEV;
683 }
684
685 if (check_pst_table(data, pst, maxvid))
686 return -EINVAL;
687
688 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
689 * (data->numps + 1)), GFP_KERNEL);
690 if (!powernow_table) {
691 printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
692 return -ENOMEM;
693 }
694
695 for (j = 0; j < data->numps; j++) {
696 int freq;
697 powernow_table[j].index = pst[j].fid; /* lower 8 bits */
698 powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
699 freq = find_khz_freq_from_fid(pst[j].fid);
700 powernow_table[j].frequency = freq;
701 }
702 powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
703 powernow_table[data->numps].index = 0;
704
705 if (query_current_values_with_pending_wait(data)) {
706 kfree(powernow_table);
707 return -EIO;
708 }
709
710 dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
711 data->powernow_table = powernow_table;
712 if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
713 print_basics(data);
714
715 for (j = 0; j < data->numps; j++)
716 if ((pst[j].fid == data->currfid) &&
717 (pst[j].vid == data->currvid))
718 return 0;
719
720 dprintk("currfid/vid do not match PST, ignoring\n");
721 return 0;
722}
723
724/* Find and validate the PSB/PST table in BIOS. */
725static int find_psb_table(struct powernow_k8_data *data)
726{
727 struct psb_s *psb;
728 unsigned int i;
729 u32 mvs;
730 u8 maxvid;
731 u32 cpst = 0;
732 u32 thiscpuid;
733
734 for (i = 0xc0000; i < 0xffff0; i += 0x10) {
735 /* Scan BIOS looking for the signature. */
736 /* It can not be at ffff0 - it is too big. */
737
738 psb = phys_to_virt(i);
739 if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
740 continue;
741
742 dprintk("found PSB header at 0x%p\n", psb);
743
744 dprintk("table vers: 0x%x\n", psb->tableversion);
745 if (psb->tableversion != PSB_VERSION_1_4) {
746 printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
747 return -ENODEV;
748 }
749
750 dprintk("flags: 0x%x\n", psb->flags1);
751 if (psb->flags1) {
752 printk(KERN_ERR FW_BUG PFX "unknown flags\n");
753 return -ENODEV;
754 }
755
756 data->vstable = psb->vstable;
757 dprintk("voltage stabilization time: %d(*20us)\n",
758 data->vstable);
759
760 dprintk("flags2: 0x%x\n", psb->flags2);
761 data->rvo = psb->flags2 & 3;
762 data->irt = ((psb->flags2) >> 2) & 3;
763 mvs = ((psb->flags2) >> 4) & 3;
764 data->vidmvs = 1 << mvs;
765 data->batps = ((psb->flags2) >> 6) & 3;
766
767 dprintk("ramp voltage offset: %d\n", data->rvo);
768 dprintk("isochronous relief time: %d\n", data->irt);
769 dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
770
771 dprintk("numpst: 0x%x\n", psb->num_tables);
772 cpst = psb->num_tables;
773 if ((psb->cpuid == 0x00000fc0) ||
774 (psb->cpuid == 0x00000fe0)) {
775 thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
776 if ((thiscpuid == 0x00000fc0) ||
777 (thiscpuid == 0x00000fe0))
778 cpst = 1;
779 }
780 if (cpst != 1) {
781 printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
782 return -ENODEV;
783 }
784
785 data->plllock = psb->plllocktime;
786 dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
787 dprintk("maxfid: 0x%x\n", psb->maxfid);
788 dprintk("maxvid: 0x%x\n", psb->maxvid);
789 maxvid = psb->maxvid;
790
791 data->numps = psb->numps;
792 dprintk("numpstates: 0x%x\n", data->numps);
793 return fill_powernow_table(data,
794 (struct pst_s *)(psb+1), maxvid);
795 }
796 /*
797 * If you see this message, complain to BIOS manufacturer. If
798 * he tells you "we do not support Linux" or some similar
799 * nonsense, remember that Windows 2000 uses the same legacy
800 * mechanism that the old Linux PSB driver uses. Tell them it
801 * is broken with Windows 2000.
802 *
803 * The reference to the AMD documentation is chapter 9 in the
804 * BIOS and Kernel Developer's Guide, which is available on
805 * www.amd.com
806 */
807 printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
808 printk(KERN_ERR PFX "Make sure that your BIOS is up to date"
809 " and Cool'N'Quiet support is enabled in BIOS setup\n");
810 return -ENODEV;
811}
812
813static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
814 unsigned int index)
815{
816 u64 control;
817
818 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
819 return;
820
821 control = data->acpi_data.states[index].control;
822 data->irt = (control >> IRT_SHIFT) & IRT_MASK;
823 data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
824 data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
825 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
826 data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
827 data->vstable = (control >> VST_SHIFT) & VST_MASK;
828}
829
830static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
831{
832 struct cpufreq_frequency_table *powernow_table;
833 int ret_val = -ENODEV;
834 u64 control, status;
835
836 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
837 dprintk("register performance failed: bad ACPI data\n");
838 return -EIO;
839 }
840
841 /* verify the data contained in the ACPI structures */
842 if (data->acpi_data.state_count <= 1) {
843 dprintk("No ACPI P-States\n");
844 goto err_out;
845 }
846
847 control = data->acpi_data.control_register.space_id;
848 status = data->acpi_data.status_register.space_id;
849
850 if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
851 (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
852 dprintk("Invalid control/status registers (%x - %x)\n",
853 control, status);
854 goto err_out;
855 }
856
857 /* fill in data->powernow_table */
858 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
859 * (data->acpi_data.state_count + 1)), GFP_KERNEL);
860 if (!powernow_table) {
861 dprintk("powernow_table memory alloc failure\n");
862 goto err_out;
863 }
864
865 /* fill in data */
866 data->numps = data->acpi_data.state_count;
867 powernow_k8_acpi_pst_values(data, 0);
868
869 if (cpu_family == CPU_HW_PSTATE)
870 ret_val = fill_powernow_table_pstate(data, powernow_table);
871 else
872 ret_val = fill_powernow_table_fidvid(data, powernow_table);
873 if (ret_val)
874 goto err_out_mem;
875
876 powernow_table[data->acpi_data.state_count].frequency =
877 CPUFREQ_TABLE_END;
878 powernow_table[data->acpi_data.state_count].index = 0;
879 data->powernow_table = powernow_table;
880
881 if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
882 print_basics(data);
883
884 /* notify BIOS that we exist */
885 acpi_processor_notify_smm(THIS_MODULE);
886
887 if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
888 printk(KERN_ERR PFX
889 "unable to alloc powernow_k8_data cpumask\n");
890 ret_val = -ENOMEM;
891 goto err_out_mem;
892 }
893
894 return 0;
895
896err_out_mem:
897 kfree(powernow_table);
898
899err_out:
900 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
901
902 /* data->acpi_data.state_count informs us at ->exit()
903 * whether ACPI was used */
904 data->acpi_data.state_count = 0;
905
906 return ret_val;
907}
908
909static int fill_powernow_table_pstate(struct powernow_k8_data *data,
910 struct cpufreq_frequency_table *powernow_table)
911{
912 int i;
913 u32 hi = 0, lo = 0;
914 rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
915 data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
916
917 for (i = 0; i < data->acpi_data.state_count; i++) {
918 u32 index;
919
920 index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
921 if (index > data->max_hw_pstate) {
922 printk(KERN_ERR PFX "invalid pstate %d - "
923 "bad value %d.\n", i, index);
924 printk(KERN_ERR PFX "Please report to BIOS "
925 "manufacturer\n");
926 invalidate_entry(powernow_table, i);
927 continue;
928 }
929 rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
930 if (!(hi & HW_PSTATE_VALID_MASK)) {
931 dprintk("invalid pstate %d, ignoring\n", index);
932 invalidate_entry(powernow_table, i);
933 continue;
934 }
935
936 powernow_table[i].index = index;
937
938 /* Frequency may be rounded for these */
939 if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
940 || boot_cpu_data.x86 == 0x11) {
941 powernow_table[i].frequency =
942 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
943 } else
944 powernow_table[i].frequency =
945 data->acpi_data.states[i].core_frequency * 1000;
946 }
947 return 0;
948}
949
950static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
951 struct cpufreq_frequency_table *powernow_table)
952{
953 int i;
954
955 for (i = 0; i < data->acpi_data.state_count; i++) {
956 u32 fid;
957 u32 vid;
958 u32 freq, index;
959 u64 status, control;
960
961 if (data->exttype) {
962 status = data->acpi_data.states[i].status;
963 fid = status & EXT_FID_MASK;
964 vid = (status >> VID_SHIFT) & EXT_VID_MASK;
965 } else {
966 control = data->acpi_data.states[i].control;
967 fid = control & FID_MASK;
968 vid = (control >> VID_SHIFT) & VID_MASK;
969 }
970
971 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
972
973 index = fid | (vid<<8);
974 powernow_table[i].index = index;
975
976 freq = find_khz_freq_from_fid(fid);
977 powernow_table[i].frequency = freq;
978
979 /* verify frequency is OK */
980 if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
981 dprintk("invalid freq %u kHz, ignoring\n", freq);
982 invalidate_entry(powernow_table, i);
983 continue;
984 }
985
986 /* verify voltage is OK -
987 * BIOSs are using "off" to indicate invalid */
988 if (vid == VID_OFF) {
989 dprintk("invalid vid %u, ignoring\n", vid);
990 invalidate_entry(powernow_table, i);
991 continue;
992 }
993
994 if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
995 printk(KERN_INFO PFX "invalid freq entries "
996 "%u kHz vs. %u kHz\n", freq,
997 (unsigned int)
998 (data->acpi_data.states[i].core_frequency
999 * 1000));
1000 invalidate_entry(powernow_table, i);
1001 continue;
1002 }
1003 }
1004 return 0;
1005}
1006
1007static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
1008{
1009 if (data->acpi_data.state_count)
1010 acpi_processor_unregister_performance(&data->acpi_data,
1011 data->cpu);
1012 free_cpumask_var(data->acpi_data.shared_cpu_map);
1013}
1014
1015static int get_transition_latency(struct powernow_k8_data *data)
1016{
1017 int max_latency = 0;
1018 int i;
1019 for (i = 0; i < data->acpi_data.state_count; i++) {
1020 int cur_latency = data->acpi_data.states[i].transition_latency
1021 + data->acpi_data.states[i].bus_master_latency;
1022 if (cur_latency > max_latency)
1023 max_latency = cur_latency;
1024 }
1025 if (max_latency == 0) {
1026 /*
1027 * Fam 11h and later may return 0 as transition latency. This
1028 * is intended and means "very fast". While cpufreq core and
1029 * governors currently can handle that gracefully, better set it
1030 * to 1 to avoid problems in the future.
1031 */
1032 if (boot_cpu_data.x86 < 0x11)
1033 printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
1034 "latency\n");
1035 max_latency = 1;
1036 }
1037 /* value in usecs, needs to be in nanoseconds */
1038 return 1000 * max_latency;
1039}
1040
1041/* Take a frequency, and issue the fid/vid transition command */
1042static int transition_frequency_fidvid(struct powernow_k8_data *data,
1043 unsigned int index)
1044{
1045 u32 fid = 0;
1046 u32 vid = 0;
1047 int res, i;
1048 struct cpufreq_freqs freqs;
1049
1050 dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
1051
1052 /* fid/vid correctness check for k8 */
1053 /* fid are the lower 8 bits of the index we stored into
1054 * the cpufreq frequency table in find_psb_table, vid
1055 * are the upper 8 bits.
1056 */
1057 fid = data->powernow_table[index].index & 0xFF;
1058 vid = (data->powernow_table[index].index & 0xFF00) >> 8;
1059
1060 dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
1061
1062 if (query_current_values_with_pending_wait(data))
1063 return 1;
1064
1065 if ((data->currvid == vid) && (data->currfid == fid)) {
1066 dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
1067 fid, vid);
1068 return 0;
1069 }
1070
1071 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
1072 smp_processor_id(), fid, vid);
1073 freqs.old = find_khz_freq_from_fid(data->currfid);
1074 freqs.new = find_khz_freq_from_fid(fid);
1075
1076 for_each_cpu(i, data->available_cores) {
1077 freqs.cpu = i;
1078 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1079 }
1080
1081 res = transition_fid_vid(data, fid, vid);
1082 freqs.new = find_khz_freq_from_fid(data->currfid);
1083
1084 for_each_cpu(i, data->available_cores) {
1085 freqs.cpu = i;
1086 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1087 }
1088 return res;
1089}
1090
1091/* Take a frequency, and issue the hardware pstate transition command */
1092static int transition_frequency_pstate(struct powernow_k8_data *data,
1093 unsigned int index)
1094{
1095 u32 pstate = 0;
1096 int res, i;
1097 struct cpufreq_freqs freqs;
1098
1099 dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
1100
1101 /* get MSR index for hardware pstate transition */
1102 pstate = index & HW_PSTATE_MASK;
1103 if (pstate > data->max_hw_pstate)
1104 return 0;
1105 freqs.old = find_khz_freq_from_pstate(data->powernow_table,
1106 data->currpstate);
1107 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1108
1109 for_each_cpu(i, data->available_cores) {
1110 freqs.cpu = i;
1111 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1112 }
1113
1114 res = transition_pstate(data, pstate);
1115 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1116
1117 for_each_cpu(i, data->available_cores) {
1118 freqs.cpu = i;
1119 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1120 }
1121 return res;
1122}
1123
1124/* Driver entry point to switch to the target frequency */
1125static int powernowk8_target(struct cpufreq_policy *pol,
1126 unsigned targfreq, unsigned relation)
1127{
1128 cpumask_var_t oldmask;
1129 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1130 u32 checkfid;
1131 u32 checkvid;
1132 unsigned int newstate;
1133 int ret = -EIO;
1134
1135 if (!data)
1136 return -EINVAL;
1137
1138 checkfid = data->currfid;
1139 checkvid = data->currvid;
1140
1141 /* only run on specific CPU from here on. */
1142 /* This is poor form: use a workqueue or smp_call_function_single */
1143 if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
1144 return -ENOMEM;
1145
1146 cpumask_copy(oldmask, tsk_cpus_allowed(current));
1147 set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
1148
1149 if (smp_processor_id() != pol->cpu) {
1150 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
1151 goto err_out;
1152 }
1153
1154 if (pending_bit_stuck()) {
1155 printk(KERN_ERR PFX "failing targ, change pending bit set\n");
1156 goto err_out;
1157 }
1158
1159 dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
1160 pol->cpu, targfreq, pol->min, pol->max, relation);
1161
1162 if (query_current_values_with_pending_wait(data))
1163 goto err_out;
1164
1165 if (cpu_family != CPU_HW_PSTATE) {
1166 dprintk("targ: curr fid 0x%x, vid 0x%x\n",
1167 data->currfid, data->currvid);
1168
1169 if ((checkvid != data->currvid) ||
1170 (checkfid != data->currfid)) {
1171 printk(KERN_INFO PFX
1172 "error - out of sync, fix 0x%x 0x%x, "
1173 "vid 0x%x 0x%x\n",
1174 checkfid, data->currfid,
1175 checkvid, data->currvid);
1176 }
1177 }
1178
1179 if (cpufreq_frequency_table_target(pol, data->powernow_table,
1180 targfreq, relation, &newstate))
1181 goto err_out;
1182
1183 mutex_lock(&fidvid_mutex);
1184
1185 powernow_k8_acpi_pst_values(data, newstate);
1186
1187 if (cpu_family == CPU_HW_PSTATE)
1188 ret = transition_frequency_pstate(data, newstate);
1189 else
1190 ret = transition_frequency_fidvid(data, newstate);
1191 if (ret) {
1192 printk(KERN_ERR PFX "transition frequency failed\n");
1193 ret = 1;
1194 mutex_unlock(&fidvid_mutex);
1195 goto err_out;
1196 }
1197 mutex_unlock(&fidvid_mutex);
1198
1199 if (cpu_family == CPU_HW_PSTATE)
1200 pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1201 newstate);
1202 else
1203 pol->cur = find_khz_freq_from_fid(data->currfid);
1204 ret = 0;
1205
1206err_out:
1207 set_cpus_allowed_ptr(current, oldmask);
1208 free_cpumask_var(oldmask);
1209 return ret;
1210}
1211
1212/* Driver entry point to verify the policy and range of frequencies */
1213static int powernowk8_verify(struct cpufreq_policy *pol)
1214{
1215 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1216
1217 if (!data)
1218 return -EINVAL;
1219
1220 return cpufreq_frequency_table_verify(pol, data->powernow_table);
1221}
1222
1223struct init_on_cpu {
1224 struct powernow_k8_data *data;
1225 int rc;
1226};
1227
1228static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
1229{
1230 struct init_on_cpu *init_on_cpu = _init_on_cpu;
1231
1232 if (pending_bit_stuck()) {
1233 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1234 init_on_cpu->rc = -ENODEV;
1235 return;
1236 }
1237
1238 if (query_current_values_with_pending_wait(init_on_cpu->data)) {
1239 init_on_cpu->rc = -ENODEV;
1240 return;
1241 }
1242
1243 if (cpu_family == CPU_OPTERON)
1244 fidvid_msr_init();
1245
1246 init_on_cpu->rc = 0;
1247}
1248
1249/* per CPU init entry point to the driver */
1250static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1251{
1252 static const char ACPI_PSS_BIOS_BUG_MSG[] =
1253 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1254 FW_BUG PFX "Try again with latest BIOS.\n";
1255 struct powernow_k8_data *data;
1256 struct init_on_cpu init_on_cpu;
1257 int rc;
1258 struct cpuinfo_x86 *c = &cpu_data(pol->cpu);
1259
1260 if (!cpu_online(pol->cpu))
1261 return -ENODEV;
1262
1263 smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
1264 if (rc)
1265 return -ENODEV;
1266
1267 data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
1268 if (!data) {
1269 printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
1270 return -ENOMEM;
1271 }
1272
1273 data->cpu = pol->cpu;
1274 data->currpstate = HW_PSTATE_INVALID;
1275
1276 if (powernow_k8_cpu_init_acpi(data)) {
1277 /*
1278 * Use the PSB BIOS structure. This is only available on
1279 * an UP version, and is deprecated by AMD.
1280 */
1281 if (num_online_cpus() != 1) {
1282 printk_once(ACPI_PSS_BIOS_BUG_MSG);
1283 goto err_out;
1284 }
1285 if (pol->cpu != 0) {
1286 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1287 "CPU other than CPU0. Complain to your BIOS "
1288 "vendor.\n");
1289 goto err_out;
1290 }
1291 rc = find_psb_table(data);
1292 if (rc)
1293 goto err_out;
1294
1295 /* Take a crude guess here.
1296 * That guess was in microseconds, so multiply with 1000 */
1297 pol->cpuinfo.transition_latency = (
1298 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
1299 ((1 << data->irt) * 30)) * 1000;
1300 } else /* ACPI _PSS objects available */
1301 pol->cpuinfo.transition_latency = get_transition_latency(data);
1302
1303 /* only run on specific CPU from here on */
1304 init_on_cpu.data = data;
1305 smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
1306 &init_on_cpu, 1);
1307 rc = init_on_cpu.rc;
1308 if (rc != 0)
1309 goto err_out_exit_acpi;
1310
1311 if (cpu_family == CPU_HW_PSTATE)
1312 cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
1313 else
1314 cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
1315 data->available_cores = pol->cpus;
1316
1317 if (cpu_family == CPU_HW_PSTATE)
1318 pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1319 data->currpstate);
1320 else
1321 pol->cur = find_khz_freq_from_fid(data->currfid);
1322 dprintk("policy current frequency %d kHz\n", pol->cur);
1323
1324 /* min/max the cpu is capable of */
1325 if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
1326 printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n");
1327 powernow_k8_cpu_exit_acpi(data);
1328 kfree(data->powernow_table);
1329 kfree(data);
1330 return -EINVAL;
1331 }
1332
1333 /* Check for APERF/MPERF support in hardware */
1334 if (cpu_has(c, X86_FEATURE_APERFMPERF))
1335 cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
1336
1337 cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
1338
1339 if (cpu_family == CPU_HW_PSTATE)
1340 dprintk("cpu_init done, current pstate 0x%x\n",
1341 data->currpstate);
1342 else
1343 dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
1344 data->currfid, data->currvid);
1345
1346 per_cpu(powernow_data, pol->cpu) = data;
1347
1348 return 0;
1349
1350err_out_exit_acpi:
1351 powernow_k8_cpu_exit_acpi(data);
1352
1353err_out:
1354 kfree(data);
1355 return -ENODEV;
1356}
1357
1358static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1359{
1360 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1361
1362 if (!data)
1363 return -EINVAL;
1364
1365 powernow_k8_cpu_exit_acpi(data);
1366
1367 cpufreq_frequency_table_put_attr(pol->cpu);
1368
1369 kfree(data->powernow_table);
1370 kfree(data);
1371 per_cpu(powernow_data, pol->cpu) = NULL;
1372
1373 return 0;
1374}
1375
1376static void query_values_on_cpu(void *_err)
1377{
1378 int *err = _err;
1379 struct powernow_k8_data *data = __this_cpu_read(powernow_data);
1380
1381 *err = query_current_values_with_pending_wait(data);
1382}
1383
1384static unsigned int powernowk8_get(unsigned int cpu)
1385{
1386 struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
1387 unsigned int khz = 0;
1388 int err;
1389
1390 if (!data)
1391 return 0;
1392
1393 smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1394 if (err)
1395 goto out;
1396
1397 if (cpu_family == CPU_HW_PSTATE)
1398 khz = find_khz_freq_from_pstate(data->powernow_table,
1399 data->currpstate);
1400 else
1401 khz = find_khz_freq_from_fid(data->currfid);
1402
1403
1404out:
1405 return khz;
1406}
1407
1408static void _cpb_toggle_msrs(bool t)
1409{
1410 int cpu;
1411
1412 get_online_cpus();
1413
1414 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1415
1416 for_each_cpu(cpu, cpu_online_mask) {
1417 struct msr *reg = per_cpu_ptr(msrs, cpu);
1418 if (t)
1419 reg->l &= ~BIT(25);
1420 else
1421 reg->l |= BIT(25);
1422 }
1423 wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1424
1425 put_online_cpus();
1426}
1427
1428/*
1429 * Switch on/off core performance boosting.
1430 *
1431 * 0=disable
1432 * 1=enable.
1433 */
1434static void cpb_toggle(bool t)
1435{
1436 if (!cpb_capable)
1437 return;
1438
1439 if (t && !cpb_enabled) {
1440 cpb_enabled = true;
1441 _cpb_toggle_msrs(t);
1442 printk(KERN_INFO PFX "Core Boosting enabled.\n");
1443 } else if (!t && cpb_enabled) {
1444 cpb_enabled = false;
1445 _cpb_toggle_msrs(t);
1446 printk(KERN_INFO PFX "Core Boosting disabled.\n");
1447 }
1448}
1449
1450static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
1451 size_t count)
1452{
1453 int ret = -EINVAL;
1454 unsigned long val = 0;
1455
1456 ret = strict_strtoul(buf, 10, &val);
1457 if (!ret && (val == 0 || val == 1) && cpb_capable)
1458 cpb_toggle(val);
1459 else
1460 return -EINVAL;
1461
1462 return count;
1463}
1464
1465static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
1466{
1467 return sprintf(buf, "%u\n", cpb_enabled);
1468}
1469
1470#define define_one_rw(_name) \
1471static struct freq_attr _name = \
1472__ATTR(_name, 0644, show_##_name, store_##_name)
1473
1474define_one_rw(cpb);
1475
1476static struct freq_attr *powernow_k8_attr[] = {
1477 &cpufreq_freq_attr_scaling_available_freqs,
1478 &cpb,
1479 NULL,
1480};
1481
1482static struct cpufreq_driver cpufreq_amd64_driver = {
1483 .verify = powernowk8_verify,
1484 .target = powernowk8_target,
1485 .bios_limit = acpi_processor_get_bios_limit,
1486 .init = powernowk8_cpu_init,
1487 .exit = __devexit_p(powernowk8_cpu_exit),
1488 .get = powernowk8_get,
1489 .name = "powernow-k8",
1490 .owner = THIS_MODULE,
1491 .attr = powernow_k8_attr,
1492};
1493
1494/*
1495 * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
1496 * cannot block the remaining ones from boosting. On the CPU_UP path we
1497 * simply keep the boost-disable flag in sync with the current global
1498 * state.
1499 */
1500static int cpb_notify(struct notifier_block *nb, unsigned long action,
1501 void *hcpu)
1502{
1503 unsigned cpu = (long)hcpu;
1504 u32 lo, hi;
1505
1506 switch (action) {
1507 case CPU_UP_PREPARE:
1508 case CPU_UP_PREPARE_FROZEN:
1509
1510 if (!cpb_enabled) {
1511 rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
1512 lo |= BIT(25);
1513 wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
1514 }
1515 break;
1516
1517 case CPU_DOWN_PREPARE:
1518 case CPU_DOWN_PREPARE_FROZEN:
1519 rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
1520 lo &= ~BIT(25);
1521 wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
1522 break;
1523
1524 default:
1525 break;
1526 }
1527
1528 return NOTIFY_OK;
1529}
1530
1531static struct notifier_block cpb_nb = {
1532 .notifier_call = cpb_notify,
1533};
1534
1535/* driver entry point for init */
1536static int __cpuinit powernowk8_init(void)
1537{
1538 unsigned int i, supported_cpus = 0, cpu;
1539 int rv;
1540
1541 for_each_online_cpu(i) {
1542 int rc;
1543 smp_call_function_single(i, check_supported_cpu, &rc, 1);
1544 if (rc == 0)
1545 supported_cpus++;
1546 }
1547
1548 if (supported_cpus != num_online_cpus())
1549 return -ENODEV;
1550
1551 printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
1552 num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
1553
1554 if (boot_cpu_has(X86_FEATURE_CPB)) {
1555
1556 cpb_capable = true;
1557
1558 msrs = msrs_alloc();
1559 if (!msrs) {
1560 printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
1561 return -ENOMEM;
1562 }
1563
1564 register_cpu_notifier(&cpb_nb);
1565
1566 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1567
1568 for_each_cpu(cpu, cpu_online_mask) {
1569 struct msr *reg = per_cpu_ptr(msrs, cpu);
1570 cpb_enabled |= !(!!(reg->l & BIT(25)));
1571 }
1572
1573 printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
1574 (cpb_enabled ? "on" : "off"));
1575 }
1576
1577 rv = cpufreq_register_driver(&cpufreq_amd64_driver);
1578 if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
1579 unregister_cpu_notifier(&cpb_nb);
1580 msrs_free(msrs);
1581 msrs = NULL;
1582 }
1583 return rv;
1584}
1585
1586/* driver entry point for term */
1587static void __exit powernowk8_exit(void)
1588{
1589 dprintk("exit\n");
1590
1591 if (boot_cpu_has(X86_FEATURE_CPB)) {
1592 msrs_free(msrs);
1593 msrs = NULL;
1594
1595 unregister_cpu_notifier(&cpb_nb);
1596 }
1597
1598 cpufreq_unregister_driver(&cpufreq_amd64_driver);
1599}
1600
1601MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
1602 "Mark Langsdorf <mark.langsdorf@amd.com>");
1603MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
1604MODULE_LICENSE("GPL");
1605
1606late_initcall(powernowk8_init);
1607module_exit(powernowk8_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
deleted file mode 100644
index df3529b1c02d..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ /dev/null
@@ -1,224 +0,0 @@
1/*
2 * (c) 2003-2006 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 */
7
8enum pstate {
9 HW_PSTATE_INVALID = 0xff,
10 HW_PSTATE_0 = 0,
11 HW_PSTATE_1 = 1,
12 HW_PSTATE_2 = 2,
13 HW_PSTATE_3 = 3,
14 HW_PSTATE_4 = 4,
15 HW_PSTATE_5 = 5,
16 HW_PSTATE_6 = 6,
17 HW_PSTATE_7 = 7,
18};
19
20struct powernow_k8_data {
21 unsigned int cpu;
22
23 u32 numps; /* number of p-states */
24 u32 batps; /* number of p-states supported on battery */
25 u32 max_hw_pstate; /* maximum legal hardware pstate */
26
27 /* these values are constant when the PSB is used to determine
28 * vid/fid pairings, but are modified during the ->target() call
29 * when ACPI is used */
30 u32 rvo; /* ramp voltage offset */
31 u32 irt; /* isochronous relief time */
32 u32 vidmvs; /* usable value calculated from mvs */
33 u32 vstable; /* voltage stabilization time, units 20 us */
34 u32 plllock; /* pll lock time, units 1 us */
35 u32 exttype; /* extended interface = 1 */
36
37 /* keep track of the current fid / vid or pstate */
38 u32 currvid;
39 u32 currfid;
40 enum pstate currpstate;
41
42 /* the powernow_table includes all frequency and vid/fid pairings:
43 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
44 * frequency is in kHz */
45 struct cpufreq_frequency_table *powernow_table;
46
47 /* the acpi table needs to be kept. it's only available if ACPI was
48 * used to determine valid frequency/vid/fid states */
49 struct acpi_processor_performance acpi_data;
50
51 /* we need to keep track of associated cores, but let cpufreq
52 * handle hotplug events - so just point at cpufreq pol->cpus
53 * structure */
54 struct cpumask *available_cores;
55};
56
57/* processor's cpuid instruction support */
58#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */
59#define CPUID_XFAM 0x0ff00000 /* extended family */
60#define CPUID_XFAM_K8 0
61#define CPUID_XMOD 0x000f0000 /* extended model */
62#define CPUID_XMOD_REV_MASK 0x000c0000
63#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */
64#define CPUID_USE_XFAM_XMOD 0x00000f00
65#define CPUID_GET_MAX_CAPABILITIES 0x80000000
66#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
67#define P_STATE_TRANSITION_CAPABLE 6
68
69/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */
70/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */
71/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */
72/* the register number is placed in ecx, and the data is returned in edx:eax. */
73
74#define MSR_FIDVID_CTL 0xc0010041
75#define MSR_FIDVID_STATUS 0xc0010042
76
77/* Field definitions within the FID VID Low Control MSR : */
78#define MSR_C_LO_INIT_FID_VID 0x00010000
79#define MSR_C_LO_NEW_VID 0x00003f00
80#define MSR_C_LO_NEW_FID 0x0000003f
81#define MSR_C_LO_VID_SHIFT 8
82
83/* Field definitions within the FID VID High Control MSR : */
84#define MSR_C_HI_STP_GNT_TO 0x000fffff
85
86/* Field definitions within the FID VID Low Status MSR : */
87#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */
88#define MSR_S_LO_MAX_RAMP_VID 0x3f000000
89#define MSR_S_LO_MAX_FID 0x003f0000
90#define MSR_S_LO_START_FID 0x00003f00
91#define MSR_S_LO_CURRENT_FID 0x0000003f
92
93/* Field definitions within the FID VID High Status MSR : */
94#define MSR_S_HI_MIN_WORKING_VID 0x3f000000
95#define MSR_S_HI_MAX_WORKING_VID 0x003f0000
96#define MSR_S_HI_START_VID 0x00003f00
97#define MSR_S_HI_CURRENT_VID 0x0000003f
98#define MSR_C_HI_STP_GNT_BENIGN 0x00000001
99
100
101/* Hardware Pstate _PSS and MSR definitions */
102#define USE_HW_PSTATE 0x00000080
103#define HW_PSTATE_MASK 0x00000007
104#define HW_PSTATE_VALID_MASK 0x80000000
105#define HW_PSTATE_MAX_MASK 0x000000f0
106#define HW_PSTATE_MAX_SHIFT 4
107#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */
108#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */
109#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */
110#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */
111
112/* define the two driver architectures */
113#define CPU_OPTERON 0
114#define CPU_HW_PSTATE 1
115
116
117/*
118 * There are restrictions frequencies have to follow:
119 * - only 1 entry in the low fid table ( <=1.4GHz )
120 * - lowest entry in the high fid table must be >= 2 * the entry in the
121 * low fid table
122 * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
123 * in the low fid table
124 * - the parts can only step at <= 200 MHz intervals, odd fid values are
125 * supported in revision G and later revisions.
126 * - lowest frequency must be >= interprocessor hypertransport link speed
127 * (only applies to MP systems obviously)
128 */
129
130/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
131#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */
132#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */
133
134#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */
135#define HI_VCOFREQ_TABLE_BOTTOM 1600
136
137#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */
138
139#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */
140#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */
141
142#define MIN_FREQ 800 /* Min and max freqs, per spec */
143#define MAX_FREQ 5000
144
145#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */
146#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */
147
148#define VID_OFF 0x3f
149
150#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
151
152#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
153
154#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */
155#define VST_UNITS_20US 20 /* Voltage Stabilization Time is in units of 20us */
156
157/*
158 * Most values of interest are encoded in a single field of the _PSS
159 * entries: the "control" value.
160 */
161
162#define IRT_SHIFT 30
163#define RVO_SHIFT 28
164#define EXT_TYPE_SHIFT 27
165#define PLL_L_SHIFT 20
166#define MVS_SHIFT 18
167#define VST_SHIFT 11
168#define VID_SHIFT 6
169#define IRT_MASK 3
170#define RVO_MASK 3
171#define EXT_TYPE_MASK 1
172#define PLL_L_MASK 0x7f
173#define MVS_MASK 3
174#define VST_MASK 0x7f
175#define VID_MASK 0x1f
176#define FID_MASK 0x1f
177#define EXT_VID_MASK 0x3f
178#define EXT_FID_MASK 0x3f
179
180
181/*
182 * Version 1.4 of the PSB table. This table is constructed by BIOS and is
183 * to tell the OS's power management driver which VIDs and FIDs are
184 * supported by this particular processor.
185 * If the data in the PSB / PST is wrong, then this driver will program the
186 * wrong values into hardware, which is very likely to lead to a crash.
187 */
188
189#define PSB_ID_STRING "AMDK7PNOW!"
190#define PSB_ID_STRING_LEN 10
191
192#define PSB_VERSION_1_4 0x14
193
194struct psb_s {
195 u8 signature[10];
196 u8 tableversion;
197 u8 flags1;
198 u16 vstable;
199 u8 flags2;
200 u8 num_tables;
201 u32 cpuid;
202 u8 plllocktime;
203 u8 maxfid;
204 u8 maxvid;
205 u8 numps;
206};
207
208/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */
209struct pst_s {
210 u8 fid;
211 u8 vid;
212};
213
214#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
215
216static int core_voltage_pre_transition(struct powernow_k8_data *data,
217 u32 reqvid, u32 regfid);
218static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
219static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
220
221static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
222
223static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
224static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
deleted file mode 100644
index 435a996a613a..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ /dev/null
@@ -1,194 +0,0 @@
1/*
2 * sc520_freq.c: cpufreq driver for the AMD Elan sc520
3 *
4 * Copyright (C) 2005 Sean Young <sean@mess.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Based on elanfreq.c
12 *
13 * 2005-03-30: - initial revision
14 */
15
16#include <linux/kernel.h>
17#include <linux/module.h>
18#include <linux/init.h>
19
20#include <linux/delay.h>
21#include <linux/cpufreq.h>
22#include <linux/timex.h>
23#include <linux/io.h>
24
25#include <asm/msr.h>
26
27#define MMCR_BASE 0xfffef000 /* The default base address */
28#define OFFS_CPUCTL 0x2 /* CPU Control Register */
29
30static __u8 __iomem *cpuctl;
31
32#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
33 "sc520_freq", msg)
34#define PFX "sc520_freq: "
35
36static struct cpufreq_frequency_table sc520_freq_table[] = {
37 {0x01, 100000},
38 {0x02, 133000},
39 {0, CPUFREQ_TABLE_END},
40};
41
42static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
43{
44 u8 clockspeed_reg = *cpuctl;
45
46 switch (clockspeed_reg & 0x03) {
47 default:
48 printk(KERN_ERR PFX "error: cpuctl register has unexpected "
49 "value %02x\n", clockspeed_reg);
50 case 0x01:
51 return 100000;
52 case 0x02:
53 return 133000;
54 }
55}
56
57static void sc520_freq_set_cpu_state(unsigned int state)
58{
59
60 struct cpufreq_freqs freqs;
61 u8 clockspeed_reg;
62
63 freqs.old = sc520_freq_get_cpu_frequency(0);
64 freqs.new = sc520_freq_table[state].frequency;
65 freqs.cpu = 0; /* AMD Elan is UP */
66
67 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
68
69 dprintk("attempting to set frequency to %i kHz\n",
70 sc520_freq_table[state].frequency);
71
72 local_irq_disable();
73
74 clockspeed_reg = *cpuctl & ~0x03;
75 *cpuctl = clockspeed_reg | sc520_freq_table[state].index;
76
77 local_irq_enable();
78
79 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
80};
81
82static int sc520_freq_verify(struct cpufreq_policy *policy)
83{
84 return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
85}
86
87static int sc520_freq_target(struct cpufreq_policy *policy,
88 unsigned int target_freq,
89 unsigned int relation)
90{
91 unsigned int newstate = 0;
92
93 if (cpufreq_frequency_table_target(policy, sc520_freq_table,
94 target_freq, relation, &newstate))
95 return -EINVAL;
96
97 sc520_freq_set_cpu_state(newstate);
98
99 return 0;
100}
101
102
103/*
104 * Module init and exit code
105 */
106
107static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
108{
109 struct cpuinfo_x86 *c = &cpu_data(0);
110 int result;
111
112 /* capability check */
113 if (c->x86_vendor != X86_VENDOR_AMD ||
114 c->x86 != 4 || c->x86_model != 9)
115 return -ENODEV;
116
117 /* cpuinfo and default policy values */
118 policy->cpuinfo.transition_latency = 1000000; /* 1ms */
119 policy->cur = sc520_freq_get_cpu_frequency(0);
120
121 result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
122 if (result)
123 return result;
124
125 cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
126
127 return 0;
128}
129
130
131static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
132{
133 cpufreq_frequency_table_put_attr(policy->cpu);
134 return 0;
135}
136
137
138static struct freq_attr *sc520_freq_attr[] = {
139 &cpufreq_freq_attr_scaling_available_freqs,
140 NULL,
141};
142
143
144static struct cpufreq_driver sc520_freq_driver = {
145 .get = sc520_freq_get_cpu_frequency,
146 .verify = sc520_freq_verify,
147 .target = sc520_freq_target,
148 .init = sc520_freq_cpu_init,
149 .exit = sc520_freq_cpu_exit,
150 .name = "sc520_freq",
151 .owner = THIS_MODULE,
152 .attr = sc520_freq_attr,
153};
154
155
156static int __init sc520_freq_init(void)
157{
158 struct cpuinfo_x86 *c = &cpu_data(0);
159 int err;
160
161 /* Test if we have the right hardware */
162 if (c->x86_vendor != X86_VENDOR_AMD ||
163 c->x86 != 4 || c->x86_model != 9) {
164 dprintk("no Elan SC520 processor found!\n");
165 return -ENODEV;
166 }
167 cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
168 if (!cpuctl) {
169 printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
170 return -ENOMEM;
171 }
172
173 err = cpufreq_register_driver(&sc520_freq_driver);
174 if (err)
175 iounmap(cpuctl);
176
177 return err;
178}
179
180
181static void __exit sc520_freq_exit(void)
182{
183 cpufreq_unregister_driver(&sc520_freq_driver);
184 iounmap(cpuctl);
185}
186
187
188MODULE_LICENSE("GPL");
189MODULE_AUTHOR("Sean Young <sean@mess.org>");
190MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU");
191
192module_init(sc520_freq_init);
193module_exit(sc520_freq_exit);
194
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
deleted file mode 100644
index 9b1ff37de46a..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ /dev/null
@@ -1,636 +0,0 @@
1/*
2 * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
3 * M (part of the Centrino chipset).
4 *
5 * Since the original Pentium M, most new Intel CPUs support Enhanced
6 * SpeedStep.
7 *
8 * Despite the "SpeedStep" in the name, this is almost entirely unlike
9 * traditional SpeedStep.
10 *
11 * Modelled on speedstep.c
12 *
13 * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
14 */
15
16#include <linux/kernel.h>
17#include <linux/module.h>
18#include <linux/init.h>
19#include <linux/cpufreq.h>
20#include <linux/sched.h> /* current */
21#include <linux/delay.h>
22#include <linux/compiler.h>
23#include <linux/gfp.h>
24
25#include <asm/msr.h>
26#include <asm/processor.h>
27#include <asm/cpufeature.h>
28
29#define PFX "speedstep-centrino: "
30#define MAINTAINER "cpufreq@vger.kernel.org"
31
32#define dprintk(msg...) \
33 cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
34
35#define INTEL_MSR_RANGE (0xffff)
36
37struct cpu_id
38{
39 __u8 x86; /* CPU family */
40 __u8 x86_model; /* model */
41 __u8 x86_mask; /* stepping */
42};
43
44enum {
45 CPU_BANIAS,
46 CPU_DOTHAN_A1,
47 CPU_DOTHAN_A2,
48 CPU_DOTHAN_B0,
49 CPU_MP4HT_D0,
50 CPU_MP4HT_E0,
51};
52
53static const struct cpu_id cpu_ids[] = {
54 [CPU_BANIAS] = { 6, 9, 5 },
55 [CPU_DOTHAN_A1] = { 6, 13, 1 },
56 [CPU_DOTHAN_A2] = { 6, 13, 2 },
57 [CPU_DOTHAN_B0] = { 6, 13, 6 },
58 [CPU_MP4HT_D0] = {15, 3, 4 },
59 [CPU_MP4HT_E0] = {15, 4, 1 },
60};
61#define N_IDS ARRAY_SIZE(cpu_ids)
62
63struct cpu_model
64{
65 const struct cpu_id *cpu_id;
66 const char *model_name;
67 unsigned max_freq; /* max clock in kHz */
68
69 struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
70};
71static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
72 const struct cpu_id *x);
73
74/* Operating points for current CPU */
75static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
76static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
77
78static struct cpufreq_driver centrino_driver;
79
80#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
81
82/* Computes the correct form for IA32_PERF_CTL MSR for a particular
83 frequency/voltage operating point; frequency in MHz, volts in mV.
84 This is stored as "index" in the structure. */
85#define OP(mhz, mv) \
86 { \
87 .frequency = (mhz) * 1000, \
88 .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \
89 }
90
91/*
92 * These voltage tables were derived from the Intel Pentium M
93 * datasheet, document 25261202.pdf, Table 5. I have verified they
94 * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
95 * M.
96 */
97
98/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
99static struct cpufreq_frequency_table banias_900[] =
100{
101 OP(600, 844),
102 OP(800, 988),
103 OP(900, 1004),
104 { .frequency = CPUFREQ_TABLE_END }
105};
106
107/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
108static struct cpufreq_frequency_table banias_1000[] =
109{
110 OP(600, 844),
111 OP(800, 972),
112 OP(900, 988),
113 OP(1000, 1004),
114 { .frequency = CPUFREQ_TABLE_END }
115};
116
117/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
118static struct cpufreq_frequency_table banias_1100[] =
119{
120 OP( 600, 956),
121 OP( 800, 1020),
122 OP( 900, 1100),
123 OP(1000, 1164),
124 OP(1100, 1180),
125 { .frequency = CPUFREQ_TABLE_END }
126};
127
128
129/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
130static struct cpufreq_frequency_table banias_1200[] =
131{
132 OP( 600, 956),
133 OP( 800, 1004),
134 OP( 900, 1020),
135 OP(1000, 1100),
136 OP(1100, 1164),
137 OP(1200, 1180),
138 { .frequency = CPUFREQ_TABLE_END }
139};
140
141/* Intel Pentium M processor 1.30GHz (Banias) */
142static struct cpufreq_frequency_table banias_1300[] =
143{
144 OP( 600, 956),
145 OP( 800, 1260),
146 OP(1000, 1292),
147 OP(1200, 1356),
148 OP(1300, 1388),
149 { .frequency = CPUFREQ_TABLE_END }
150};
151
152/* Intel Pentium M processor 1.40GHz (Banias) */
153static struct cpufreq_frequency_table banias_1400[] =
154{
155 OP( 600, 956),
156 OP( 800, 1180),
157 OP(1000, 1308),
158 OP(1200, 1436),
159 OP(1400, 1484),
160 { .frequency = CPUFREQ_TABLE_END }
161};
162
163/* Intel Pentium M processor 1.50GHz (Banias) */
164static struct cpufreq_frequency_table banias_1500[] =
165{
166 OP( 600, 956),
167 OP( 800, 1116),
168 OP(1000, 1228),
169 OP(1200, 1356),
170 OP(1400, 1452),
171 OP(1500, 1484),
172 { .frequency = CPUFREQ_TABLE_END }
173};
174
175/* Intel Pentium M processor 1.60GHz (Banias) */
176static struct cpufreq_frequency_table banias_1600[] =
177{
178 OP( 600, 956),
179 OP( 800, 1036),
180 OP(1000, 1164),
181 OP(1200, 1276),
182 OP(1400, 1420),
183 OP(1600, 1484),
184 { .frequency = CPUFREQ_TABLE_END }
185};
186
187/* Intel Pentium M processor 1.70GHz (Banias) */
188static struct cpufreq_frequency_table banias_1700[] =
189{
190 OP( 600, 956),
191 OP( 800, 1004),
192 OP(1000, 1116),
193 OP(1200, 1228),
194 OP(1400, 1308),
195 OP(1700, 1484),
196 { .frequency = CPUFREQ_TABLE_END }
197};
198#undef OP
199
200#define _BANIAS(cpuid, max, name) \
201{ .cpu_id = cpuid, \
202 .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \
203 .max_freq = (max)*1000, \
204 .op_points = banias_##max, \
205}
206#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
207
208/* CPU models, their operating frequency range, and freq/voltage
209 operating points */
210static struct cpu_model models[] =
211{
212 _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
213 BANIAS(1000),
214 BANIAS(1100),
215 BANIAS(1200),
216 BANIAS(1300),
217 BANIAS(1400),
218 BANIAS(1500),
219 BANIAS(1600),
220 BANIAS(1700),
221
222 /* NULL model_name is a wildcard */
223 { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
224 { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
225 { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
226 { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL },
227 { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL },
228
229 { NULL, }
230};
231#undef _BANIAS
232#undef BANIAS
233
234static int centrino_cpu_init_table(struct cpufreq_policy *policy)
235{
236 struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
237 struct cpu_model *model;
238
239 for(model = models; model->cpu_id != NULL; model++)
240 if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
241 (model->model_name == NULL ||
242 strcmp(cpu->x86_model_id, model->model_name) == 0))
243 break;
244
245 if (model->cpu_id == NULL) {
246 /* No match at all */
247 dprintk("no support for CPU model \"%s\": "
248 "send /proc/cpuinfo to " MAINTAINER "\n",
249 cpu->x86_model_id);
250 return -ENOENT;
251 }
252
253 if (model->op_points == NULL) {
254 /* Matched a non-match */
255 dprintk("no table support for CPU model \"%s\"\n",
256 cpu->x86_model_id);
257 dprintk("try using the acpi-cpufreq driver\n");
258 return -ENOENT;
259 }
260
261 per_cpu(centrino_model, policy->cpu) = model;
262
263 dprintk("found \"%s\": max frequency: %dkHz\n",
264 model->model_name, model->max_freq);
265
266 return 0;
267}
268
269#else
270static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
271{
272 return -ENODEV;
273}
274#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
275
276static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
277 const struct cpu_id *x)
278{
279 if ((c->x86 == x->x86) &&
280 (c->x86_model == x->x86_model) &&
281 (c->x86_mask == x->x86_mask))
282 return 1;
283 return 0;
284}
285
286/* To be called only after centrino_model is initialized */
287static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
288{
289 int i;
290
291 /*
292 * Extract clock in kHz from PERF_CTL value
293 * for centrino, as some DSDTs are buggy.
294 * Ideally, this can be done using the acpi_data structure.
295 */
296 if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
297 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
298 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
299 msr = (msr >> 8) & 0xff;
300 return msr * 100000;
301 }
302
303 if ((!per_cpu(centrino_model, cpu)) ||
304 (!per_cpu(centrino_model, cpu)->op_points))
305 return 0;
306
307 msr &= 0xffff;
308 for (i = 0;
309 per_cpu(centrino_model, cpu)->op_points[i].frequency
310 != CPUFREQ_TABLE_END;
311 i++) {
312 if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
313 return per_cpu(centrino_model, cpu)->
314 op_points[i].frequency;
315 }
316 if (failsafe)
317 return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
318 else
319 return 0;
320}
321
322/* Return the current CPU frequency in kHz */
323static unsigned int get_cur_freq(unsigned int cpu)
324{
325 unsigned l, h;
326 unsigned clock_freq;
327
328 rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
329 clock_freq = extract_clock(l, cpu, 0);
330
331 if (unlikely(clock_freq == 0)) {
332 /*
333 * On some CPUs, we can see transient MSR values (which are
334 * not present in _PSS), while CPU is doing some automatic
335 * P-state transition (like TM2). Get the last freq set
336 * in PERF_CTL.
337 */
338 rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
339 clock_freq = extract_clock(l, cpu, 1);
340 }
341 return clock_freq;
342}
343
344
345static int centrino_cpu_init(struct cpufreq_policy *policy)
346{
347 struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
348 unsigned freq;
349 unsigned l, h;
350 int ret;
351 int i;
352
353 /* Only Intel makes Enhanced Speedstep-capable CPUs */
354 if (cpu->x86_vendor != X86_VENDOR_INTEL ||
355 !cpu_has(cpu, X86_FEATURE_EST))
356 return -ENODEV;
357
358 if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
359 centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
360
361 if (policy->cpu != 0)
362 return -ENODEV;
363
364 for (i = 0; i < N_IDS; i++)
365 if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
366 break;
367
368 if (i != N_IDS)
369 per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
370
371 if (!per_cpu(centrino_cpu, policy->cpu)) {
372 dprintk("found unsupported CPU with "
373 "Enhanced SpeedStep: send /proc/cpuinfo to "
374 MAINTAINER "\n");
375 return -ENODEV;
376 }
377
378 if (centrino_cpu_init_table(policy)) {
379 return -ENODEV;
380 }
381
382 /* Check to see if Enhanced SpeedStep is enabled, and try to
383 enable it if not. */
384 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
385
386 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
387 l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
388 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
389 wrmsr(MSR_IA32_MISC_ENABLE, l, h);
390
391 /* check to see if it stuck */
392 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
393 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
394 printk(KERN_INFO PFX
395 "couldn't enable Enhanced SpeedStep\n");
396 return -ENODEV;
397 }
398 }
399
400 freq = get_cur_freq(policy->cpu);
401 policy->cpuinfo.transition_latency = 10000;
402 /* 10uS transition latency */
403 policy->cur = freq;
404
405 dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
406
407 ret = cpufreq_frequency_table_cpuinfo(policy,
408 per_cpu(centrino_model, policy->cpu)->op_points);
409 if (ret)
410 return (ret);
411
412 cpufreq_frequency_table_get_attr(
413 per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
414
415 return 0;
416}
417
418static int centrino_cpu_exit(struct cpufreq_policy *policy)
419{
420 unsigned int cpu = policy->cpu;
421
422 if (!per_cpu(centrino_model, cpu))
423 return -ENODEV;
424
425 cpufreq_frequency_table_put_attr(cpu);
426
427 per_cpu(centrino_model, cpu) = NULL;
428
429 return 0;
430}
431
432/**
433 * centrino_verify - verifies a new CPUFreq policy
434 * @policy: new policy
435 *
436 * Limit must be within this model's frequency range at least one
437 * border included.
438 */
439static int centrino_verify (struct cpufreq_policy *policy)
440{
441 return cpufreq_frequency_table_verify(policy,
442 per_cpu(centrino_model, policy->cpu)->op_points);
443}
444
445/**
446 * centrino_setpolicy - set a new CPUFreq policy
447 * @policy: new policy
448 * @target_freq: the target frequency
449 * @relation: how that frequency relates to achieved frequency
450 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
451 *
452 * Sets a new CPUFreq policy.
453 */
454static int centrino_target (struct cpufreq_policy *policy,
455 unsigned int target_freq,
456 unsigned int relation)
457{
458 unsigned int newstate = 0;
459 unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu;
460 struct cpufreq_freqs freqs;
461 int retval = 0;
462 unsigned int j, k, first_cpu, tmp;
463 cpumask_var_t covered_cpus;
464
465 if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)))
466 return -ENOMEM;
467
468 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
469 retval = -ENODEV;
470 goto out;
471 }
472
473 if (unlikely(cpufreq_frequency_table_target(policy,
474 per_cpu(centrino_model, cpu)->op_points,
475 target_freq,
476 relation,
477 &newstate))) {
478 retval = -EINVAL;
479 goto out;
480 }
481
482 first_cpu = 1;
483 for_each_cpu(j, policy->cpus) {
484 int good_cpu;
485
486 /* cpufreq holds the hotplug lock, so we are safe here */
487 if (!cpu_online(j))
488 continue;
489
490 /*
491 * Support for SMP systems.
492 * Make sure we are running on CPU that wants to change freq
493 */
494 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
495 good_cpu = cpumask_any_and(policy->cpus,
496 cpu_online_mask);
497 else
498 good_cpu = j;
499
500 if (good_cpu >= nr_cpu_ids) {
501 dprintk("couldn't limit to CPUs in this domain\n");
502 retval = -EAGAIN;
503 if (first_cpu) {
504 /* We haven't started the transition yet. */
505 goto out;
506 }
507 break;
508 }
509
510 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
511
512 if (first_cpu) {
513 rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
514 if (msr == (oldmsr & 0xffff)) {
515 dprintk("no change needed - msr was and needs "
516 "to be %x\n", oldmsr);
517 retval = 0;
518 goto out;
519 }
520
521 freqs.old = extract_clock(oldmsr, cpu, 0);
522 freqs.new = extract_clock(msr, cpu, 0);
523
524 dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
525 target_freq, freqs.old, freqs.new, msr);
526
527 for_each_cpu(k, policy->cpus) {
528 if (!cpu_online(k))
529 continue;
530 freqs.cpu = k;
531 cpufreq_notify_transition(&freqs,
532 CPUFREQ_PRECHANGE);
533 }
534
535 first_cpu = 0;
536 /* all but 16 LSB are reserved, treat them with care */
537 oldmsr &= ~0xffff;
538 msr &= 0xffff;
539 oldmsr |= msr;
540 }
541
542 wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h);
543 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
544 break;
545
546 cpumask_set_cpu(j, covered_cpus);
547 }
548
549 for_each_cpu(k, policy->cpus) {
550 if (!cpu_online(k))
551 continue;
552 freqs.cpu = k;
553 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
554 }
555
556 if (unlikely(retval)) {
557 /*
558 * We have failed halfway through the frequency change.
559 * We have sent callbacks to policy->cpus and
560 * MSRs have already been written on coverd_cpus.
561 * Best effort undo..
562 */
563
564 for_each_cpu(j, covered_cpus)
565 wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
566
567 tmp = freqs.new;
568 freqs.new = freqs.old;
569 freqs.old = tmp;
570 for_each_cpu(j, policy->cpus) {
571 if (!cpu_online(j))
572 continue;
573 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
574 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
575 }
576 }
577 retval = 0;
578
579out:
580 free_cpumask_var(covered_cpus);
581 return retval;
582}
583
584static struct freq_attr* centrino_attr[] = {
585 &cpufreq_freq_attr_scaling_available_freqs,
586 NULL,
587};
588
589static struct cpufreq_driver centrino_driver = {
590 .name = "centrino", /* should be speedstep-centrino,
591 but there's a 16 char limit */
592 .init = centrino_cpu_init,
593 .exit = centrino_cpu_exit,
594 .verify = centrino_verify,
595 .target = centrino_target,
596 .get = get_cur_freq,
597 .attr = centrino_attr,
598 .owner = THIS_MODULE,
599};
600
601
602/**
603 * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
604 *
605 * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
606 * unsupported devices, -ENOENT if there's no voltage table for this
607 * particular CPU model, -EINVAL on problems during initiatization,
608 * and zero on success.
609 *
610 * This is quite picky. Not only does the CPU have to advertise the
611 * "est" flag in the cpuid capability flags, we look for a specific
612 * CPU model and stepping, and we need to have the exact model name in
613 * our voltage tables. That is, be paranoid about not releasing
614 * someone's valuable magic smoke.
615 */
616static int __init centrino_init(void)
617{
618 struct cpuinfo_x86 *cpu = &cpu_data(0);
619
620 if (!cpu_has(cpu, X86_FEATURE_EST))
621 return -ENODEV;
622
623 return cpufreq_register_driver(&centrino_driver);
624}
625
626static void __exit centrino_exit(void)
627{
628 cpufreq_unregister_driver(&centrino_driver);
629}
630
631MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
632MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
633MODULE_LICENSE ("GPL");
634
635late_initcall(centrino_init);
636module_exit(centrino_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
deleted file mode 100644
index 561758e95180..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ /dev/null
@@ -1,452 +0,0 @@
1/*
2 * (C) 2001 Dave Jones, Arjan van de ven.
3 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
4 *
5 * Licensed under the terms of the GNU GPL License version 2.
6 * Based upon reverse engineered information, and on Intel documentation
7 * for chipsets ICH2-M and ICH3-M.
8 *
9 * Many thanks to Ducrot Bruno for finding and fixing the last
10 * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler
11 * for extensive testing.
12 *
13 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
14 */
15
16
17/*********************************************************************
18 * SPEEDSTEP - DEFINITIONS *
19 *********************************************************************/
20
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/init.h>
24#include <linux/cpufreq.h>
25#include <linux/pci.h>
26#include <linux/sched.h>
27
28#include "speedstep-lib.h"
29
30
31/* speedstep_chipset:
32 * It is necessary to know which chipset is used. As accesses to
33 * this device occur at various places in this module, we need a
34 * static struct pci_dev * pointing to that device.
35 */
36static struct pci_dev *speedstep_chipset_dev;
37
38
39/* speedstep_processor
40 */
41static enum speedstep_processor speedstep_processor;
42
43static u32 pmbase;
44
45/*
46 * There are only two frequency states for each processor. Values
47 * are in kHz for the time being.
48 */
49static struct cpufreq_frequency_table speedstep_freqs[] = {
50 {SPEEDSTEP_HIGH, 0},
51 {SPEEDSTEP_LOW, 0},
52 {0, CPUFREQ_TABLE_END},
53};
54
55
56#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
57 "speedstep-ich", msg)
58
59
60/**
61 * speedstep_find_register - read the PMBASE address
62 *
63 * Returns: -ENODEV if no register could be found
64 */
65static int speedstep_find_register(void)
66{
67 if (!speedstep_chipset_dev)
68 return -ENODEV;
69
70 /* get PMBASE */
71 pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
72 if (!(pmbase & 0x01)) {
73 printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
74 return -ENODEV;
75 }
76
77 pmbase &= 0xFFFFFFFE;
78 if (!pmbase) {
79 printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
80 return -ENODEV;
81 }
82
83 dprintk("pmbase is 0x%x\n", pmbase);
84 return 0;
85}
86
87/**
88 * speedstep_set_state - set the SpeedStep state
89 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
90 *
91 * Tries to change the SpeedStep state. Can be called from
92 * smp_call_function_single.
93 */
94static void speedstep_set_state(unsigned int state)
95{
96 u8 pm2_blk;
97 u8 value;
98 unsigned long flags;
99
100 if (state > 0x1)
101 return;
102
103 /* Disable IRQs */
104 local_irq_save(flags);
105
106 /* read state */
107 value = inb(pmbase + 0x50);
108
109 dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
110
111 /* write new state */
112 value &= 0xFE;
113 value |= state;
114
115 dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
116
117 /* Disable bus master arbitration */
118 pm2_blk = inb(pmbase + 0x20);
119 pm2_blk |= 0x01;
120 outb(pm2_blk, (pmbase + 0x20));
121
122 /* Actual transition */
123 outb(value, (pmbase + 0x50));
124
125 /* Restore bus master arbitration */
126 pm2_blk &= 0xfe;
127 outb(pm2_blk, (pmbase + 0x20));
128
129 /* check if transition was successful */
130 value = inb(pmbase + 0x50);
131
132 /* Enable IRQs */
133 local_irq_restore(flags);
134
135 dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
136
137 if (state == (value & 0x1))
138 dprintk("change to %u MHz succeeded\n",
139 speedstep_get_frequency(speedstep_processor) / 1000);
140 else
141 printk(KERN_ERR "cpufreq: change failed - I/O error\n");
142
143 return;
144}
145
146/* Wrapper for smp_call_function_single. */
147static void _speedstep_set_state(void *_state)
148{
149 speedstep_set_state(*(unsigned int *)_state);
150}
151
152/**
153 * speedstep_activate - activate SpeedStep control in the chipset
154 *
155 * Tries to activate the SpeedStep status and control registers.
156 * Returns -EINVAL on an unsupported chipset, and zero on success.
157 */
158static int speedstep_activate(void)
159{
160 u16 value = 0;
161
162 if (!speedstep_chipset_dev)
163 return -EINVAL;
164
165 pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
166 if (!(value & 0x08)) {
167 value |= 0x08;
168 dprintk("activating SpeedStep (TM) registers\n");
169 pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
170 }
171
172 return 0;
173}
174
175
176/**
177 * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic
178 *
179 * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to
180 * the LPC bridge / PM module which contains all power-management
181 * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
182 * chipset, or zero on failure.
183 */
184static unsigned int speedstep_detect_chipset(void)
185{
186 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
187 PCI_DEVICE_ID_INTEL_82801DB_12,
188 PCI_ANY_ID, PCI_ANY_ID,
189 NULL);
190 if (speedstep_chipset_dev)
191 return 4; /* 4-M */
192
193 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
194 PCI_DEVICE_ID_INTEL_82801CA_12,
195 PCI_ANY_ID, PCI_ANY_ID,
196 NULL);
197 if (speedstep_chipset_dev)
198 return 3; /* 3-M */
199
200
201 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
202 PCI_DEVICE_ID_INTEL_82801BA_10,
203 PCI_ANY_ID, PCI_ANY_ID,
204 NULL);
205 if (speedstep_chipset_dev) {
206 /* speedstep.c causes lockups on Dell Inspirons 8000 and
207 * 8100 which use a pretty old revision of the 82815
208 * host brige. Abort on these systems.
209 */
210 static struct pci_dev *hostbridge;
211
212 hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL,
213 PCI_DEVICE_ID_INTEL_82815_MC,
214 PCI_ANY_ID, PCI_ANY_ID,
215 NULL);
216
217 if (!hostbridge)
218 return 2; /* 2-M */
219
220 if (hostbridge->revision < 5) {
221 dprintk("hostbridge does not support speedstep\n");
222 speedstep_chipset_dev = NULL;
223 pci_dev_put(hostbridge);
224 return 0;
225 }
226
227 pci_dev_put(hostbridge);
228 return 2; /* 2-M */
229 }
230
231 return 0;
232}
233
234static void get_freq_data(void *_speed)
235{
236 unsigned int *speed = _speed;
237
238 *speed = speedstep_get_frequency(speedstep_processor);
239}
240
241static unsigned int speedstep_get(unsigned int cpu)
242{
243 unsigned int speed;
244
245 /* You're supposed to ensure CPU is online. */
246 if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
247 BUG();
248
249 dprintk("detected %u kHz as current frequency\n", speed);
250 return speed;
251}
252
253/**
254 * speedstep_target - set a new CPUFreq policy
255 * @policy: new policy
256 * @target_freq: the target frequency
257 * @relation: how that frequency relates to achieved frequency
258 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
259 *
260 * Sets a new CPUFreq policy.
261 */
262static int speedstep_target(struct cpufreq_policy *policy,
263 unsigned int target_freq,
264 unsigned int relation)
265{
266 unsigned int newstate = 0, policy_cpu;
267 struct cpufreq_freqs freqs;
268 int i;
269
270 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
271 target_freq, relation, &newstate))
272 return -EINVAL;
273
274 policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
275 freqs.old = speedstep_get(policy_cpu);
276 freqs.new = speedstep_freqs[newstate].frequency;
277 freqs.cpu = policy->cpu;
278
279 dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
280
281 /* no transition necessary */
282 if (freqs.old == freqs.new)
283 return 0;
284
285 for_each_cpu(i, policy->cpus) {
286 freqs.cpu = i;
287 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
288 }
289
290 smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
291 true);
292
293 for_each_cpu(i, policy->cpus) {
294 freqs.cpu = i;
295 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
296 }
297
298 return 0;
299}
300
301
302/**
303 * speedstep_verify - verifies a new CPUFreq policy
304 * @policy: new policy
305 *
306 * Limit must be within speedstep_low_freq and speedstep_high_freq, with
307 * at least one border included.
308 */
309static int speedstep_verify(struct cpufreq_policy *policy)
310{
311 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
312}
313
314struct get_freqs {
315 struct cpufreq_policy *policy;
316 int ret;
317};
318
319static void get_freqs_on_cpu(void *_get_freqs)
320{
321 struct get_freqs *get_freqs = _get_freqs;
322
323 get_freqs->ret =
324 speedstep_get_freqs(speedstep_processor,
325 &speedstep_freqs[SPEEDSTEP_LOW].frequency,
326 &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
327 &get_freqs->policy->cpuinfo.transition_latency,
328 &speedstep_set_state);
329}
330
331static int speedstep_cpu_init(struct cpufreq_policy *policy)
332{
333 int result;
334 unsigned int policy_cpu, speed;
335 struct get_freqs gf;
336
337 /* only run on CPU to be set, or on its sibling */
338#ifdef CONFIG_SMP
339 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
340#endif
341 policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
342
343 /* detect low and high frequency and transition latency */
344 gf.policy = policy;
345 smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1);
346 if (gf.ret)
347 return gf.ret;
348
349 /* get current speed setting */
350 speed = speedstep_get(policy_cpu);
351 if (!speed)
352 return -EIO;
353
354 dprintk("currently at %s speed setting - %i MHz\n",
355 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
356 ? "low" : "high",
357 (speed / 1000));
358
359 /* cpuinfo and default policy values */
360 policy->cur = speed;
361
362 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
363 if (result)
364 return result;
365
366 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
367
368 return 0;
369}
370
371
372static int speedstep_cpu_exit(struct cpufreq_policy *policy)
373{
374 cpufreq_frequency_table_put_attr(policy->cpu);
375 return 0;
376}
377
378static struct freq_attr *speedstep_attr[] = {
379 &cpufreq_freq_attr_scaling_available_freqs,
380 NULL,
381};
382
383
384static struct cpufreq_driver speedstep_driver = {
385 .name = "speedstep-ich",
386 .verify = speedstep_verify,
387 .target = speedstep_target,
388 .init = speedstep_cpu_init,
389 .exit = speedstep_cpu_exit,
390 .get = speedstep_get,
391 .owner = THIS_MODULE,
392 .attr = speedstep_attr,
393};
394
395
396/**
397 * speedstep_init - initializes the SpeedStep CPUFreq driver
398 *
399 * Initializes the SpeedStep support. Returns -ENODEV on unsupported
400 * devices, -EINVAL on problems during initiatization, and zero on
401 * success.
402 */
403static int __init speedstep_init(void)
404{
405 /* detect processor */
406 speedstep_processor = speedstep_detect_processor();
407 if (!speedstep_processor) {
408 dprintk("Intel(R) SpeedStep(TM) capable processor "
409 "not found\n");
410 return -ENODEV;
411 }
412
413 /* detect chipset */
414 if (!speedstep_detect_chipset()) {
415 dprintk("Intel(R) SpeedStep(TM) for this chipset not "
416 "(yet) available.\n");
417 return -ENODEV;
418 }
419
420 /* activate speedstep support */
421 if (speedstep_activate()) {
422 pci_dev_put(speedstep_chipset_dev);
423 return -EINVAL;
424 }
425
426 if (speedstep_find_register())
427 return -ENODEV;
428
429 return cpufreq_register_driver(&speedstep_driver);
430}
431
432
433/**
434 * speedstep_exit - unregisters SpeedStep support
435 *
436 * Unregisters SpeedStep support.
437 */
438static void __exit speedstep_exit(void)
439{
440 pci_dev_put(speedstep_chipset_dev);
441 cpufreq_unregister_driver(&speedstep_driver);
442}
443
444
445MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
446 "Dominik Brodowski <linux@brodo.de>");
447MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
448 "with ICH-M southbridges.");
449MODULE_LICENSE("GPL");
450
451module_init(speedstep_init);
452module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
deleted file mode 100644
index a94ec6be69fa..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ /dev/null
@@ -1,481 +0,0 @@
1/*
2 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * Library for common functions for Intel SpeedStep v.1 and v.2 support
7 *
8 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
9 */
10
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/moduleparam.h>
14#include <linux/init.h>
15#include <linux/cpufreq.h>
16
17#include <asm/msr.h>
18#include <asm/tsc.h>
19#include "speedstep-lib.h"
20
21#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
22 "speedstep-lib", msg)
23
24#define PFX "speedstep-lib: "
25
26#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
27static int relaxed_check;
28#else
29#define relaxed_check 0
30#endif
31
32/*********************************************************************
33 * GET PROCESSOR CORE SPEED IN KHZ *
34 *********************************************************************/
35
36static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
37{
38 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
39 struct {
40 unsigned int ratio; /* Frequency Multiplier (x10) */
41 u8 bitmap; /* power on configuration bits
42 [27, 25:22] (in MSR 0x2a) */
43 } msr_decode_mult[] = {
44 { 30, 0x01 },
45 { 35, 0x05 },
46 { 40, 0x02 },
47 { 45, 0x06 },
48 { 50, 0x00 },
49 { 55, 0x04 },
50 { 60, 0x0b },
51 { 65, 0x0f },
52 { 70, 0x09 },
53 { 75, 0x0d },
54 { 80, 0x0a },
55 { 85, 0x26 },
56 { 90, 0x20 },
57 { 100, 0x2b },
58 { 0, 0xff } /* error or unknown value */
59 };
60
61 /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
62 struct {
63 unsigned int value; /* Front Side Bus speed in MHz */
64 u8 bitmap; /* power on configuration bits [18: 19]
65 (in MSR 0x2a) */
66 } msr_decode_fsb[] = {
67 { 66, 0x0 },
68 { 100, 0x2 },
69 { 133, 0x1 },
70 { 0, 0xff}
71 };
72
73 u32 msr_lo, msr_tmp;
74 int i = 0, j = 0;
75
76 /* read MSR 0x2a - we only need the low 32 bits */
77 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
78 dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
79 msr_tmp = msr_lo;
80
81 /* decode the FSB */
82 msr_tmp &= 0x00c0000;
83 msr_tmp >>= 18;
84 while (msr_tmp != msr_decode_fsb[i].bitmap) {
85 if (msr_decode_fsb[i].bitmap == 0xff)
86 return 0;
87 i++;
88 }
89
90 /* decode the multiplier */
91 if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
92 dprintk("workaround for early PIIIs\n");
93 msr_lo &= 0x03c00000;
94 } else
95 msr_lo &= 0x0bc00000;
96 msr_lo >>= 22;
97 while (msr_lo != msr_decode_mult[j].bitmap) {
98 if (msr_decode_mult[j].bitmap == 0xff)
99 return 0;
100 j++;
101 }
102
103 dprintk("speed is %u\n",
104 (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
105
106 return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
107}
108
109
110static unsigned int pentiumM_get_frequency(void)
111{
112 u32 msr_lo, msr_tmp;
113
114 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
115 dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
116
117 /* see table B-2 of 24547212.pdf */
118 if (msr_lo & 0x00040000) {
119 printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n",
120 msr_lo, msr_tmp);
121 return 0;
122 }
123
124 msr_tmp = (msr_lo >> 22) & 0x1f;
125 dprintk("bits 22-26 are 0x%x, speed is %u\n",
126 msr_tmp, (msr_tmp * 100 * 1000));
127
128 return msr_tmp * 100 * 1000;
129}
130
131static unsigned int pentium_core_get_frequency(void)
132{
133 u32 fsb = 0;
134 u32 msr_lo, msr_tmp;
135 int ret;
136
137 rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
138 /* see table B-2 of 25366920.pdf */
139 switch (msr_lo & 0x07) {
140 case 5:
141 fsb = 100000;
142 break;
143 case 1:
144 fsb = 133333;
145 break;
146 case 3:
147 fsb = 166667;
148 break;
149 case 2:
150 fsb = 200000;
151 break;
152 case 0:
153 fsb = 266667;
154 break;
155 case 4:
156 fsb = 333333;
157 break;
158 default:
159 printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
160 }
161
162 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
163 dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
164 msr_lo, msr_tmp);
165
166 msr_tmp = (msr_lo >> 22) & 0x1f;
167 dprintk("bits 22-26 are 0x%x, speed is %u\n",
168 msr_tmp, (msr_tmp * fsb));
169
170 ret = (msr_tmp * fsb);
171 return ret;
172}
173
174
175static unsigned int pentium4_get_frequency(void)
176{
177 struct cpuinfo_x86 *c = &boot_cpu_data;
178 u32 msr_lo, msr_hi, mult;
179 unsigned int fsb = 0;
180 unsigned int ret;
181 u8 fsb_code;
182
183 /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency
184 * to System Bus Frequency Ratio Field in the Processor Frequency
185 * Configuration Register of the MSR. Therefore the current
186 * frequency cannot be calculated and has to be measured.
187 */
188 if (c->x86_model < 2)
189 return cpu_khz;
190
191 rdmsr(0x2c, msr_lo, msr_hi);
192
193 dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
194
195 /* decode the FSB: see IA-32 Intel (C) Architecture Software
196 * Developer's Manual, Volume 3: System Prgramming Guide,
197 * revision #12 in Table B-1: MSRs in the Pentium 4 and
198 * Intel Xeon Processors, on page B-4 and B-5.
199 */
200 fsb_code = (msr_lo >> 16) & 0x7;
201 switch (fsb_code) {
202 case 0:
203 fsb = 100 * 1000;
204 break;
205 case 1:
206 fsb = 13333 * 10;
207 break;
208 case 2:
209 fsb = 200 * 1000;
210 break;
211 }
212
213 if (!fsb)
214 printk(KERN_DEBUG PFX "couldn't detect FSB speed. "
215 "Please send an e-mail to <linux@brodo.de>\n");
216
217 /* Multiplier. */
218 mult = msr_lo >> 24;
219
220 dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
221 fsb, mult, (fsb * mult));
222
223 ret = (fsb * mult);
224 return ret;
225}
226
227
228/* Warning: may get called from smp_call_function_single. */
229unsigned int speedstep_get_frequency(enum speedstep_processor processor)
230{
231 switch (processor) {
232 case SPEEDSTEP_CPU_PCORE:
233 return pentium_core_get_frequency();
234 case SPEEDSTEP_CPU_PM:
235 return pentiumM_get_frequency();
236 case SPEEDSTEP_CPU_P4D:
237 case SPEEDSTEP_CPU_P4M:
238 return pentium4_get_frequency();
239 case SPEEDSTEP_CPU_PIII_T:
240 case SPEEDSTEP_CPU_PIII_C:
241 case SPEEDSTEP_CPU_PIII_C_EARLY:
242 return pentium3_get_frequency(processor);
243 default:
244 return 0;
245 };
246 return 0;
247}
248EXPORT_SYMBOL_GPL(speedstep_get_frequency);
249
250
251/*********************************************************************
252 * DETECT SPEEDSTEP-CAPABLE PROCESSOR *
253 *********************************************************************/
254
255unsigned int speedstep_detect_processor(void)
256{
257 struct cpuinfo_x86 *c = &cpu_data(0);
258 u32 ebx, msr_lo, msr_hi;
259
260 dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
261
262 if ((c->x86_vendor != X86_VENDOR_INTEL) ||
263 ((c->x86 != 6) && (c->x86 != 0xF)))
264 return 0;
265
266 if (c->x86 == 0xF) {
267 /* Intel Mobile Pentium 4-M
268 * or Intel Mobile Pentium 4 with 533 MHz FSB */
269 if (c->x86_model != 2)
270 return 0;
271
272 ebx = cpuid_ebx(0x00000001);
273 ebx &= 0x000000FF;
274
275 dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
276
277 switch (c->x86_mask) {
278 case 4:
279 /*
280 * B-stepping [M-P4-M]
281 * sample has ebx = 0x0f, production has 0x0e.
282 */
283 if ((ebx == 0x0e) || (ebx == 0x0f))
284 return SPEEDSTEP_CPU_P4M;
285 break;
286 case 7:
287 /*
288 * C-stepping [M-P4-M]
289 * needs to have ebx=0x0e, else it's a celeron:
290 * cf. 25130917.pdf / page 7, footnote 5 even
291 * though 25072120.pdf / page 7 doesn't say
292 * samples are only of B-stepping...
293 */
294 if (ebx == 0x0e)
295 return SPEEDSTEP_CPU_P4M;
296 break;
297 case 9:
298 /*
299 * D-stepping [M-P4-M or M-P4/533]
300 *
301 * this is totally strange: CPUID 0x0F29 is
302 * used by M-P4-M, M-P4/533 and(!) Celeron CPUs.
303 * The latter need to be sorted out as they don't
304 * support speedstep.
305 * Celerons with CPUID 0x0F29 may have either
306 * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything
307 * specific.
308 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
309 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
310 * also, M-P4M HTs have ebx=0x8, too
311 * For now, they are distinguished by the model_id
312 * string
313 */
314 if ((ebx == 0x0e) ||
315 (strstr(c->x86_model_id,
316 "Mobile Intel(R) Pentium(R) 4") != NULL))
317 return SPEEDSTEP_CPU_P4M;
318 break;
319 default:
320 break;
321 }
322 return 0;
323 }
324
325 switch (c->x86_model) {
326 case 0x0B: /* Intel PIII [Tualatin] */
327 /* cpuid_ebx(1) is 0x04 for desktop PIII,
328 * 0x06 for mobile PIII-M */
329 ebx = cpuid_ebx(0x00000001);
330 dprintk("ebx is %x\n", ebx);
331
332 ebx &= 0x000000FF;
333
334 if (ebx != 0x06)
335 return 0;
336
337 /* So far all PIII-M processors support SpeedStep. See
338 * Intel's 24540640.pdf of June 2003
339 */
340 return SPEEDSTEP_CPU_PIII_T;
341
342 case 0x08: /* Intel PIII [Coppermine] */
343
344 /* all mobile PIII Coppermines have FSB 100 MHz
345 * ==> sort out a few desktop PIIIs. */
346 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
347 dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
348 msr_lo, msr_hi);
349 msr_lo &= 0x00c0000;
350 if (msr_lo != 0x0080000)
351 return 0;
352
353 /*
354 * If the processor is a mobile version,
355 * platform ID has bit 50 set
356 * it has SpeedStep technology if either
357 * bit 56 or 57 is set
358 */
359 rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
360 dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
361 msr_lo, msr_hi);
362 if ((msr_hi & (1<<18)) &&
363 (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
364 if (c->x86_mask == 0x01) {
365 dprintk("early PIII version\n");
366 return SPEEDSTEP_CPU_PIII_C_EARLY;
367 } else
368 return SPEEDSTEP_CPU_PIII_C;
369 }
370
371 default:
372 return 0;
373 }
374}
375EXPORT_SYMBOL_GPL(speedstep_detect_processor);
376
377
378/*********************************************************************
379 * DETECT SPEEDSTEP SPEEDS *
380 *********************************************************************/
381
382unsigned int speedstep_get_freqs(enum speedstep_processor processor,
383 unsigned int *low_speed,
384 unsigned int *high_speed,
385 unsigned int *transition_latency,
386 void (*set_state) (unsigned int state))
387{
388 unsigned int prev_speed;
389 unsigned int ret = 0;
390 unsigned long flags;
391 struct timeval tv1, tv2;
392
393 if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
394 return -EINVAL;
395
396 dprintk("trying to determine both speeds\n");
397
398 /* get current speed */
399 prev_speed = speedstep_get_frequency(processor);
400 if (!prev_speed)
401 return -EIO;
402
403 dprintk("previous speed is %u\n", prev_speed);
404
405 local_irq_save(flags);
406
407 /* switch to low state */
408 set_state(SPEEDSTEP_LOW);
409 *low_speed = speedstep_get_frequency(processor);
410 if (!*low_speed) {
411 ret = -EIO;
412 goto out;
413 }
414
415 dprintk("low speed is %u\n", *low_speed);
416
417 /* start latency measurement */
418 if (transition_latency)
419 do_gettimeofday(&tv1);
420
421 /* switch to high state */
422 set_state(SPEEDSTEP_HIGH);
423
424 /* end latency measurement */
425 if (transition_latency)
426 do_gettimeofday(&tv2);
427
428 *high_speed = speedstep_get_frequency(processor);
429 if (!*high_speed) {
430 ret = -EIO;
431 goto out;
432 }
433
434 dprintk("high speed is %u\n", *high_speed);
435
436 if (*low_speed == *high_speed) {
437 ret = -ENODEV;
438 goto out;
439 }
440
441 /* switch to previous state, if necessary */
442 if (*high_speed != prev_speed)
443 set_state(SPEEDSTEP_LOW);
444
445 if (transition_latency) {
446 *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
447 tv2.tv_usec - tv1.tv_usec;
448 dprintk("transition latency is %u uSec\n", *transition_latency);
449
450 /* convert uSec to nSec and add 20% for safety reasons */
451 *transition_latency *= 1200;
452
453 /* check if the latency measurement is too high or too low
454 * and set it to a safe value (500uSec) in that case
455 */
456 if (*transition_latency > 10000000 ||
457 *transition_latency < 50000) {
458 printk(KERN_WARNING PFX "frequency transition "
459 "measured seems out of range (%u "
460 "nSec), falling back to a safe one of"
461 "%u nSec.\n",
462 *transition_latency, 500000);
463 *transition_latency = 500000;
464 }
465 }
466
467out:
468 local_irq_restore(flags);
469 return ret;
470}
471EXPORT_SYMBOL_GPL(speedstep_get_freqs);
472
473#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
474module_param(relaxed_check, int, 0444);
475MODULE_PARM_DESC(relaxed_check,
476 "Don't do all checks for speedstep capability.");
477#endif
478
479MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
480MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
481MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
deleted file mode 100644
index 70d9cea1219d..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ /dev/null
@@ -1,49 +0,0 @@
1/*
2 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * Library for common functions for Intel SpeedStep v.1 and v.2 support
7 *
8 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
9 */
10
11
12
13/* processors */
14enum speedstep_processor {
15 SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */
16 SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */
17 SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */
18 SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */
19/* the following processors are not speedstep-capable and are not auto-detected
20 * in speedstep_detect_processor(). However, their speed can be detected using
21 * the speedstep_get_frequency() call. */
22 SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */
23 SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */
24 SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */
25};
26
27/* speedstep states -- only two of them */
28
29#define SPEEDSTEP_HIGH 0x00000000
30#define SPEEDSTEP_LOW 0x00000001
31
32
33/* detect a speedstep-capable processor */
34extern enum speedstep_processor speedstep_detect_processor(void);
35
36/* detect the current speed (in khz) of the processor */
37extern unsigned int speedstep_get_frequency(enum speedstep_processor processor);
38
39
40/* detect the low and high speeds of the processor. The callback
41 * set_state"'s first argument is either SPEEDSTEP_HIGH or
42 * SPEEDSTEP_LOW; the second argument is zero so that no
43 * cpufreq_notify_transition calls are initiated.
44 */
45extern unsigned int speedstep_get_freqs(enum speedstep_processor processor,
46 unsigned int *low_speed,
47 unsigned int *high_speed,
48 unsigned int *transition_latency,
49 void (*set_state) (unsigned int state));
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
deleted file mode 100644
index 91bc25b67bc1..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ /dev/null
@@ -1,467 +0,0 @@
1/*
2 * Intel SpeedStep SMI driver.
3 *
4 * (C) 2003 Hiroshi Miura <miura@da-cha.org>
5 *
6 * Licensed under the terms of the GNU GPL License version 2.
7 *
8 */
9
10
11/*********************************************************************
12 * SPEEDSTEP - DEFINITIONS *
13 *********************************************************************/
14
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/moduleparam.h>
18#include <linux/init.h>
19#include <linux/cpufreq.h>
20#include <linux/delay.h>
21#include <linux/io.h>
22#include <asm/ist.h>
23
24#include "speedstep-lib.h"
25
26/* speedstep system management interface port/command.
27 *
28 * These parameters are got from IST-SMI BIOS call.
29 * If user gives it, these are used.
30 *
31 */
32static int smi_port;
33static int smi_cmd;
34static unsigned int smi_sig;
35
36/* info about the processor */
37static enum speedstep_processor speedstep_processor;
38
39/*
40 * There are only two frequency states for each processor. Values
41 * are in kHz for the time being.
42 */
43static struct cpufreq_frequency_table speedstep_freqs[] = {
44 {SPEEDSTEP_HIGH, 0},
45 {SPEEDSTEP_LOW, 0},
46 {0, CPUFREQ_TABLE_END},
47};
48
49#define GET_SPEEDSTEP_OWNER 0
50#define GET_SPEEDSTEP_STATE 1
51#define SET_SPEEDSTEP_STATE 2
52#define GET_SPEEDSTEP_FREQS 4
53
54/* how often shall the SMI call be tried if it failed, e.g. because
55 * of DMA activity going on? */
56#define SMI_TRIES 5
57
58#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
59 "speedstep-smi", msg)
60
61/**
62 * speedstep_smi_ownership
63 */
64static int speedstep_smi_ownership(void)
65{
66 u32 command, result, magic, dummy;
67 u32 function = GET_SPEEDSTEP_OWNER;
68 unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
69
70 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
71 magic = virt_to_phys(magic_data);
72
73 dprintk("trying to obtain ownership with command %x at port %x\n",
74 command, smi_port);
75
76 __asm__ __volatile__(
77 "push %%ebp\n"
78 "out %%al, (%%dx)\n"
79 "pop %%ebp\n"
80 : "=D" (result),
81 "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
82 "=S" (dummy)
83 : "a" (command), "b" (function), "c" (0), "d" (smi_port),
84 "D" (0), "S" (magic)
85 : "memory"
86 );
87
88 dprintk("result is %x\n", result);
89
90 return result;
91}
92
93/**
94 * speedstep_smi_get_freqs - get SpeedStep preferred & current freq.
95 * @low: the low frequency value is placed here
96 * @high: the high frequency value is placed here
97 *
98 * Only available on later SpeedStep-enabled systems, returns false results or
99 * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
100 * shows that the latter occurs if !(ist_info.event & 0xFFFF).
101 */
102static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
103{
104 u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
105 u32 state = 0;
106 u32 function = GET_SPEEDSTEP_FREQS;
107
108 if (!(ist_info.event & 0xFFFF)) {
109 dprintk("bug #1422 -- can't read freqs from BIOS\n");
110 return -ENODEV;
111 }
112
113 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
114
115 dprintk("trying to determine frequencies with command %x at port %x\n",
116 command, smi_port);
117
118 __asm__ __volatile__(
119 "push %%ebp\n"
120 "out %%al, (%%dx)\n"
121 "pop %%ebp"
122 : "=a" (result),
123 "=b" (high_mhz),
124 "=c" (low_mhz),
125 "=d" (state), "=D" (edi), "=S" (dummy)
126 : "a" (command),
127 "b" (function),
128 "c" (state),
129 "d" (smi_port), "S" (0), "D" (0)
130 );
131
132 dprintk("result %x, low_freq %u, high_freq %u\n",
133 result, low_mhz, high_mhz);
134
135 /* abort if results are obviously incorrect... */
136 if ((high_mhz + low_mhz) < 600)
137 return -EINVAL;
138
139 *high = high_mhz * 1000;
140 *low = low_mhz * 1000;
141
142 return result;
143}
144
145/**
146 * speedstep_get_state - set the SpeedStep state
147 * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
148 *
149 */
150static int speedstep_get_state(void)
151{
152 u32 function = GET_SPEEDSTEP_STATE;
153 u32 result, state, edi, command, dummy;
154
155 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
156
157 dprintk("trying to determine current setting with command %x "
158 "at port %x\n", command, smi_port);
159
160 __asm__ __volatile__(
161 "push %%ebp\n"
162 "out %%al, (%%dx)\n"
163 "pop %%ebp\n"
164 : "=a" (result),
165 "=b" (state), "=D" (edi),
166 "=c" (dummy), "=d" (dummy), "=S" (dummy)
167 : "a" (command), "b" (function), "c" (0),
168 "d" (smi_port), "S" (0), "D" (0)
169 );
170
171 dprintk("state is %x, result is %x\n", state, result);
172
173 return state & 1;
174}
175
176
177/**
178 * speedstep_set_state - set the SpeedStep state
179 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
180 *
181 */
182static void speedstep_set_state(unsigned int state)
183{
184 unsigned int result = 0, command, new_state, dummy;
185 unsigned long flags;
186 unsigned int function = SET_SPEEDSTEP_STATE;
187 unsigned int retry = 0;
188
189 if (state > 0x1)
190 return;
191
192 /* Disable IRQs */
193 local_irq_save(flags);
194
195 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
196
197 dprintk("trying to set frequency to state %u "
198 "with command %x at port %x\n",
199 state, command, smi_port);
200
201 do {
202 if (retry) {
203 dprintk("retry %u, previous result %u, waiting...\n",
204 retry, result);
205 mdelay(retry * 50);
206 }
207 retry++;
208 __asm__ __volatile__(
209 "push %%ebp\n"
210 "out %%al, (%%dx)\n"
211 "pop %%ebp"
212 : "=b" (new_state), "=D" (result),
213 "=c" (dummy), "=a" (dummy),
214 "=d" (dummy), "=S" (dummy)
215 : "a" (command), "b" (function), "c" (state),
216 "d" (smi_port), "S" (0), "D" (0)
217 );
218 } while ((new_state != state) && (retry <= SMI_TRIES));
219
220 /* enable IRQs */
221 local_irq_restore(flags);
222
223 if (new_state == state)
224 dprintk("change to %u MHz succeeded after %u tries "
225 "with result %u\n",
226 (speedstep_freqs[new_state].frequency / 1000),
227 retry, result);
228 else
229 printk(KERN_ERR "cpufreq: change to state %u "
230 "failed with new_state %u and result %u\n",
231 state, new_state, result);
232
233 return;
234}
235
236
237/**
238 * speedstep_target - set a new CPUFreq policy
239 * @policy: new policy
240 * @target_freq: new freq
241 * @relation:
242 *
243 * Sets a new CPUFreq policy/freq.
244 */
245static int speedstep_target(struct cpufreq_policy *policy,
246 unsigned int target_freq, unsigned int relation)
247{
248 unsigned int newstate = 0;
249 struct cpufreq_freqs freqs;
250
251 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
252 target_freq, relation, &newstate))
253 return -EINVAL;
254
255 freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
256 freqs.new = speedstep_freqs[newstate].frequency;
257 freqs.cpu = 0; /* speedstep.c is UP only driver */
258
259 if (freqs.old == freqs.new)
260 return 0;
261
262 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
263 speedstep_set_state(newstate);
264 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
265
266 return 0;
267}
268
269
270/**
271 * speedstep_verify - verifies a new CPUFreq policy
272 * @policy: new policy
273 *
274 * Limit must be within speedstep_low_freq and speedstep_high_freq, with
275 * at least one border included.
276 */
277static int speedstep_verify(struct cpufreq_policy *policy)
278{
279 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
280}
281
282
283static int speedstep_cpu_init(struct cpufreq_policy *policy)
284{
285 int result;
286 unsigned int speed, state;
287 unsigned int *low, *high;
288
289 /* capability check */
290 if (policy->cpu != 0)
291 return -ENODEV;
292
293 result = speedstep_smi_ownership();
294 if (result) {
295 dprintk("fails in acquiring ownership of a SMI interface.\n");
296 return -EINVAL;
297 }
298
299 /* detect low and high frequency */
300 low = &speedstep_freqs[SPEEDSTEP_LOW].frequency;
301 high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency;
302
303 result = speedstep_smi_get_freqs(low, high);
304 if (result) {
305 /* fall back to speedstep_lib.c dection mechanism:
306 * try both states out */
307 dprintk("could not detect low and high frequencies "
308 "by SMI call.\n");
309 result = speedstep_get_freqs(speedstep_processor,
310 low, high,
311 NULL,
312 &speedstep_set_state);
313
314 if (result) {
315 dprintk("could not detect two different speeds"
316 " -- aborting.\n");
317 return result;
318 } else
319 dprintk("workaround worked.\n");
320 }
321
322 /* get current speed setting */
323 state = speedstep_get_state();
324 speed = speedstep_freqs[state].frequency;
325
326 dprintk("currently at %s speed setting - %i MHz\n",
327 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
328 ? "low" : "high",
329 (speed / 1000));
330
331 /* cpuinfo and default policy values */
332 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
333 policy->cur = speed;
334
335 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
336 if (result)
337 return result;
338
339 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
340
341 return 0;
342}
343
344static int speedstep_cpu_exit(struct cpufreq_policy *policy)
345{
346 cpufreq_frequency_table_put_attr(policy->cpu);
347 return 0;
348}
349
350static unsigned int speedstep_get(unsigned int cpu)
351{
352 if (cpu)
353 return -ENODEV;
354 return speedstep_get_frequency(speedstep_processor);
355}
356
357
358static int speedstep_resume(struct cpufreq_policy *policy)
359{
360 int result = speedstep_smi_ownership();
361
362 if (result)
363 dprintk("fails in re-acquiring ownership of a SMI interface.\n");
364
365 return result;
366}
367
368static struct freq_attr *speedstep_attr[] = {
369 &cpufreq_freq_attr_scaling_available_freqs,
370 NULL,
371};
372
373static struct cpufreq_driver speedstep_driver = {
374 .name = "speedstep-smi",
375 .verify = speedstep_verify,
376 .target = speedstep_target,
377 .init = speedstep_cpu_init,
378 .exit = speedstep_cpu_exit,
379 .get = speedstep_get,
380 .resume = speedstep_resume,
381 .owner = THIS_MODULE,
382 .attr = speedstep_attr,
383};
384
385/**
386 * speedstep_init - initializes the SpeedStep CPUFreq driver
387 *
388 * Initializes the SpeedStep support. Returns -ENODEV on unsupported
389 * BIOS, -EINVAL on problems during initiatization, and zero on
390 * success.
391 */
392static int __init speedstep_init(void)
393{
394 speedstep_processor = speedstep_detect_processor();
395
396 switch (speedstep_processor) {
397 case SPEEDSTEP_CPU_PIII_T:
398 case SPEEDSTEP_CPU_PIII_C:
399 case SPEEDSTEP_CPU_PIII_C_EARLY:
400 break;
401 default:
402 speedstep_processor = 0;
403 }
404
405 if (!speedstep_processor) {
406 dprintk("No supported Intel CPU detected.\n");
407 return -ENODEV;
408 }
409
410 dprintk("signature:0x%.8lx, command:0x%.8lx, "
411 "event:0x%.8lx, perf_level:0x%.8lx.\n",
412 ist_info.signature, ist_info.command,
413 ist_info.event, ist_info.perf_level);
414
415 /* Error if no IST-SMI BIOS or no PARM
416 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
417 if ((ist_info.signature != 0x47534943) && (
418 (smi_port == 0) || (smi_cmd == 0)))
419 return -ENODEV;
420
421 if (smi_sig == 1)
422 smi_sig = 0x47534943;
423 else
424 smi_sig = ist_info.signature;
425
426 /* setup smi_port from MODLULE_PARM or BIOS */
427 if ((smi_port > 0xff) || (smi_port < 0))
428 return -EINVAL;
429 else if (smi_port == 0)
430 smi_port = ist_info.command & 0xff;
431
432 if ((smi_cmd > 0xff) || (smi_cmd < 0))
433 return -EINVAL;
434 else if (smi_cmd == 0)
435 smi_cmd = (ist_info.command >> 16) & 0xff;
436
437 return cpufreq_register_driver(&speedstep_driver);
438}
439
440
441/**
442 * speedstep_exit - unregisters SpeedStep support
443 *
444 * Unregisters SpeedStep support.
445 */
446static void __exit speedstep_exit(void)
447{
448 cpufreq_unregister_driver(&speedstep_driver);
449}
450
451module_param(smi_port, int, 0444);
452module_param(smi_cmd, int, 0444);
453module_param(smi_sig, uint, 0444);
454
455MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value "
456 "-- Intel's default setting is 0xb2");
457MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value "
458 "-- Intel's default setting is 0x82");
459MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the "
460 "SMI interface.");
461
462MODULE_AUTHOR("Hiroshi Miura");
463MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface.");
464MODULE_LICENSE("GPL");
465
466module_init(speedstep_init);
467module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index df86bc8c859d..1edf5ba4fb2b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -29,10 +29,10 @@
29 29
30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
31{ 31{
32 u64 misc_enable;
33
32 /* Unmask CPUID levels if masked: */ 34 /* Unmask CPUID levels if masked: */
33 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { 35 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
34 u64 misc_enable;
35
36 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); 36 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
37 37
38 if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { 38 if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
@@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
118 * (model 2) with the same problem. 118 * (model 2) with the same problem.
119 */ 119 */
120 if (c->x86 == 15) { 120 if (c->x86 == 15) {
121 u64 misc_enable;
122
123 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); 121 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
124 122
125 if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { 123 if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
@@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
130 } 128 }
131 } 129 }
132#endif 130#endif
131
132 /*
133 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
134 * clear the fast string and enhanced fast string CPU capabilities.
135 */
136 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
137 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
138 if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
139 printk(KERN_INFO "Disabled fast string operations\n");
140 setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
141 setup_clear_cpu_cap(X86_FEATURE_ERMS);
142 }
143 }
133} 144}
134 145
135#ifdef CONFIG_X86_32 146#ifdef CONFIG_X86_32
@@ -400,12 +411,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
400 411
401 switch (c->x86_model) { 412 switch (c->x86_model) {
402 case 5: 413 case 5:
403 if (c->x86_mask == 0) { 414 if (l2 == 0)
404 if (l2 == 0) 415 p = "Celeron (Covington)";
405 p = "Celeron (Covington)"; 416 else if (l2 == 256)
406 else if (l2 == 256) 417 p = "Mobile Pentium II (Dixon)";
407 p = "Mobile Pentium II (Dixon)";
408 }
409 break; 418 break;
410 419
411 case 6: 420 case 6:
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1ce1af2899df..c105c533ed94 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -327,7 +327,6 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
327 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); 327 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
328 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); 328 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
329 329
330 l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
331 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; 330 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
332} 331}
333 332
@@ -454,27 +453,16 @@ int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
454{ 453{
455 int ret = 0; 454 int ret = 0;
456 455
457#define SUBCACHE_MASK (3UL << 20) 456 /* check if @slot is already used or the index is already disabled */
458#define SUBCACHE_INDEX 0xfff
459
460 /*
461 * check whether this slot is already used or
462 * the index is already disabled
463 */
464 ret = amd_get_l3_disable_slot(l3, slot); 457 ret = amd_get_l3_disable_slot(l3, slot);
465 if (ret >= 0) 458 if (ret >= 0)
466 return -EINVAL; 459 return -EINVAL;
467 460
468 /* 461 if (index > l3->indices)
469 * check whether the other slot has disabled the
470 * same index already
471 */
472 if (index == amd_get_l3_disable_slot(l3, !slot))
473 return -EINVAL; 462 return -EINVAL;
474 463
475 /* do not allow writes outside of allowed bits */ 464 /* check whether the other slot has disabled the same index already */
476 if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || 465 if (index == amd_get_l3_disable_slot(l3, !slot))
477 ((index & SUBCACHE_INDEX) > l3->indices))
478 return -EINVAL; 466 return -EINVAL;
479 467
480 amd_l3_disable_index(l3, cpu, slot, index); 468 amd_l3_disable_index(l3, cpu, slot, index);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3385ea26f684..ff1ae9b6464d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -105,20 +105,6 @@ static int cpu_missing;
105ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); 105ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
106EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); 106EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
107 107
108static int default_decode_mce(struct notifier_block *nb, unsigned long val,
109 void *data)
110{
111 pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n");
112 pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");
113
114 return NOTIFY_STOP;
115}
116
117static struct notifier_block mce_dec_nb = {
118 .notifier_call = default_decode_mce,
119 .priority = -1,
120};
121
122/* MCA banks polled by the period polling timer for corrected events */ 108/* MCA banks polled by the period polling timer for corrected events */
123DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 109DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
124 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 110 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -212,6 +198,8 @@ void mce_log(struct mce *mce)
212 198
213static void print_mce(struct mce *m) 199static void print_mce(struct mce *m)
214{ 200{
201 int ret = 0;
202
215 pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", 203 pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
216 m->extcpu, m->mcgstatus, m->bank, m->status); 204 m->extcpu, m->mcgstatus, m->bank, m->status);
217 205
@@ -239,7 +227,11 @@ static void print_mce(struct mce *m)
239 * Print out human-readable details about the MCE error, 227 * Print out human-readable details about the MCE error,
240 * (if the CPU has an implementation for that) 228 * (if the CPU has an implementation for that)
241 */ 229 */
242 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); 230 ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
231 if (ret == NOTIFY_STOP)
232 return;
233
234 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
243} 235}
244 236
245#define PANIC_TIMEOUT 5 /* 5 seconds */ 237#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -590,7 +582,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
590 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { 582 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
591 mce_log(&m); 583 mce_log(&m);
592 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); 584 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
593 add_taint(TAINT_MACHINE_CHECK);
594 } 585 }
595 586
596 /* 587 /*
@@ -1722,8 +1713,6 @@ __setup("mce", mcheck_enable);
1722 1713
1723int __init mcheck_init(void) 1714int __init mcheck_init(void)
1724{ 1715{
1725 atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
1726
1727 mcheck_intel_therm_init(); 1716 mcheck_intel_therm_init();
1728 1717
1729 return 0; 1718 return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 167f97b5596e..bb0adad35143 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -509,6 +509,7 @@ recurse:
509out_free: 509out_free:
510 if (b) { 510 if (b) {
511 kobject_put(&b->kobj); 511 kobject_put(&b->kobj);
512 list_del(&b->miscj);
512 kfree(b); 513 kfree(b);
513 } 514 }
514 return err; 515 return err;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97f..27c625178bf1 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -187,8 +187,6 @@ static int therm_throt_process(bool new_event, int event, int level)
187 this_cpu, 187 this_cpu,
188 level == CORE_LEVEL ? "Core" : "Package", 188 level == CORE_LEVEL ? "Core" : "Package",
189 state->count); 189 state->count);
190
191 add_taint(TAINT_MACHINE_CHECK);
192 return 1; 190 return 1;
193 } 191 }
194 if (old_event) { 192 if (old_event) {
@@ -355,7 +353,6 @@ static void notify_thresholds(__u64 msr_val)
355static void intel_thermal_interrupt(void) 353static void intel_thermal_interrupt(void)
356{ 354{
357 __u64 msr_val; 355 __u64 msr_val;
358 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
359 356
360 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 357 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
361 358
@@ -367,19 +364,19 @@ static void intel_thermal_interrupt(void)
367 CORE_LEVEL) != 0) 364 CORE_LEVEL) != 0)
368 mce_log_therm_throt_event(CORE_THROTTLED | msr_val); 365 mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
369 366
370 if (cpu_has(c, X86_FEATURE_PLN)) 367 if (this_cpu_has(X86_FEATURE_PLN))
371 if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, 368 if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
372 POWER_LIMIT_EVENT, 369 POWER_LIMIT_EVENT,
373 CORE_LEVEL) != 0) 370 CORE_LEVEL) != 0)
374 mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); 371 mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
375 372
376 if (cpu_has(c, X86_FEATURE_PTS)) { 373 if (this_cpu_has(X86_FEATURE_PTS)) {
377 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 374 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
378 if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, 375 if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
379 THERMAL_THROTTLING_EVENT, 376 THERMAL_THROTTLING_EVENT,
380 PACKAGE_LEVEL) != 0) 377 PACKAGE_LEVEL) != 0)
381 mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); 378 mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
382 if (cpu_has(c, X86_FEATURE_PLN)) 379 if (this_cpu_has(X86_FEATURE_PLN))
383 if (therm_throt_process(msr_val & 380 if (therm_throt_process(msr_val &
384 PACKAGE_THERM_STATUS_POWER_LIMIT, 381 PACKAGE_THERM_STATUS_POWER_LIMIT,
385 POWER_LIMIT_EVENT, 382 POWER_LIMIT_EVENT,
@@ -393,7 +390,6 @@ static void unexpected_thermal_interrupt(void)
393{ 390{
394 printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", 391 printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
395 smp_processor_id()); 392 smp_processor_id());
396 add_taint(TAINT_MACHINE_CHECK);
397} 393}
398 394
399static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; 395static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -446,18 +442,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
446 */ 442 */
447 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 443 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
448 444
445 h = lvtthmr_init;
449 /* 446 /*
450 * The initial value of thermal LVT entries on all APs always reads 447 * The initial value of thermal LVT entries on all APs always reads
451 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI 448 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
452 * sequence to them and LVT registers are reset to 0s except for 449 * sequence to them and LVT registers are reset to 0s except for
453 * the mask bits which are set to 1s when APs receive INIT IPI. 450 * the mask bits which are set to 1s when APs receive INIT IPI.
454 * Always restore the value that BIOS has programmed on AP based on 451 * If BIOS takes over the thermal interrupt and sets its interrupt
455 * BSP's info we saved since BIOS is always setting the same value 452 * delivery mode to SMI (not fixed), it restores the value that the
456 * for all threads/cores 453 * BIOS has programmed on AP based on BSP's info we saved since BIOS
454 * is always setting the same value for all threads/cores.
457 */ 455 */
458 apic_write(APIC_LVTTHMR, lvtthmr_init); 456 if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
457 apic_write(APIC_LVTTHMR, lvtthmr_init);
459 458
460 h = lvtthmr_init;
461 459
462 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 460 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
463 printk(KERN_DEBUG 461 printk(KERN_DEBUG
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e638689279d3..3a0338b4b179 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,7 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33#include <asm/smp.h> 33#include <asm/smp.h>
34#include <asm/alternative.h>
34 35
35#if 0 36#if 0
36#undef wrmsrl 37#undef wrmsrl
@@ -363,12 +364,18 @@ again:
363 return new_raw_count; 364 return new_raw_count;
364} 365}
365 366
366/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
367static inline int x86_pmu_addr_offset(int index) 367static inline int x86_pmu_addr_offset(int index)
368{ 368{
369 if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 369 int offset;
370 return index << 1; 370
371 return index; 371 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
372 alternative_io(ASM_NOP2,
373 "shll $1, %%eax",
374 X86_FEATURE_PERFCTR_CORE,
375 "=a" (offset),
376 "a" (index));
377
378 return offset;
372} 379}
373 380
374static inline unsigned int x86_pmu_config_addr(int index) 381static inline unsigned int x86_pmu_config_addr(int index)
@@ -1766,17 +1773,6 @@ static struct pmu pmu = {
1766 * callchain support 1773 * callchain support
1767 */ 1774 */
1768 1775
1769static void
1770backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1771{
1772 /* Ignore warnings */
1773}
1774
1775static void backtrace_warning(void *data, char *msg)
1776{
1777 /* Ignore warnings */
1778}
1779
1780static int backtrace_stack(void *data, char *name) 1776static int backtrace_stack(void *data, char *name)
1781{ 1777{
1782 return 0; 1778 return 0;
@@ -1790,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1790} 1786}
1791 1787
1792static const struct stacktrace_ops backtrace_ops = { 1788static const struct stacktrace_ops backtrace_ops = {
1793 .warning = backtrace_warning,
1794 .warning_symbol = backtrace_warning_symbol,
1795 .stack = backtrace_stack, 1789 .stack = backtrace_stack,
1796 .address = backtrace_address, 1790 .address = backtrace_address,
1797 .walk_stack = print_context_stack_bp, 1791 .walk_stack = print_context_stack_bp,
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index cf4e369cea67..fe29c1d2219e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids
96 */ 96 */
97static const u64 amd_perfmon_event_map[] = 97static const u64 amd_perfmon_event_map[] =
98{ 98{
99 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 99 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
100 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 100 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
101 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, 101 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
102 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, 102 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
103 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 103 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
104 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 104 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
105 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
106 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
105}; 107};
106 108
107static u64 amd_pmu_event_map(int hw_event) 109static u64 amd_pmu_event_map(int hw_event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 447a28de6f09..41178c826c48 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -36,7 +36,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
36 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 36 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
37}; 37};
38 38
39static struct event_constraint intel_core_event_constraints[] = 39static struct event_constraint intel_core_event_constraints[] __read_mostly =
40{ 40{
41 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 41 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
42 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 42 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] =
47 EVENT_CONSTRAINT_END 47 EVENT_CONSTRAINT_END
48}; 48};
49 49
50static struct event_constraint intel_core2_event_constraints[] = 50static struct event_constraint intel_core2_event_constraints[] __read_mostly =
51{ 51{
52 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 52 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
53 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 53 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] =
70 EVENT_CONSTRAINT_END 70 EVENT_CONSTRAINT_END
71}; 71};
72 72
73static struct event_constraint intel_nehalem_event_constraints[] = 73static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
74{ 74{
75 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 75 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
76 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 76 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] =
86 EVENT_CONSTRAINT_END 86 EVENT_CONSTRAINT_END
87}; 87};
88 88
89static struct extra_reg intel_nehalem_extra_regs[] = 89static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
90{ 90{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92 EVENT_EXTRA_END 92 EVENT_EXTRA_END
93}; 93};
94 94
95static struct event_constraint intel_nehalem_percore_constraints[] = 95static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
96{ 96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0), 97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END 98 EVENT_CONSTRAINT_END
99}; 99};
100 100
101static struct event_constraint intel_westmere_event_constraints[] = 101static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
102{ 102{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] =
110 EVENT_CONSTRAINT_END 110 EVENT_CONSTRAINT_END
111}; 111};
112 112
113static struct event_constraint intel_snb_event_constraints[] = 113static struct event_constraint intel_snb_event_constraints[] __read_mostly =
114{ 114{
115 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 115 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] =
123 EVENT_CONSTRAINT_END 123 EVENT_CONSTRAINT_END
124}; 124};
125 125
126static struct extra_reg intel_westmere_extra_regs[] = 126static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
127{ 127{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), 129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130 EVENT_EXTRA_END 130 EVENT_EXTRA_END
131}; 131};
132 132
133static struct event_constraint intel_westmere_percore_constraints[] = 133static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
134{ 134{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0), 135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0), 136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END 137 EVENT_CONSTRAINT_END
138}; 138};
139 139
140static struct event_constraint intel_gen_event_constraints[] = 140static struct event_constraint intel_gen_event_constraints[] __read_mostly =
141{ 141{
142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
143 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 143 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -1440,6 +1440,11 @@ static __init int intel_pmu_init(void)
1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1441 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1441 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442 1442
1443 /* UOPS_ISSUED.STALLED_CYCLES */
1444 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1445 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1446 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1447
1443 if (ebx & 0x40) { 1448 if (ebx & 0x40) {
1444 /* 1449 /*
1445 * Erratum AAJ80 detected, we work it around by using 1450 * Erratum AAJ80 detected, we work it around by using
@@ -1480,6 +1485,12 @@ static __init int intel_pmu_init(void)
1480 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1485 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1481 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 1486 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1482 x86_pmu.extra_regs = intel_westmere_extra_regs; 1487 x86_pmu.extra_regs = intel_westmere_extra_regs;
1488
1489 /* UOPS_ISSUED.STALLED_CYCLES */
1490 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1491 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1492 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1493
1483 pr_cont("Westmere events, "); 1494 pr_cont("Westmere events, ");
1484 break; 1495 break;
1485 1496
@@ -1491,6 +1502,12 @@ static __init int intel_pmu_init(void)
1491 1502
1492 x86_pmu.event_constraints = intel_snb_event_constraints; 1503 x86_pmu.event_constraints = intel_snb_event_constraints;
1493 x86_pmu.pebs_constraints = intel_snb_pebs_events; 1504 x86_pmu.pebs_constraints = intel_snb_pebs_events;
1505
1506 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1507 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1508 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
1509 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
1510
1494 pr_cont("SandyBridge events, "); 1511 pr_cont("SandyBridge events, ");
1495 break; 1512 break;
1496 1513
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index e93fcd55fae1..ead584fb6a7d 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = {
468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470 .escr_emask = 470 .escr_emask =
471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), 471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
472 .cntr = { {12, 13, 16}, {14, 15, 17} }, 472 .cntr = { {12, 13, 16}, {14, 15, 17} },
473 }, 473 },
474 [P4_EVENT_X87_ASSIST] = { 474 [P4_EVENT_X87_ASSIST] = {
@@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
912 int idx, handled = 0; 912 int idx, handled = 0;
913 u64 val; 913 u64 val;
914 914
915 data.addr = 0; 915 perf_sample_data_init(&data, 0);
916 data.raw = NULL;
917 916
918 cpuc = &__get_cpu_var(cpu_hw_events); 917 cpuc = &__get_cpu_var(cpu_hw_events);
919 918
@@ -1197,7 +1196,7 @@ static __init int p4_pmu_init(void)
1197{ 1196{
1198 unsigned int low, high; 1197 unsigned int low, high;
1199 1198
1200 /* If we get stripped -- indexig fails */ 1199 /* If we get stripped -- indexing fails */
1201 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); 1200 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
1202 1201
1203 rdmsr(MSR_IA32_MISC_ENABLE, low, high); 1202 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e2a3f0606da4..1aae78f775fc 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo,
135} 135}
136EXPORT_SYMBOL_GPL(print_context_stack_bp); 136EXPORT_SYMBOL_GPL(print_context_stack_bp);
137 137
138
139static void
140print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
141{
142 printk(data);
143 print_symbol(msg, symbol);
144 printk("\n");
145}
146
147static void print_trace_warning(void *data, char *msg)
148{
149 printk("%s%s\n", (char *)data, msg);
150}
151
152static int print_trace_stack(void *data, char *name) 138static int print_trace_stack(void *data, char *name)
153{ 139{
154 printk("%s <%s> ", (char *)data, name); 140 printk("%s <%s> ", (char *)data, name);
@@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
166} 152}
167 153
168static const struct stacktrace_ops print_trace_ops = { 154static const struct stacktrace_ops print_trace_ops = {
169 .warning = print_trace_warning,
170 .warning_symbol = print_trace_warning_symbol,
171 .stack = print_trace_stack, 155 .stack = print_trace_stack,
172 .address = print_trace_address, 156 .address = print_trace_address,
173 .walk_stack = print_context_stack, 157 .walk_stack = print_context_stack,
@@ -279,7 +263,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
279 printk("DEBUG_PAGEALLOC"); 263 printk("DEBUG_PAGEALLOC");
280#endif 264#endif
281 printk("\n"); 265 printk("\n");
282 sysfs_printk_last_file();
283 if (notify_die(DIE_OOPS, str, regs, err, 266 if (notify_die(DIE_OOPS, str, regs, err,
284 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) 267 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
285 return 1; 268 return 1;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index a93742a57468..0ba15a6cc57e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -260,9 +260,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
260 return mod_code_status; 260 return mod_code_status;
261} 261}
262 262
263static unsigned char *ftrace_nop_replace(void) 263static const unsigned char *ftrace_nop_replace(void)
264{ 264{
265 return ideal_nop5; 265 return ideal_nops[NOP_ATOMIC5];
266} 266}
267 267
268static int 268static int
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index d6d6bb361931..3bb08509a7a1 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -23,7 +23,6 @@
23static void __init i386_default_early_setup(void) 23static void __init i386_default_early_setup(void)
24{ 24{
25 /* Initialize 32bit specific setup functions */ 25 /* Initialize 32bit specific setup functions */
26 x86_init.resources.probe_roms = probe_roms;
27 x86_init.resources.reserve_resources = i386_reserve_resources; 26 x86_init.resources.reserve_resources = i386_reserve_resources;
28 x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; 27 x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
29 28
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index bfe8f729e086..6781765b3a0d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -217,7 +217,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { }
217/* 217/*
218 * Common hpet info 218 * Common hpet info
219 */ 219 */
220static unsigned long hpet_period; 220static unsigned long hpet_freq;
221 221
222static void hpet_legacy_set_mode(enum clock_event_mode mode, 222static void hpet_legacy_set_mode(enum clock_event_mode mode,
223 struct clock_event_device *evt); 223 struct clock_event_device *evt);
@@ -232,7 +232,6 @@ static struct clock_event_device hpet_clockevent = {
232 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 232 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
233 .set_mode = hpet_legacy_set_mode, 233 .set_mode = hpet_legacy_set_mode,
234 .set_next_event = hpet_legacy_next_event, 234 .set_next_event = hpet_legacy_next_event,
235 .shift = 32,
236 .irq = 0, 235 .irq = 0,
237 .rating = 50, 236 .rating = 50,
238}; 237};
@@ -290,28 +289,12 @@ static void hpet_legacy_clockevent_register(void)
290 hpet_enable_legacy_int(); 289 hpet_enable_legacy_int();
291 290
292 /* 291 /*
293 * The mult factor is defined as (include/linux/clockchips.h)
294 * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h)
295 * hpet_period is in units of femtoseconds (per cycle), so
296 * mult/2^shift = cyc/ns = 10^6/hpet_period
297 * mult = (10^6 * 2^shift)/hpet_period
298 * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period
299 */
300 hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC,
301 hpet_period, hpet_clockevent.shift);
302 /* Calculate the min / max delta */
303 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
304 &hpet_clockevent);
305 /* Setup minimum reprogramming delta. */
306 hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
307 &hpet_clockevent);
308
309 /*
310 * Start hpet with the boot cpu mask and make it 292 * Start hpet with the boot cpu mask and make it
311 * global after the IO_APIC has been initialized. 293 * global after the IO_APIC has been initialized.
312 */ 294 */
313 hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); 295 hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
314 clockevents_register_device(&hpet_clockevent); 296 clockevents_config_and_register(&hpet_clockevent, hpet_freq,
297 HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
315 global_clock_event = &hpet_clockevent; 298 global_clock_event = &hpet_clockevent;
316 printk(KERN_DEBUG "hpet clockevent registered\n"); 299 printk(KERN_DEBUG "hpet clockevent registered\n");
317} 300}
@@ -549,7 +532,6 @@ static int hpet_setup_irq(struct hpet_dev *dev)
549static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) 532static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
550{ 533{
551 struct clock_event_device *evt = &hdev->evt; 534 struct clock_event_device *evt = &hdev->evt;
552 uint64_t hpet_freq;
553 535
554 WARN_ON(cpu != smp_processor_id()); 536 WARN_ON(cpu != smp_processor_id());
555 if (!(hdev->flags & HPET_DEV_VALID)) 537 if (!(hdev->flags & HPET_DEV_VALID))
@@ -571,24 +553,10 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
571 553
572 evt->set_mode = hpet_msi_set_mode; 554 evt->set_mode = hpet_msi_set_mode;
573 evt->set_next_event = hpet_msi_next_event; 555 evt->set_next_event = hpet_msi_next_event;
574 evt->shift = 32;
575
576 /*
577 * The period is a femto seconds value. We need to calculate the
578 * scaled math multiplication factor for nanosecond to hpet tick
579 * conversion.
580 */
581 hpet_freq = FSEC_PER_SEC;
582 do_div(hpet_freq, hpet_period);
583 evt->mult = div_sc((unsigned long) hpet_freq,
584 NSEC_PER_SEC, evt->shift);
585 /* Calculate the max delta */
586 evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
587 /* 5 usec minimum reprogramming delta. */
588 evt->min_delta_ns = 5000;
589
590 evt->cpumask = cpumask_of(hdev->cpu); 556 evt->cpumask = cpumask_of(hdev->cpu);
591 clockevents_register_device(evt); 557
558 clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
559 0x7FFFFFFF);
592} 560}
593 561
594#ifdef CONFIG_HPET 562#ifdef CONFIG_HPET
@@ -792,7 +760,6 @@ static struct clocksource clocksource_hpet = {
792static int hpet_clocksource_register(void) 760static int hpet_clocksource_register(void)
793{ 761{
794 u64 start, now; 762 u64 start, now;
795 u64 hpet_freq;
796 cycle_t t1; 763 cycle_t t1;
797 764
798 /* Start the counter */ 765 /* Start the counter */
@@ -819,24 +786,7 @@ static int hpet_clocksource_register(void)
819 return -ENODEV; 786 return -ENODEV;
820 } 787 }
821 788
822 /*
823 * The definition of mult is (include/linux/clocksource.h)
824 * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc
825 * so we first need to convert hpet_period to ns/cyc units:
826 * mult/2^shift = ns/cyc = hpet_period/10^6
827 * mult = (hpet_period * 2^shift)/10^6
828 * mult = (hpet_period << shift)/FSEC_PER_NSEC
829 */
830
831 /* Need to convert hpet_period (fsec/cyc) to cyc/sec:
832 *
833 * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc)
834 * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period
835 */
836 hpet_freq = FSEC_PER_SEC;
837 do_div(hpet_freq, hpet_period);
838 clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); 789 clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
839
840 return 0; 790 return 0;
841} 791}
842 792
@@ -845,7 +795,9 @@ static int hpet_clocksource_register(void)
845 */ 795 */
846int __init hpet_enable(void) 796int __init hpet_enable(void)
847{ 797{
798 unsigned long hpet_period;
848 unsigned int id; 799 unsigned int id;
800 u64 freq;
849 int i; 801 int i;
850 802
851 if (!is_hpet_capable()) 803 if (!is_hpet_capable())
@@ -884,6 +836,14 @@ int __init hpet_enable(void)
884 goto out_nohpet; 836 goto out_nohpet;
885 837
886 /* 838 /*
839 * The period is a femto seconds value. Convert it to a
840 * frequency.
841 */
842 freq = FSEC_PER_SEC;
843 do_div(freq, hpet_period);
844 hpet_freq = freq;
845
846 /*
887 * Read the HPET ID register to retrieve the IRQ routing 847 * Read the HPET ID register to retrieve the IRQ routing
888 * information and the number of channels 848 * information and the number of channels
889 */ 849 */
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 2dfd31597443..fb66dc9e36cb 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -93,7 +93,6 @@ static struct clock_event_device pit_ce = {
93 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 93 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
94 .set_mode = init_pit_timer, 94 .set_mode = init_pit_timer,
95 .set_next_event = pit_next_event, 95 .set_next_event = pit_next_event,
96 .shift = 32,
97 .irq = 0, 96 .irq = 0,
98}; 97};
99 98
@@ -108,90 +107,12 @@ void __init setup_pit_timer(void)
108 * IO_APIC has been initialized. 107 * IO_APIC has been initialized.
109 */ 108 */
110 pit_ce.cpumask = cpumask_of(smp_processor_id()); 109 pit_ce.cpumask = cpumask_of(smp_processor_id());
111 pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift);
112 pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce);
113 pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce);
114 110
115 clockevents_register_device(&pit_ce); 111 clockevents_config_and_register(&pit_ce, CLOCK_TICK_RATE, 0xF, 0x7FFF);
116 global_clock_event = &pit_ce; 112 global_clock_event = &pit_ce;
117} 113}
118 114
119#ifndef CONFIG_X86_64 115#ifndef CONFIG_X86_64
120/*
121 * Since the PIT overflows every tick, its not very useful
122 * to just read by itself. So use jiffies to emulate a free
123 * running counter:
124 */
125static cycle_t pit_read(struct clocksource *cs)
126{
127 static int old_count;
128 static u32 old_jifs;
129 unsigned long flags;
130 int count;
131 u32 jifs;
132
133 raw_spin_lock_irqsave(&i8253_lock, flags);
134 /*
135 * Although our caller may have the read side of xtime_lock,
136 * this is now a seqlock, and we are cheating in this routine
137 * by having side effects on state that we cannot undo if
138 * there is a collision on the seqlock and our caller has to
139 * retry. (Namely, old_jifs and old_count.) So we must treat
140 * jiffies as volatile despite the lock. We read jiffies
141 * before latching the timer count to guarantee that although
142 * the jiffies value might be older than the count (that is,
143 * the counter may underflow between the last point where
144 * jiffies was incremented and the point where we latch the
145 * count), it cannot be newer.
146 */
147 jifs = jiffies;
148 outb_pit(0x00, PIT_MODE); /* latch the count ASAP */
149 count = inb_pit(PIT_CH0); /* read the latched count */
150 count |= inb_pit(PIT_CH0) << 8;
151
152 /* VIA686a test code... reset the latch if count > max + 1 */
153 if (count > LATCH) {
154 outb_pit(0x34, PIT_MODE);
155 outb_pit(LATCH & 0xff, PIT_CH0);
156 outb_pit(LATCH >> 8, PIT_CH0);
157 count = LATCH - 1;
158 }
159
160 /*
161 * It's possible for count to appear to go the wrong way for a
162 * couple of reasons:
163 *
164 * 1. The timer counter underflows, but we haven't handled the
165 * resulting interrupt and incremented jiffies yet.
166 * 2. Hardware problem with the timer, not giving us continuous time,
167 * the counter does small "jumps" upwards on some Pentium systems,
168 * (see c't 95/10 page 335 for Neptun bug.)
169 *
170 * Previous attempts to handle these cases intelligently were
171 * buggy, so we just do the simple thing now.
172 */
173 if (count > old_count && jifs == old_jifs)
174 count = old_count;
175
176 old_count = count;
177 old_jifs = jifs;
178
179 raw_spin_unlock_irqrestore(&i8253_lock, flags);
180
181 count = (LATCH - 1) - count;
182
183 return (cycle_t)(jifs * LATCH) + count;
184}
185
186static struct clocksource pit_cs = {
187 .name = "pit",
188 .rating = 110,
189 .read = pit_read,
190 .mask = CLOCKSOURCE_MASK(32),
191 .mult = 0,
192 .shift = 20,
193};
194
195static int __init init_pit_clocksource(void) 116static int __init init_pit_clocksource(void)
196{ 117{
197 /* 118 /*
@@ -205,10 +126,7 @@ static int __init init_pit_clocksource(void)
205 pit_ce.mode != CLOCK_EVT_MODE_PERIODIC) 126 pit_ce.mode != CLOCK_EVT_MODE_PERIODIC)
206 return 0; 127 return 0;
207 128
208 pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift); 129 return clocksource_i8253_init();
209
210 return clocksource_register(&pit_cs);
211} 130}
212arch_initcall(init_pit_clocksource); 131arch_initcall(init_pit_clocksource);
213
214#endif /* !CONFIG_X86_64 */ 132#endif /* !CONFIG_X86_64 */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 1cb0b9fc78dc..6c0802eb2f7f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -249,7 +249,7 @@ void fixup_irqs(void)
249 249
250 data = irq_desc_get_irq_data(desc); 250 data = irq_desc_get_irq_data(desc);
251 affinity = data->affinity; 251 affinity = data->affinity;
252 if (!irq_has_action(irq) || 252 if (!irq_has_action(irq) || irqd_is_per_cpu(data) ||
253 cpumask_subset(affinity, cpu_online_mask)) { 253 cpumask_subset(affinity, cpu_online_mask)) {
254 raw_spin_unlock(&desc->lock); 254 raw_spin_unlock(&desc->lock);
255 continue; 255 continue;
@@ -276,7 +276,8 @@ void fixup_irqs(void)
276 else if (!(warned++)) 276 else if (!(warned++))
277 set_affinity = 0; 277 set_affinity = 0;
278 278
279 if (!irqd_can_move_in_process_context(data) && chip->irq_unmask) 279 if (!irqd_can_move_in_process_context(data) &&
280 !irqd_irq_disabled(data) && chip->irq_unmask)
280 chip->irq_unmask(data); 281 chip->irq_unmask(data);
281 282
282 raw_spin_unlock(&desc->lock); 283 raw_spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 961b6b30ba90..3fee346ef545 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -34,7 +34,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
34 code.offset = entry->target - 34 code.offset = entry->target -
35 (entry->code + JUMP_LABEL_NOP_SIZE); 35 (entry->code + JUMP_LABEL_NOP_SIZE);
36 } else 36 } else
37 memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE); 37 memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
38 get_online_cpus(); 38 get_online_cpus();
39 mutex_lock(&text_mutex); 39 mutex_lock(&text_mutex);
40 text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); 40 text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
@@ -44,7 +44,8 @@ void arch_jump_label_transform(struct jump_entry *entry,
44 44
45void arch_jump_label_text_poke_early(jump_label_t addr) 45void arch_jump_label_text_poke_early(jump_label_t addr)
46{ 46{
47 text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE); 47 text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5],
48 JUMP_LABEL_NOP_SIZE);
48} 49}
49 50
50#endif 51#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c969fd9d1566..f1a6244d7d93 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1183 struct pt_regs *regs) 1183 struct pt_regs *regs)
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 unsigned long flags;
1186 1187
1187 /* This is possible if op is under delayed unoptimizing */ 1188 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp)) 1189 if (kprobe_disabled(&op->kp))
1189 return; 1190 return;
1190 1191
1191 preempt_disable(); 1192 local_irq_save(flags);
1192 if (kprobe_running()) { 1193 if (kprobe_running()) {
1193 kprobes_inc_nmissed_count(&op->kp); 1194 kprobes_inc_nmissed_count(&op->kp);
1194 } else { 1195 } else {
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1207 opt_pre_handler(&op->kp, regs); 1208 opt_pre_handler(&op->kp, regs);
1208 __this_cpu_write(current_kprobe, NULL); 1209 __this_cpu_write(current_kprobe, NULL);
1209 } 1210 }
1210 preempt_enable_no_resched(); 1211 local_irq_restore(flags);
1211} 1212}
1212 1213
1213static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) 1214static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f98d3eafe07a..6389a6bca11b 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -26,8 +26,6 @@
26#include <asm/x86_init.h> 26#include <asm/x86_init.h>
27#include <asm/reboot.h> 27#include <asm/reboot.h>
28 28
29#define KVM_SCALE 22
30
31static int kvmclock = 1; 29static int kvmclock = 1;
32static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; 30static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
33static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; 31static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
@@ -120,8 +118,6 @@ static struct clocksource kvm_clock = {
120 .read = kvm_clock_get_cycles, 118 .read = kvm_clock_get_cycles,
121 .rating = 400, 119 .rating = 400,
122 .mask = CLOCKSOURCE_MASK(64), 120 .mask = CLOCKSOURCE_MASK(64),
123 .mult = 1 << KVM_SCALE,
124 .shift = KVM_SCALE,
125 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 121 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
126}; 122};
127 123
@@ -203,7 +199,7 @@ void __init kvmclock_init(void)
203 machine_ops.crash_shutdown = kvm_crash_shutdown; 199 machine_ops.crash_shutdown = kvm_crash_shutdown;
204#endif 200#endif
205 kvm_get_preset_lpj(); 201 kvm_get_preset_lpj();
206 clocksource_register(&kvm_clock); 202 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
207 pv_info.paravirt_enabled = 1; 203 pv_info.paravirt_enabled = 1;
208 pv_info.name = "KVM"; 204 pv_info.name = "KVM";
209 205
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index ab23f1ad4bf1..52f256f2cc81 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -24,6 +24,7 @@
24#include <linux/bug.h> 24#include <linux/bug.h>
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/gfp.h> 26#include <linux/gfp.h>
27#include <linux/jump_label.h>
27 28
28#include <asm/system.h> 29#include <asm/system.h>
29#include <asm/page.h> 30#include <asm/page.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5a532ce646bf..6f9bfffb2720 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -715,17 +715,15 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
715 } 715 }
716} 716}
717 717
718static int 718static int __init
719check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) 719check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
720{ 720{
721 int ret = 0;
722
723 if (!mpc_new_phys || count <= mpc_new_length) { 721 if (!mpc_new_phys || count <= mpc_new_length) {
724 WARN(1, "update_mptable: No spare slots (length: %x)\n", count); 722 WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
725 return -1; 723 return -1;
726 } 724 }
727 725
728 return ret; 726 return 0;
729} 727}
730#else /* CONFIG_X86_IO_APIC */ 728#else /* CONFIG_X86_IO_APIC */
731static 729static
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 55d745ec1181..35ccf75696eb 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
50 struct iommu_table_entry *finish) 50 struct iommu_table_entry *finish)
51{ 51{
52 struct iommu_table_entry *p, *q, *x; 52 struct iommu_table_entry *p, *q, *x;
53 char sym_p[KSYM_SYMBOL_LEN];
54 char sym_q[KSYM_SYMBOL_LEN];
55 53
56 /* Simple cyclic dependency checker. */ 54 /* Simple cyclic dependency checker. */
57 for (p = start; p < finish; p++) { 55 for (p = start; p < finish; p++) {
58 q = find_dependents_of(start, finish, p); 56 q = find_dependents_of(start, finish, p);
59 x = find_dependents_of(start, finish, q); 57 x = find_dependents_of(start, finish, q);
60 if (p == x) { 58 if (p == x) {
61 sprint_symbol(sym_p, (unsigned long)p->detect); 59 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n",
62 sprint_symbol(sym_q, (unsigned long)q->detect); 60 p->detect, q->detect);
63
64 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
65 " on %s and vice-versa. BREAKING IT.\n",
66 sym_p, sym_q);
67 /* Heavy handed way..*/ 61 /* Heavy handed way..*/
68 x->depend = 0; 62 x->depend = 0;
69 } 63 }
@@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
72 for (p = start; p < finish; p++) { 66 for (p = start; p < finish; p++) {
73 q = find_dependents_of(p, finish, p); 67 q = find_dependents_of(p, finish, p);
74 if (q && q > p) { 68 if (q && q > p) {
75 sprint_symbol(sym_p, (unsigned long)p->detect); 69 printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n",
76 sprint_symbol(sym_q, (unsigned long)q->detect); 70 p->detect, q->detect);
77
78 printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
79 "should be called before %s!\n",
80 sym_p, sym_q);
81 } 71 }
82 } 72 }
83} 73}
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms.c
index 071e7fea42e5..ba0a4cce53be 100644
--- a/arch/x86/kernel/probe_roms_32.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -73,6 +73,107 @@ static struct resource video_rom_resource = {
73 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM 73 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
74}; 74};
75 75
76/* does this oprom support the given pci device, or any of the devices
77 * that the driver supports?
78 */
79static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device)
80{
81 struct pci_driver *drv = pdev->driver;
82 const struct pci_device_id *id;
83
84 if (pdev->vendor == vendor && pdev->device == device)
85 return true;
86
87 for (id = drv ? drv->id_table : NULL; id && id->vendor; id++)
88 if (id->vendor == vendor && id->device == device)
89 break;
90
91 return id && id->vendor;
92}
93
94static bool probe_list(struct pci_dev *pdev, unsigned short vendor,
95 const unsigned char *rom_list)
96{
97 unsigned short device;
98
99 do {
100 if (probe_kernel_address(rom_list, device) != 0)
101 device = 0;
102
103 if (device && match_id(pdev, vendor, device))
104 break;
105
106 rom_list += 2;
107 } while (device);
108
109 return !!device;
110}
111
112static struct resource *find_oprom(struct pci_dev *pdev)
113{
114 struct resource *oprom = NULL;
115 int i;
116
117 for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) {
118 struct resource *res = &adapter_rom_resources[i];
119 unsigned short offset, vendor, device, list, rev;
120 const unsigned char *rom;
121
122 if (res->end == 0)
123 break;
124
125 rom = isa_bus_to_virt(res->start);
126 if (probe_kernel_address(rom + 0x18, offset) != 0)
127 continue;
128
129 if (probe_kernel_address(rom + offset + 0x4, vendor) != 0)
130 continue;
131
132 if (probe_kernel_address(rom + offset + 0x6, device) != 0)
133 continue;
134
135 if (match_id(pdev, vendor, device)) {
136 oprom = res;
137 break;
138 }
139
140 if (probe_kernel_address(rom + offset + 0x8, list) == 0 &&
141 probe_kernel_address(rom + offset + 0xc, rev) == 0 &&
142 rev >= 3 && list &&
143 probe_list(pdev, vendor, rom + offset + list)) {
144 oprom = res;
145 break;
146 }
147 }
148
149 return oprom;
150}
151
152void *pci_map_biosrom(struct pci_dev *pdev)
153{
154 struct resource *oprom = find_oprom(pdev);
155
156 if (!oprom)
157 return NULL;
158
159 return ioremap(oprom->start, resource_size(oprom));
160}
161EXPORT_SYMBOL(pci_map_biosrom);
162
163void pci_unmap_biosrom(void __iomem *image)
164{
165 iounmap(image);
166}
167EXPORT_SYMBOL(pci_unmap_biosrom);
168
169size_t pci_biosrom_size(struct pci_dev *pdev)
170{
171 struct resource *oprom = find_oprom(pdev);
172
173 return oprom ? resource_size(oprom) : 0;
174}
175EXPORT_SYMBOL(pci_biosrom_size);
176
76#define ROMSIGNATURE 0xaa55 177#define ROMSIGNATURE 0xaa55
77 178
78static int __init romsignature(const unsigned char *rom) 179static int __init romsignature(const unsigned char *rom)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d46cbe46b7ab..88a90a977f8e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
449void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 449void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
450{ 450{
451 if (!need_resched()) { 451 if (!need_resched()) {
452 if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) 452 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
453 clflush((void *)&current_thread_info()->flags); 453 clflush((void *)&current_thread_info()->flags);
454 454
455 __monitor((void *)&current_thread_info()->flags, 0, 0); 455 __monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -465,7 +465,7 @@ static void mwait_idle(void)
465 if (!need_resched()) { 465 if (!need_resched()) {
466 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 466 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
467 trace_cpu_idle(1, smp_processor_id()); 467 trace_cpu_idle(1, smp_processor_id());
468 if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) 468 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
469 clflush((void *)&current_thread_info()->flags); 469 clflush((void *)&current_thread_info()->flags);
470 470
471 __monitor((void *)&current_thread_info()->flags, 0, 0); 471 __monitor((void *)&current_thread_info()->flags, 0, 0);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 08c44b08bf5b..0c016f727695 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(pm_power_off);
36 36
37static const struct desc_ptr no_idt = {}; 37static const struct desc_ptr no_idt = {};
38static int reboot_mode; 38static int reboot_mode;
39enum reboot_type reboot_type = BOOT_KBD; 39enum reboot_type reboot_type = BOOT_ACPI;
40int reboot_force; 40int reboot_force;
41 41
42#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) 42#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
@@ -478,9 +478,24 @@ void __attribute__((weak)) mach_reboot_fixups(void)
478{ 478{
479} 479}
480 480
481/*
482 * Windows compatible x86 hardware expects the following on reboot:
483 *
484 * 1) If the FADT has the ACPI reboot register flag set, try it
485 * 2) If still alive, write to the keyboard controller
486 * 3) If still alive, write to the ACPI reboot register again
487 * 4) If still alive, write to the keyboard controller again
488 *
489 * If the machine is still alive at this stage, it gives up. We default to
490 * following the same pattern, except that if we're still alive after (4) we'll
491 * try to force a triple fault and then cycle between hitting the keyboard
492 * controller and doing that
493 */
481static void native_machine_emergency_restart(void) 494static void native_machine_emergency_restart(void)
482{ 495{
483 int i; 496 int i;
497 int attempt = 0;
498 int orig_reboot_type = reboot_type;
484 499
485 if (reboot_emergency) 500 if (reboot_emergency)
486 emergency_vmx_disable_all(); 501 emergency_vmx_disable_all();
@@ -502,6 +517,13 @@ static void native_machine_emergency_restart(void)
502 outb(0xfe, 0x64); /* pulse reset low */ 517 outb(0xfe, 0x64); /* pulse reset low */
503 udelay(50); 518 udelay(50);
504 } 519 }
520 if (attempt == 0 && orig_reboot_type == BOOT_ACPI) {
521 attempt = 1;
522 reboot_type = BOOT_ACPI;
523 } else {
524 reboot_type = BOOT_TRIPLE;
525 }
526 break;
505 527
506 case BOOT_TRIPLE: 528 case BOOT_TRIPLE:
507 load_idt(&no_idt); 529 load_idt(&no_idt);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4be9b398470e..c3050af9306d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -691,8 +691,6 @@ early_param("reservelow", parse_reservelow);
691 691
692void __init setup_arch(char **cmdline_p) 692void __init setup_arch(char **cmdline_p)
693{ 693{
694 unsigned long flags;
695
696#ifdef CONFIG_X86_32 694#ifdef CONFIG_X86_32
697 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 695 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
698 visws_early_detect(); 696 visws_early_detect();
@@ -1041,9 +1039,7 @@ void __init setup_arch(char **cmdline_p)
1041 1039
1042 mcheck_init(); 1040 mcheck_init();
1043 1041
1044 local_irq_save(flags); 1042 arch_init_ideal_nops();
1045 arch_init_ideal_nop5();
1046 local_irq_restore(flags);
1047} 1043}
1048 1044
1049#ifdef CONFIG_X86_32 1045#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4fd173cd8e57..40a24932a8a1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
601 goto badframe; 601 goto badframe;
602 602
603 sigdelsetmask(&set, ~_BLOCKABLE); 603 sigdelsetmask(&set, ~_BLOCKABLE);
604 spin_lock_irq(&current->sighand->siglock); 604 set_current_blocked(&set);
605 current->blocked = set;
606 recalc_sigpending();
607 spin_unlock_irq(&current->sighand->siglock);
608 605
609 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) 606 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
610 goto badframe; 607 goto badframe;
@@ -682,6 +679,7 @@ static int
682handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, 679handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
683 sigset_t *oldset, struct pt_regs *regs) 680 sigset_t *oldset, struct pt_regs *regs)
684{ 681{
682 sigset_t blocked;
685 int ret; 683 int ret;
686 684
687 /* Are we from a system call? */ 685 /* Are we from a system call? */
@@ -741,12 +739,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
741 */ 739 */
742 regs->flags &= ~X86_EFLAGS_TF; 740 regs->flags &= ~X86_EFLAGS_TF;
743 741
744 spin_lock_irq(&current->sighand->siglock); 742 sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
745 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
746 if (!(ka->sa.sa_flags & SA_NODEFER)) 743 if (!(ka->sa.sa_flags & SA_NODEFER))
747 sigaddset(&current->blocked, sig); 744 sigaddset(&blocked, sig);
748 recalc_sigpending(); 745 set_current_blocked(&blocked);
749 spin_unlock_irq(&current->sighand->siglock);
750 746
751 tracehook_signal_handler(sig, info, ka, regs, 747 tracehook_signal_handler(sig, info, ka, regs,
752 test_thread_flag(TIF_SINGLESTEP)); 748 test_thread_flag(TIF_SINGLESTEP));
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 513deac7228d..013e7eba83bb 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait)
194} 194}
195 195
196/* 196/*
197 * Reschedule call back. Nothing to do, 197 * Reschedule call back.
198 * all the work is done automatically when
199 * we return from the interrupt.
200 */ 198 */
201void smp_reschedule_interrupt(struct pt_regs *regs) 199void smp_reschedule_interrupt(struct pt_regs *regs)
202{ 200{
203 ack_APIC_irq(); 201 ack_APIC_irq();
204 inc_irq_stat(irq_resched_count); 202 inc_irq_stat(irq_resched_count);
203 scheduler_ipi();
205 /* 204 /*
206 * KVM uses this interrupt to force a cpu out of guest mode 205 * KVM uses this interrupt to force a cpu out of guest mode
207 */ 206 */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c2871d3c71b6..a3c430bdfb60 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void)
1332 void *mwait_ptr; 1332 void *mwait_ptr;
1333 struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); 1333 struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
1334 1334
1335 if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c))) 1335 if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
1336 return; 1336 return;
1337 if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH)) 1337 if (!this_cpu_has(X86_FEATURE_CLFLSH))
1338 return; 1338 return;
1339 if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) 1339 if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
1340 return; 1340 return;
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 6515733a289d..55d9bc03f696 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -9,15 +9,6 @@
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
11 11
12static void save_stack_warning(void *data, char *msg)
13{
14}
15
16static void
17save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
18{
19}
20
21static int save_stack_stack(void *data, char *name) 12static int save_stack_stack(void *data, char *name)
22{ 13{
23 return 0; 14 return 0;
@@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable)
53} 44}
54 45
55static const struct stacktrace_ops save_stack_ops = { 46static const struct stacktrace_ops save_stack_ops = {
56 .warning = save_stack_warning,
57 .warning_symbol = save_stack_warning_symbol,
58 .stack = save_stack_stack, 47 .stack = save_stack_stack,
59 .address = save_stack_address, 48 .address = save_stack_address,
60 .walk_stack = print_context_stack, 49 .walk_stack = print_context_stack,
61}; 50};
62 51
63static const struct stacktrace_ops save_stack_ops_nosched = { 52static const struct stacktrace_ops save_stack_ops_nosched = {
64 .warning = save_stack_warning,
65 .warning_symbol = save_stack_warning_symbol,
66 .stack = save_stack_stack, 53 .stack = save_stack_stack,
67 .address = save_stack_address_nosched, 54 .address = save_stack_address_nosched,
68 .walk_stack = print_context_stack, 55 .walk_stack = print_context_stack,
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d5c79d..32cbffb0c494 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
344 .long sys_open_by_handle_at 344 .long sys_open_by_handle_at
345 .long sys_clock_adjtime 345 .long sys_clock_adjtime
346 .long sys_syncfs 346 .long sys_syncfs
347 .long sys_sendmmsg /* 345 */
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 75ef4b18e9b7..6f164bd5e14d 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -35,7 +35,7 @@ void iommu_shutdown_noop(void) { }
35struct x86_init_ops x86_init __initdata = { 35struct x86_init_ops x86_init __initdata = {
36 36
37 .resources = { 37 .resources = {
38 .probe_roms = x86_init_noop, 38 .probe_roms = probe_roms,
39 .reserve_resources = reserve_standard_io_resources, 39 .reserve_resources = reserve_standard_io_resources,
40 .memory_setup = default_machine_specific_memory_setup, 40 .memory_setup = default_machine_specific_memory_setup,
41 }, 41 },
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 1cd608973ce5..e191c096ab90 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -7,7 +7,7 @@
7 * kernel and insert a module (lg.ko) which allows us to run other Linux 7 * kernel and insert a module (lg.ko) which allows us to run other Linux
8 * kernels the same way we'd run processes. We call the first kernel the Host, 8 * kernels the same way we'd run processes. We call the first kernel the Host,
9 * and the others the Guests. The program which sets up and configures Guests 9 * and the others the Guests. The program which sets up and configures Guests
10 * (such as the example in Documentation/lguest/lguest.c) is called the 10 * (such as the example in Documentation/virtual/lguest/lguest.c) is called the
11 * Launcher. 11 * Launcher.
12 * 12 *
13 * Secondly, we only run specially modified Guests, not normal kernels: setting 13 * Secondly, we only run specially modified Guests, not normal kernels: setting
@@ -913,8 +913,6 @@ static struct clocksource lguest_clock = {
913 .rating = 200, 913 .rating = 200,
914 .read = lguest_clock_read, 914 .read = lguest_clock_read,
915 .mask = CLOCKSOURCE_MASK(64), 915 .mask = CLOCKSOURCE_MASK(64),
916 .mult = 1 << 22,
917 .shift = 22,
918 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 916 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
919}; 917};
920 918
@@ -997,7 +995,7 @@ static void lguest_time_init(void)
997 /* Set up the timer interrupt (0) to go to our simple timer routine */ 995 /* Set up the timer interrupt (0) to go to our simple timer routine */
998 irq_set_handler(0, lguest_time_irq); 996 irq_set_handler(0, lguest_time_irq);
999 997
1000 clocksource_register(&lguest_clock); 998 clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
1001 999
1002 /* We can't set cpumask in the initializer: damn C limitations! Set it 1000 /* We can't set cpumask in the initializer: damn C limitations! Set it
1003 * here and register our timer device. */ 1001 * here and register our timer device. */
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index aa4326bfb24a..f2145cfa12a6 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,6 @@
1#include <linux/linkage.h> 1#include <linux/linkage.h>
2#include <asm/dwarf2.h> 2#include <asm/dwarf2.h>
3#include <asm/alternative-asm.h>
3 4
4/* 5/*
5 * Zero a page. 6 * Zero a page.
@@ -14,6 +15,15 @@ ENTRY(clear_page_c)
14 CFI_ENDPROC 15 CFI_ENDPROC
15ENDPROC(clear_page_c) 16ENDPROC(clear_page_c)
16 17
18ENTRY(clear_page_c_e)
19 CFI_STARTPROC
20 movl $4096,%ecx
21 xorl %eax,%eax
22 rep stosb
23 ret
24 CFI_ENDPROC
25ENDPROC(clear_page_c_e)
26
17ENTRY(clear_page) 27ENTRY(clear_page)
18 CFI_STARTPROC 28 CFI_STARTPROC
19 xorl %eax,%eax 29 xorl %eax,%eax
@@ -38,21 +48,26 @@ ENTRY(clear_page)
38.Lclear_page_end: 48.Lclear_page_end:
39ENDPROC(clear_page) 49ENDPROC(clear_page)
40 50
41 /* Some CPUs run faster using the string instructions. 51 /*
42 It is also a lot simpler. Use this when possible */ 52 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
53 * It is recommended to use this when possible.
54 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
55 * Otherwise, use original function.
56 *
57 */
43 58
44#include <asm/cpufeature.h> 59#include <asm/cpufeature.h>
45 60
46 .section .altinstr_replacement,"ax" 61 .section .altinstr_replacement,"ax"
471: .byte 0xeb /* jmp <disp8> */ 621: .byte 0xeb /* jmp <disp8> */
48 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ 63 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
492: 642: .byte 0xeb /* jmp <disp8> */
65 .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
663:
50 .previous 67 .previous
51 .section .altinstructions,"a" 68 .section .altinstructions,"a"
52 .align 8 69 altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
53 .quad clear_page 70 .Lclear_page_end-clear_page, 2b-1b
54 .quad 1b 71 altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
55 .word X86_FEATURE_REP_GOOD 72 .Lclear_page_end-clear_page,3b-2b
56 .byte .Lclear_page_end - clear_page
57 .byte 2b - 1b
58 .previous 73 .previous
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 99e482615195..024840266ba0 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,23 +15,30 @@
15#include <asm/asm-offsets.h> 15#include <asm/asm-offsets.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/cpufeature.h> 17#include <asm/cpufeature.h>
18#include <asm/alternative-asm.h>
18 19
19 .macro ALTERNATIVE_JUMP feature,orig,alt 20/*
21 * By placing feature2 after feature1 in altinstructions section, we logically
22 * implement:
23 * If CPU has feature2, jmp to alt2 is used
24 * else if CPU has feature1, jmp to alt1 is used
25 * else jmp to orig is used.
26 */
27 .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
200: 280:
21 .byte 0xe9 /* 32bit jump */ 29 .byte 0xe9 /* 32bit jump */
22 .long \orig-1f /* by default jump to orig */ 30 .long \orig-1f /* by default jump to orig */
231: 311:
24 .section .altinstr_replacement,"ax" 32 .section .altinstr_replacement,"ax"
252: .byte 0xe9 /* near jump with 32bit immediate */ 332: .byte 0xe9 /* near jump with 32bit immediate */
26 .long \alt-1b /* offset */ /* or alternatively to alt */ 34 .long \alt1-1b /* offset */ /* or alternatively to alt1 */
353: .byte 0xe9 /* near jump with 32bit immediate */
36 .long \alt2-1b /* offset */ /* or alternatively to alt2 */
27 .previous 37 .previous
38
28 .section .altinstructions,"a" 39 .section .altinstructions,"a"
29 .align 8 40 altinstruction_entry 0b,2b,\feature1,5,5
30 .quad 0b 41 altinstruction_entry 0b,3b,\feature2,5,5
31 .quad 2b
32 .word \feature /* when feature is set */
33 .byte 5
34 .byte 5
35 .previous 42 .previous
36 .endm 43 .endm
37 44
@@ -72,8 +79,10 @@ ENTRY(_copy_to_user)
72 addq %rdx,%rcx 79 addq %rdx,%rcx
73 jc bad_to_user 80 jc bad_to_user
74 cmpq TI_addr_limit(%rax),%rcx 81 cmpq TI_addr_limit(%rax),%rcx
75 jae bad_to_user 82 ja bad_to_user
76 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 83 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
84 copy_user_generic_unrolled,copy_user_generic_string, \
85 copy_user_enhanced_fast_string
77 CFI_ENDPROC 86 CFI_ENDPROC
78ENDPROC(_copy_to_user) 87ENDPROC(_copy_to_user)
79 88
@@ -85,8 +94,10 @@ ENTRY(_copy_from_user)
85 addq %rdx,%rcx 94 addq %rdx,%rcx
86 jc bad_from_user 95 jc bad_from_user
87 cmpq TI_addr_limit(%rax),%rcx 96 cmpq TI_addr_limit(%rax),%rcx
88 jae bad_from_user 97 ja bad_from_user
89 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 98 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
99 copy_user_generic_unrolled,copy_user_generic_string, \
100 copy_user_enhanced_fast_string
90 CFI_ENDPROC 101 CFI_ENDPROC
91ENDPROC(_copy_from_user) 102ENDPROC(_copy_from_user)
92 103
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
255 .previous 266 .previous
256 CFI_ENDPROC 267 CFI_ENDPROC
257ENDPROC(copy_user_generic_string) 268ENDPROC(copy_user_generic_string)
269
270/*
271 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
272 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
273 *
274 * Input:
275 * rdi destination
276 * rsi source
277 * rdx count
278 *
279 * Output:
280 * eax uncopied bytes or 0 if successful.
281 */
282ENTRY(copy_user_enhanced_fast_string)
283 CFI_STARTPROC
284 andl %edx,%edx
285 jz 2f
286 movl %edx,%ecx
2871: rep
288 movsb
2892: xorl %eax,%eax
290 ret
291
292 .section .fixup,"ax"
29312: movl %ecx,%edx /* ecx is zerorest also */
294 jmp copy_user_handle_tail
295 .previous
296
297 .section __ex_table,"a"
298 .align 8
299 .quad 1b,12b
300 .previous
301 CFI_ENDPROC
302ENDPROC(copy_user_enhanced_fast_string)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 75ef61e35e38..efbf2a0ecdea 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,6 +4,7 @@
4 4
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/dwarf2.h> 6#include <asm/dwarf2.h>
7#include <asm/alternative-asm.h>
7 8
8/* 9/*
9 * memcpy - Copy a memory block. 10 * memcpy - Copy a memory block.
@@ -37,6 +38,23 @@
37.Lmemcpy_e: 38.Lmemcpy_e:
38 .previous 39 .previous
39 40
41/*
42 * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
43 * memcpy_c. Use memcpy_c_e when possible.
44 *
45 * This gets patched over the unrolled variant (below) via the
46 * alternative instructions framework:
47 */
48 .section .altinstr_replacement, "ax", @progbits
49.Lmemcpy_c_e:
50 movq %rdi, %rax
51
52 movl %edx, %ecx
53 rep movsb
54 ret
55.Lmemcpy_e_e:
56 .previous
57
40ENTRY(__memcpy) 58ENTRY(__memcpy)
41ENTRY(memcpy) 59ENTRY(memcpy)
42 CFI_STARTPROC 60 CFI_STARTPROC
@@ -49,7 +67,7 @@ ENTRY(memcpy)
49 jb .Lhandle_tail 67 jb .Lhandle_tail
50 68
51 /* 69 /*
52 * We check whether memory false dependece could occur, 70 * We check whether memory false dependence could occur,
53 * then jump to corresponding copy mode. 71 * then jump to corresponding copy mode.
54 */ 72 */
55 cmp %dil, %sil 73 cmp %dil, %sil
@@ -171,21 +189,22 @@ ENDPROC(memcpy)
171ENDPROC(__memcpy) 189ENDPROC(__memcpy)
172 190
173 /* 191 /*
174 * Some CPUs run faster using the string copy instructions. 192 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
175 * It is also a lot simpler. Use this when possible: 193 * If the feature is supported, memcpy_c_e() is the first choice.
176 */ 194 * If enhanced rep movsb copy is not available, use fast string copy
177 195 * memcpy_c() when possible. This is faster and code is simpler than
178 .section .altinstructions, "a" 196 * original memcpy().
179 .align 8 197 * Otherwise, original memcpy() is used.
180 .quad memcpy 198 * In .altinstructions section, ERMS feature is placed after REG_GOOD
181 .quad .Lmemcpy_c 199 * feature to implement the right patch order.
182 .word X86_FEATURE_REP_GOOD 200 *
183
184 /*
185 * Replace only beginning, memcpy is used to apply alternatives, 201 * Replace only beginning, memcpy is used to apply alternatives,
186 * so it is silly to overwrite itself with nops - reboot is the 202 * so it is silly to overwrite itself with nops - reboot is the
187 * only outcome... 203 * only outcome...
188 */ 204 */
189 .byte .Lmemcpy_e - .Lmemcpy_c 205 .section .altinstructions, "a"
190 .byte .Lmemcpy_e - .Lmemcpy_c 206 altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
207 .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
208 altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
209 .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
191 .previous 210 .previous
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 0ecb8433e5a8..d0ec9c2936d7 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -8,6 +8,7 @@
8#define _STRING_C 8#define _STRING_C
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/dwarf2.h> 10#include <asm/dwarf2.h>
11#include <asm/cpufeature.h>
11 12
12#undef memmove 13#undef memmove
13 14
@@ -24,6 +25,7 @@
24 */ 25 */
25ENTRY(memmove) 26ENTRY(memmove)
26 CFI_STARTPROC 27 CFI_STARTPROC
28
27 /* Handle more 32bytes in loop */ 29 /* Handle more 32bytes in loop */
28 mov %rdi, %rax 30 mov %rdi, %rax
29 cmp $0x20, %rdx 31 cmp $0x20, %rdx
@@ -31,8 +33,13 @@ ENTRY(memmove)
31 33
32 /* Decide forward/backward copy mode */ 34 /* Decide forward/backward copy mode */
33 cmp %rdi, %rsi 35 cmp %rdi, %rsi
34 jb 2f 36 jge .Lmemmove_begin_forward
37 mov %rsi, %r8
38 add %rdx, %r8
39 cmp %rdi, %r8
40 jg 2f
35 41
42.Lmemmove_begin_forward:
36 /* 43 /*
37 * movsq instruction have many startup latency 44 * movsq instruction have many startup latency
38 * so we handle small size by general register. 45 * so we handle small size by general register.
@@ -78,6 +85,8 @@ ENTRY(memmove)
78 rep movsq 85 rep movsq
79 movq %r11, (%r10) 86 movq %r11, (%r10)
80 jmp 13f 87 jmp 13f
88.Lmemmove_end_forward:
89
81 /* 90 /*
82 * Handle data backward by movsq. 91 * Handle data backward by movsq.
83 */ 92 */
@@ -194,4 +203,22 @@ ENTRY(memmove)
19413: 20313:
195 retq 204 retq
196 CFI_ENDPROC 205 CFI_ENDPROC
206
207 .section .altinstr_replacement,"ax"
208.Lmemmove_begin_forward_efs:
209 /* Forward moving data. */
210 movq %rdx, %rcx
211 rep movsb
212 retq
213.Lmemmove_end_forward_efs:
214 .previous
215
216 .section .altinstructions,"a"
217 .align 8
218 .quad .Lmemmove_begin_forward
219 .quad .Lmemmove_begin_forward_efs
220 .word X86_FEATURE_ERMS
221 .byte .Lmemmove_end_forward-.Lmemmove_begin_forward
222 .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
223 .previous
197ENDPROC(memmove) 224ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 09d344269652..79bd454b78a3 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -2,9 +2,13 @@
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h>
6#include <asm/alternative-asm.h>
5 7
6/* 8/*
7 * ISO C memset - set a memory block to a byte value. 9 * ISO C memset - set a memory block to a byte value. This function uses fast
10 * string to get better performance than the original function. The code is
11 * simpler and shorter than the orignal function as well.
8 * 12 *
9 * rdi destination 13 * rdi destination
10 * rsi value (char) 14 * rsi value (char)
@@ -31,6 +35,28 @@
31.Lmemset_e: 35.Lmemset_e:
32 .previous 36 .previous
33 37
38/*
39 * ISO C memset - set a memory block to a byte value. This function uses
40 * enhanced rep stosb to override the fast string function.
41 * The code is simpler and shorter than the fast string function as well.
42 *
43 * rdi destination
44 * rsi value (char)
45 * rdx count (bytes)
46 *
47 * rax original destination
48 */
49 .section .altinstr_replacement, "ax", @progbits
50.Lmemset_c_e:
51 movq %rdi,%r9
52 movb %sil,%al
53 movl %edx,%ecx
54 rep stosb
55 movq %r9,%rax
56 ret
57.Lmemset_e_e:
58 .previous
59
34ENTRY(memset) 60ENTRY(memset)
35ENTRY(__memset) 61ENTRY(__memset)
36 CFI_STARTPROC 62 CFI_STARTPROC
@@ -112,16 +138,20 @@ ENTRY(__memset)
112ENDPROC(memset) 138ENDPROC(memset)
113ENDPROC(__memset) 139ENDPROC(__memset)
114 140
115 /* Some CPUs run faster using the string instructions. 141 /* Some CPUs support enhanced REP MOVSB/STOSB feature.
116 It is also a lot simpler. Use this when possible */ 142 * It is recommended to use this when possible.
117 143 *
118#include <asm/cpufeature.h> 144 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
119 145 * instructions.
146 *
147 * Otherwise, use original memset function.
148 *
149 * In .altinstructions section, ERMS feature is placed after REG_GOOD
150 * feature to implement the right patch order.
151 */
120 .section .altinstructions,"a" 152 .section .altinstructions,"a"
121 .align 8 153 altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
122 .quad memset 154 .Lfinal-memset,.Lmemset_e-.Lmemset_c
123 .quad .Lmemset_c 155 altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
124 .word X86_FEATURE_REP_GOOD 156 .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
125 .byte .Lfinal - memset
126 .byte .Lmemset_e - .Lmemset_c
127 .previous 157 .previous
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3e608edf9958..3d11327c9ab4 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,8 +23,8 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
23obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 23obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
24 24
25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o 25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
26obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o 26obj-$(CONFIG_AMD_NUMA) += amdtopology.o
27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 27obj-$(CONFIG_ACPI_NUMA) += srat.o
28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o 28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
29 29
30obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 30obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology.c
index 0919c26820d4..5247d01329ca 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/memblock.h> 14#include <linux/memblock.h>
15#include <linux/bootmem.h>
15 16
16#include <asm/io.h> 17#include <asm/io.h>
17#include <linux/pci_ids.h> 18#include <linux/pci_ids.h>
@@ -69,10 +70,10 @@ static __init void early_get_boot_cpu_id(void)
69 70
70int __init amd_numa_init(void) 71int __init amd_numa_init(void)
71{ 72{
72 unsigned long start = PFN_PHYS(0); 73 u64 start = PFN_PHYS(0);
73 unsigned long end = PFN_PHYS(max_pfn); 74 u64 end = PFN_PHYS(max_pfn);
74 unsigned numnodes; 75 unsigned numnodes;
75 unsigned long prevbase; 76 u64 prevbase;
76 int i, j, nb; 77 int i, j, nb;
77 u32 nodeid, reg; 78 u32 nodeid, reg;
78 unsigned int bits, cores, apicid_base; 79 unsigned int bits, cores, apicid_base;
@@ -95,7 +96,7 @@ int __init amd_numa_init(void)
95 96
96 prevbase = 0; 97 prevbase = 0;
97 for (i = 0; i < 8; i++) { 98 for (i = 0; i < 8; i++) {
98 unsigned long base, limit; 99 u64 base, limit;
99 100
100 base = read_pci_config(0, nb, 1, 0x40 + i*8); 101 base = read_pci_config(0, nb, 1, 0x40 + i*8);
101 limit = read_pci_config(0, nb, 1, 0x44 + i*8); 102 limit = read_pci_config(0, nb, 1, 0x44 + i*8);
@@ -107,18 +108,18 @@ int __init amd_numa_init(void)
107 continue; 108 continue;
108 } 109 }
109 if (nodeid >= numnodes) { 110 if (nodeid >= numnodes) {
110 pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, 111 pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid,
111 base, limit); 112 base, limit);
112 continue; 113 continue;
113 } 114 }
114 115
115 if (!limit) { 116 if (!limit) {
116 pr_info("Skipping node entry %d (base %lx)\n", 117 pr_info("Skipping node entry %d (base %Lx)\n",
117 i, base); 118 i, base);
118 continue; 119 continue;
119 } 120 }
120 if ((base >> 8) & 3 || (limit >> 8) & 3) { 121 if ((base >> 8) & 3 || (limit >> 8) & 3) {
121 pr_err("Node %d using interleaving mode %lx/%lx\n", 122 pr_err("Node %d using interleaving mode %Lx/%Lx\n",
122 nodeid, (base >> 8) & 3, (limit >> 8) & 3); 123 nodeid, (base >> 8) & 3, (limit >> 8) & 3);
123 return -EINVAL; 124 return -EINVAL;
124 } 125 }
@@ -150,19 +151,19 @@ int __init amd_numa_init(void)
150 continue; 151 continue;
151 } 152 }
152 if (limit < base) { 153 if (limit < base) {
153 pr_err("Node %d bogus settings %lx-%lx.\n", 154 pr_err("Node %d bogus settings %Lx-%Lx.\n",
154 nodeid, base, limit); 155 nodeid, base, limit);
155 continue; 156 continue;
156 } 157 }
157 158
158 /* Could sort here, but pun for now. Should not happen anyroads. */ 159 /* Could sort here, but pun for now. Should not happen anyroads. */
159 if (prevbase > base) { 160 if (prevbase > base) {
160 pr_err("Node map not sorted %lx,%lx\n", 161 pr_err("Node map not sorted %Lx,%Lx\n",
161 prevbase, base); 162 prevbase, base);
162 return -EINVAL; 163 return -EINVAL;
163 } 164 }
164 165
165 pr_info("Node %d MemBase %016lx Limit %016lx\n", 166 pr_info("Node %d MemBase %016Lx Limit %016Lx\n",
166 nodeid, base, limit); 167 nodeid, base, limit);
167 168
168 prevbase = base; 169 prevbase = base;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 20e3f8702d1e..bcb394dfbb35 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -12,6 +12,7 @@
12#include <linux/mmiotrace.h> /* kmmio_handler, ... */ 12#include <linux/mmiotrace.h> /* kmmio_handler, ... */
13#include <linux/perf_event.h> /* perf_sw_event */ 13#include <linux/perf_event.h> /* perf_sw_event */
14#include <linux/hugetlb.h> /* hstate_index_to_shift */ 14#include <linux/hugetlb.h> /* hstate_index_to_shift */
15#include <linux/prefetch.h> /* prefetchw */
15 16
16#include <asm/traps.h> /* dotraplinkage, ... */ 17#include <asm/traps.h> /* dotraplinkage, ... */
17#include <asm/pgalloc.h> /* pgd_*(), ... */ 18#include <asm/pgalloc.h> /* pgd_*(), ... */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 80088f994193..29f7c6d98179 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -678,8 +678,10 @@ static void __init zone_sizes_init(void)
678{ 678{
679 unsigned long max_zone_pfns[MAX_NR_ZONES]; 679 unsigned long max_zone_pfns[MAX_NR_ZONES];
680 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 680 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
681#ifdef CONFIG_ZONE_DMA
681 max_zone_pfns[ZONE_DMA] = 682 max_zone_pfns[ZONE_DMA] =
682 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; 683 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
684#endif
683 max_zone_pfns[ZONE_NORMAL] = max_low_pfn; 685 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
684#ifdef CONFIG_HIGHMEM 686#ifdef CONFIG_HIGHMEM
685 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; 687 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
@@ -716,6 +718,7 @@ void __init paging_init(void)
716 * NOTE: at this point the bootmem allocator is fully available. 718 * NOTE: at this point the bootmem allocator is fully available.
717 */ 719 */
718 olpc_dt_build_devicetree(); 720 olpc_dt_build_devicetree();
721 sparse_memory_present_with_active_regions(MAX_NUMNODES);
719 sparse_init(); 722 sparse_init();
720 zone_sizes_init(); 723 zone_sizes_init();
721} 724}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 794233587287..d865c4aeec55 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -616,7 +616,9 @@ void __init paging_init(void)
616 unsigned long max_zone_pfns[MAX_NR_ZONES]; 616 unsigned long max_zone_pfns[MAX_NR_ZONES];
617 617
618 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 618 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
619#ifdef CONFIG_ZONE_DMA
619 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 620 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
621#endif
620 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 622 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
621 max_zone_pfns[ZONE_NORMAL] = max_pfn; 623 max_zone_pfns[ZONE_NORMAL] = max_pfn;
622 624
@@ -679,14 +681,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
679} 681}
680EXPORT_SYMBOL_GPL(arch_add_memory); 682EXPORT_SYMBOL_GPL(arch_add_memory);
681 683
682#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
683int memory_add_physaddr_to_nid(u64 start)
684{
685 return 0;
686}
687EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
688#endif
689
690#endif /* CONFIG_MEMORY_HOTPLUG */ 684#endif /* CONFIG_MEMORY_HOTPLUG */
691 685
692static struct kcore_list kcore_vsyscall; 686static struct kcore_list kcore_vsyscall;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0369843511dc..be1ef574ce9a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -91,13 +91,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
91 return (__force void __iomem *)phys_to_virt(phys_addr); 91 return (__force void __iomem *)phys_to_virt(phys_addr);
92 92
93 /* 93 /*
94 * Check if the request spans more than any BAR in the iomem resource
95 * tree.
96 */
97 WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
98 KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
99
100 /*
101 * Don't allow anybody to remap normal RAM that we're using.. 94 * Don't allow anybody to remap normal RAM that we're using..
102 */ 95 */
103 last_pfn = last_addr >> PAGE_SHIFT; 96 last_pfn = last_addr >> PAGE_SHIFT;
@@ -170,6 +163,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
170 ret_addr = (void __iomem *) (vaddr + offset); 163 ret_addr = (void __iomem *) (vaddr + offset);
171 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); 164 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
172 165
166 /*
167 * Check if the request spans more than any BAR in the iomem resource
168 * tree.
169 */
170 WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
171 KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
172
173 return ret_addr; 173 return ret_addr;
174err_free_area: 174err_free_area:
175 free_vm_area(area); 175 free_vm_area(area);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 745258dfc4dc..f5510d889a22 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,11 +1,39 @@
1/* Common code for 32 and 64-bit NUMA */ 1/* Common code for 32 and 64-bit NUMA */
2#include <linux/topology.h> 2#include <linux/kernel.h>
3#include <linux/module.h> 3#include <linux/mm.h>
4#include <linux/string.h>
5#include <linux/init.h>
4#include <linux/bootmem.h> 6#include <linux/bootmem.h>
5#include <asm/numa.h> 7#include <linux/memblock.h>
8#include <linux/mmzone.h>
9#include <linux/ctype.h>
10#include <linux/module.h>
11#include <linux/nodemask.h>
12#include <linux/sched.h>
13#include <linux/topology.h>
14
15#include <asm/e820.h>
16#include <asm/proto.h>
17#include <asm/dma.h>
6#include <asm/acpi.h> 18#include <asm/acpi.h>
19#include <asm/amd_nb.h>
20
21#include "numa_internal.h"
7 22
8int __initdata numa_off; 23int __initdata numa_off;
24nodemask_t numa_nodes_parsed __initdata;
25
26struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
27EXPORT_SYMBOL(node_data);
28
29static struct numa_meminfo numa_meminfo
30#ifndef CONFIG_MEMORY_HOTPLUG
31__initdata
32#endif
33;
34
35static int numa_distance_cnt;
36static u8 *numa_distance;
9 37
10static __init int numa_setup(char *opt) 38static __init int numa_setup(char *opt)
11{ 39{
@@ -32,6 +60,15 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
32 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 60 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
33}; 61};
34 62
63int __cpuinit numa_cpu_node(int cpu)
64{
65 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
66
67 if (apicid != BAD_APICID)
68 return __apicid_to_node[apicid];
69 return NUMA_NO_NODE;
70}
71
35cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; 72cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
36EXPORT_SYMBOL(node_to_cpumask_map); 73EXPORT_SYMBOL(node_to_cpumask_map);
37 74
@@ -95,6 +132,407 @@ void __init setup_node_to_cpumask_map(void)
95 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); 132 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
96} 133}
97 134
135static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
136 struct numa_meminfo *mi)
137{
138 /* ignore zero length blks */
139 if (start == end)
140 return 0;
141
142 /* whine about and ignore invalid blks */
143 if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
144 pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
145 nid, start, end);
146 return 0;
147 }
148
149 if (mi->nr_blks >= NR_NODE_MEMBLKS) {
150 pr_err("NUMA: too many memblk ranges\n");
151 return -EINVAL;
152 }
153
154 mi->blk[mi->nr_blks].start = start;
155 mi->blk[mi->nr_blks].end = end;
156 mi->blk[mi->nr_blks].nid = nid;
157 mi->nr_blks++;
158 return 0;
159}
160
161/**
162 * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
163 * @idx: Index of memblk to remove
164 * @mi: numa_meminfo to remove memblk from
165 *
166 * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
167 * decrementing @mi->nr_blks.
168 */
169void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
170{
171 mi->nr_blks--;
172 memmove(&mi->blk[idx], &mi->blk[idx + 1],
173 (mi->nr_blks - idx) * sizeof(mi->blk[0]));
174}
175
176/**
177 * numa_add_memblk - Add one numa_memblk to numa_meminfo
178 * @nid: NUMA node ID of the new memblk
179 * @start: Start address of the new memblk
180 * @end: End address of the new memblk
181 *
182 * Add a new memblk to the default numa_meminfo.
183 *
184 * RETURNS:
185 * 0 on success, -errno on failure.
186 */
187int __init numa_add_memblk(int nid, u64 start, u64 end)
188{
189 return numa_add_memblk_to(nid, start, end, &numa_meminfo);
190}
191
192/* Initialize NODE_DATA for a node on the local memory */
193static void __init setup_node_data(int nid, u64 start, u64 end)
194{
195 const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
196 const u64 nd_high = PFN_PHYS(max_pfn_mapped);
197 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
198 bool remapped = false;
199 u64 nd_pa;
200 void *nd;
201 int tnid;
202
203 /*
204 * Don't confuse VM with a node that doesn't have the
205 * minimum amount of memory:
206 */
207 if (end && (end - start) < NODE_MIN_SIZE)
208 return;
209
210 /* initialize remap allocator before aligning to ZONE_ALIGN */
211 init_alloc_remap(nid, start, end);
212
213 start = roundup(start, ZONE_ALIGN);
214
215 printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n",
216 nid, start, end);
217
218 /*
219 * Allocate node data. Try remap allocator first, node-local
220 * memory and then any node. Never allocate in DMA zone.
221 */
222 nd = alloc_remap(nid, nd_size);
223 if (nd) {
224 nd_pa = __pa(nd);
225 remapped = true;
226 } else {
227 nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
228 nd_size, SMP_CACHE_BYTES);
229 if (nd_pa == MEMBLOCK_ERROR)
230 nd_pa = memblock_find_in_range(nd_low, nd_high,
231 nd_size, SMP_CACHE_BYTES);
232 if (nd_pa == MEMBLOCK_ERROR) {
233 pr_err("Cannot find %zu bytes in node %d\n",
234 nd_size, nid);
235 return;
236 }
237 memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
238 nd = __va(nd_pa);
239 }
240
241 /* report and initialize */
242 printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n",
243 nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
244 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
245 if (!remapped && tnid != nid)
246 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
247
248 node_data[nid] = nd;
249 memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
250 NODE_DATA(nid)->node_id = nid;
251 NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
252 NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
253
254 node_set_online(nid);
255}
256
257/**
258 * numa_cleanup_meminfo - Cleanup a numa_meminfo
259 * @mi: numa_meminfo to clean up
260 *
261 * Sanitize @mi by merging and removing unncessary memblks. Also check for
262 * conflicts and clear unused memblks.
263 *
264 * RETURNS:
265 * 0 on success, -errno on failure.
266 */
267int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
268{
269 const u64 low = 0;
270 const u64 high = PFN_PHYS(max_pfn);
271 int i, j, k;
272
273 /* first, trim all entries */
274 for (i = 0; i < mi->nr_blks; i++) {
275 struct numa_memblk *bi = &mi->blk[i];
276
277 /* make sure all blocks are inside the limits */
278 bi->start = max(bi->start, low);
279 bi->end = min(bi->end, high);
280
281 /* and there's no empty block */
282 if (bi->start >= bi->end)
283 numa_remove_memblk_from(i--, mi);
284 }
285
286 /* merge neighboring / overlapping entries */
287 for (i = 0; i < mi->nr_blks; i++) {
288 struct numa_memblk *bi = &mi->blk[i];
289
290 for (j = i + 1; j < mi->nr_blks; j++) {
291 struct numa_memblk *bj = &mi->blk[j];
292 u64 start, end;
293
294 /*
295 * See whether there are overlapping blocks. Whine
296 * about but allow overlaps of the same nid. They
297 * will be merged below.
298 */
299 if (bi->end > bj->start && bi->start < bj->end) {
300 if (bi->nid != bj->nid) {
301 pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
302 bi->nid, bi->start, bi->end,
303 bj->nid, bj->start, bj->end);
304 return -EINVAL;
305 }
306 pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
307 bi->nid, bi->start, bi->end,
308 bj->start, bj->end);
309 }
310
311 /*
312 * Join together blocks on the same node, holes
313 * between which don't overlap with memory on other
314 * nodes.
315 */
316 if (bi->nid != bj->nid)
317 continue;
318 start = min(bi->start, bj->start);
319 end = max(bi->end, bj->end);
320 for (k = 0; k < mi->nr_blks; k++) {
321 struct numa_memblk *bk = &mi->blk[k];
322
323 if (bi->nid == bk->nid)
324 continue;
325 if (start < bk->end && end > bk->start)
326 break;
327 }
328 if (k < mi->nr_blks)
329 continue;
330 printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n",
331 bi->nid, bi->start, bi->end, bj->start, bj->end,
332 start, end);
333 bi->start = start;
334 bi->end = end;
335 numa_remove_memblk_from(j--, mi);
336 }
337 }
338
339 /* clear unused ones */
340 for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
341 mi->blk[i].start = mi->blk[i].end = 0;
342 mi->blk[i].nid = NUMA_NO_NODE;
343 }
344
345 return 0;
346}
347
348/*
349 * Set nodes, which have memory in @mi, in *@nodemask.
350 */
351static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
352 const struct numa_meminfo *mi)
353{
354 int i;
355
356 for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
357 if (mi->blk[i].start != mi->blk[i].end &&
358 mi->blk[i].nid != NUMA_NO_NODE)
359 node_set(mi->blk[i].nid, *nodemask);
360}
361
362/**
363 * numa_reset_distance - Reset NUMA distance table
364 *
365 * The current table is freed. The next numa_set_distance() call will
366 * create a new one.
367 */
368void __init numa_reset_distance(void)
369{
370 size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
371
372 /* numa_distance could be 1LU marking allocation failure, test cnt */
373 if (numa_distance_cnt)
374 memblock_x86_free_range(__pa(numa_distance),
375 __pa(numa_distance) + size);
376 numa_distance_cnt = 0;
377 numa_distance = NULL; /* enable table creation */
378}
379
380static int __init numa_alloc_distance(void)
381{
382 nodemask_t nodes_parsed;
383 size_t size;
384 int i, j, cnt = 0;
385 u64 phys;
386
387 /* size the new table and allocate it */
388 nodes_parsed = numa_nodes_parsed;
389 numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
390
391 for_each_node_mask(i, nodes_parsed)
392 cnt = i;
393 cnt++;
394 size = cnt * cnt * sizeof(numa_distance[0]);
395
396 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
397 size, PAGE_SIZE);
398 if (phys == MEMBLOCK_ERROR) {
399 pr_warning("NUMA: Warning: can't allocate distance table!\n");
400 /* don't retry until explicitly reset */
401 numa_distance = (void *)1LU;
402 return -ENOMEM;
403 }
404 memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
405
406 numa_distance = __va(phys);
407 numa_distance_cnt = cnt;
408
409 /* fill with the default distances */
410 for (i = 0; i < cnt; i++)
411 for (j = 0; j < cnt; j++)
412 numa_distance[i * cnt + j] = i == j ?
413 LOCAL_DISTANCE : REMOTE_DISTANCE;
414 printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
415
416 return 0;
417}
418
419/**
420 * numa_set_distance - Set NUMA distance from one NUMA to another
421 * @from: the 'from' node to set distance
422 * @to: the 'to' node to set distance
423 * @distance: NUMA distance
424 *
425 * Set the distance from node @from to @to to @distance. If distance table
426 * doesn't exist, one which is large enough to accommodate all the currently
427 * known nodes will be created.
428 *
429 * If such table cannot be allocated, a warning is printed and further
430 * calls are ignored until the distance table is reset with
431 * numa_reset_distance().
432 *
433 * If @from or @to is higher than the highest known node at the time of
434 * table creation or @distance doesn't make sense, the call is ignored.
435 * This is to allow simplification of specific NUMA config implementations.
436 */
437void __init numa_set_distance(int from, int to, int distance)
438{
439 if (!numa_distance && numa_alloc_distance() < 0)
440 return;
441
442 if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
443 printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
444 from, to, distance);
445 return;
446 }
447
448 if ((u8)distance != distance ||
449 (from == to && distance != LOCAL_DISTANCE)) {
450 pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
451 from, to, distance);
452 return;
453 }
454
455 numa_distance[from * numa_distance_cnt + to] = distance;
456}
457
458int __node_distance(int from, int to)
459{
460 if (from >= numa_distance_cnt || to >= numa_distance_cnt)
461 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
462 return numa_distance[from * numa_distance_cnt + to];
463}
464EXPORT_SYMBOL(__node_distance);
465
466/*
467 * Sanity check to catch more bad NUMA configurations (they are amazingly
468 * common). Make sure the nodes cover all memory.
469 */
470static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
471{
472 u64 numaram, e820ram;
473 int i;
474
475 numaram = 0;
476 for (i = 0; i < mi->nr_blks; i++) {
477 u64 s = mi->blk[i].start >> PAGE_SHIFT;
478 u64 e = mi->blk[i].end >> PAGE_SHIFT;
479 numaram += e - s;
480 numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
481 if ((s64)numaram < 0)
482 numaram = 0;
483 }
484
485 e820ram = max_pfn - (memblock_x86_hole_size(0,
486 PFN_PHYS(max_pfn)) >> PAGE_SHIFT);
487 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
488 if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
489 printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
490 (numaram << PAGE_SHIFT) >> 20,
491 (e820ram << PAGE_SHIFT) >> 20);
492 return false;
493 }
494 return true;
495}
496
497static int __init numa_register_memblks(struct numa_meminfo *mi)
498{
499 int i, nid;
500
501 /* Account for nodes with cpus and no memory */
502 node_possible_map = numa_nodes_parsed;
503 numa_nodemask_from_meminfo(&node_possible_map, mi);
504 if (WARN_ON(nodes_empty(node_possible_map)))
505 return -EINVAL;
506
507 for (i = 0; i < mi->nr_blks; i++)
508 memblock_x86_register_active_regions(mi->blk[i].nid,
509 mi->blk[i].start >> PAGE_SHIFT,
510 mi->blk[i].end >> PAGE_SHIFT);
511
512 /* for out of order entries */
513 sort_node_map();
514 if (!numa_meminfo_cover_memory(mi))
515 return -EINVAL;
516
517 /* Finally register nodes. */
518 for_each_node_mask(nid, node_possible_map) {
519 u64 start = PFN_PHYS(max_pfn);
520 u64 end = 0;
521
522 for (i = 0; i < mi->nr_blks; i++) {
523 if (nid != mi->blk[i].nid)
524 continue;
525 start = min(mi->blk[i].start, start);
526 end = max(mi->blk[i].end, end);
527 }
528
529 if (start < end)
530 setup_node_data(nid, start, end);
531 }
532
533 return 0;
534}
535
98/* 536/*
99 * There are unfortunately some poorly designed mainboards around that 537 * There are unfortunately some poorly designed mainboards around that
100 * only connect memory to a single CPU. This breaks the 1:1 cpu->node 538 * only connect memory to a single CPU. This breaks the 1:1 cpu->node
@@ -102,7 +540,7 @@ void __init setup_node_to_cpumask_map(void)
102 * as the number of CPUs is not known yet. We round robin the existing 540 * as the number of CPUs is not known yet. We round robin the existing
103 * nodes. 541 * nodes.
104 */ 542 */
105void __init numa_init_array(void) 543static void __init numa_init_array(void)
106{ 544{
107 int rr, i; 545 int rr, i;
108 546
@@ -117,6 +555,95 @@ void __init numa_init_array(void)
117 } 555 }
118} 556}
119 557
558static int __init numa_init(int (*init_func)(void))
559{
560 int i;
561 int ret;
562
563 for (i = 0; i < MAX_LOCAL_APIC; i++)
564 set_apicid_to_node(i, NUMA_NO_NODE);
565
566 nodes_clear(numa_nodes_parsed);
567 nodes_clear(node_possible_map);
568 nodes_clear(node_online_map);
569 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
570 remove_all_active_ranges();
571 numa_reset_distance();
572
573 ret = init_func();
574 if (ret < 0)
575 return ret;
576 ret = numa_cleanup_meminfo(&numa_meminfo);
577 if (ret < 0)
578 return ret;
579
580 numa_emulation(&numa_meminfo, numa_distance_cnt);
581
582 ret = numa_register_memblks(&numa_meminfo);
583 if (ret < 0)
584 return ret;
585
586 for (i = 0; i < nr_cpu_ids; i++) {
587 int nid = early_cpu_to_node(i);
588
589 if (nid == NUMA_NO_NODE)
590 continue;
591 if (!node_online(nid))
592 numa_clear_node(i);
593 }
594 numa_init_array();
595 return 0;
596}
597
598/**
599 * dummy_numa_init - Fallback dummy NUMA init
600 *
601 * Used if there's no underlying NUMA architecture, NUMA initialization
602 * fails, or NUMA is disabled on the command line.
603 *
604 * Must online at least one node and add memory blocks that cover all
605 * allowed memory. This function must not fail.
606 */
607static int __init dummy_numa_init(void)
608{
609 printk(KERN_INFO "%s\n",
610 numa_off ? "NUMA turned off" : "No NUMA configuration found");
611 printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n",
612 0LLU, PFN_PHYS(max_pfn));
613
614 node_set(0, numa_nodes_parsed);
615 numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
616
617 return 0;
618}
619
620/**
621 * x86_numa_init - Initialize NUMA
622 *
623 * Try each configured NUMA initialization method until one succeeds. The
624 * last fallback is dummy single node config encomapssing whole memory and
625 * never fails.
626 */
627void __init x86_numa_init(void)
628{
629 if (!numa_off) {
630#ifdef CONFIG_X86_NUMAQ
631 if (!numa_init(numaq_numa_init))
632 return;
633#endif
634#ifdef CONFIG_ACPI_NUMA
635 if (!numa_init(x86_acpi_numa_init))
636 return;
637#endif
638#ifdef CONFIG_AMD_NUMA
639 if (!numa_init(amd_numa_init))
640 return;
641#endif
642 }
643
644 numa_init(dummy_numa_init);
645}
646
120static __init int find_near_online_node(int node) 647static __init int find_near_online_node(int node)
121{ 648{
122 int n, val; 649 int n, val;
@@ -282,3 +809,18 @@ const struct cpumask *cpumask_of_node(int node)
282EXPORT_SYMBOL(cpumask_of_node); 809EXPORT_SYMBOL(cpumask_of_node);
283 810
284#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ 811#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
812
813#ifdef CONFIG_MEMORY_HOTPLUG
814int memory_add_physaddr_to_nid(u64 start)
815{
816 struct numa_meminfo *mi = &numa_meminfo;
817 int nid = mi->blk[0].nid;
818 int i;
819
820 for (i = 0; i < mi->nr_blks; i++)
821 if (mi->blk[i].start <= start && mi->blk[i].end > start)
822 nid = mi->blk[i].nid;
823 return nid;
824}
825EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
826#endif
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index bde3906420df..849a975d3fa0 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -22,39 +22,11 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#include <linux/mm.h>
26#include <linux/bootmem.h> 25#include <linux/bootmem.h>
27#include <linux/memblock.h> 26#include <linux/memblock.h>
28#include <linux/mmzone.h>
29#include <linux/highmem.h>
30#include <linux/initrd.h>
31#include <linux/nodemask.h>
32#include <linux/module.h> 27#include <linux/module.h>
33#include <linux/kexec.h>
34#include <linux/pfn.h>
35#include <linux/swap.h>
36#include <linux/acpi.h>
37
38#include <asm/e820.h>
39#include <asm/setup.h>
40#include <asm/mmzone.h>
41#include <asm/bios_ebda.h>
42#include <asm/proto.h>
43
44struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
45EXPORT_SYMBOL(node_data);
46
47/*
48 * numa interface - we expect the numa architecture specific code to have
49 * populated the following initialisation.
50 *
51 * 1) node_online_map - the map of all nodes configured (online) in the system
52 * 2) node_start_pfn - the starting page frame number for a node
53 * 3) node_end_pfn - the ending page fram number for a node
54 */
55unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly;
56unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
57 28
29#include "numa_internal.h"
58 30
59#ifdef CONFIG_DISCONTIGMEM 31#ifdef CONFIG_DISCONTIGMEM
60/* 32/*
@@ -99,108 +71,46 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
99} 71}
100#endif 72#endif
101 73
102extern unsigned long find_max_low_pfn(void);
103extern unsigned long highend_pfn, highstart_pfn; 74extern unsigned long highend_pfn, highstart_pfn;
104 75
105#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) 76#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
106 77
107unsigned long node_remap_size[MAX_NUMNODES];
108static void *node_remap_start_vaddr[MAX_NUMNODES]; 78static void *node_remap_start_vaddr[MAX_NUMNODES];
109void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 79void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
110 80
111static unsigned long kva_start_pfn;
112static unsigned long kva_pages;
113
114int __cpuinit numa_cpu_node(int cpu)
115{
116 return apic->x86_32_numa_cpu_node(cpu);
117}
118
119/*
120 * FLAT - support for basic PC memory model with discontig enabled, essentially
121 * a single node with all available processors in it with a flat
122 * memory map.
123 */
124int __init get_memcfg_numa_flat(void)
125{
126 printk(KERN_DEBUG "NUMA - single node, flat memory mode\n");
127
128 node_start_pfn[0] = 0;
129 node_end_pfn[0] = max_pfn;
130 memblock_x86_register_active_regions(0, 0, max_pfn);
131 memory_present(0, 0, max_pfn);
132 node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
133
134 /* Indicate there is one node available. */
135 nodes_clear(node_online_map);
136 node_set_online(0);
137 return 1;
138}
139
140/*
141 * Find the highest page frame number we have available for the node
142 */
143static void __init propagate_e820_map_node(int nid)
144{
145 if (node_end_pfn[nid] > max_pfn)
146 node_end_pfn[nid] = max_pfn;
147 /*
148 * if a user has given mem=XXXX, then we need to make sure
149 * that the node _starts_ before that, too, not just ends
150 */
151 if (node_start_pfn[nid] > max_pfn)
152 node_start_pfn[nid] = max_pfn;
153 BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]);
154}
155
156/*
157 * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
158 * method. For node zero take this from the bottom of memory, for
159 * subsequent nodes place them at node_remap_start_vaddr which contains
160 * node local data in physically node local memory. See setup_memory()
161 * for details.
162 */
163static void __init allocate_pgdat(int nid)
164{
165 char buf[16];
166
167 if (node_has_online_mem(nid) && node_remap_start_vaddr[nid])
168 NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
169 else {
170 unsigned long pgdat_phys;
171 pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT,
172 max_pfn_mapped<<PAGE_SHIFT,
173 sizeof(pg_data_t),
174 PAGE_SIZE);
175 NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
176 memset(buf, 0, sizeof(buf));
177 sprintf(buf, "NODE_DATA %d", nid);
178 memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf);
179 }
180 printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
181 nid, (unsigned long)NODE_DATA(nid));
182}
183
184/* 81/*
185 * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel 82 * Remap memory allocator
186 * virtual address space (KVA) is reserved and portions of nodes are mapped
187 * using it. This is to allow node-local memory to be allocated for
188 * structures that would normally require ZONE_NORMAL. The memory is
189 * allocated with alloc_remap() and callers should be prepared to allocate
190 * from the bootmem allocator instead.
191 */ 83 */
192static unsigned long node_remap_start_pfn[MAX_NUMNODES]; 84static unsigned long node_remap_start_pfn[MAX_NUMNODES];
193static void *node_remap_end_vaddr[MAX_NUMNODES]; 85static void *node_remap_end_vaddr[MAX_NUMNODES];
194static void *node_remap_alloc_vaddr[MAX_NUMNODES]; 86static void *node_remap_alloc_vaddr[MAX_NUMNODES];
195static unsigned long node_remap_offset[MAX_NUMNODES];
196 87
88/**
89 * alloc_remap - Allocate remapped memory
90 * @nid: NUMA node to allocate memory from
91 * @size: The size of allocation
92 *
93 * Allocate @size bytes from the remap area of NUMA node @nid. The
94 * size of the remap area is predetermined by init_alloc_remap() and
95 * only the callers considered there should call this function. For
96 * more info, please read the comment on top of init_alloc_remap().
97 *
98 * The caller must be ready to handle allocation failure from this
99 * function and fall back to regular memory allocator in such cases.
100 *
101 * CONTEXT:
102 * Single CPU early boot context.
103 *
104 * RETURNS:
105 * Pointer to the allocated memory on success, %NULL on failure.
106 */
197void *alloc_remap(int nid, unsigned long size) 107void *alloc_remap(int nid, unsigned long size)
198{ 108{
199 void *allocation = node_remap_alloc_vaddr[nid]; 109 void *allocation = node_remap_alloc_vaddr[nid];
200 110
201 size = ALIGN(size, L1_CACHE_BYTES); 111 size = ALIGN(size, L1_CACHE_BYTES);
202 112
203 if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) 113 if (!allocation || (allocation + size) > node_remap_end_vaddr[nid])
204 return NULL; 114 return NULL;
205 115
206 node_remap_alloc_vaddr[nid] += size; 116 node_remap_alloc_vaddr[nid] += size;
@@ -209,26 +119,6 @@ void *alloc_remap(int nid, unsigned long size)
209 return allocation; 119 return allocation;
210} 120}
211 121
212static void __init remap_numa_kva(void)
213{
214 void *vaddr;
215 unsigned long pfn;
216 int node;
217
218 for_each_online_node(node) {
219 printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
220 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
221 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
222 printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
223 (unsigned long)vaddr,
224 node_remap_start_pfn[node] + pfn);
225 set_pmd_pfn((ulong) vaddr,
226 node_remap_start_pfn[node] + pfn,
227 PAGE_KERNEL_LARGE);
228 }
229 }
230}
231
232#ifdef CONFIG_HIBERNATION 122#ifdef CONFIG_HIBERNATION
233/** 123/**
234 * resume_map_numa_kva - add KVA mapping to the temporary page tables created 124 * resume_map_numa_kva - add KVA mapping to the temporary page tables created
@@ -240,15 +130,16 @@ void resume_map_numa_kva(pgd_t *pgd_base)
240 int node; 130 int node;
241 131
242 for_each_online_node(node) { 132 for_each_online_node(node) {
243 unsigned long start_va, start_pfn, size, pfn; 133 unsigned long start_va, start_pfn, nr_pages, pfn;
244 134
245 start_va = (unsigned long)node_remap_start_vaddr[node]; 135 start_va = (unsigned long)node_remap_start_vaddr[node];
246 start_pfn = node_remap_start_pfn[node]; 136 start_pfn = node_remap_start_pfn[node];
247 size = node_remap_size[node]; 137 nr_pages = (node_remap_end_vaddr[node] -
138 node_remap_start_vaddr[node]) >> PAGE_SHIFT;
248 139
249 printk(KERN_DEBUG "%s: node %d\n", __func__, node); 140 printk(KERN_DEBUG "%s: node %d\n", __func__, node);
250 141
251 for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { 142 for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) {
252 unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); 143 unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
253 pgd_t *pgd = pgd_base + pgd_index(vaddr); 144 pgd_t *pgd = pgd_base + pgd_index(vaddr);
254 pud_t *pud = pud_offset(pgd, vaddr); 145 pud_t *pud = pud_offset(pgd, vaddr);
@@ -264,132 +155,89 @@ void resume_map_numa_kva(pgd_t *pgd_base)
264} 155}
265#endif 156#endif
266 157
267static __init unsigned long calculate_numa_remap_pages(void) 158/**
159 * init_alloc_remap - Initialize remap allocator for a NUMA node
160 * @nid: NUMA node to initizlie remap allocator for
161 *
162 * NUMA nodes may end up without any lowmem. As allocating pgdat and
163 * memmap on a different node with lowmem is inefficient, a special
164 * remap allocator is implemented which can be used by alloc_remap().
165 *
166 * For each node, the amount of memory which will be necessary for
167 * pgdat and memmap is calculated and two memory areas of the size are
168 * allocated - one in the node and the other in lowmem; then, the area
169 * in the node is remapped to the lowmem area.
170 *
171 * As pgdat and memmap must be allocated in lowmem anyway, this
172 * doesn't waste lowmem address space; however, the actual lowmem
173 * which gets remapped over is wasted. The amount shouldn't be
174 * problematic on machines this feature will be used.
175 *
176 * Initialization failure isn't fatal. alloc_remap() is used
177 * opportunistically and the callers will fall back to other memory
178 * allocation mechanisms on failure.
179 */
180void __init init_alloc_remap(int nid, u64 start, u64 end)
268{ 181{
269 int nid; 182 unsigned long start_pfn = start >> PAGE_SHIFT;
270 unsigned long size, reserve_pages = 0; 183 unsigned long end_pfn = end >> PAGE_SHIFT;
271 184 unsigned long size, pfn;
272 for_each_online_node(nid) { 185 u64 node_pa, remap_pa;
273 u64 node_kva_target; 186 void *remap_va;
274 u64 node_kva_final;
275
276 /*
277 * The acpi/srat node info can show hot-add memroy zones
278 * where memory could be added but not currently present.
279 */
280 printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
281 nid, node_start_pfn[nid], node_end_pfn[nid]);
282 if (node_start_pfn[nid] > max_pfn)
283 continue;
284 if (!node_end_pfn[nid])
285 continue;
286 if (node_end_pfn[nid] > max_pfn)
287 node_end_pfn[nid] = max_pfn;
288
289 /* ensure the remap includes space for the pgdat. */
290 size = node_remap_size[nid] + sizeof(pg_data_t);
291
292 /* convert size to large (pmd size) pages, rounding up */
293 size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
294 /* now the roundup is correct, convert to PAGE_SIZE pages */
295 size = size * PTRS_PER_PTE;
296
297 node_kva_target = round_down(node_end_pfn[nid] - size,
298 PTRS_PER_PTE);
299 node_kva_target <<= PAGE_SHIFT;
300 do {
301 node_kva_final = memblock_find_in_range(node_kva_target,
302 ((u64)node_end_pfn[nid])<<PAGE_SHIFT,
303 ((u64)size)<<PAGE_SHIFT,
304 LARGE_PAGE_BYTES);
305 node_kva_target -= LARGE_PAGE_BYTES;
306 } while (node_kva_final == MEMBLOCK_ERROR &&
307 (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
308
309 if (node_kva_final == MEMBLOCK_ERROR)
310 panic("Can not get kva ram\n");
311
312 node_remap_size[nid] = size;
313 node_remap_offset[nid] = reserve_pages;
314 reserve_pages += size;
315 printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of"
316 " node %d at %llx\n",
317 size, nid, node_kva_final>>PAGE_SHIFT);
318
319 /*
320 * prevent kva address below max_low_pfn want it on system
321 * with less memory later.
322 * layout will be: KVA address , KVA RAM
323 *
324 * we are supposed to only record the one less then max_low_pfn
325 * but we could have some hole in high memory, and it will only
326 * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
327 * to use it as free.
328 * So memblock_x86_reserve_range here, hope we don't run out of that array
329 */
330 memblock_x86_reserve_range(node_kva_final,
331 node_kva_final+(((u64)size)<<PAGE_SHIFT),
332 "KVA RAM");
333
334 node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
335 }
336 printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
337 reserve_pages);
338 return reserve_pages;
339}
340 187
341static void init_remap_allocator(int nid) 188 /*
342{ 189 * The acpi/srat node info can show hot-add memroy zones where
343 node_remap_start_vaddr[nid] = pfn_to_kaddr( 190 * memory could be added but not currently present.
344 kva_start_pfn + node_remap_offset[nid]); 191 */
345 node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + 192 printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
346 (node_remap_size[nid] * PAGE_SIZE); 193 nid, start_pfn, end_pfn);
347 node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + 194
348 ALIGN(sizeof(pg_data_t), PAGE_SIZE); 195 /* calculate the necessary space aligned to large page size */
349 196 size = node_memmap_size_bytes(nid, start_pfn, end_pfn);
350 printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, 197 size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
351 (ulong) node_remap_start_vaddr[nid], 198 size = ALIGN(size, LARGE_PAGE_BYTES);
352 (ulong) node_remap_end_vaddr[nid]); 199
200 /* allocate node memory and the lowmem remap area */
201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
202 if (node_pa == MEMBLOCK_ERROR) {
203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
204 size, nid);
205 return;
206 }
207 memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
208
209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
210 max_low_pfn << PAGE_SHIFT,
211 size, LARGE_PAGE_BYTES);
212 if (remap_pa == MEMBLOCK_ERROR) {
213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
214 size, nid);
215 memblock_x86_free_range(node_pa, node_pa + size);
216 return;
217 }
218 memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
219 remap_va = phys_to_virt(remap_pa);
220
221 /* perform actual remap */
222 for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE)
223 set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT),
224 (node_pa >> PAGE_SHIFT) + pfn,
225 PAGE_KERNEL_LARGE);
226
227 /* initialize remap allocator parameters */
228 node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
229 node_remap_start_vaddr[nid] = remap_va;
230 node_remap_end_vaddr[nid] = remap_va + size;
231 node_remap_alloc_vaddr[nid] = remap_va;
232
233 printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
234 nid, node_pa, node_pa + size, remap_va, remap_va + size);
353} 235}
354 236
355void __init initmem_init(void) 237void __init initmem_init(void)
356{ 238{
357 int nid; 239 x86_numa_init();
358 long kva_target_pfn;
359
360 /*
361 * When mapping a NUMA machine we allocate the node_mem_map arrays
362 * from node local memory. They are then mapped directly into KVA
363 * between zone normal and vmalloc space. Calculate the size of
364 * this space and use it to adjust the boundary between ZONE_NORMAL
365 * and ZONE_HIGHMEM.
366 */
367
368 get_memcfg_numa();
369 numa_init_array();
370
371 kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
372 240
373 kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
374 do {
375 kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT,
376 max_low_pfn<<PAGE_SHIFT,
377 kva_pages<<PAGE_SHIFT,
378 PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
379 kva_target_pfn -= PTRS_PER_PTE;
380 } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn);
381
382 if (kva_start_pfn == MEMBLOCK_ERROR)
383 panic("Can not get kva space\n");
384
385 printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
386 kva_start_pfn, max_low_pfn);
387 printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
388
389 /* avoid clash with initrd */
390 memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT,
391 (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
392 "KVA PG");
393#ifdef CONFIG_HIGHMEM 241#ifdef CONFIG_HIGHMEM
394 highstart_pfn = highend_pfn = max_pfn; 242 highstart_pfn = highend_pfn = max_pfn;
395 if (max_pfn > max_low_pfn) 243 if (max_pfn > max_low_pfn)
@@ -409,51 +257,9 @@ void __init initmem_init(void)
409 257
410 printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", 258 printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
411 (ulong) pfn_to_kaddr(max_low_pfn)); 259 (ulong) pfn_to_kaddr(max_low_pfn));
412 for_each_online_node(nid) {
413 init_remap_allocator(nid);
414
415 allocate_pgdat(nid);
416 }
417 remap_numa_kva();
418 260
419 printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", 261 printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
420 (ulong) pfn_to_kaddr(highstart_pfn)); 262 (ulong) pfn_to_kaddr(highstart_pfn));
421 for_each_online_node(nid)
422 propagate_e820_map_node(nid);
423
424 for_each_online_node(nid) {
425 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
426 NODE_DATA(nid)->node_id = nid;
427 }
428 263
429 setup_bootmem_allocator(); 264 setup_bootmem_allocator();
430} 265}
431
432#ifdef CONFIG_MEMORY_HOTPLUG
433static int paddr_to_nid(u64 addr)
434{
435 int nid;
436 unsigned long pfn = PFN_DOWN(addr);
437
438 for_each_node(nid)
439 if (node_start_pfn[nid] <= pfn &&
440 pfn < node_end_pfn[nid])
441 return nid;
442
443 return -1;
444}
445
446/*
447 * This function is used to ask node id BEFORE memmap and mem_section's
448 * initialization (pfn_to_nid() can't be used yet).
449 * If _PXM is not defined on ACPI's DSDT, node id must be found by this.
450 */
451int memory_add_physaddr_to_nid(u64 addr)
452{
453 int nid = paddr_to_nid(addr);
454 return (nid >= 0) ? nid : 0;
455}
456
457EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
458#endif
459
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 85b52fc03084..dd27f401f0a0 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -2,646 +2,13 @@
2 * Generic VM initialization for x86-64 NUMA setups. 2 * Generic VM initialization for x86-64 NUMA setups.
3 * Copyright 2002,2003 Andi Kleen, SuSE Labs. 3 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 */ 4 */
5#include <linux/kernel.h>
6#include <linux/mm.h>
7#include <linux/string.h>
8#include <linux/init.h>
9#include <linux/bootmem.h> 5#include <linux/bootmem.h>
10#include <linux/memblock.h>
11#include <linux/mmzone.h>
12#include <linux/ctype.h>
13#include <linux/module.h>
14#include <linux/nodemask.h>
15#include <linux/sched.h>
16#include <linux/acpi.h>
17
18#include <asm/e820.h>
19#include <asm/proto.h>
20#include <asm/dma.h>
21#include <asm/acpi.h>
22#include <asm/amd_nb.h>
23 6
24#include "numa_internal.h" 7#include "numa_internal.h"
25 8
26struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
27EXPORT_SYMBOL(node_data);
28
29nodemask_t numa_nodes_parsed __initdata;
30
31struct memnode memnode;
32
33static unsigned long __initdata nodemap_addr;
34static unsigned long __initdata nodemap_size;
35
36static struct numa_meminfo numa_meminfo __initdata;
37
38static int numa_distance_cnt;
39static u8 *numa_distance;
40
41/*
42 * Given a shift value, try to populate memnodemap[]
43 * Returns :
44 * 1 if OK
45 * 0 if memnodmap[] too small (of shift too small)
46 * -1 if node overlap or lost ram (shift too big)
47 */
48static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
49{
50 unsigned long addr, end;
51 int i, res = -1;
52
53 memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
54 for (i = 0; i < mi->nr_blks; i++) {
55 addr = mi->blk[i].start;
56 end = mi->blk[i].end;
57 if (addr >= end)
58 continue;
59 if ((end >> shift) >= memnodemapsize)
60 return 0;
61 do {
62 if (memnodemap[addr >> shift] != NUMA_NO_NODE)
63 return -1;
64 memnodemap[addr >> shift] = mi->blk[i].nid;
65 addr += (1UL << shift);
66 } while (addr < end);
67 res = 1;
68 }
69 return res;
70}
71
72static int __init allocate_cachealigned_memnodemap(void)
73{
74 unsigned long addr;
75
76 memnodemap = memnode.embedded_map;
77 if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
78 return 0;
79
80 addr = 0x8000;
81 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
82 nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
83 nodemap_size, L1_CACHE_BYTES);
84 if (nodemap_addr == MEMBLOCK_ERROR) {
85 printk(KERN_ERR
86 "NUMA: Unable to allocate Memory to Node hash map\n");
87 nodemap_addr = nodemap_size = 0;
88 return -1;
89 }
90 memnodemap = phys_to_virt(nodemap_addr);
91 memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP");
92
93 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
94 nodemap_addr, nodemap_addr + nodemap_size);
95 return 0;
96}
97
98/*
99 * The LSB of all start and end addresses in the node map is the value of the
100 * maximum possible shift.
101 */
102static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
103{
104 int i, nodes_used = 0;
105 unsigned long start, end;
106 unsigned long bitfield = 0, memtop = 0;
107
108 for (i = 0; i < mi->nr_blks; i++) {
109 start = mi->blk[i].start;
110 end = mi->blk[i].end;
111 if (start >= end)
112 continue;
113 bitfield |= start;
114 nodes_used++;
115 if (end > memtop)
116 memtop = end;
117 }
118 if (nodes_used <= 1)
119 i = 63;
120 else
121 i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
122 memnodemapsize = (memtop >> i)+1;
123 return i;
124}
125
126static int __init compute_hash_shift(const struct numa_meminfo *mi)
127{
128 int shift;
129
130 shift = extract_lsb_from_nodes(mi);
131 if (allocate_cachealigned_memnodemap())
132 return -1;
133 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
134 shift);
135
136 if (populate_memnodemap(mi, shift) != 1) {
137 printk(KERN_INFO "Your memory is not aligned you need to "
138 "rebuild your kernel with a bigger NODEMAPSIZE "
139 "shift=%d\n", shift);
140 return -1;
141 }
142 return shift;
143}
144
145int __meminit __early_pfn_to_nid(unsigned long pfn)
146{
147 return phys_to_nid(pfn << PAGE_SHIFT);
148}
149
150static void * __init early_node_mem(int nodeid, unsigned long start,
151 unsigned long end, unsigned long size,
152 unsigned long align)
153{
154 unsigned long mem;
155
156 /*
157 * put it on high as possible
158 * something will go with NODE_DATA
159 */
160 if (start < (MAX_DMA_PFN<<PAGE_SHIFT))
161 start = MAX_DMA_PFN<<PAGE_SHIFT;
162 if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
163 end > (MAX_DMA32_PFN<<PAGE_SHIFT))
164 start = MAX_DMA32_PFN<<PAGE_SHIFT;
165 mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align);
166 if (mem != MEMBLOCK_ERROR)
167 return __va(mem);
168
169 /* extend the search scope */
170 end = max_pfn_mapped << PAGE_SHIFT;
171 start = MAX_DMA_PFN << PAGE_SHIFT;
172 mem = memblock_find_in_range(start, end, size, align);
173 if (mem != MEMBLOCK_ERROR)
174 return __va(mem);
175
176 printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
177 size, nodeid);
178
179 return NULL;
180}
181
182static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
183 struct numa_meminfo *mi)
184{
185 /* ignore zero length blks */
186 if (start == end)
187 return 0;
188
189 /* whine about and ignore invalid blks */
190 if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
191 pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
192 nid, start, end);
193 return 0;
194 }
195
196 if (mi->nr_blks >= NR_NODE_MEMBLKS) {
197 pr_err("NUMA: too many memblk ranges\n");
198 return -EINVAL;
199 }
200
201 mi->blk[mi->nr_blks].start = start;
202 mi->blk[mi->nr_blks].end = end;
203 mi->blk[mi->nr_blks].nid = nid;
204 mi->nr_blks++;
205 return 0;
206}
207
208/**
209 * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
210 * @idx: Index of memblk to remove
211 * @mi: numa_meminfo to remove memblk from
212 *
213 * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
214 * decrementing @mi->nr_blks.
215 */
216void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
217{
218 mi->nr_blks--;
219 memmove(&mi->blk[idx], &mi->blk[idx + 1],
220 (mi->nr_blks - idx) * sizeof(mi->blk[0]));
221}
222
223/**
224 * numa_add_memblk - Add one numa_memblk to numa_meminfo
225 * @nid: NUMA node ID of the new memblk
226 * @start: Start address of the new memblk
227 * @end: End address of the new memblk
228 *
229 * Add a new memblk to the default numa_meminfo.
230 *
231 * RETURNS:
232 * 0 on success, -errno on failure.
233 */
234int __init numa_add_memblk(int nid, u64 start, u64 end)
235{
236 return numa_add_memblk_to(nid, start, end, &numa_meminfo);
237}
238
239/* Initialize bootmem allocator for a node */
240void __init
241setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
242{
243 unsigned long start_pfn, last_pfn, nodedata_phys;
244 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
245 int nid;
246
247 if (!end)
248 return;
249
250 /*
251 * Don't confuse VM with a node that doesn't have the
252 * minimum amount of memory:
253 */
254 if (end && (end - start) < NODE_MIN_SIZE)
255 return;
256
257 start = roundup(start, ZONE_ALIGN);
258
259 printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
260 start, end);
261
262 start_pfn = start >> PAGE_SHIFT;
263 last_pfn = end >> PAGE_SHIFT;
264
265 node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size,
266 SMP_CACHE_BYTES);
267 if (node_data[nodeid] == NULL)
268 return;
269 nodedata_phys = __pa(node_data[nodeid]);
270 memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
271 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
272 nodedata_phys + pgdat_size - 1);
273 nid = phys_to_nid(nodedata_phys);
274 if (nid != nodeid)
275 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
276
277 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
278 NODE_DATA(nodeid)->node_id = nodeid;
279 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
280 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
281
282 node_set_online(nodeid);
283}
284
285/**
286 * numa_cleanup_meminfo - Cleanup a numa_meminfo
287 * @mi: numa_meminfo to clean up
288 *
289 * Sanitize @mi by merging and removing unncessary memblks. Also check for
290 * conflicts and clear unused memblks.
291 *
292 * RETURNS:
293 * 0 on success, -errno on failure.
294 */
295int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
296{
297 const u64 low = 0;
298 const u64 high = (u64)max_pfn << PAGE_SHIFT;
299 int i, j, k;
300
301 for (i = 0; i < mi->nr_blks; i++) {
302 struct numa_memblk *bi = &mi->blk[i];
303
304 /* make sure all blocks are inside the limits */
305 bi->start = max(bi->start, low);
306 bi->end = min(bi->end, high);
307
308 /* and there's no empty block */
309 if (bi->start >= bi->end) {
310 numa_remove_memblk_from(i--, mi);
311 continue;
312 }
313
314 for (j = i + 1; j < mi->nr_blks; j++) {
315 struct numa_memblk *bj = &mi->blk[j];
316 unsigned long start, end;
317
318 /*
319 * See whether there are overlapping blocks. Whine
320 * about but allow overlaps of the same nid. They
321 * will be merged below.
322 */
323 if (bi->end > bj->start && bi->start < bj->end) {
324 if (bi->nid != bj->nid) {
325 pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
326 bi->nid, bi->start, bi->end,
327 bj->nid, bj->start, bj->end);
328 return -EINVAL;
329 }
330 pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
331 bi->nid, bi->start, bi->end,
332 bj->start, bj->end);
333 }
334
335 /*
336 * Join together blocks on the same node, holes
337 * between which don't overlap with memory on other
338 * nodes.
339 */
340 if (bi->nid != bj->nid)
341 continue;
342 start = max(min(bi->start, bj->start), low);
343 end = min(max(bi->end, bj->end), high);
344 for (k = 0; k < mi->nr_blks; k++) {
345 struct numa_memblk *bk = &mi->blk[k];
346
347 if (bi->nid == bk->nid)
348 continue;
349 if (start < bk->end && end > bk->start)
350 break;
351 }
352 if (k < mi->nr_blks)
353 continue;
354 printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
355 bi->nid, bi->start, bi->end, bj->start, bj->end,
356 start, end);
357 bi->start = start;
358 bi->end = end;
359 numa_remove_memblk_from(j--, mi);
360 }
361 }
362
363 for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
364 mi->blk[i].start = mi->blk[i].end = 0;
365 mi->blk[i].nid = NUMA_NO_NODE;
366 }
367
368 return 0;
369}
370
371/*
372 * Set nodes, which have memory in @mi, in *@nodemask.
373 */
374static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
375 const struct numa_meminfo *mi)
376{
377 int i;
378
379 for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
380 if (mi->blk[i].start != mi->blk[i].end &&
381 mi->blk[i].nid != NUMA_NO_NODE)
382 node_set(mi->blk[i].nid, *nodemask);
383}
384
385/**
386 * numa_reset_distance - Reset NUMA distance table
387 *
388 * The current table is freed. The next numa_set_distance() call will
389 * create a new one.
390 */
391void __init numa_reset_distance(void)
392{
393 size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
394
395 /* numa_distance could be 1LU marking allocation failure, test cnt */
396 if (numa_distance_cnt)
397 memblock_x86_free_range(__pa(numa_distance),
398 __pa(numa_distance) + size);
399 numa_distance_cnt = 0;
400 numa_distance = NULL; /* enable table creation */
401}
402
403static int __init numa_alloc_distance(void)
404{
405 nodemask_t nodes_parsed;
406 size_t size;
407 int i, j, cnt = 0;
408 u64 phys;
409
410 /* size the new table and allocate it */
411 nodes_parsed = numa_nodes_parsed;
412 numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
413
414 for_each_node_mask(i, nodes_parsed)
415 cnt = i;
416 cnt++;
417 size = cnt * cnt * sizeof(numa_distance[0]);
418
419 phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
420 size, PAGE_SIZE);
421 if (phys == MEMBLOCK_ERROR) {
422 pr_warning("NUMA: Warning: can't allocate distance table!\n");
423 /* don't retry until explicitly reset */
424 numa_distance = (void *)1LU;
425 return -ENOMEM;
426 }
427 memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
428
429 numa_distance = __va(phys);
430 numa_distance_cnt = cnt;
431
432 /* fill with the default distances */
433 for (i = 0; i < cnt; i++)
434 for (j = 0; j < cnt; j++)
435 numa_distance[i * cnt + j] = i == j ?
436 LOCAL_DISTANCE : REMOTE_DISTANCE;
437 printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
438
439 return 0;
440}
441
442/**
443 * numa_set_distance - Set NUMA distance from one NUMA to another
444 * @from: the 'from' node to set distance
445 * @to: the 'to' node to set distance
446 * @distance: NUMA distance
447 *
448 * Set the distance from node @from to @to to @distance. If distance table
449 * doesn't exist, one which is large enough to accommodate all the currently
450 * known nodes will be created.
451 *
452 * If such table cannot be allocated, a warning is printed and further
453 * calls are ignored until the distance table is reset with
454 * numa_reset_distance().
455 *
456 * If @from or @to is higher than the highest known node at the time of
457 * table creation or @distance doesn't make sense, the call is ignored.
458 * This is to allow simplification of specific NUMA config implementations.
459 */
460void __init numa_set_distance(int from, int to, int distance)
461{
462 if (!numa_distance && numa_alloc_distance() < 0)
463 return;
464
465 if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
466 printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
467 from, to, distance);
468 return;
469 }
470
471 if ((u8)distance != distance ||
472 (from == to && distance != LOCAL_DISTANCE)) {
473 pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
474 from, to, distance);
475 return;
476 }
477
478 numa_distance[from * numa_distance_cnt + to] = distance;
479}
480
481int __node_distance(int from, int to)
482{
483 if (from >= numa_distance_cnt || to >= numa_distance_cnt)
484 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
485 return numa_distance[from * numa_distance_cnt + to];
486}
487EXPORT_SYMBOL(__node_distance);
488
489/*
490 * Sanity check to catch more bad NUMA configurations (they are amazingly
491 * common). Make sure the nodes cover all memory.
492 */
493static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
494{
495 unsigned long numaram, e820ram;
496 int i;
497
498 numaram = 0;
499 for (i = 0; i < mi->nr_blks; i++) {
500 unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
501 unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
502 numaram += e - s;
503 numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
504 if ((long)numaram < 0)
505 numaram = 0;
506 }
507
508 e820ram = max_pfn - (memblock_x86_hole_size(0,
509 max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
510 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
511 if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
512 printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
513 (numaram << PAGE_SHIFT) >> 20,
514 (e820ram << PAGE_SHIFT) >> 20);
515 return false;
516 }
517 return true;
518}
519
520static int __init numa_register_memblks(struct numa_meminfo *mi)
521{
522 int i, nid;
523
524 /* Account for nodes with cpus and no memory */
525 node_possible_map = numa_nodes_parsed;
526 numa_nodemask_from_meminfo(&node_possible_map, mi);
527 if (WARN_ON(nodes_empty(node_possible_map)))
528 return -EINVAL;
529
530 memnode_shift = compute_hash_shift(mi);
531 if (memnode_shift < 0) {
532 printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
533 return -EINVAL;
534 }
535
536 for (i = 0; i < mi->nr_blks; i++)
537 memblock_x86_register_active_regions(mi->blk[i].nid,
538 mi->blk[i].start >> PAGE_SHIFT,
539 mi->blk[i].end >> PAGE_SHIFT);
540
541 /* for out of order entries */
542 sort_node_map();
543 if (!numa_meminfo_cover_memory(mi))
544 return -EINVAL;
545
546 /* Finally register nodes. */
547 for_each_node_mask(nid, node_possible_map) {
548 u64 start = (u64)max_pfn << PAGE_SHIFT;
549 u64 end = 0;
550
551 for (i = 0; i < mi->nr_blks; i++) {
552 if (nid != mi->blk[i].nid)
553 continue;
554 start = min(mi->blk[i].start, start);
555 end = max(mi->blk[i].end, end);
556 }
557
558 if (start < end)
559 setup_node_bootmem(nid, start, end);
560 }
561
562 return 0;
563}
564
565/**
566 * dummy_numma_init - Fallback dummy NUMA init
567 *
568 * Used if there's no underlying NUMA architecture, NUMA initialization
569 * fails, or NUMA is disabled on the command line.
570 *
571 * Must online at least one node and add memory blocks that cover all
572 * allowed memory. This function must not fail.
573 */
574static int __init dummy_numa_init(void)
575{
576 printk(KERN_INFO "%s\n",
577 numa_off ? "NUMA turned off" : "No NUMA configuration found");
578 printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
579 0LU, max_pfn << PAGE_SHIFT);
580
581 node_set(0, numa_nodes_parsed);
582 numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
583
584 return 0;
585}
586
587static int __init numa_init(int (*init_func)(void))
588{
589 int i;
590 int ret;
591
592 for (i = 0; i < MAX_LOCAL_APIC; i++)
593 set_apicid_to_node(i, NUMA_NO_NODE);
594
595 nodes_clear(numa_nodes_parsed);
596 nodes_clear(node_possible_map);
597 nodes_clear(node_online_map);
598 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
599 remove_all_active_ranges();
600 numa_reset_distance();
601
602 ret = init_func();
603 if (ret < 0)
604 return ret;
605 ret = numa_cleanup_meminfo(&numa_meminfo);
606 if (ret < 0)
607 return ret;
608
609 numa_emulation(&numa_meminfo, numa_distance_cnt);
610
611 ret = numa_register_memblks(&numa_meminfo);
612 if (ret < 0)
613 return ret;
614
615 for (i = 0; i < nr_cpu_ids; i++) {
616 int nid = early_cpu_to_node(i);
617
618 if (nid == NUMA_NO_NODE)
619 continue;
620 if (!node_online(nid))
621 numa_clear_node(i);
622 }
623 numa_init_array();
624 return 0;
625}
626
627void __init initmem_init(void) 9void __init initmem_init(void)
628{ 10{
629 int ret; 11 x86_numa_init();
630
631 if (!numa_off) {
632#ifdef CONFIG_ACPI_NUMA
633 ret = numa_init(x86_acpi_numa_init);
634 if (!ret)
635 return;
636#endif
637#ifdef CONFIG_AMD_NUMA
638 ret = numa_init(amd_numa_init);
639 if (!ret)
640 return;
641#endif
642 }
643
644 numa_init(dummy_numa_init);
645} 12}
646 13
647unsigned long __init numa_free_all_bootmem(void) 14unsigned long __init numa_free_all_bootmem(void)
@@ -656,12 +23,3 @@ unsigned long __init numa_free_all_bootmem(void)
656 23
657 return pages; 24 return pages;
658} 25}
659
660int __cpuinit numa_cpu_node(int cpu)
661{
662 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
663
664 if (apicid != BAD_APICID)
665 return __apicid_to_node[apicid];
666 return NUMA_NO_NODE;
667}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index de84cc140379..d0ed086b6247 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -5,6 +5,7 @@
5#include <linux/errno.h> 5#include <linux/errno.h>
6#include <linux/topology.h> 6#include <linux/topology.h>
7#include <linux/memblock.h> 7#include <linux/memblock.h>
8#include <linux/bootmem.h>
8#include <asm/dma.h> 9#include <asm/dma.h>
9 10
10#include "numa_internal.h" 11#include "numa_internal.h"
@@ -84,7 +85,13 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
84 nr_nodes = MAX_NUMNODES; 85 nr_nodes = MAX_NUMNODES;
85 } 86 }
86 87
87 size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes; 88 /*
89 * Calculate target node size. x86_32 freaks on __udivdi3() so do
90 * the division in ulong number of pages and convert back.
91 */
92 size = max_addr - addr - memblock_x86_hole_size(addr, max_addr);
93 size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
94
88 /* 95 /*
89 * Calculate the number of big nodes that can be allocated as a result 96 * Calculate the number of big nodes that can be allocated as a result
90 * of consolidating the remainder. 97 * of consolidating the remainder.
@@ -226,7 +233,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
226 */ 233 */
227 while (nodes_weight(physnode_mask)) { 234 while (nodes_weight(physnode_mask)) {
228 for_each_node_mask(i, physnode_mask) { 235 for_each_node_mask(i, physnode_mask) {
229 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; 236 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
230 u64 start, limit, end; 237 u64 start, limit, end;
231 int phys_blk; 238 int phys_blk;
232 239
@@ -298,7 +305,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
298{ 305{
299 static struct numa_meminfo ei __initdata; 306 static struct numa_meminfo ei __initdata;
300 static struct numa_meminfo pi __initdata; 307 static struct numa_meminfo pi __initdata;
301 const u64 max_addr = max_pfn << PAGE_SHIFT; 308 const u64 max_addr = PFN_PHYS(max_pfn);
302 u8 *phys_dist = NULL; 309 u8 *phys_dist = NULL;
303 size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); 310 size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
304 int max_emu_nid, dfl_phys_nid; 311 int max_emu_nid, dfl_phys_nid;
@@ -342,8 +349,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
342 if (numa_dist_cnt) { 349 if (numa_dist_cnt) {
343 u64 phys; 350 u64 phys;
344 351
345 phys = memblock_find_in_range(0, 352 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
346 (u64)max_pfn_mapped << PAGE_SHIFT,
347 phys_size, PAGE_SIZE); 353 phys_size, PAGE_SIZE);
348 if (phys == MEMBLOCK_ERROR) { 354 if (phys == MEMBLOCK_ERROR) {
349 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); 355 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index ef2d97377d7c..7178c3afe05e 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -19,6 +19,14 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
19int __init numa_cleanup_meminfo(struct numa_meminfo *mi); 19int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
20void __init numa_reset_distance(void); 20void __init numa_reset_distance(void);
21 21
22void __init x86_numa_init(void);
23
24#ifdef CONFIG_X86_64
25static inline void init_alloc_remap(int nid, u64 start, u64 end) { }
26#else
27void __init init_alloc_remap(int nid, u64 start, u64 end);
28#endif
29
22#ifdef CONFIG_NUMA_EMU 30#ifdef CONFIG_NUMA_EMU
23void __init numa_emulation(struct numa_meminfo *numa_meminfo, 31void __init numa_emulation(struct numa_meminfo *numa_meminfo,
24 int numa_dist_cnt); 32 int numa_dist_cnt);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat.c
index 8e9d3394f6d4..81dbfdeb080d 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat.c
@@ -26,8 +26,6 @@
26 26
27int acpi_numa __initdata; 27int acpi_numa __initdata;
28 28
29static struct bootnode nodes_add[MAX_NUMNODES];
30
31static __init int setup_node(int pxm) 29static __init int setup_node(int pxm)
32{ 30{
33 return acpi_map_pxm_to_node(pxm); 31 return acpi_map_pxm_to_node(pxm);
@@ -37,7 +35,6 @@ static __init void bad_srat(void)
37{ 35{
38 printk(KERN_ERR "SRAT: SRAT not used.\n"); 36 printk(KERN_ERR "SRAT: SRAT not used.\n");
39 acpi_numa = -1; 37 acpi_numa = -1;
40 memset(nodes_add, 0, sizeof(nodes_add));
41} 38}
42 39
43static __init inline int srat_disabled(void) 40static __init inline int srat_disabled(void)
@@ -131,73 +128,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
131 pxm, apic_id, node); 128 pxm, apic_id, node);
132} 129}
133 130
134#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 131#ifdef CONFIG_MEMORY_HOTPLUG
135static inline int save_add_info(void) {return 1;} 132static inline int save_add_info(void) {return 1;}
136#else 133#else
137static inline int save_add_info(void) {return 0;} 134static inline int save_add_info(void) {return 0;}
138#endif 135#endif
139/*
140 * Update nodes_add[]
141 * This code supports one contiguous hot add area per node
142 */
143static void __init
144update_nodes_add(int node, unsigned long start, unsigned long end)
145{
146 unsigned long s_pfn = start >> PAGE_SHIFT;
147 unsigned long e_pfn = end >> PAGE_SHIFT;
148 int changed = 0;
149 struct bootnode *nd = &nodes_add[node];
150
151 /* I had some trouble with strange memory hotadd regions breaking
152 the boot. Be very strict here and reject anything unexpected.
153 If you want working memory hotadd write correct SRATs.
154
155 The node size check is a basic sanity check to guard against
156 mistakes */
157 if ((signed long)(end - start) < NODE_MIN_SIZE) {
158 printk(KERN_ERR "SRAT: Hotplug area too small\n");
159 return;
160 }
161
162 /* This check might be a bit too strict, but I'm keeping it for now. */
163 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
164 printk(KERN_ERR
165 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
166 s_pfn, e_pfn);
167 return;
168 }
169
170 /* Looks good */
171
172 if (nd->start == nd->end) {
173 nd->start = start;
174 nd->end = end;
175 changed = 1;
176 } else {
177 if (nd->start == end) {
178 nd->start = start;
179 changed = 1;
180 }
181 if (nd->end == start) {
182 nd->end = end;
183 changed = 1;
184 }
185 if (!changed)
186 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
187 }
188
189 if (changed) {
190 node_set(node, numa_nodes_parsed);
191 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
192 nd->start, nd->end);
193 }
194}
195 136
196/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 137/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
197void __init 138void __init
198acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) 139acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
199{ 140{
200 unsigned long start, end; 141 u64 start, end;
201 int node, pxm; 142 int node, pxm;
202 143
203 if (srat_disabled()) 144 if (srat_disabled())
@@ -226,11 +167,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
226 return; 167 return;
227 } 168 }
228 169
229 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, 170 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
230 start, end); 171 start, end);
231
232 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
233 update_nodes_add(node, start, end);
234} 172}
235 173
236void __init acpi_numa_arch_fixup(void) {} 174void __init acpi_numa_arch_fixup(void) {}
@@ -244,17 +182,3 @@ int __init x86_acpi_numa_init(void)
244 return ret; 182 return ret;
245 return srat_disabled() ? -EINVAL : 0; 183 return srat_disabled() ? -EINVAL : 0;
246} 184}
247
248#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
249int memory_add_physaddr_to_nid(u64 start)
250{
251 int i, ret = 0;
252
253 for_each_node(i)
254 if (nodes_add[i].start <= start && nodes_add[i].end > start)
255 ret = i;
256
257 return ret;
258}
259EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
260#endif
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
deleted file mode 100644
index 364f36bdfad8..000000000000
--- a/arch/x86/mm/srat_32.c
+++ /dev/null
@@ -1,288 +0,0 @@
1/*
2 * Some of the code in this file has been gleaned from the 64 bit
3 * discontigmem support code base.
4 *
5 * Copyright (C) 2002, IBM Corp.
6 *
7 * All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
17 * NON INFRINGEMENT. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * Send feedback to Pat Gaughen <gone@us.ibm.com>
25 */
26#include <linux/mm.h>
27#include <linux/bootmem.h>
28#include <linux/memblock.h>
29#include <linux/mmzone.h>
30#include <linux/acpi.h>
31#include <linux/nodemask.h>
32#include <asm/srat.h>
33#include <asm/topology.h>
34#include <asm/smp.h>
35#include <asm/e820.h>
36
37/*
38 * proximity macros and definitions
39 */
40#define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */
41#define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */
42#define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
43#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
44/* bitmap length; _PXM is at most 255 */
45#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
46static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
47
48#define MAX_CHUNKS_PER_NODE 3
49#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
50struct node_memory_chunk_s {
51 unsigned long start_pfn;
52 unsigned long end_pfn;
53 u8 pxm; // proximity domain of node
54 u8 nid; // which cnode contains this chunk?
55 u8 bank; // which mem bank on this node
56};
57static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
58
59static int __initdata num_memory_chunks; /* total number of memory chunks */
60static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
61
62int acpi_numa __initdata;
63
64static __init void bad_srat(void)
65{
66 printk(KERN_ERR "SRAT: SRAT not used.\n");
67 acpi_numa = -1;
68 num_memory_chunks = 0;
69}
70
71static __init inline int srat_disabled(void)
72{
73 return numa_off || acpi_numa < 0;
74}
75
76/* Identify CPU proximity domains */
77void __init
78acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
79{
80 if (srat_disabled())
81 return;
82 if (cpu_affinity->header.length !=
83 sizeof(struct acpi_srat_cpu_affinity)) {
84 bad_srat();
85 return;
86 }
87
88 if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
89 return; /* empty entry */
90
91 /* mark this node as "seen" in node bitmap */
92 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
93
94 /* don't need to check apic_id here, because it is always 8 bits */
95 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
96
97 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
98 cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo);
99}
100
101/*
102 * Identify memory proximity domains and hot-remove capabilities.
103 * Fill node memory chunk list structure.
104 */
105void __init
106acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
107{
108 unsigned long long paddr, size;
109 unsigned long start_pfn, end_pfn;
110 u8 pxm;
111 struct node_memory_chunk_s *p, *q, *pend;
112
113 if (srat_disabled())
114 return;
115 if (memory_affinity->header.length !=
116 sizeof(struct acpi_srat_mem_affinity)) {
117 bad_srat();
118 return;
119 }
120
121 if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
122 return; /* empty entry */
123
124 pxm = memory_affinity->proximity_domain & 0xff;
125
126 /* mark this node as "seen" in node bitmap */
127 BMAP_SET(pxm_bitmap, pxm);
128
129 /* calculate info for memory chunk structure */
130 paddr = memory_affinity->base_address;
131 size = memory_affinity->length;
132
133 start_pfn = paddr >> PAGE_SHIFT;
134 end_pfn = (paddr + size) >> PAGE_SHIFT;
135
136
137 if (num_memory_chunks >= MAXCHUNKS) {
138 printk(KERN_WARNING "Too many mem chunks in SRAT."
139 " Ignoring %lld MBytes at %llx\n",
140 size/(1024*1024), paddr);
141 return;
142 }
143
144 /* Insertion sort based on base address */
145 pend = &node_memory_chunk[num_memory_chunks];
146 for (p = &node_memory_chunk[0]; p < pend; p++) {
147 if (start_pfn < p->start_pfn)
148 break;
149 }
150 if (p < pend) {
151 for (q = pend; q >= p; q--)
152 *(q + 1) = *q;
153 }
154 p->start_pfn = start_pfn;
155 p->end_pfn = end_pfn;
156 p->pxm = pxm;
157
158 num_memory_chunks++;
159
160 printk(KERN_DEBUG "Memory range %08lx to %08lx"
161 " in proximity domain %02x %s\n",
162 start_pfn, end_pfn,
163 pxm,
164 ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
165 "enabled and removable" : "enabled" ) );
166}
167
168/* Callback for SLIT parsing */
169void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
170{
171}
172
173void acpi_numa_arch_fixup(void)
174{
175}
176/*
177 * The SRAT table always lists ascending addresses, so can always
178 * assume that the first "start" address that you see is the real
179 * start of the node, and that the current "end" address is after
180 * the previous one.
181 */
182static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
183{
184 /*
185 * Only add present memory as told by the e820.
186 * There is no guarantee from the SRAT that the memory it
187 * enumerates is present at boot time because it represents
188 * *possible* memory hotplug areas the same as normal RAM.
189 */
190 if (memory_chunk->start_pfn >= max_pfn) {
191 printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
192 memory_chunk->start_pfn, memory_chunk->end_pfn);
193 return -1;
194 }
195 if (memory_chunk->nid != nid)
196 return -1;
197
198 if (!node_has_online_mem(nid))
199 node_start_pfn[nid] = memory_chunk->start_pfn;
200
201 if (node_start_pfn[nid] > memory_chunk->start_pfn)
202 node_start_pfn[nid] = memory_chunk->start_pfn;
203
204 if (node_end_pfn[nid] < memory_chunk->end_pfn)
205 node_end_pfn[nid] = memory_chunk->end_pfn;
206
207 return 0;
208}
209
210int __init get_memcfg_from_srat(void)
211{
212 int i, j, nid;
213
214 if (srat_disabled())
215 goto out_fail;
216
217 if (acpi_numa_init() < 0)
218 goto out_fail;
219
220 if (num_memory_chunks == 0) {
221 printk(KERN_DEBUG
222 "could not find any ACPI SRAT memory areas.\n");
223 goto out_fail;
224 }
225
226 /* Calculate total number of nodes in system from PXM bitmap and create
227 * a set of sequential node IDs starting at zero. (ACPI doesn't seem
228 * to specify the range of _PXM values.)
229 */
230 /*
231 * MCD - we no longer HAVE to number nodes sequentially. PXM domain
232 * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
233 * 32, so we will continue numbering them in this manner until MAX_NUMNODES
234 * approaches MAX_PXM_DOMAINS for i386.
235 */
236 nodes_clear(node_online_map);
237 for (i = 0; i < MAX_PXM_DOMAINS; i++) {
238 if (BMAP_TEST(pxm_bitmap, i)) {
239 int nid = acpi_map_pxm_to_node(i);
240 node_set_online(nid);
241 }
242 }
243 BUG_ON(num_online_nodes() == 0);
244
245 /* set cnode id in memory chunk structure */
246 for (i = 0; i < num_memory_chunks; i++)
247 node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm);
248
249 printk(KERN_DEBUG "pxm bitmap: ");
250 for (i = 0; i < sizeof(pxm_bitmap); i++) {
251 printk(KERN_CONT "%02x ", pxm_bitmap[i]);
252 }
253 printk(KERN_CONT "\n");
254 printk(KERN_DEBUG "Number of logical nodes in system = %d\n",
255 num_online_nodes());
256 printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
257 num_memory_chunks);
258
259 for (i = 0; i < MAX_LOCAL_APIC; i++)
260 set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
261
262 for (j = 0; j < num_memory_chunks; j++){
263 struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
264 printk(KERN_DEBUG
265 "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
266 j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
267 if (node_read_chunk(chunk->nid, chunk))
268 continue;
269
270 memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn,
271 min(chunk->end_pfn, max_pfn));
272 }
273 /* for out of order entries in SRAT */
274 sort_node_map();
275
276 for_each_online_node(nid) {
277 unsigned long start = node_start_pfn[nid];
278 unsigned long end = min(node_end_pfn[nid], max_pfn);
279
280 memory_present(nid, start, end);
281 node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
282 }
283 return 1;
284out_fail:
285 printk(KERN_DEBUG "failed to get NUMA memory information from SRAT"
286 " table\n");
287 return 0;
288}
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
new file mode 100644
index 000000000000..90568c33ddb0
--- /dev/null
+++ b/arch/x86/net/Makefile
@@ -0,0 +1,4 @@
1#
2# Arch-specific network modules
3#
4obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
new file mode 100644
index 000000000000..66870223f8c5
--- /dev/null
+++ b/arch/x86/net/bpf_jit.S
@@ -0,0 +1,140 @@
1/* bpf_jit.S : BPF JIT helper functions
2 *
3 * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
8 * of the License.
9 */
10#include <linux/linkage.h>
11#include <asm/dwarf2.h>
12
13/*
14 * Calling convention :
15 * rdi : skb pointer
16 * esi : offset of byte(s) to fetch in skb (can be scratched)
17 * r8 : copy of skb->data
18 * r9d : hlen = skb->len - skb->data_len
19 */
20#define SKBDATA %r8
21
22sk_load_word_ind:
23 .globl sk_load_word_ind
24
25 add %ebx,%esi /* offset += X */
26# test %esi,%esi /* if (offset < 0) goto bpf_error; */
27 js bpf_error
28
29sk_load_word:
30 .globl sk_load_word
31
32 mov %r9d,%eax # hlen
33 sub %esi,%eax # hlen - offset
34 cmp $3,%eax
35 jle bpf_slow_path_word
36 mov (SKBDATA,%rsi),%eax
37 bswap %eax /* ntohl() */
38 ret
39
40
41sk_load_half_ind:
42 .globl sk_load_half_ind
43
44 add %ebx,%esi /* offset += X */
45 js bpf_error
46
47sk_load_half:
48 .globl sk_load_half
49
50 mov %r9d,%eax
51 sub %esi,%eax # hlen - offset
52 cmp $1,%eax
53 jle bpf_slow_path_half
54 movzwl (SKBDATA,%rsi),%eax
55 rol $8,%ax # ntohs()
56 ret
57
58sk_load_byte_ind:
59 .globl sk_load_byte_ind
60 add %ebx,%esi /* offset += X */
61 js bpf_error
62
63sk_load_byte:
64 .globl sk_load_byte
65
66 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
67 jle bpf_slow_path_byte
68 movzbl (SKBDATA,%rsi),%eax
69 ret
70
71/**
72 * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
73 *
74 * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
75 * Must preserve A accumulator (%eax)
76 * Inputs : %esi is the offset value, already known positive
77 */
78ENTRY(sk_load_byte_msh)
79 CFI_STARTPROC
80 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
81 jle bpf_slow_path_byte_msh
82 movzbl (SKBDATA,%rsi),%ebx
83 and $15,%bl
84 shl $2,%bl
85 ret
86 CFI_ENDPROC
87ENDPROC(sk_load_byte_msh)
88
89bpf_error:
90# force a return 0 from jit handler
91 xor %eax,%eax
92 mov -8(%rbp),%rbx
93 leaveq
94 ret
95
96/* rsi contains offset and can be scratched */
97#define bpf_slow_path_common(LEN) \
98 push %rdi; /* save skb */ \
99 push %r9; \
100 push SKBDATA; \
101/* rsi already has offset */ \
102 mov $LEN,%ecx; /* len */ \
103 lea -12(%rbp),%rdx; \
104 call skb_copy_bits; \
105 test %eax,%eax; \
106 pop SKBDATA; \
107 pop %r9; \
108 pop %rdi
109
110
111bpf_slow_path_word:
112 bpf_slow_path_common(4)
113 js bpf_error
114 mov -12(%rbp),%eax
115 bswap %eax
116 ret
117
118bpf_slow_path_half:
119 bpf_slow_path_common(2)
120 js bpf_error
121 mov -12(%rbp),%ax
122 rol $8,%ax
123 movzwl %ax,%eax
124 ret
125
126bpf_slow_path_byte:
127 bpf_slow_path_common(1)
128 js bpf_error
129 movzbl -12(%rbp),%eax
130 ret
131
132bpf_slow_path_byte_msh:
133 xchg %eax,%ebx /* dont lose A , X is about to be scratched */
134 bpf_slow_path_common(1)
135 js bpf_error
136 movzbl -12(%rbp),%eax
137 and $15,%al
138 shl $2,%al
139 xchg %eax,%ebx
140 ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
new file mode 100644
index 000000000000..bfab3fa10edc
--- /dev/null
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -0,0 +1,654 @@
1/* bpf_jit_comp.c : BPF JIT compiler
2 *
3 * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
8 * of the License.
9 */
10#include <linux/moduleloader.h>
11#include <asm/cacheflush.h>
12#include <linux/netdevice.h>
13#include <linux/filter.h>
14
15/*
16 * Conventions :
17 * EAX : BPF A accumulator
18 * EBX : BPF X accumulator
19 * RDI : pointer to skb (first argument given to JIT function)
20 * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
21 * ECX,EDX,ESI : scratch registers
22 * r9d : skb->len - skb->data_len (headlen)
23 * r8 : skb->data
24 * -8(RBP) : saved RBX value
25 * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
26 */
27int bpf_jit_enable __read_mostly;
28
29/*
30 * assembly code in arch/x86/net/bpf_jit.S
31 */
32extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
33extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
34
35static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
36{
37 if (len == 1)
38 *ptr = bytes;
39 else if (len == 2)
40 *(u16 *)ptr = bytes;
41 else {
42 *(u32 *)ptr = bytes;
43 barrier();
44 }
45 return ptr + len;
46}
47
48#define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0)
49
50#define EMIT1(b1) EMIT(b1, 1)
51#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
52#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
53#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
54#define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0)
55
56#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
57#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
58
59static inline bool is_imm8(int value)
60{
61 return value <= 127 && value >= -128;
62}
63
64static inline bool is_near(int offset)
65{
66 return offset <= 127 && offset >= -128;
67}
68
69#define EMIT_JMP(offset) \
70do { \
71 if (offset) { \
72 if (is_near(offset)) \
73 EMIT2(0xeb, offset); /* jmp .+off8 */ \
74 else \
75 EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \
76 } \
77} while (0)
78
79/* list of x86 cond jumps opcodes (. + s8)
80 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
81 */
82#define X86_JB 0x72
83#define X86_JAE 0x73
84#define X86_JE 0x74
85#define X86_JNE 0x75
86#define X86_JBE 0x76
87#define X86_JA 0x77
88
89#define EMIT_COND_JMP(op, offset) \
90do { \
91 if (is_near(offset)) \
92 EMIT2(op, offset); /* jxx .+off8 */ \
93 else { \
94 EMIT2(0x0f, op + 0x10); \
95 EMIT(offset, 4); /* jxx .+off32 */ \
96 } \
97} while (0)
98
99#define COND_SEL(CODE, TOP, FOP) \
100 case CODE: \
101 t_op = TOP; \
102 f_op = FOP; \
103 goto cond_branch
104
105
106#define SEEN_DATAREF 1 /* might call external helpers */
107#define SEEN_XREG 2 /* ebx is used */
108#define SEEN_MEM 4 /* use mem[] for temporary storage */
109
110static inline void bpf_flush_icache(void *start, void *end)
111{
112 mm_segment_t old_fs = get_fs();
113
114 set_fs(KERNEL_DS);
115 smp_wmb();
116 flush_icache_range((unsigned long)start, (unsigned long)end);
117 set_fs(old_fs);
118}
119
120
121void bpf_jit_compile(struct sk_filter *fp)
122{
123 u8 temp[64];
124 u8 *prog;
125 unsigned int proglen, oldproglen = 0;
126 int ilen, i;
127 int t_offset, f_offset;
128 u8 t_op, f_op, seen = 0, pass;
129 u8 *image = NULL;
130 u8 *func;
131 int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
132 unsigned int cleanup_addr; /* epilogue code offset */
133 unsigned int *addrs;
134 const struct sock_filter *filter = fp->insns;
135 int flen = fp->len;
136
137 if (!bpf_jit_enable)
138 return;
139
140 addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
141 if (addrs == NULL)
142 return;
143
144 /* Before first pass, make a rough estimation of addrs[]
145 * each bpf instruction is translated to less than 64 bytes
146 */
147 for (proglen = 0, i = 0; i < flen; i++) {
148 proglen += 64;
149 addrs[i] = proglen;
150 }
151 cleanup_addr = proglen; /* epilogue address */
152
153 for (pass = 0; pass < 10; pass++) {
154 /* no prologue/epilogue for trivial filters (RET something) */
155 proglen = 0;
156 prog = temp;
157
158 if (seen) {
159 EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
160 EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */
161 /* note : must save %rbx in case bpf_error is hit */
162 if (seen & (SEEN_XREG | SEEN_DATAREF))
163 EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
164 if (seen & SEEN_XREG)
165 CLEAR_X(); /* make sure we dont leek kernel memory */
166
167 /*
168 * If this filter needs to access skb data,
169 * loads r9 and r8 with :
170 * r9 = skb->len - skb->data_len
171 * r8 = skb->data
172 */
173 if (seen & SEEN_DATAREF) {
174 if (offsetof(struct sk_buff, len) <= 127)
175 /* mov off8(%rdi),%r9d */
176 EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
177 else {
178 /* mov off32(%rdi),%r9d */
179 EMIT3(0x44, 0x8b, 0x8f);
180 EMIT(offsetof(struct sk_buff, len), 4);
181 }
182 if (is_imm8(offsetof(struct sk_buff, data_len)))
183 /* sub off8(%rdi),%r9d */
184 EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
185 else {
186 EMIT3(0x44, 0x2b, 0x8f);
187 EMIT(offsetof(struct sk_buff, data_len), 4);
188 }
189
190 if (is_imm8(offsetof(struct sk_buff, data)))
191 /* mov off8(%rdi),%r8 */
192 EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
193 else {
194 /* mov off32(%rdi),%r8 */
195 EMIT3(0x4c, 0x8b, 0x87);
196 EMIT(offsetof(struct sk_buff, data), 4);
197 }
198 }
199 }
200
201 switch (filter[0].code) {
202 case BPF_S_RET_K:
203 case BPF_S_LD_W_LEN:
204 case BPF_S_ANC_PROTOCOL:
205 case BPF_S_ANC_IFINDEX:
206 case BPF_S_ANC_MARK:
207 case BPF_S_ANC_RXHASH:
208 case BPF_S_ANC_CPU:
209 case BPF_S_ANC_QUEUE:
210 case BPF_S_LD_W_ABS:
211 case BPF_S_LD_H_ABS:
212 case BPF_S_LD_B_ABS:
213 /* first instruction sets A register (or is RET 'constant') */
214 break;
215 default:
216 /* make sure we dont leak kernel information to user */
217 CLEAR_A(); /* A = 0 */
218 }
219
220 for (i = 0; i < flen; i++) {
221 unsigned int K = filter[i].k;
222
223 switch (filter[i].code) {
224 case BPF_S_ALU_ADD_X: /* A += X; */
225 seen |= SEEN_XREG;
226 EMIT2(0x01, 0xd8); /* add %ebx,%eax */
227 break;
228 case BPF_S_ALU_ADD_K: /* A += K; */
229 if (!K)
230 break;
231 if (is_imm8(K))
232 EMIT3(0x83, 0xc0, K); /* add imm8,%eax */
233 else
234 EMIT1_off32(0x05, K); /* add imm32,%eax */
235 break;
236 case BPF_S_ALU_SUB_X: /* A -= X; */
237 seen |= SEEN_XREG;
238 EMIT2(0x29, 0xd8); /* sub %ebx,%eax */
239 break;
240 case BPF_S_ALU_SUB_K: /* A -= K */
241 if (!K)
242 break;
243 if (is_imm8(K))
244 EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
245 else
246 EMIT1_off32(0x2d, K); /* sub imm32,%eax */
247 break;
248 case BPF_S_ALU_MUL_X: /* A *= X; */
249 seen |= SEEN_XREG;
250 EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */
251 break;
252 case BPF_S_ALU_MUL_K: /* A *= K */
253 if (is_imm8(K))
254 EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
255 else {
256 EMIT2(0x69, 0xc0); /* imul imm32,%eax */
257 EMIT(K, 4);
258 }
259 break;
260 case BPF_S_ALU_DIV_X: /* A /= X; */
261 seen |= SEEN_XREG;
262 EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
263 if (pc_ret0 != -1)
264 EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4));
265 else {
266 EMIT_COND_JMP(X86_JNE, 2 + 5);
267 CLEAR_A();
268 EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
269 }
270 EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
271 break;
272 case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
273 EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
274 EMIT(K, 4);
275 EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */
276 break;
277 case BPF_S_ALU_AND_X:
278 seen |= SEEN_XREG;
279 EMIT2(0x21, 0xd8); /* and %ebx,%eax */
280 break;
281 case BPF_S_ALU_AND_K:
282 if (K >= 0xFFFFFF00) {
283 EMIT2(0x24, K & 0xFF); /* and imm8,%al */
284 } else if (K >= 0xFFFF0000) {
285 EMIT2(0x66, 0x25); /* and imm16,%ax */
286 EMIT2(K, 2);
287 } else {
288 EMIT1_off32(0x25, K); /* and imm32,%eax */
289 }
290 break;
291 case BPF_S_ALU_OR_X:
292 seen |= SEEN_XREG;
293 EMIT2(0x09, 0xd8); /* or %ebx,%eax */
294 break;
295 case BPF_S_ALU_OR_K:
296 if (is_imm8(K))
297 EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
298 else
299 EMIT1_off32(0x0d, K); /* or imm32,%eax */
300 break;
301 case BPF_S_ALU_LSH_X: /* A <<= X; */
302 seen |= SEEN_XREG;
303 EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */
304 break;
305 case BPF_S_ALU_LSH_K:
306 if (K == 0)
307 break;
308 else if (K == 1)
309 EMIT2(0xd1, 0xe0); /* shl %eax */
310 else
311 EMIT3(0xc1, 0xe0, K);
312 break;
313 case BPF_S_ALU_RSH_X: /* A >>= X; */
314 seen |= SEEN_XREG;
315 EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */
316 break;
317 case BPF_S_ALU_RSH_K: /* A >>= K; */
318 if (K == 0)
319 break;
320 else if (K == 1)
321 EMIT2(0xd1, 0xe8); /* shr %eax */
322 else
323 EMIT3(0xc1, 0xe8, K);
324 break;
325 case BPF_S_ALU_NEG:
326 EMIT2(0xf7, 0xd8); /* neg %eax */
327 break;
328 case BPF_S_RET_K:
329 if (!K) {
330 if (pc_ret0 == -1)
331 pc_ret0 = i;
332 CLEAR_A();
333 } else {
334 EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
335 }
336 /* fallinto */
337 case BPF_S_RET_A:
338 if (seen) {
339 if (i != flen - 1) {
340 EMIT_JMP(cleanup_addr - addrs[i]);
341 break;
342 }
343 if (seen & SEEN_XREG)
344 EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */
345 EMIT1(0xc9); /* leaveq */
346 }
347 EMIT1(0xc3); /* ret */
348 break;
349 case BPF_S_MISC_TAX: /* X = A */
350 seen |= SEEN_XREG;
351 EMIT2(0x89, 0xc3); /* mov %eax,%ebx */
352 break;
353 case BPF_S_MISC_TXA: /* A = X */
354 seen |= SEEN_XREG;
355 EMIT2(0x89, 0xd8); /* mov %ebx,%eax */
356 break;
357 case BPF_S_LD_IMM: /* A = K */
358 if (!K)
359 CLEAR_A();
360 else
361 EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
362 break;
363 case BPF_S_LDX_IMM: /* X = K */
364 seen |= SEEN_XREG;
365 if (!K)
366 CLEAR_X();
367 else
368 EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
369 break;
370 case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
371 seen |= SEEN_MEM;
372 EMIT3(0x8b, 0x45, 0xf0 - K*4);
373 break;
374 case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
375 seen |= SEEN_XREG | SEEN_MEM;
376 EMIT3(0x8b, 0x5d, 0xf0 - K*4);
377 break;
378 case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
379 seen |= SEEN_MEM;
380 EMIT3(0x89, 0x45, 0xf0 - K*4);
381 break;
382 case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
383 seen |= SEEN_XREG | SEEN_MEM;
384 EMIT3(0x89, 0x5d, 0xf0 - K*4);
385 break;
386 case BPF_S_LD_W_LEN: /* A = skb->len; */
387 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
388 if (is_imm8(offsetof(struct sk_buff, len)))
389 /* mov off8(%rdi),%eax */
390 EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
391 else {
392 EMIT2(0x8b, 0x87);
393 EMIT(offsetof(struct sk_buff, len), 4);
394 }
395 break;
396 case BPF_S_LDX_W_LEN: /* X = skb->len; */
397 seen |= SEEN_XREG;
398 if (is_imm8(offsetof(struct sk_buff, len)))
399 /* mov off8(%rdi),%ebx */
400 EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
401 else {
402 EMIT2(0x8b, 0x9f);
403 EMIT(offsetof(struct sk_buff, len), 4);
404 }
405 break;
406 case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
407 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
408 if (is_imm8(offsetof(struct sk_buff, protocol))) {
409 /* movzwl off8(%rdi),%eax */
410 EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
411 } else {
412 EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
413 EMIT(offsetof(struct sk_buff, protocol), 4);
414 }
415 EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */
416 break;
417 case BPF_S_ANC_IFINDEX:
418 if (is_imm8(offsetof(struct sk_buff, dev))) {
419 /* movq off8(%rdi),%rax */
420 EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
421 } else {
422 EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
423 EMIT(offsetof(struct sk_buff, dev), 4);
424 }
425 EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */
426 EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
427 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
428 EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */
429 EMIT(offsetof(struct net_device, ifindex), 4);
430 break;
431 case BPF_S_ANC_MARK:
432 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
433 if (is_imm8(offsetof(struct sk_buff, mark))) {
434 /* mov off8(%rdi),%eax */
435 EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
436 } else {
437 EMIT2(0x8b, 0x87);
438 EMIT(offsetof(struct sk_buff, mark), 4);
439 }
440 break;
441 case BPF_S_ANC_RXHASH:
442 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
443 if (is_imm8(offsetof(struct sk_buff, rxhash))) {
444 /* mov off8(%rdi),%eax */
445 EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash));
446 } else {
447 EMIT2(0x8b, 0x87);
448 EMIT(offsetof(struct sk_buff, rxhash), 4);
449 }
450 break;
451 case BPF_S_ANC_QUEUE:
452 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
453 if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
454 /* movzwl off8(%rdi),%eax */
455 EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
456 } else {
457 EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
458 EMIT(offsetof(struct sk_buff, queue_mapping), 4);
459 }
460 break;
461 case BPF_S_ANC_CPU:
462#ifdef CONFIG_SMP
463 EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
464 EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
465#else
466 CLEAR_A();
467#endif
468 break;
469 case BPF_S_LD_W_ABS:
470 func = sk_load_word;
471common_load: seen |= SEEN_DATAREF;
472 if ((int)K < 0)
473 goto out;
474 t_offset = func - (image + addrs[i]);
475 EMIT1_off32(0xbe, K); /* mov imm32,%esi */
476 EMIT1_off32(0xe8, t_offset); /* call */
477 break;
478 case BPF_S_LD_H_ABS:
479 func = sk_load_half;
480 goto common_load;
481 case BPF_S_LD_B_ABS:
482 func = sk_load_byte;
483 goto common_load;
484 case BPF_S_LDX_B_MSH:
485 if ((int)K < 0) {
486 if (pc_ret0 != -1) {
487 EMIT_JMP(addrs[pc_ret0] - addrs[i]);
488 break;
489 }
490 CLEAR_A();
491 EMIT_JMP(cleanup_addr - addrs[i]);
492 break;
493 }
494 seen |= SEEN_DATAREF | SEEN_XREG;
495 t_offset = sk_load_byte_msh - (image + addrs[i]);
496 EMIT1_off32(0xbe, K); /* mov imm32,%esi */
497 EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
498 break;
499 case BPF_S_LD_W_IND:
500 func = sk_load_word_ind;
501common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
502 t_offset = func - (image + addrs[i]);
503 EMIT1_off32(0xbe, K); /* mov imm32,%esi */
504 EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
505 break;
506 case BPF_S_LD_H_IND:
507 func = sk_load_half_ind;
508 goto common_load_ind;
509 case BPF_S_LD_B_IND:
510 func = sk_load_byte_ind;
511 goto common_load_ind;
512 case BPF_S_JMP_JA:
513 t_offset = addrs[i + K] - addrs[i];
514 EMIT_JMP(t_offset);
515 break;
516 COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
517 COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
518 COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
519 COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
520 COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
521 COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
522 COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
523 COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
524
525cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
526 t_offset = addrs[i + filter[i].jt] - addrs[i];
527
528 /* same targets, can avoid doing the test :) */
529 if (filter[i].jt == filter[i].jf) {
530 EMIT_JMP(t_offset);
531 break;
532 }
533
534 switch (filter[i].code) {
535 case BPF_S_JMP_JGT_X:
536 case BPF_S_JMP_JGE_X:
537 case BPF_S_JMP_JEQ_X:
538 seen |= SEEN_XREG;
539 EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
540 break;
541 case BPF_S_JMP_JSET_X:
542 seen |= SEEN_XREG;
543 EMIT2(0x85, 0xd8); /* test %ebx,%eax */
544 break;
545 case BPF_S_JMP_JEQ_K:
546 if (K == 0) {
547 EMIT2(0x85, 0xc0); /* test %eax,%eax */
548 break;
549 }
550 case BPF_S_JMP_JGT_K:
551 case BPF_S_JMP_JGE_K:
552 if (K <= 127)
553 EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
554 else
555 EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
556 break;
557 case BPF_S_JMP_JSET_K:
558 if (K <= 0xFF)
559 EMIT2(0xa8, K); /* test imm8,%al */
560 else if (!(K & 0xFFFF00FF))
561 EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
562 else if (K <= 0xFFFF) {
563 EMIT2(0x66, 0xa9); /* test imm16,%ax */
564 EMIT(K, 2);
565 } else {
566 EMIT1_off32(0xa9, K); /* test imm32,%eax */
567 }
568 break;
569 }
570 if (filter[i].jt != 0) {
571 if (filter[i].jf)
572 t_offset += is_near(f_offset) ? 2 : 6;
573 EMIT_COND_JMP(t_op, t_offset);
574 if (filter[i].jf)
575 EMIT_JMP(f_offset);
576 break;
577 }
578 EMIT_COND_JMP(f_op, f_offset);
579 break;
580 default:
581 /* hmm, too complex filter, give up with jit compiler */
582 goto out;
583 }
584 ilen = prog - temp;
585 if (image) {
586 if (unlikely(proglen + ilen > oldproglen)) {
587 pr_err("bpb_jit_compile fatal error\n");
588 kfree(addrs);
589 module_free(NULL, image);
590 return;
591 }
592 memcpy(image + proglen, temp, ilen);
593 }
594 proglen += ilen;
595 addrs[i] = proglen;
596 prog = temp;
597 }
598 /* last bpf instruction is always a RET :
599 * use it to give the cleanup instruction(s) addr
600 */
601 cleanup_addr = proglen - 1; /* ret */
602 if (seen)
603 cleanup_addr -= 1; /* leaveq */
604 if (seen & SEEN_XREG)
605 cleanup_addr -= 4; /* mov -8(%rbp),%rbx */
606
607 if (image) {
608 WARN_ON(proglen != oldproglen);
609 break;
610 }
611 if (proglen == oldproglen) {
612 image = module_alloc(max_t(unsigned int,
613 proglen,
614 sizeof(struct work_struct)));
615 if (!image)
616 goto out;
617 }
618 oldproglen = proglen;
619 }
620 if (bpf_jit_enable > 1)
621 pr_err("flen=%d proglen=%u pass=%d image=%p\n",
622 flen, proglen, pass, image);
623
624 if (image) {
625 if (bpf_jit_enable > 1)
626 print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
627 16, 1, image, proglen, false);
628
629 bpf_flush_icache(image, image + proglen);
630
631 fp->bpf_func = (void *)image;
632 }
633out:
634 kfree(addrs);
635 return;
636}
637
638static void jit_free_defer(struct work_struct *arg)
639{
640 module_free(NULL, arg);
641}
642
643/* run from softirq, we must use a work_struct to call
644 * module_free() from process context
645 */
646void bpf_jit_free(struct sk_filter *fp)
647{
648 if (fp->bpf_func != sk_run_filter) {
649 struct work_struct *work = (struct work_struct *)fp->bpf_func;
650
651 INIT_WORK(work, jit_free_defer);
652 schedule_work(work);
653 }
654}
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..a5b64ab4cd6e 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -16,17 +16,6 @@
16#include <asm/stacktrace.h> 16#include <asm/stacktrace.h>
17#include <linux/compat.h> 17#include <linux/compat.h>
18 18
19static void backtrace_warning_symbol(void *data, char *msg,
20 unsigned long symbol)
21{
22 /* Ignore warnings */
23}
24
25static void backtrace_warning(void *data, char *msg)
26{
27 /* Ignore warnings */
28}
29
30static int backtrace_stack(void *data, char *name) 19static int backtrace_stack(void *data, char *name)
31{ 20{
32 /* Yes, we want all stacks */ 21 /* Yes, we want all stacks */
@@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
42} 31}
43 32
44static struct stacktrace_ops backtrace_ops = { 33static struct stacktrace_ops backtrace_ops = {
45 .warning = backtrace_warning,
46 .warning_symbol = backtrace_warning_symbol,
47 .stack = backtrace_stack, 34 .stack = backtrace_stack,
48 .address = backtrace_address, 35 .address = backtrace_address,
49 .walk_stack = print_context_stack, 36 .walk_stack = print_context_stack,
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index e37b407a0ee8..8214724ce54d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
108 } 108 }
109 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, 109 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
110 (type == PCI_CAP_ID_MSIX) ? 110 (type == PCI_CAP_ID_MSIX) ?
111 "msi-x" : "msi"); 111 "msi-x" : "msi",
112 DOMID_SELF);
112 if (irq < 0) 113 if (irq < 0)
113 goto error; 114 goto error;
114 dev_dbg(&dev->dev, 115 dev_dbg(&dev->dev,
@@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
148 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, 149 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
149 (type == PCI_CAP_ID_MSIX) ? 150 (type == PCI_CAP_ID_MSIX) ?
150 "pcifront-msi-x" : 151 "pcifront-msi-x" :
151 "pcifront-msi"); 152 "pcifront-msi",
153 DOMID_SELF);
152 if (irq < 0) 154 if (irq < 0)
153 goto free; 155 goto free;
154 i++; 156 i++;
@@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
190 192
191 list_for_each_entry(msidesc, &dev->msi_list, list) { 193 list_for_each_entry(msidesc, &dev->msi_list, list) {
192 struct physdev_map_pirq map_irq; 194 struct physdev_map_pirq map_irq;
195 domid_t domid;
196
197 domid = ret = xen_find_device_domain_owner(dev);
198 /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
199 * hence check ret value for < 0. */
200 if (ret < 0)
201 domid = DOMID_SELF;
193 202
194 memset(&map_irq, 0, sizeof(map_irq)); 203 memset(&map_irq, 0, sizeof(map_irq));
195 map_irq.domid = DOMID_SELF; 204 map_irq.domid = domid;
196 map_irq.type = MAP_PIRQ_TYPE_MSI; 205 map_irq.type = MAP_PIRQ_TYPE_MSI;
197 map_irq.index = -1; 206 map_irq.index = -1;
198 map_irq.pirq = -1; 207 map_irq.pirq = -1;
@@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
215 224
216 ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 225 ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
217 if (ret) { 226 if (ret) {
218 dev_warn(&dev->dev, "xen map irq failed %d\n", ret); 227 dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
228 ret, domid);
219 goto out; 229 goto out;
220 } 230 }
221 231
222 ret = xen_bind_pirq_msi_to_irq(dev, msidesc, 232 ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
223 map_irq.pirq, map_irq.index, 233 map_irq.pirq, map_irq.index,
224 (type == PCI_CAP_ID_MSIX) ? 234 (type == PCI_CAP_ID_MSIX) ?
225 "msi-x" : "msi"); 235 "msi-x" : "msi",
236 domid);
226 if (ret < 0) 237 if (ret < 0)
227 goto out; 238 goto out;
228 } 239 }
@@ -461,3 +472,78 @@ void __init xen_setup_pirqs(void)
461 } 472 }
462} 473}
463#endif 474#endif
475
476#ifdef CONFIG_XEN_DOM0
477struct xen_device_domain_owner {
478 domid_t domain;
479 struct pci_dev *dev;
480 struct list_head list;
481};
482
483static DEFINE_SPINLOCK(dev_domain_list_spinlock);
484static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
485
486static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
487{
488 struct xen_device_domain_owner *owner;
489
490 list_for_each_entry(owner, &dev_domain_list, list) {
491 if (owner->dev == dev)
492 return owner;
493 }
494 return NULL;
495}
496
497int xen_find_device_domain_owner(struct pci_dev *dev)
498{
499 struct xen_device_domain_owner *owner;
500 int domain = -ENODEV;
501
502 spin_lock(&dev_domain_list_spinlock);
503 owner = find_device(dev);
504 if (owner)
505 domain = owner->domain;
506 spin_unlock(&dev_domain_list_spinlock);
507 return domain;
508}
509EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
510
511int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
512{
513 struct xen_device_domain_owner *owner;
514
515 owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
516 if (!owner)
517 return -ENODEV;
518
519 spin_lock(&dev_domain_list_spinlock);
520 if (find_device(dev)) {
521 spin_unlock(&dev_domain_list_spinlock);
522 kfree(owner);
523 return -EEXIST;
524 }
525 owner->domain = domain;
526 owner->dev = dev;
527 list_add_tail(&owner->list, &dev_domain_list);
528 spin_unlock(&dev_domain_list_spinlock);
529 return 0;
530}
531EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
532
533int xen_unregister_device_domain_owner(struct pci_dev *dev)
534{
535 struct xen_device_domain_owner *owner;
536
537 spin_lock(&dev_domain_list_spinlock);
538 owner = find_device(dev);
539 if (!owner) {
540 spin_unlock(&dev_domain_list_spinlock);
541 return -ENODEV;
542 }
543 list_del(&owner->list);
544 spin_unlock(&dev_domain_list_spinlock);
545 kfree(owner);
546 return 0;
547}
548EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
549#endif
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 0fe27d7c6258..b30aa26a8df2 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -145,17 +145,6 @@ static void virt_efi_reset_system(int reset_type,
145 data_size, data); 145 data_size, data);
146} 146}
147 147
148static efi_status_t virt_efi_set_virtual_address_map(
149 unsigned long memory_map_size,
150 unsigned long descriptor_size,
151 u32 descriptor_version,
152 efi_memory_desc_t *virtual_map)
153{
154 return efi_call_virt4(set_virtual_address_map,
155 memory_map_size, descriptor_size,
156 descriptor_version, virtual_map);
157}
158
159static efi_status_t __init phys_efi_set_virtual_address_map( 148static efi_status_t __init phys_efi_set_virtual_address_map(
160 unsigned long memory_map_size, 149 unsigned long memory_map_size,
161 unsigned long descriptor_size, 150 unsigned long descriptor_size,
@@ -468,11 +457,25 @@ void __init efi_init(void)
468#endif 457#endif
469} 458}
470 459
460void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
461{
462 u64 addr, npages;
463
464 addr = md->virt_addr;
465 npages = md->num_pages;
466
467 memrange_efi_to_native(&addr, &npages);
468
469 if (executable)
470 set_memory_x(addr, npages);
471 else
472 set_memory_nx(addr, npages);
473}
474
471static void __init runtime_code_page_mkexec(void) 475static void __init runtime_code_page_mkexec(void)
472{ 476{
473 efi_memory_desc_t *md; 477 efi_memory_desc_t *md;
474 void *p; 478 void *p;
475 u64 addr, npages;
476 479
477 /* Make EFI runtime service code area executable */ 480 /* Make EFI runtime service code area executable */
478 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 481 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -481,10 +484,7 @@ static void __init runtime_code_page_mkexec(void)
481 if (md->type != EFI_RUNTIME_SERVICES_CODE) 484 if (md->type != EFI_RUNTIME_SERVICES_CODE)
482 continue; 485 continue;
483 486
484 addr = md->virt_addr; 487 efi_set_executable(md, true);
485 npages = md->num_pages;
486 memrange_efi_to_native(&addr, &npages);
487 set_memory_x(addr, npages);
488 } 488 }
489} 489}
490 490
@@ -498,13 +498,42 @@ static void __init runtime_code_page_mkexec(void)
498 */ 498 */
499void __init efi_enter_virtual_mode(void) 499void __init efi_enter_virtual_mode(void)
500{ 500{
501 efi_memory_desc_t *md; 501 efi_memory_desc_t *md, *prev_md = NULL;
502 efi_status_t status; 502 efi_status_t status;
503 unsigned long size; 503 unsigned long size;
504 u64 end, systab, addr, npages, end_pfn; 504 u64 end, systab, addr, npages, end_pfn;
505 void *p, *va; 505 void *p, *va, *new_memmap = NULL;
506 int count = 0;
506 507
507 efi.systab = NULL; 508 efi.systab = NULL;
509
510 /* Merge contiguous regions of the same type and attribute */
511 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
512 u64 prev_size;
513 md = p;
514
515 if (!prev_md) {
516 prev_md = md;
517 continue;
518 }
519
520 if (prev_md->type != md->type ||
521 prev_md->attribute != md->attribute) {
522 prev_md = md;
523 continue;
524 }
525
526 prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;
527
528 if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
529 prev_md->num_pages += md->num_pages;
530 md->type = EFI_RESERVED_TYPE;
531 md->attribute = 0;
532 continue;
533 }
534 prev_md = md;
535 }
536
508 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 537 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
509 md = p; 538 md = p;
510 if (!(md->attribute & EFI_MEMORY_RUNTIME)) 539 if (!(md->attribute & EFI_MEMORY_RUNTIME))
@@ -541,15 +570,21 @@ void __init efi_enter_virtual_mode(void)
541 systab += md->virt_addr - md->phys_addr; 570 systab += md->virt_addr - md->phys_addr;
542 efi.systab = (efi_system_table_t *) (unsigned long) systab; 571 efi.systab = (efi_system_table_t *) (unsigned long) systab;
543 } 572 }
573 new_memmap = krealloc(new_memmap,
574 (count + 1) * memmap.desc_size,
575 GFP_KERNEL);
576 memcpy(new_memmap + (count * memmap.desc_size), md,
577 memmap.desc_size);
578 count++;
544 } 579 }
545 580
546 BUG_ON(!efi.systab); 581 BUG_ON(!efi.systab);
547 582
548 status = phys_efi_set_virtual_address_map( 583 status = phys_efi_set_virtual_address_map(
549 memmap.desc_size * memmap.nr_map, 584 memmap.desc_size * count,
550 memmap.desc_size, 585 memmap.desc_size,
551 memmap.desc_version, 586 memmap.desc_version,
552 memmap.phys_map); 587 (efi_memory_desc_t *)__pa(new_memmap));
553 588
554 if (status != EFI_SUCCESS) { 589 if (status != EFI_SUCCESS) {
555 printk(KERN_ALERT "Unable to switch EFI into virtual mode " 590 printk(KERN_ALERT "Unable to switch EFI into virtual mode "
@@ -572,11 +607,12 @@ void __init efi_enter_virtual_mode(void)
572 efi.set_variable = virt_efi_set_variable; 607 efi.set_variable = virt_efi_set_variable;
573 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; 608 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
574 efi.reset_system = virt_efi_reset_system; 609 efi.reset_system = virt_efi_reset_system;
575 efi.set_virtual_address_map = virt_efi_set_virtual_address_map; 610 efi.set_virtual_address_map = NULL;
576 if (__supported_pte_mask & _PAGE_NX) 611 if (__supported_pte_mask & _PAGE_NX)
577 runtime_code_page_mkexec(); 612 runtime_code_page_mkexec();
578 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); 613 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
579 memmap.map = NULL; 614 memmap.map = NULL;
615 kfree(new_memmap);
580} 616}
581 617
582/* 618/*
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac0621a7ac3d..2649426a7905 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -41,22 +41,7 @@
41static pgd_t save_pgd __initdata; 41static pgd_t save_pgd __initdata;
42static unsigned long efi_flags __initdata; 42static unsigned long efi_flags __initdata;
43 43
44static void __init early_mapping_set_exec(unsigned long start, 44static void __init early_code_mapping_set_exec(int executable)
45 unsigned long end,
46 int executable)
47{
48 unsigned long num_pages;
49
50 start &= PMD_MASK;
51 end = (end + PMD_SIZE - 1) & PMD_MASK;
52 num_pages = (end - start) >> PAGE_SHIFT;
53 if (executable)
54 set_memory_x((unsigned long)__va(start), num_pages);
55 else
56 set_memory_nx((unsigned long)__va(start), num_pages);
57}
58
59static void __init early_runtime_code_mapping_set_exec(int executable)
60{ 45{
61 efi_memory_desc_t *md; 46 efi_memory_desc_t *md;
62 void *p; 47 void *p;
@@ -67,11 +52,8 @@ static void __init early_runtime_code_mapping_set_exec(int executable)
67 /* Make EFI runtime service code area executable */ 52 /* Make EFI runtime service code area executable */
68 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 53 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
69 md = p; 54 md = p;
70 if (md->type == EFI_RUNTIME_SERVICES_CODE) { 55 if (md->type == EFI_RUNTIME_SERVICES_CODE)
71 unsigned long end; 56 efi_set_executable(md, executable);
72 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
73 early_mapping_set_exec(md->phys_addr, end, executable);
74 }
75 } 57 }
76} 58}
77 59
@@ -79,7 +61,7 @@ void __init efi_call_phys_prelog(void)
79{ 61{
80 unsigned long vaddress; 62 unsigned long vaddress;
81 63
82 early_runtime_code_mapping_set_exec(1); 64 early_code_mapping_set_exec(1);
83 local_irq_save(efi_flags); 65 local_irq_save(efi_flags);
84 vaddress = (unsigned long)__va(0x0UL); 66 vaddress = (unsigned long)__va(0x0UL);
85 save_pgd = *pgd_offset_k(0x0UL); 67 save_pgd = *pgd_offset_k(0x0UL);
@@ -95,7 +77,7 @@ void __init efi_call_phys_epilog(void)
95 set_pgd(pgd_offset_k(0x0UL), save_pgd); 77 set_pgd(pgd_offset_k(0x0UL), save_pgd);
96 __flush_tlb_all(); 78 __flush_tlb_all();
97 local_irq_restore(efi_flags); 79 local_irq_restore(efi_flags);
98 early_runtime_code_mapping_set_exec(0); 80 early_code_mapping_set_exec(0);
99} 81}
100 82
101void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, 83void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
@@ -107,8 +89,10 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
107 return ioremap(phys_addr, size); 89 return ioremap(phys_addr, size);
108 90
109 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); 91 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
110 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) 92 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
111 return NULL; 93 unsigned long top = last_map_pfn << PAGE_SHIFT;
94 efi_ioremap(top, size - (top - phys_addr), type);
95 }
112 96
113 return (void __iomem *)__va(phys_addr); 97 return (void __iomem *)__va(phys_addr);
114} 98}
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 275dbc19e2cf..7000e74b3087 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -194,7 +194,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
194 return 0; 194 return 0;
195} 195}
196 196
197void __init mrst_time_init(void) 197static void __init mrst_time_init(void)
198{ 198{
199 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); 199 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
200 switch (mrst_timer_options) { 200 switch (mrst_timer_options) {
@@ -216,7 +216,7 @@ void __init mrst_time_init(void)
216 apbt_time_init(); 216 apbt_time_init();
217} 217}
218 218
219void __cpuinit mrst_arch_setup(void) 219static void __cpuinit mrst_arch_setup(void)
220{ 220{
221 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) 221 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
222 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; 222 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
index c2a8cab65e5d..81c5e2165c24 100644
--- a/arch/x86/platform/olpc/Makefile
+++ b/arch/x86/platform/olpc/Makefile
@@ -1,4 +1,2 @@
1obj-$(CONFIG_OLPC) += olpc.o 1obj-$(CONFIG_OLPC) += olpc.o olpc_ofw.o olpc_dt.o
2obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o 2obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
3obj-$(CONFIG_OLPC) += olpc_ofw.o
4obj-$(CONFIG_OF_PROMTREE) += olpc_dt.o
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index edaf3fe8dc5e..0060fd59ea00 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -18,6 +18,7 @@
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/platform_device.h> 20#include <linux/platform_device.h>
21#include <linux/of.h>
21 22
22#include <asm/geode.h> 23#include <asm/geode.h>
23#include <asm/setup.h> 24#include <asm/setup.h>
@@ -187,41 +188,43 @@ err:
187} 188}
188EXPORT_SYMBOL_GPL(olpc_ec_cmd); 189EXPORT_SYMBOL_GPL(olpc_ec_cmd);
189 190
190static bool __init check_ofw_architecture(void) 191static bool __init check_ofw_architecture(struct device_node *root)
191{ 192{
192 size_t propsize; 193 const char *olpc_arch;
193 char olpc_arch[5]; 194 int propsize;
194 const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 };
195 void *res[] = { &propsize };
196 195
197 if (olpc_ofw("getprop", args, res)) { 196 olpc_arch = of_get_property(root, "architecture", &propsize);
198 printk(KERN_ERR "ofw: getprop call failed!\n");
199 return false;
200 }
201 return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; 197 return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
202} 198}
203 199
204static u32 __init get_board_revision(void) 200static u32 __init get_board_revision(struct device_node *root)
205{ 201{
206 size_t propsize; 202 int propsize;
207 __be32 rev; 203 const __be32 *rev;
208 const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 }; 204
209 void *res[] = { &propsize }; 205 rev = of_get_property(root, "board-revision-int", &propsize);
210 206 if (propsize != 4)
211 if (olpc_ofw("getprop", args, res) || propsize != 4) { 207 return 0;
212 printk(KERN_ERR "ofw: getprop call failed!\n"); 208
213 return cpu_to_be32(0); 209 return be32_to_cpu(*rev);
214 }
215 return be32_to_cpu(rev);
216} 210}
217 211
218static bool __init platform_detect(void) 212static bool __init platform_detect(void)
219{ 213{
220 if (!check_ofw_architecture()) 214 struct device_node *root = of_find_node_by_path("/");
215 bool success;
216
217 if (!root)
221 return false; 218 return false;
222 olpc_platform_info.flags |= OLPC_F_PRESENT; 219
223 olpc_platform_info.boardrev = get_board_revision(); 220 success = check_ofw_architecture(root);
224 return true; 221 if (success) {
222 olpc_platform_info.boardrev = get_board_revision(root);
223 olpc_platform_info.flags |= OLPC_F_PRESENT;
224 }
225
226 of_node_put(root);
227 return success;
225} 228}
226 229
227static int __init add_xo1_platform_devices(void) 230static int __init add_xo1_platform_devices(void)
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 044bda5b3174..d39f63d017d2 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -19,7 +19,9 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/bootmem.h> 20#include <linux/bootmem.h>
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/of_platform.h>
22#include <linux/of_pdt.h> 23#include <linux/of_pdt.h>
24#include <asm/olpc.h>
23#include <asm/olpc_ofw.h> 25#include <asm/olpc_ofw.h>
24 26
25static phandle __init olpc_dt_getsibling(phandle node) 27static phandle __init olpc_dt_getsibling(phandle node)
@@ -180,3 +182,20 @@ void __init olpc_dt_build_devicetree(void)
180 pr_info("PROM DT: Built device tree with %u bytes of memory.\n", 182 pr_info("PROM DT: Built device tree with %u bytes of memory.\n",
181 prom_early_allocated); 183 prom_early_allocated);
182} 184}
185
186/* A list of DT node/bus matches that we want to expose as platform devices */
187static struct of_device_id __initdata of_ids[] = {
188 { .compatible = "olpc,xo1-battery" },
189 { .compatible = "olpc,xo1-dcon" },
190 { .compatible = "olpc,xo1-rtc" },
191 {},
192};
193
194static int __init olpc_create_platform_devices(void)
195{
196 if (machine_is_olpc())
197 return of_platform_bus_probe(NULL, of_ids, NULL);
198 else
199 return 0;
200}
201device_initcall(olpc_create_platform_devices);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 7cb6424317f6..c58e0ea39ef5 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
699 struct mm_struct *mm, 699 struct mm_struct *mm,
700 unsigned long va, unsigned int cpu) 700 unsigned long va, unsigned int cpu)
701{ 701{
702 int tcpu;
703 int uvhub;
704 int locals = 0; 702 int locals = 0;
705 int remotes = 0; 703 int remotes = 0;
706 int hubs = 0; 704 int hubs = 0;
705 int tcpu;
706 int tpnode;
707 struct bau_desc *bau_desc; 707 struct bau_desc *bau_desc;
708 struct cpumask *flush_mask; 708 struct cpumask *flush_mask;
709 struct ptc_stats *stat; 709 struct ptc_stats *stat;
710 struct bau_control *bcp; 710 struct bau_control *bcp;
711 struct bau_control *tbcp; 711 struct bau_control *tbcp;
712 struct hub_and_pnode *hpp;
712 713
713 /* kernel was booted 'nobau' */ 714 /* kernel was booted 'nobau' */
714 if (nobau) 715 if (nobau)
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
750 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; 751 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
751 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 752 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
752 753
753 /* cpu statistics */
754 for_each_cpu(tcpu, flush_mask) { 754 for_each_cpu(tcpu, flush_mask) {
755 uvhub = uv_cpu_to_blade_id(tcpu); 755 /*
756 bau_uvhub_set(uvhub, &bau_desc->distribution); 756 * The distribution vector is a bit map of pnodes, relative
757 if (uvhub == bcp->uvhub) 757 * to the partition base pnode (and the partition base nasid
758 * in the header).
759 * Translate cpu to pnode and hub using an array stored
760 * in local memory.
761 */
762 hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
763 tpnode = hpp->pnode - bcp->partition_base_pnode;
764 bau_uvhub_set(tpnode, &bau_desc->distribution);
765 if (hpp->uvhub == bcp->uvhub)
758 locals++; 766 locals++;
759 else 767 else
760 remotes++; 768 remotes++;
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
855 * an interrupt, but causes an error message to be returned to 863 * an interrupt, but causes an error message to be returned to
856 * the sender. 864 * the sender.
857 */ 865 */
858static void uv_enable_timeouts(void) 866static void __init uv_enable_timeouts(void)
859{ 867{
860 int uvhub; 868 int uvhub;
861 int nuvhubs; 869 int nuvhubs;
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void)
1326} 1334}
1327 1335
1328/* 1336/*
1329 * initialize the sending side's sending buffers 1337 * Initialize the sending side's sending buffers.
1330 */ 1338 */
1331static void 1339static void
1332uv_activation_descriptor_init(int node, int pnode) 1340uv_activation_descriptor_init(int node, int pnode, int base_pnode)
1333{ 1341{
1334 int i; 1342 int i;
1335 int cpu; 1343 int cpu;
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode)
1352 n = pa >> uv_nshift; 1360 n = pa >> uv_nshift;
1353 m = pa & uv_mmask; 1361 m = pa & uv_mmask;
1354 1362
1363 /* the 14-bit pnode */
1355 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, 1364 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
1356 (n << UV_DESC_BASE_PNODE_SHIFT | m)); 1365 (n << UV_DESC_BASE_PNODE_SHIFT | m));
1357
1358 /* 1366 /*
1359 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 1367 * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
1360 * cpu even though we only use the first one; one descriptor can 1368 * cpu even though we only use the first one; one descriptor can
1361 * describe a broadcast to 256 uv hubs. 1369 * describe a broadcast to 256 uv hubs.
1362 */ 1370 */
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode)
1365 memset(bd2, 0, sizeof(struct bau_desc)); 1373 memset(bd2, 0, sizeof(struct bau_desc));
1366 bd2->header.sw_ack_flag = 1; 1374 bd2->header.sw_ack_flag = 1;
1367 /* 1375 /*
1368 * base_dest_nodeid is the nasid of the first uvhub 1376 * The base_dest_nasid set in the message header is the nasid
1369 * in the partition. The bit map will indicate uvhub numbers, 1377 * of the first uvhub in the partition. The bit map will
1370 * which are 0-N in a partition. Pnodes are unique system-wide. 1378 * indicate destination pnode numbers relative to that base.
1379 * They may not be consecutive if nasid striding is being used.
1371 */ 1380 */
1372 bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); 1381 bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
1373 bd2->header.dest_subnodeid = 0x10; /* the LB */ 1382 bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
1374 bd2->header.command = UV_NET_ENDPOINT_INTD; 1383 bd2->header.command = UV_NET_ENDPOINT_INTD;
1375 bd2->header.int_both = 1; 1384 bd2->header.int_both = 1;
1376 /* 1385 /*
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode)
1442/* 1451/*
1443 * Initialization of each UV hub's structures 1452 * Initialization of each UV hub's structures
1444 */ 1453 */
1445static void __init uv_init_uvhub(int uvhub, int vector) 1454static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
1446{ 1455{
1447 int node; 1456 int node;
1448 int pnode; 1457 int pnode;
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector)
1450 1459
1451 node = uvhub_to_first_node(uvhub); 1460 node = uvhub_to_first_node(uvhub);
1452 pnode = uv_blade_to_pnode(uvhub); 1461 pnode = uv_blade_to_pnode(uvhub);
1453 uv_activation_descriptor_init(node, pnode); 1462 uv_activation_descriptor_init(node, pnode, base_pnode);
1454 uv_payload_queue_init(node, pnode); 1463 uv_payload_queue_init(node, pnode);
1455 /* 1464 /*
1456 * the below initialization can't be in firmware because the 1465 * The below initialization can't be in firmware because the
1457 * messaging IRQ will be determined by the OS 1466 * messaging IRQ will be determined by the OS.
1458 */ 1467 */
1459 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; 1468 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
1460 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 1469 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void)
1491/* 1500/*
1492 * initialize the bau_control structure for each cpu 1501 * initialize the bau_control structure for each cpu
1493 */ 1502 */
1494static int __init uv_init_per_cpu(int nuvhubs) 1503static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
1495{ 1504{
1496 int i; 1505 int i;
1497 int cpu; 1506 int cpu;
1507 int tcpu;
1498 int pnode; 1508 int pnode;
1499 int uvhub; 1509 int uvhub;
1500 int have_hmaster; 1510 int have_hmaster;
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs)
1528 bcp = &per_cpu(bau_control, cpu); 1538 bcp = &per_cpu(bau_control, cpu);
1529 memset(bcp, 0, sizeof(struct bau_control)); 1539 memset(bcp, 0, sizeof(struct bau_control));
1530 pnode = uv_cpu_hub_info(cpu)->pnode; 1540 pnode = uv_cpu_hub_info(cpu)->pnode;
1541 if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
1542 printk(KERN_EMERG
1543 "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
1544 cpu, pnode, base_part_pnode,
1545 UV_DISTRIBUTION_SIZE);
1546 return 1;
1547 }
1548 bcp->osnode = cpu_to_node(cpu);
1549 bcp->partition_base_pnode = uv_partition_base_pnode;
1531 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; 1550 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
1532 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); 1551 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
1533 bdp = &uvhub_descs[uvhub]; 1552 bdp = &uvhub_descs[uvhub];
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs)
1536 bdp->pnode = pnode; 1555 bdp->pnode = pnode;
1537 /* kludge: 'assuming' one node per socket, and assuming that 1556 /* kludge: 'assuming' one node per socket, and assuming that
1538 disabling a socket just leaves a gap in node numbers */ 1557 disabling a socket just leaves a gap in node numbers */
1539 socket = (cpu_to_node(cpu) & 1); 1558 socket = bcp->osnode & 1;
1540 bdp->socket_mask |= (1 << socket); 1559 bdp->socket_mask |= (1 << socket);
1541 sdp = &bdp->socket[socket]; 1560 sdp = &bdp->socket[socket];
1542 sdp->cpu_number[sdp->num_cpus] = cpu; 1561 sdp->cpu_number[sdp->num_cpus] = cpu;
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs)
1585nextsocket: 1604nextsocket:
1586 socket++; 1605 socket++;
1587 socket_mask = (socket_mask >> 1); 1606 socket_mask = (socket_mask >> 1);
1607 /* each socket gets a local array of pnodes/hubs */
1608 bcp = smaster;
1609 bcp->target_hub_and_pnode = kmalloc_node(
1610 sizeof(struct hub_and_pnode) *
1611 num_possible_cpus(), GFP_KERNEL, bcp->osnode);
1612 memset(bcp->target_hub_and_pnode, 0,
1613 sizeof(struct hub_and_pnode) *
1614 num_possible_cpus());
1615 for_each_present_cpu(tcpu) {
1616 bcp->target_hub_and_pnode[tcpu].pnode =
1617 uv_cpu_hub_info(tcpu)->pnode;
1618 bcp->target_hub_and_pnode[tcpu].uvhub =
1619 uv_cpu_hub_info(tcpu)->numa_blade_id;
1620 }
1588 } 1621 }
1589 } 1622 }
1590 kfree(uvhub_descs); 1623 kfree(uvhub_descs);
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void)
1637 spin_lock_init(&disable_lock); 1670 spin_lock_init(&disable_lock);
1638 congested_cycles = microsec_2_cycles(congested_response_us); 1671 congested_cycles = microsec_2_cycles(congested_response_us);
1639 1672
1640 if (uv_init_per_cpu(nuvhubs)) {
1641 nobau = 1;
1642 return 0;
1643 }
1644
1645 uv_partition_base_pnode = 0x7fffffff; 1673 uv_partition_base_pnode = 0x7fffffff;
1646 for (uvhub = 0; uvhub < nuvhubs; uvhub++) 1674 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1647 if (uv_blade_nr_possible_cpus(uvhub) && 1675 if (uv_blade_nr_possible_cpus(uvhub) &&
1648 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) 1676 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
1649 uv_partition_base_pnode = uv_blade_to_pnode(uvhub); 1677 uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
1678 }
1679
1680 if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
1681 nobau = 1;
1682 return 0;
1683 }
1650 1684
1651 vector = UV_BAU_MESSAGE; 1685 vector = UV_BAU_MESSAGE;
1652 for_each_possible_blade(uvhub) 1686 for_each_possible_blade(uvhub)
1653 if (uv_blade_nr_possible_cpus(uvhub)) 1687 if (uv_blade_nr_possible_cpus(uvhub))
1654 uv_init_uvhub(uvhub, vector); 1688 uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
1655 1689
1656 uv_enable_timeouts(); 1690 uv_enable_timeouts();
1657 alloc_intr_gate(vector, uv_bau_message_intr1); 1691 alloc_intr_gate(vector, uv_bau_message_intr1);
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 9daf5d1af9f1..0eb90184515f 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -40,7 +40,6 @@ static struct clocksource clocksource_uv = {
40 .rating = 400, 40 .rating = 400,
41 .read = uv_read_rtc, 41 .read = uv_read_rtc,
42 .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, 42 .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
43 .shift = 10,
44 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 43 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
45}; 44};
46 45
@@ -372,14 +371,11 @@ static __init int uv_rtc_setup_clock(void)
372 if (!is_uv_system()) 371 if (!is_uv_system())
373 return -ENODEV; 372 return -ENODEV;
374 373
375 clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
376 clocksource_uv.shift);
377
378 /* If single blade, prefer tsc */ 374 /* If single blade, prefer tsc */
379 if (uv_num_possible_blades() == 1) 375 if (uv_num_possible_blades() == 1)
380 clocksource_uv.rating = 250; 376 clocksource_uv.rating = 250;
381 377
382 rc = clocksource_register(&clocksource_uv); 378 rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second);
383 if (rc) 379 if (rc)
384 printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); 380 printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
385 else 381 else
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e3c6a06cf725..dd7b88f2ec7a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -235,7 +235,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
235 *dx &= maskedx; 235 *dx &= maskedx;
236} 236}
237 237
238static __init void xen_init_cpuid_mask(void) 238static void __init xen_init_cpuid_mask(void)
239{ 239{
240 unsigned int ax, bx, cx, dx; 240 unsigned int ax, bx, cx, dx;
241 unsigned int xsave_mask; 241 unsigned int xsave_mask;
@@ -400,7 +400,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
400/* 400/*
401 * load_gdt for early boot, when the gdt is only mapped once 401 * load_gdt for early boot, when the gdt is only mapped once
402 */ 402 */
403static __init void xen_load_gdt_boot(const struct desc_ptr *dtr) 403static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
404{ 404{
405 unsigned long va = dtr->address; 405 unsigned long va = dtr->address;
406 unsigned int size = dtr->size + 1; 406 unsigned int size = dtr->size + 1;
@@ -662,7 +662,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
662 * Version of write_gdt_entry for use at early boot-time needed to 662 * Version of write_gdt_entry for use at early boot-time needed to
663 * update an entry as simply as possible. 663 * update an entry as simply as possible.
664 */ 664 */
665static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, 665static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
666 const void *desc, int type) 666 const void *desc, int type)
667{ 667{
668 switch (type) { 668 switch (type) {
@@ -933,18 +933,18 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
933 return ret; 933 return ret;
934} 934}
935 935
936static const struct pv_info xen_info __initdata = { 936static const struct pv_info xen_info __initconst = {
937 .paravirt_enabled = 1, 937 .paravirt_enabled = 1,
938 .shared_kernel_pmd = 0, 938 .shared_kernel_pmd = 0,
939 939
940 .name = "Xen", 940 .name = "Xen",
941}; 941};
942 942
943static const struct pv_init_ops xen_init_ops __initdata = { 943static const struct pv_init_ops xen_init_ops __initconst = {
944 .patch = xen_patch, 944 .patch = xen_patch,
945}; 945};
946 946
947static const struct pv_cpu_ops xen_cpu_ops __initdata = { 947static const struct pv_cpu_ops xen_cpu_ops __initconst = {
948 .cpuid = xen_cpuid, 948 .cpuid = xen_cpuid,
949 949
950 .set_debugreg = xen_set_debugreg, 950 .set_debugreg = xen_set_debugreg,
@@ -1004,7 +1004,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1004 .end_context_switch = xen_end_context_switch, 1004 .end_context_switch = xen_end_context_switch,
1005}; 1005};
1006 1006
1007static const struct pv_apic_ops xen_apic_ops __initdata = { 1007static const struct pv_apic_ops xen_apic_ops __initconst = {
1008#ifdef CONFIG_X86_LOCAL_APIC 1008#ifdef CONFIG_X86_LOCAL_APIC
1009 .startup_ipi_hook = paravirt_nop, 1009 .startup_ipi_hook = paravirt_nop,
1010#endif 1010#endif
@@ -1055,7 +1055,7 @@ int xen_panic_handler_init(void)
1055 return 0; 1055 return 0;
1056} 1056}
1057 1057
1058static const struct machine_ops __initdata xen_machine_ops = { 1058static const struct machine_ops xen_machine_ops __initconst = {
1059 .restart = xen_restart, 1059 .restart = xen_restart,
1060 .halt = xen_machine_halt, 1060 .halt = xen_machine_halt,
1061 .power_off = xen_machine_halt, 1061 .power_off = xen_machine_halt,
@@ -1332,7 +1332,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1332 return NOTIFY_OK; 1332 return NOTIFY_OK;
1333} 1333}
1334 1334
1335static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = { 1335static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = {
1336 .notifier_call = xen_hvm_cpu_notify, 1336 .notifier_call = xen_hvm_cpu_notify,
1337}; 1337};
1338 1338
@@ -1381,7 +1381,7 @@ bool xen_hvm_need_lapic(void)
1381} 1381}
1382EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); 1382EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
1383 1383
1384const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { 1384const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
1385 .name = "Xen HVM", 1385 .name = "Xen HVM",
1386 .detect = xen_hvm_platform, 1386 .detect = xen_hvm_platform,
1387 .init_platform = xen_hvm_guest_init, 1387 .init_platform = xen_hvm_guest_init,
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 6a6fe8939645..8bbb465b6f0a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -113,7 +113,7 @@ static void xen_halt(void)
113 xen_safe_halt(); 113 xen_safe_halt();
114} 114}
115 115
116static const struct pv_irq_ops xen_irq_ops __initdata = { 116static const struct pv_irq_ops xen_irq_ops __initconst = {
117 .save_fl = PV_CALLEE_SAVE(xen_save_fl), 117 .save_fl = PV_CALLEE_SAVE(xen_save_fl),
118 .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), 118 .restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
119 .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), 119 .irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0684f3c74d53..02d752460371 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1054,7 +1054,7 @@ void xen_mm_pin_all(void)
1054 * that's before we have page structures to store the bits. So do all 1054 * that's before we have page structures to store the bits. So do all
1055 * the book-keeping now. 1055 * the book-keeping now.
1056 */ 1056 */
1057static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, 1057static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
1058 enum pt_level level) 1058 enum pt_level level)
1059{ 1059{
1060 SetPagePinned(page); 1060 SetPagePinned(page);
@@ -1187,7 +1187,7 @@ static void drop_other_mm_ref(void *info)
1187 1187
1188 active_mm = percpu_read(cpu_tlbstate.active_mm); 1188 active_mm = percpu_read(cpu_tlbstate.active_mm);
1189 1189
1190 if (active_mm == mm) 1190 if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
1191 leave_mm(smp_processor_id()); 1191 leave_mm(smp_processor_id());
1192 1192
1193 /* If this cpu still has a stale cr3 reference, then make sure 1193 /* If this cpu still has a stale cr3 reference, then make sure
@@ -1271,7 +1271,7 @@ void xen_exit_mmap(struct mm_struct *mm)
1271 spin_unlock(&mm->page_table_lock); 1271 spin_unlock(&mm->page_table_lock);
1272} 1272}
1273 1273
1274static __init void xen_pagetable_setup_start(pgd_t *base) 1274static void __init xen_pagetable_setup_start(pgd_t *base)
1275{ 1275{
1276} 1276}
1277 1277
@@ -1291,7 +1291,7 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1291 1291
1292static void xen_post_allocator_init(void); 1292static void xen_post_allocator_init(void);
1293 1293
1294static __init void xen_pagetable_setup_done(pgd_t *base) 1294static void __init xen_pagetable_setup_done(pgd_t *base)
1295{ 1295{
1296 xen_setup_shared_info(); 1296 xen_setup_shared_info();
1297 xen_post_allocator_init(); 1297 xen_post_allocator_init();
@@ -1488,7 +1488,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1488} 1488}
1489 1489
1490#ifdef CONFIG_X86_32 1490#ifdef CONFIG_X86_32
1491static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) 1491static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1492{ 1492{
1493 /* If there's an existing pte, then don't allow _PAGE_RW to be set */ 1493 /* If there's an existing pte, then don't allow _PAGE_RW to be set */
1494 if (pte_val_ma(*ptep) & _PAGE_PRESENT) 1494 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
@@ -1498,7 +1498,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1498 return pte; 1498 return pte;
1499} 1499}
1500#else /* CONFIG_X86_64 */ 1500#else /* CONFIG_X86_64 */
1501static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) 1501static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1502{ 1502{
1503 unsigned long pfn = pte_pfn(pte); 1503 unsigned long pfn = pte_pfn(pte);
1504 1504
@@ -1519,7 +1519,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1519 1519
1520/* Init-time set_pte while constructing initial pagetables, which 1520/* Init-time set_pte while constructing initial pagetables, which
1521 doesn't allow RO pagetable pages to be remapped RW */ 1521 doesn't allow RO pagetable pages to be remapped RW */
1522static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) 1522static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
1523{ 1523{
1524 pte = mask_rw_pte(ptep, pte); 1524 pte = mask_rw_pte(ptep, pte);
1525 1525
@@ -1537,7 +1537,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1537 1537
1538/* Early in boot, while setting up the initial pagetable, assume 1538/* Early in boot, while setting up the initial pagetable, assume
1539 everything is pinned. */ 1539 everything is pinned. */
1540static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) 1540static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1541{ 1541{
1542#ifdef CONFIG_FLATMEM 1542#ifdef CONFIG_FLATMEM
1543 BUG_ON(mem_map); /* should only be used early */ 1543 BUG_ON(mem_map); /* should only be used early */
@@ -1547,7 +1547,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1547} 1547}
1548 1548
1549/* Used for pmd and pud */ 1549/* Used for pmd and pud */
1550static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) 1550static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1551{ 1551{
1552#ifdef CONFIG_FLATMEM 1552#ifdef CONFIG_FLATMEM
1553 BUG_ON(mem_map); /* should only be used early */ 1553 BUG_ON(mem_map); /* should only be used early */
@@ -1557,13 +1557,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1557 1557
1558/* Early release_pte assumes that all pts are pinned, since there's 1558/* Early release_pte assumes that all pts are pinned, since there's
1559 only init_mm and anything attached to that is pinned. */ 1559 only init_mm and anything attached to that is pinned. */
1560static __init void xen_release_pte_init(unsigned long pfn) 1560static void __init xen_release_pte_init(unsigned long pfn)
1561{ 1561{
1562 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); 1562 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1563 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 1563 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1564} 1564}
1565 1565
1566static __init void xen_release_pmd_init(unsigned long pfn) 1566static void __init xen_release_pmd_init(unsigned long pfn)
1567{ 1567{
1568 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 1568 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1569} 1569}
@@ -1689,7 +1689,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
1689 BUG(); 1689 BUG();
1690} 1690}
1691 1691
1692static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) 1692static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1693{ 1693{
1694 unsigned pmdidx, pteidx; 1694 unsigned pmdidx, pteidx;
1695 unsigned ident_pte; 1695 unsigned ident_pte;
@@ -1772,7 +1772,7 @@ static void convert_pfn_mfn(void *v)
1772 * of the physical mapping once some sort of allocator has been set 1772 * of the physical mapping once some sort of allocator has been set
1773 * up. 1773 * up.
1774 */ 1774 */
1775__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, 1775pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1776 unsigned long max_pfn) 1776 unsigned long max_pfn)
1777{ 1777{
1778 pud_t *l3; 1778 pud_t *l3;
@@ -1843,7 +1843,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1843static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); 1843static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
1844static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); 1844static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
1845 1845
1846static __init void xen_write_cr3_init(unsigned long cr3) 1846static void __init xen_write_cr3_init(unsigned long cr3)
1847{ 1847{
1848 unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); 1848 unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
1849 1849
@@ -1880,7 +1880,7 @@ static __init void xen_write_cr3_init(unsigned long cr3)
1880 pv_mmu_ops.write_cr3 = &xen_write_cr3; 1880 pv_mmu_ops.write_cr3 = &xen_write_cr3;
1881} 1881}
1882 1882
1883__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, 1883pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1884 unsigned long max_pfn) 1884 unsigned long max_pfn)
1885{ 1885{
1886 pmd_t *kernel_pmd; 1886 pmd_t *kernel_pmd;
@@ -1986,7 +1986,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1986#endif 1986#endif
1987} 1987}
1988 1988
1989__init void xen_ident_map_ISA(void) 1989void __init xen_ident_map_ISA(void)
1990{ 1990{
1991 unsigned long pa; 1991 unsigned long pa;
1992 1992
@@ -2009,7 +2009,7 @@ __init void xen_ident_map_ISA(void)
2009 xen_flush_tlb(); 2009 xen_flush_tlb();
2010} 2010}
2011 2011
2012static __init void xen_post_allocator_init(void) 2012static void __init xen_post_allocator_init(void)
2013{ 2013{
2014#ifdef CONFIG_XEN_DEBUG 2014#ifdef CONFIG_XEN_DEBUG
2015 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); 2015 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
@@ -2046,7 +2046,7 @@ static void xen_leave_lazy_mmu(void)
2046 preempt_enable(); 2046 preempt_enable();
2047} 2047}
2048 2048
2049static const struct pv_mmu_ops xen_mmu_ops __initdata = { 2049static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2050 .read_cr2 = xen_read_cr2, 2050 .read_cr2 = xen_read_cr2,
2051 .write_cr2 = xen_write_cr2, 2051 .write_cr2 = xen_write_cr2,
2052 2052
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 141eb0de8b06..58efeb9d5440 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -522,11 +522,20 @@ static bool __init __early_alloc_p2m(unsigned long pfn)
522 /* Boundary cross-over for the edges: */ 522 /* Boundary cross-over for the edges: */
523 if (idx) { 523 if (idx) {
524 unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); 524 unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
525 unsigned long *mid_mfn_p;
525 526
526 p2m_init(p2m); 527 p2m_init(p2m);
527 528
528 p2m_top[topidx][mididx] = p2m; 529 p2m_top[topidx][mididx] = p2m;
529 530
531 /* For save/restore we need to MFN of the P2M saved */
532
533 mid_mfn_p = p2m_top_mfn_p[topidx];
534 WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
535 "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
536 topidx, mididx);
537 mid_mfn_p[mididx] = virt_to_mfn(p2m);
538
530 } 539 }
531 return idx != 0; 540 return idx != 0;
532} 541}
@@ -549,12 +558,29 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
549 pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) 558 pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
550 { 559 {
551 unsigned topidx = p2m_top_index(pfn); 560 unsigned topidx = p2m_top_index(pfn);
552 if (p2m_top[topidx] == p2m_mid_missing) { 561 unsigned long *mid_mfn_p;
553 unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); 562 unsigned long **mid;
563
564 mid = p2m_top[topidx];
565 mid_mfn_p = p2m_top_mfn_p[topidx];
566 if (mid == p2m_mid_missing) {
567 mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
554 568
555 p2m_mid_init(mid); 569 p2m_mid_init(mid);
556 570
557 p2m_top[topidx] = mid; 571 p2m_top[topidx] = mid;
572
573 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
574 }
575 /* And the save/restore P2M tables.. */
576 if (mid_mfn_p == p2m_mid_missing_mfn) {
577 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
578 p2m_mid_mfn_init(mid_mfn_p);
579
580 p2m_top_mfn_p[topidx] = mid_mfn_p;
581 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
582 /* Note: we don't set mid_mfn_p[midix] here,
583 * look in __early_alloc_p2m */
558 } 584 }
559 } 585 }
560 586
@@ -650,7 +676,7 @@ static unsigned long mfn_hash(unsigned long mfn)
650} 676}
651 677
652/* Add an MFN override for a particular page */ 678/* Add an MFN override for a particular page */
653int m2p_add_override(unsigned long mfn, struct page *page) 679int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
654{ 680{
655 unsigned long flags; 681 unsigned long flags;
656 unsigned long pfn; 682 unsigned long pfn;
@@ -662,7 +688,6 @@ int m2p_add_override(unsigned long mfn, struct page *page)
662 if (!PageHighMem(page)) { 688 if (!PageHighMem(page)) {
663 address = (unsigned long)__va(pfn << PAGE_SHIFT); 689 address = (unsigned long)__va(pfn << PAGE_SHIFT);
664 ptep = lookup_address(address, &level); 690 ptep = lookup_address(address, &level);
665
666 if (WARN(ptep == NULL || level != PG_LEVEL_4K, 691 if (WARN(ptep == NULL || level != PG_LEVEL_4K,
667 "m2p_add_override: pfn %lx not mapped", pfn)) 692 "m2p_add_override: pfn %lx not mapped", pfn))
668 return -EINVAL; 693 return -EINVAL;
@@ -674,18 +699,17 @@ int m2p_add_override(unsigned long mfn, struct page *page)
674 if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) 699 if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
675 return -ENOMEM; 700 return -ENOMEM;
676 701
677 if (!PageHighMem(page)) 702 if (clear_pte && !PageHighMem(page))
678 /* Just zap old mapping for now */ 703 /* Just zap old mapping for now */
679 pte_clear(&init_mm, address, ptep); 704 pte_clear(&init_mm, address, ptep);
680
681 spin_lock_irqsave(&m2p_override_lock, flags); 705 spin_lock_irqsave(&m2p_override_lock, flags);
682 list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); 706 list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
683 spin_unlock_irqrestore(&m2p_override_lock, flags); 707 spin_unlock_irqrestore(&m2p_override_lock, flags);
684 708
685 return 0; 709 return 0;
686} 710}
687 711EXPORT_SYMBOL_GPL(m2p_add_override);
688int m2p_remove_override(struct page *page) 712int m2p_remove_override(struct page *page, bool clear_pte)
689{ 713{
690 unsigned long flags; 714 unsigned long flags;
691 unsigned long mfn; 715 unsigned long mfn;
@@ -713,7 +737,7 @@ int m2p_remove_override(struct page *page)
713 spin_unlock_irqrestore(&m2p_override_lock, flags); 737 spin_unlock_irqrestore(&m2p_override_lock, flags);
714 set_phys_to_machine(pfn, page->index); 738 set_phys_to_machine(pfn, page->index);
715 739
716 if (!PageHighMem(page)) 740 if (clear_pte && !PageHighMem(page))
717 set_pte_at(&init_mm, address, ptep, 741 set_pte_at(&init_mm, address, ptep,
718 pfn_pte(pfn, PAGE_KERNEL)); 742 pfn_pte(pfn, PAGE_KERNEL));
719 /* No tlb flush necessary because the caller already 743 /* No tlb flush necessary because the caller already
@@ -721,6 +745,7 @@ int m2p_remove_override(struct page *page)
721 745
722 return 0; 746 return 0;
723} 747}
748EXPORT_SYMBOL_GPL(m2p_remove_override);
724 749
725struct page *m2p_find_override(unsigned long mfn) 750struct page *m2p_find_override(unsigned long mfn)
726{ 751{
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 90bac0aac3a5..be1a464f6d66 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -50,7 +50,7 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
50 */ 50 */
51#define EXTRA_MEM_RATIO (10) 51#define EXTRA_MEM_RATIO (10)
52 52
53static __init void xen_add_extra_mem(unsigned long pages) 53static void __init xen_add_extra_mem(unsigned long pages)
54{ 54{
55 unsigned long pfn; 55 unsigned long pfn;
56 56
@@ -166,7 +166,7 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
166 if (last > end) 166 if (last > end)
167 continue; 167 continue;
168 168
169 if (entry->type == E820_RAM) { 169 if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
170 if (start > start_pci) 170 if (start > start_pci)
171 identity += set_phys_range_identity( 171 identity += set_phys_range_identity(
172 PFN_UP(start_pci), PFN_DOWN(start)); 172 PFN_UP(start_pci), PFN_DOWN(start));
@@ -227,7 +227,11 @@ char * __init xen_memory_setup(void)
227 227
228 memcpy(map_raw, map, sizeof(map)); 228 memcpy(map_raw, map, sizeof(map));
229 e820.nr_map = 0; 229 e820.nr_map = 0;
230#ifdef CONFIG_X86_32
231 xen_extra_mem_start = mem_end;
232#else
230 xen_extra_mem_start = max((1ULL << 32), mem_end); 233 xen_extra_mem_start = max((1ULL << 32), mem_end);
234#endif
231 for (i = 0; i < memmap.nr_entries; i++) { 235 for (i = 0; i < memmap.nr_entries; i++) {
232 unsigned long long end; 236 unsigned long long end;
233 237
@@ -336,7 +340,7 @@ static void __init fiddle_vdso(void)
336#endif 340#endif
337} 341}
338 342
339static __cpuinit int register_callback(unsigned type, const void *func) 343static int __cpuinit register_callback(unsigned type, const void *func)
340{ 344{
341 struct callback_register callback = { 345 struct callback_register callback = {
342 .type = type, 346 .type = type,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 30612441ed99..41038c01de40 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -46,18 +46,17 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
46static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); 46static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
47 47
48/* 48/*
49 * Reschedule call back. Nothing to do, 49 * Reschedule call back.
50 * all the work is done automatically when
51 * we return from the interrupt.
52 */ 50 */
53static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 51static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
54{ 52{
55 inc_irq_stat(irq_resched_count); 53 inc_irq_stat(irq_resched_count);
54 scheduler_ipi();
56 55
57 return IRQ_HANDLED; 56 return IRQ_HANDLED;
58} 57}
59 58
60static __cpuinit void cpu_bringup(void) 59static void __cpuinit cpu_bringup(void)
61{ 60{
62 int cpu = smp_processor_id(); 61 int cpu = smp_processor_id();
63 62
@@ -85,7 +84,7 @@ static __cpuinit void cpu_bringup(void)
85 wmb(); /* make sure everything is out */ 84 wmb(); /* make sure everything is out */
86} 85}
87 86
88static __cpuinit void cpu_bringup_and_idle(void) 87static void __cpuinit cpu_bringup_and_idle(void)
89{ 88{
90 cpu_bringup(); 89 cpu_bringup();
91 cpu_idle(); 90 cpu_idle();
@@ -242,7 +241,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
242 } 241 }
243} 242}
244 243
245static __cpuinit int 244static int __cpuinit
246cpu_initialize_context(unsigned int cpu, struct task_struct *idle) 245cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
247{ 246{
248 struct vcpu_guest_context *ctxt; 247 struct vcpu_guest_context *ctxt;
@@ -486,7 +485,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
486 return IRQ_HANDLED; 485 return IRQ_HANDLED;
487} 486}
488 487
489static const struct smp_ops xen_smp_ops __initdata = { 488static const struct smp_ops xen_smp_ops __initconst = {
490 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, 489 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
491 .smp_prepare_cpus = xen_smp_prepare_cpus, 490 .smp_prepare_cpus = xen_smp_prepare_cpus,
492 .smp_cpus_done = xen_smp_cpus_done, 491 .smp_cpus_done = xen_smp_cpus_done,
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 2e2d370a47b1..5158c505bef9 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -26,8 +26,6 @@
26 26
27#include "xen-ops.h" 27#include "xen-ops.h"
28 28
29#define XEN_SHIFT 22
30
31/* Xen may fire a timer up to this many ns early */ 29/* Xen may fire a timer up to this many ns early */
32#define TIMER_SLOP 100000 30#define TIMER_SLOP 100000
33#define NS_PER_TICK (1000000000LL / HZ) 31#define NS_PER_TICK (1000000000LL / HZ)
@@ -211,8 +209,6 @@ static struct clocksource xen_clocksource __read_mostly = {
211 .rating = 400, 209 .rating = 400,
212 .read = xen_clocksource_get_cycles, 210 .read = xen_clocksource_get_cycles,
213 .mask = ~0, 211 .mask = ~0,
214 .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */
215 .shift = XEN_SHIFT,
216 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 212 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
217}; 213};
218 214
@@ -439,16 +435,16 @@ void xen_timer_resume(void)
439 } 435 }
440} 436}
441 437
442static const struct pv_time_ops xen_time_ops __initdata = { 438static const struct pv_time_ops xen_time_ops __initconst = {
443 .sched_clock = xen_clocksource_read, 439 .sched_clock = xen_clocksource_read,
444}; 440};
445 441
446static __init void xen_time_init(void) 442static void __init xen_time_init(void)
447{ 443{
448 int cpu = smp_processor_id(); 444 int cpu = smp_processor_id();
449 struct timespec tp; 445 struct timespec tp;
450 446
451 clocksource_register(&xen_clocksource); 447 clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
452 448
453 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { 449 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
454 /* Successfully turned off 100Hz tick, so we have the 450 /* Successfully turned off 100Hz tick, so we have the
@@ -468,7 +464,7 @@ static __init void xen_time_init(void)
468 xen_setup_cpu_clockevents(); 464 xen_setup_cpu_clockevents();
469} 465}
470 466
471__init void xen_init_time_ops(void) 467void __init xen_init_time_ops(void)
472{ 468{
473 pv_time_ops = xen_time_ops; 469 pv_time_ops = xen_time_ops;
474 470
@@ -490,7 +486,7 @@ static void xen_hvm_setup_cpu_clockevents(void)
490 xen_setup_cpu_clockevents(); 486 xen_setup_cpu_clockevents();
491} 487}
492 488
493__init void xen_hvm_init_time_ops(void) 489void __init xen_hvm_init_time_ops(void)
494{ 490{
495 /* vector callback is needed otherwise we cannot receive interrupts 491 /* vector callback is needed otherwise we cannot receive interrupts
496 * on cpu > 0 and at this point we don't know how many cpus are 492 * on cpu > 0 and at this point we don't know how many cpus are
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3112f55638c4..97dfdc8757b3 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -74,7 +74,7 @@ static inline void xen_hvm_smp_init(void) {}
74 74
75#ifdef CONFIG_PARAVIRT_SPINLOCKS 75#ifdef CONFIG_PARAVIRT_SPINLOCKS
76void __init xen_init_spinlocks(void); 76void __init xen_init_spinlocks(void);
77__cpuinit void xen_init_lock_cpu(int cpu); 77void __cpuinit xen_init_lock_cpu(int cpu);
78void xen_uninit_lock_cpu(int cpu); 78void xen_uninit_lock_cpu(int cpu);
79#else 79#else
80static inline void xen_init_spinlocks(void) 80static inline void xen_init_spinlocks(void)