aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/Kconfig.cpu17
-rw-r--r--arch/x86/Makefile29
-rw-r--r--arch/x86/boot/Makefile53
-rw-r--r--arch/x86/boot/header.S29
-rw-r--r--arch/x86/boot/pm.c44
-rw-r--r--arch/x86/boot/pmjump.S1
-rw-r--r--arch/x86/boot/setup.ld3
-rw-r--r--arch/x86/boot/tools/build.c9
-rw-r--r--arch/x86/boot/video-vga.c22
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S18
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S6
-rw-r--r--arch/x86/crypto/aes_glue.c20
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S896
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c461
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/ia32/sys_ia32.c22
-rw-r--r--arch/x86/include/asm/aes.h11
-rw-r--r--arch/x86/include/asm/apic.h35
-rw-r--r--arch/x86/include/asm/boot.h4
-rw-r--r--arch/x86/include/asm/cacheflush.h3
-rwxr-xr-xarch/x86/include/asm/cpu_debug.h226
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/desc.h3
-rw-r--r--arch/x86/include/asm/device.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h188
-rw-r--r--arch/x86/include/asm/dmi.h19
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/ftrace.h25
-rw-r--r--arch/x86/include/asm/highmem.h1
-rw-r--r--arch/x86/include/asm/ia32.h7
-rw-r--r--arch/x86/include/asm/io_apic.h5
-rw-r--r--arch/x86/include/asm/iommu.h2
-rw-r--r--arch/x86/include/asm/irq_remapping.h2
-rw-r--r--arch/x86/include/asm/kvm.h24
-rw-r--r--arch/x86/include/asm/kvm_host.h61
-rw-r--r--arch/x86/include/asm/linkage.h13
-rw-r--r--arch/x86/include/asm/msidef.h1
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/page_32_types.h5
-rw-r--r--arch/x86/include/asm/paravirt.h19
-rw-r--r--arch/x86/include/asm/percpu.h8
-rw-r--r--arch/x86/include/asm/pgtable-2level.h7
-rw-r--r--arch/x86/include/asm/pgtable-3level.h17
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/pgtable_32.h3
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/sections.h7
-rw-r--r--arch/x86/include/asm/setup.h39
-rw-r--r--arch/x86/include/asm/socket.h3
-rw-r--r--arch/x86/include/asm/svm.h4
-rw-r--r--arch/x86/include/asm/sys_ia32.h2
-rw-r--r--arch/x86/include/asm/timer.h2
-rw-r--r--arch/x86/include/asm/virtext.h2
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/include/asm/xen/hypercall.h2
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/amd_iommu.c26
-rw-r--r--arch/x86/kernel/apic/apic.c20
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c18
-rw-r--r--arch/x86/kernel/apic/io_apic.c292
-rw-r--r--arch/x86/kernel/apic/probe_64.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c6
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c6
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/Makefile5
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/centaur.c36
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c37
-rw-r--r--arch/x86/kernel/cpu/common.c392
-rw-r--r--arch/x86/kernel/cpu/cpu.h25
-rwxr-xr-xarch/x86/kernel/cpu/cpu_debug.c901
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig19
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Makefile8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c36
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c54
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c21
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c105
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c193
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.h12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c25
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c72
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c44
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c239
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c386
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/sc520_freq.c30
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c70
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c163
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.h18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c166
-rw-r--r--arch/x86/kernel/cpu/cyrix.c16
-rw-r--r--arch/x86/kernel/cpu/intel.c15
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c40
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c1101
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c202
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c1069
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h4
-rw-r--r--arch/x86/kernel/cpu/transmeta.c2
-rw-r--r--arch/x86/kernel/cpu/umc.c2
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/e820.c142
-rw-r--r--arch/x86/kernel/early_printk.c20
-rw-r--r--arch/x86/kernel/entry_32.S18
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/ftrace.c75
-rw-r--r--arch/x86/kernel/head32.c5
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S76
-rw-r--r--arch/x86/kernel/hpet.c80
-rw-r--r--arch/x86/kernel/i8253.c68
-rw-r--r--arch/x86/kernel/io_delay.c27
-rw-r--r--arch/x86/kernel/irq.c54
-rw-r--r--arch/x86/kernel/kdebugfs.c82
-rw-r--r--arch/x86/kernel/kprobes.c3
-rw-r--r--arch/x86/kernel/kvm.c7
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c2
-rw-r--r--arch/x86/kernel/mpparse.c375
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/pci-calgary_64.c38
-rw-r--r--arch/x86/kernel/pci-dma.c14
-rw-r--r--arch/x86/kernel/pci-gart_64.c34
-rw-r--r--arch/x86/kernel/pci-nommu.c39
-rw-r--r--arch/x86/kernel/pci-swiotlb.c (renamed from arch/x86/kernel/pci-swiotlb_64.c)19
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/ptrace.c3
-rw-r--r--arch/x86/kernel/quirks.c6
-rw-r--r--arch/x86/kernel/rtc.c20
-rw-r--r--arch/x86/kernel/setup.c54
-rw-r--r--arch/x86/kernel/setup_percpu.c63
-rw-r--r--arch/x86/kernel/signal.c48
-rw-r--r--arch/x86/kernel/tlb_uv.c3
-rw-r--r--arch/x86/kernel/topology.c14
-rw-r--r--arch/x86/kernel/tsc.c122
-rw-r--r--arch/x86/kernel/vmi_32.c6
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S21
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S94
-rw-r--r--arch/x86/kernel/vsmp_64.c12
-rw-r--r--arch/x86/kvm/Kconfig4
-rw-r--r--arch/x86/kvm/i8254.c21
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c25
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/kvm_svm.h16
-rw-r--r--arch/x86/kvm/mmu.c237
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/paging_tmpl.h219
-rw-r--r--arch/x86/kvm/svm.c916
-rw-r--r--arch/x86/kvm/vmx.c393
-rw-r--r--arch/x86/kvm/x86.c432
-rw-r--r--arch/x86/kvm/x86_emulate.c56
-rw-r--r--arch/x86/lguest/boot.c8
-rw-r--r--arch/x86/lib/memcpy_64.S143
-rw-r--r--arch/x86/mm/highmem_32.c15
-rw-r--r--arch/x86/mm/init.c6
-rw-r--r--arch/x86/mm/iomap_32.c26
-rw-r--r--arch/x86/mm/ioremap.c19
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/pageattr.c147
-rw-r--r--arch/x86/mm/pat.c5
-rw-r--r--arch/x86/mm/pgtable_32.c2
-rw-r--r--arch/x86/mm/tlb.c5
-rw-r--r--arch/x86/pci/common.c4
-rw-r--r--arch/x86/pci/fixup.c4
-rw-r--r--arch/x86/pci/i386.c3
-rw-r--r--arch/x86/xen/mmu.c7
172 files changed, 8344 insertions, 4379 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87717f3687d2..45161b816313 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,6 +40,7 @@ config X86
40 select HAVE_GENERIC_DMA_COHERENT if X86_32 40 select HAVE_GENERIC_DMA_COHERENT if X86_32
41 select HAVE_EFFICIENT_UNALIGNED_ACCESS 41 select HAVE_EFFICIENT_UNALIGNED_ACCESS
42 select USER_STACKTRACE_SUPPORT 42 select USER_STACKTRACE_SUPPORT
43 select HAVE_DMA_API_DEBUG
43 select HAVE_KERNEL_GZIP 44 select HAVE_KERNEL_GZIP
44 select HAVE_KERNEL_BZIP2 45 select HAVE_KERNEL_BZIP2
45 select HAVE_KERNEL_LZMA 46 select HAVE_KERNEL_LZMA
@@ -169,6 +170,9 @@ config GENERIC_HARDIRQS
169 bool 170 bool
170 default y 171 default y
171 172
173config GENERIC_HARDIRQS_NO__DO_IRQ
174 def_bool y
175
172config GENERIC_IRQ_PROBE 176config GENERIC_IRQ_PROBE
173 bool 177 bool
174 default y 178 default y
@@ -931,6 +935,12 @@ config X86_CPUID
931 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to 935 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
932 /dev/cpu/31/cpuid. 936 /dev/cpu/31/cpuid.
933 937
938config X86_CPU_DEBUG
939 tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support"
940 ---help---
941 If you select this option, this will provide various x86 CPUs
942 information through debugfs.
943
934choice 944choice
935 prompt "High Memory Support" 945 prompt "High Memory Support"
936 default HIGHMEM4G if !X86_NUMAQ 946 default HIGHMEM4G if !X86_NUMAQ
@@ -1123,7 +1133,7 @@ config NUMA_EMU
1123 1133
1124config NODES_SHIFT 1134config NODES_SHIFT
1125 int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP 1135 int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
1126 range 1 9 if X86_64 1136 range 1 9
1127 default "9" if MAXSMP 1137 default "9" if MAXSMP
1128 default "6" if X86_64 1138 default "6" if X86_64
1129 default "4" if X86_NUMAQ 1139 default "4" if X86_NUMAQ
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index a95eaf0e582a..924e156a85ab 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -456,24 +456,9 @@ config CPU_SUP_AMD
456 456
457 If unsure, say N. 457 If unsure, say N.
458 458
459config CPU_SUP_CENTAUR_32 459config CPU_SUP_CENTAUR
460 default y 460 default y
461 bool "Support Centaur processors" if PROCESSOR_SELECT 461 bool "Support Centaur processors" if PROCESSOR_SELECT
462 depends on !64BIT
463 ---help---
464 This enables detection, tunings and quirks for Centaur processors
465
466 You need this enabled if you want your kernel to run on a
467 Centaur CPU. Disabling this option on other types of CPUs
468 makes the kernel a tiny bit smaller. Disabling it on a Centaur
469 CPU might render the kernel unbootable.
470
471 If unsure, say N.
472
473config CPU_SUP_CENTAUR_64
474 default y
475 bool "Support Centaur processors" if PROCESSOR_SELECT
476 depends on 64BIT
477 ---help--- 462 ---help---
478 This enables detection, tunings and quirks for Centaur processors 463 This enables detection, tunings and quirks for Centaur processors
479 464
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1836191839ee..f05d8c91d9e5 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -153,34 +153,23 @@ endif
153 153
154boot := arch/x86/boot 154boot := arch/x86/boot
155 155
156PHONY += zImage bzImage compressed zlilo bzlilo \ 156BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage install
157 zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install 157
158PHONY += bzImage $(BOOT_TARGETS)
158 159
159# Default kernel to build 160# Default kernel to build
160all: bzImage 161all: bzImage
161 162
162# KBUILD_IMAGE specify target image being built 163# KBUILD_IMAGE specify target image being built
163 KBUILD_IMAGE := $(boot)/bzImage 164KBUILD_IMAGE := $(boot)/bzImage
164zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage
165 165
166zImage bzImage: vmlinux 166bzImage: vmlinux
167 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) 167 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
168 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot 168 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
169 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ 169 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
170 170
171compressed: zImage 171$(BOOT_TARGETS): vmlinux
172 172 $(Q)$(MAKE) $(build)=$(boot) $@
173zlilo bzlilo: vmlinux
174 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
175
176zdisk bzdisk: vmlinux
177 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
178
179fdimage fdimage144 fdimage288 isoimage: vmlinux
180 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
181
182install:
183 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
184 173
185PHONY += vdso_install 174PHONY += vdso_install
186vdso_install: 175vdso_install:
@@ -205,7 +194,3 @@ define archhelp
205 echo ' FDARGS="..." arguments for the booted kernel' 194 echo ' FDARGS="..." arguments for the booted kernel'
206 echo ' FDINITRD=file initrd for the booted kernel' 195 echo ' FDINITRD=file initrd for the booted kernel'
207endef 196endef
208
209CLEAN_FILES += arch/x86/boot/fdimage \
210 arch/x86/boot/image.iso \
211 arch/x86/boot/mtools.conf
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index c70eff69a1fb..fb737ce5888d 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -6,26 +6,24 @@
6# for more details. 6# for more details.
7# 7#
8# Copyright (C) 1994 by Linus Torvalds 8# Copyright (C) 1994 by Linus Torvalds
9# Changed by many, many contributors over the years.
9# 10#
10 11
11# ROOT_DEV specifies the default root-device when making the image. 12# ROOT_DEV specifies the default root-device when making the image.
12# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case 13# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
13# the default of FLOPPY is used by 'build'. 14# the default of FLOPPY is used by 'build'.
14 15
15ROOT_DEV := CURRENT 16ROOT_DEV := CURRENT
16 17
17# If you want to preset the SVGA mode, uncomment the next line and 18# If you want to preset the SVGA mode, uncomment the next line and
18# set SVGA_MODE to whatever number you want. 19# set SVGA_MODE to whatever number you want.
19# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. 20# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
20# The number is the same as you would ordinarily press at bootup. 21# The number is the same as you would ordinarily press at bootup.
21 22
22SVGA_MODE := -DSVGA_MODE=NORMAL_VGA 23SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
23 24
24# If you want the RAM disk device, define this to be the size in blocks. 25targets := vmlinux.bin setup.bin setup.elf bzImage
25 26targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
26#RAMDISK := -DRAMDISK=512
27
28targets := vmlinux.bin setup.bin setup.elf zImage bzImage
29subdir- := compressed 27subdir- := compressed
30 28
31setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o 29setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
@@ -71,17 +69,13 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
71KBUILD_CFLAGS += $(call cc-option,-m32) 69KBUILD_CFLAGS += $(call cc-option,-m32)
72KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ 70KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
73 71
74$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK) 72$(obj)/bzImage: asflags-y := $(SVGA_MODE)
75$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
76$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
77$(obj)/bzImage: BUILDFLAGS := -b
78 73
79quiet_cmd_image = BUILD $@ 74quiet_cmd_image = BUILD $@
80cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \ 75cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
81 $(obj)/vmlinux.bin $(ROOT_DEV) > $@ 76 $(ROOT_DEV) > $@
82 77
83$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ 78$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
84 $(obj)/vmlinux.bin $(obj)/tools/build FORCE
85 $(call if_changed,image) 79 $(call if_changed,image)
86 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' 80 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
87 81
@@ -116,9 +110,11 @@ $(obj)/setup.bin: $(obj)/setup.elf FORCE
116$(obj)/compressed/vmlinux: FORCE 110$(obj)/compressed/vmlinux: FORCE
117 $(Q)$(MAKE) $(build)=$(obj)/compressed $@ 111 $(Q)$(MAKE) $(build)=$(obj)/compressed $@
118 112
119# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel 113# Set this if you want to pass append arguments to the
114# bzdisk/fdimage/isoimage kernel
120FDARGS = 115FDARGS =
121# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel 116# Set this if you want an initrd included with the
117# bzdisk/fdimage/isoimage kernel
122FDINITRD = 118FDINITRD =
123 119
124image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) 120image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
@@ -127,7 +123,7 @@ $(obj)/mtools.conf: $(src)/mtools.conf.in
127 sed -e 's|@OBJ@|$(obj)|g' < $< > $@ 123 sed -e 's|@OBJ@|$(obj)|g' < $< > $@
128 124
129# This requires write access to /dev/fd0 125# This requires write access to /dev/fd0
130zdisk: $(BOOTIMAGE) $(obj)/mtools.conf 126bzdisk: $(obj)/bzImage $(obj)/mtools.conf
131 MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync 127 MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync
132 syslinux /dev/fd0 ; sync 128 syslinux /dev/fd0 ; sync
133 echo '$(image_cmdline)' | \ 129 echo '$(image_cmdline)' | \
@@ -135,10 +131,10 @@ zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
135 if [ -f '$(FDINITRD)' ] ; then \ 131 if [ -f '$(FDINITRD)' ] ; then \
136 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ 132 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \
137 fi 133 fi
138 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync 134 MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync
139 135
140# These require being root or having syslinux 2.02 or higher installed 136# These require being root or having syslinux 2.02 or higher installed
141fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf 137fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
142 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 138 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
143 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync 139 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync
144 syslinux $(obj)/fdimage ; sync 140 syslinux $(obj)/fdimage ; sync
@@ -147,9 +143,9 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf
147 if [ -f '$(FDINITRD)' ] ; then \ 143 if [ -f '$(FDINITRD)' ] ; then \
148 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ 144 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \
149 fi 145 fi
150 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync 146 MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync
151 147
152fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf 148fdimage288: $(obj)/bzImage $(obj)/mtools.conf
153 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 149 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
154 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync 150 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync
155 syslinux $(obj)/fdimage ; sync 151 syslinux $(obj)/fdimage ; sync
@@ -158,9 +154,9 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
158 if [ -f '$(FDINITRD)' ] ; then \ 154 if [ -f '$(FDINITRD)' ] ; then \
159 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ 155 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \
160 fi 156 fi
161 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync 157 MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync
162 158
163isoimage: $(BOOTIMAGE) 159isoimage: $(obj)/bzImage
164 -rm -rf $(obj)/isoimage 160 -rm -rf $(obj)/isoimage
165 mkdir $(obj)/isoimage 161 mkdir $(obj)/isoimage
166 for i in lib lib64 share end ; do \ 162 for i in lib lib64 share end ; do \
@@ -170,7 +166,7 @@ isoimage: $(BOOTIMAGE)
170 fi ; \ 166 fi ; \
171 if [ $$i = end ] ; then exit 1 ; fi ; \ 167 if [ $$i = end ] ; then exit 1 ; fi ; \
172 done 168 done
173 cp $(BOOTIMAGE) $(obj)/isoimage/linux 169 cp $(obj)/bzImage $(obj)/isoimage/linux
174 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg 170 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
175 if [ -f '$(FDINITRD)' ] ; then \ 171 if [ -f '$(FDINITRD)' ] ; then \
176 cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ 172 cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \
@@ -181,12 +177,13 @@ isoimage: $(BOOTIMAGE)
181 isohybrid $(obj)/image.iso 2>/dev/null || true 177 isohybrid $(obj)/image.iso 2>/dev/null || true
182 rm -rf $(obj)/isoimage 178 rm -rf $(obj)/isoimage
183 179
184zlilo: $(BOOTIMAGE) 180bzlilo: $(obj)/bzImage
185 if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi 181 if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
186 if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi 182 if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
187 cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz 183 cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
188 cp System.map $(INSTALL_PATH)/ 184 cp System.map $(INSTALL_PATH)/
189 if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi 185 if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
190 186
191install: 187install:
192 sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" 188 sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
189 System.map "$(INSTALL_PATH)"
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 7ccff4884a23..5d84d1c74e4c 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -24,12 +24,8 @@
24#include "boot.h" 24#include "boot.h"
25#include "offsets.h" 25#include "offsets.h"
26 26
27SETUPSECTS = 4 /* default nr of setup-sectors */
28BOOTSEG = 0x07C0 /* original address of boot-sector */ 27BOOTSEG = 0x07C0 /* original address of boot-sector */
29SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */ 28SYSSEG = 0x1000 /* historical load address >> 4 */
30SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
31 /* to be loaded */
32ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
33 29
34#ifndef SVGA_MODE 30#ifndef SVGA_MODE
35#define SVGA_MODE ASK_VGA 31#define SVGA_MODE ASK_VGA
@@ -97,12 +93,12 @@ bugger_off_msg:
97 .section ".header", "a" 93 .section ".header", "a"
98 .globl hdr 94 .globl hdr
99hdr: 95hdr:
100setup_sects: .byte SETUPSECTS 96setup_sects: .byte 0 /* Filled in by build.c */
101root_flags: .word ROOT_RDONLY 97root_flags: .word ROOT_RDONLY
102syssize: .long SYSSIZE 98syssize: .long 0 /* Filled in by build.c */
103ram_size: .word RAMDISK 99ram_size: .word 0 /* Obsolete */
104vid_mode: .word SVGA_MODE 100vid_mode: .word SVGA_MODE
105root_dev: .word ROOT_DEV 101root_dev: .word 0 /* Filled in by build.c */
106boot_flag: .word 0xAA55 102boot_flag: .word 0xAA55
107 103
108 # offset 512, entry point 104 # offset 512, entry point
@@ -123,14 +119,15 @@ _start:
123 # or else old loadlin-1.5 will fail) 119 # or else old loadlin-1.5 will fail)
124 .globl realmode_swtch 120 .globl realmode_swtch
125realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 121realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
126start_sys_seg: .word SYSSEG 122start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
123 # in case something decided to "use" it
127 .word kernel_version-512 # pointing to kernel version string 124 .word kernel_version-512 # pointing to kernel version string
128 # above section of header is compatible 125 # above section of header is compatible
129 # with loadlin-1.5 (header v1.5). Don't 126 # with loadlin-1.5 (header v1.5). Don't
130 # change it. 127 # change it.
131 128
132type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin, 129type_of_loader: .byte 0 # 0 means ancient bootloader, newer
133 # Bootlin, SYSLX, bootsect...) 130 # bootloaders know to change this.
134 # See Documentation/i386/boot.txt for 131 # See Documentation/i386/boot.txt for
135 # assigned ids 132 # assigned ids
136 133
@@ -142,11 +139,7 @@ CAN_USE_HEAP = 0x80 # If set, the loader also has set
142 # space behind setup.S can be used for 139 # space behind setup.S can be used for
143 # heap purposes. 140 # heap purposes.
144 # Only the loader knows what is free 141 # Only the loader knows what is free
145#ifndef __BIG_KERNEL__
146 .byte 0
147#else
148 .byte LOADED_HIGH 142 .byte LOADED_HIGH
149#endif
150 143
151setup_move_size: .word 0x8000 # size to move, when setup is not 144setup_move_size: .word 0x8000 # size to move, when setup is not
152 # loaded at 0x90000. We will move setup 145 # loaded at 0x90000. We will move setup
@@ -157,11 +150,7 @@ setup_move_size: .word 0x8000 # size to move, when setup is not
157 150
158code32_start: # here loaders can put a different 151code32_start: # here loaders can put a different
159 # start address for 32-bit code. 152 # start address for 32-bit code.
160#ifndef __BIG_KERNEL__
161 .long 0x1000 # 0x1000 = default for zImage
162#else
163 .long 0x100000 # 0x100000 = default for big kernel 153 .long 0x100000 # 0x100000 = default for big kernel
164#endif
165 154
166ramdisk_image: .long 0 # address of loaded ramdisk image 155ramdisk_image: .long 0 # address of loaded ramdisk image
167 # Here the loader puts the 32-bit 156 # Here the loader puts the 32-bit
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 85a1cd8a8ff8..8062f8915250 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -33,47 +33,6 @@ static void realmode_switch_hook(void)
33} 33}
34 34
35/* 35/*
36 * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000.
37 * A bzImage kernel is loaded and runs at 0x100000.
38 */
39static void move_kernel_around(void)
40{
41 /* Note: rely on the compile-time option here rather than
42 the LOADED_HIGH flag. The Qemu kernel loader unconditionally
43 sets the loadflags to zero. */
44#ifndef __BIG_KERNEL__
45 u16 dst_seg, src_seg;
46 u32 syssize;
47
48 dst_seg = 0x1000 >> 4;
49 src_seg = 0x10000 >> 4;
50 syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */
51
52 while (syssize) {
53 int paras = (syssize >= 0x1000) ? 0x1000 : syssize;
54 int dwords = paras << 2;
55
56 asm volatile("pushw %%es ; "
57 "pushw %%ds ; "
58 "movw %1,%%es ; "
59 "movw %2,%%ds ; "
60 "xorw %%di,%%di ; "
61 "xorw %%si,%%si ; "
62 "rep;movsl ; "
63 "popw %%ds ; "
64 "popw %%es"
65 : "+c" (dwords)
66 : "r" (dst_seg), "r" (src_seg)
67 : "esi", "edi");
68
69 syssize -= paras;
70 dst_seg += paras;
71 src_seg += paras;
72 }
73#endif
74}
75
76/*
77 * Disable all interrupts at the legacy PIC. 36 * Disable all interrupts at the legacy PIC.
78 */ 37 */
79static void mask_all_interrupts(void) 38static void mask_all_interrupts(void)
@@ -147,9 +106,6 @@ void go_to_protected_mode(void)
147 /* Hook before leaving real mode, also disables interrupts */ 106 /* Hook before leaving real mode, also disables interrupts */
148 realmode_switch_hook(); 107 realmode_switch_hook();
149 108
150 /* Move the kernel/setup to their final resting places */
151 move_kernel_around();
152
153 /* Enable the A20 gate */ 109 /* Enable the A20 gate */
154 if (enable_a20()) { 110 if (enable_a20()) {
155 puts("A20 gate not responding, unable to boot...\n"); 111 puts("A20 gate not responding, unable to boot...\n");
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index 019c17a75851..3e0edc6d2a20 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -47,6 +47,7 @@ GLOBAL(protected_mode_jump)
47ENDPROC(protected_mode_jump) 47ENDPROC(protected_mode_jump)
48 48
49 .code32 49 .code32
50 .section ".text32","ax"
50GLOBAL(in_pm32) 51GLOBAL(in_pm32)
51 # Set up data segments for flat 32-bit mode 52 # Set up data segments for flat 32-bit mode
52 movl %ecx, %ds 53 movl %ecx, %ds
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index df9234b3a5e0..bb8dc2de7969 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -17,7 +17,8 @@ SECTIONS
17 .header : { *(.header) } 17 .header : { *(.header) }
18 .inittext : { *(.inittext) } 18 .inittext : { *(.inittext) }
19 .initdata : { *(.initdata) } 19 .initdata : { *(.initdata) }
20 .text : { *(.text*) } 20 .text : { *(.text) }
21 .text32 : { *(.text32) }
21 22
22 . = ALIGN(16); 23 . = ALIGN(16);
23 .rodata : { *(.rodata*) } 24 .rodata : { *(.rodata*) }
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 44dc1923c0e3..ee3a4ea923ac 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -130,7 +130,7 @@ static void die(const char * str, ...)
130 130
131static void usage(void) 131static void usage(void)
132{ 132{
133 die("Usage: build [-b] setup system [rootdev] [> image]"); 133 die("Usage: build setup system [rootdev] [> image]");
134} 134}
135 135
136int main(int argc, char ** argv) 136int main(int argc, char ** argv)
@@ -145,11 +145,6 @@ int main(int argc, char ** argv)
145 void *kernel; 145 void *kernel;
146 u32 crc = 0xffffffffUL; 146 u32 crc = 0xffffffffUL;
147 147
148 if (argc > 2 && !strcmp(argv[1], "-b"))
149 {
150 is_big_kernel = 1;
151 argc--, argv++;
152 }
153 if ((argc < 3) || (argc > 4)) 148 if ((argc < 3) || (argc > 4))
154 usage(); 149 usage();
155 if (argc > 3) { 150 if (argc > 3) {
@@ -216,8 +211,6 @@ int main(int argc, char ** argv)
216 die("Unable to mmap '%s': %m", argv[2]); 211 die("Unable to mmap '%s': %m", argv[2]);
217 /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ 212 /* Number of 16-byte paragraphs, including space for a 4-byte CRC */
218 sys_size = (sz + 15 + 4) / 16; 213 sys_size = (sz + 15 + 4) / 16;
219 if (!is_big_kernel && sys_size > DEF_SYSSIZE)
220 die("System is too big. Try using bzImage or modules.");
221 214
222 /* Patch the setup code with the appropriate size parameters */ 215 /* Patch the setup code with the appropriate size parameters */
223 buf[0x1f1] = setup_sectors-1; 216 buf[0x1f1] = setup_sectors-1;
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 5d4742ed4aa2..95d86ce0421c 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -129,41 +129,45 @@ u16 vga_crtc(void)
129 return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4; 129 return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
130} 130}
131 131
132static void vga_set_480_scanlines(int end) 132static void vga_set_480_scanlines(int lines)
133{ 133{
134 u16 crtc; 134 u16 crtc; /* CRTC base address */
135 u8 csel; 135 u8 csel; /* CRTC miscellaneous output register */
136 u8 ovfw; /* CRTC overflow register */
137 int end = lines-1;
136 138
137 crtc = vga_crtc(); 139 crtc = vga_crtc();
138 140
141 ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40);
142
139 out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */ 143 out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
140 out_idx(0x0b, crtc, 0x06); /* Vertical total */ 144 out_idx(0x0b, crtc, 0x06); /* Vertical total */
141 out_idx(0x3e, crtc, 0x07); /* Vertical overflow */ 145 out_idx(ovfw, crtc, 0x07); /* Vertical overflow */
142 out_idx(0xea, crtc, 0x10); /* Vertical sync start */ 146 out_idx(0xea, crtc, 0x10); /* Vertical sync start */
143 out_idx(end, crtc, 0x12); /* Vertical display end */ 147 out_idx(end, crtc, 0x12); /* Vertical display end */
144 out_idx(0xe7, crtc, 0x15); /* Vertical blank start */ 148 out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
145 out_idx(0x04, crtc, 0x16); /* Vertical blank end */ 149 out_idx(0x04, crtc, 0x16); /* Vertical blank end */
146 csel = inb(0x3cc); 150 csel = inb(0x3cc);
147 csel &= 0x0d; 151 csel &= 0x0d;
148 csel |= 0xe2; 152 csel |= 0xe2;
149 outb(csel, 0x3cc); 153 outb(csel, 0x3c2);
150} 154}
151 155
152static void vga_set_80x30(void) 156static void vga_set_80x30(void)
153{ 157{
154 vga_set_480_scanlines(0xdf); 158 vga_set_480_scanlines(30*16);
155} 159}
156 160
157static void vga_set_80x34(void) 161static void vga_set_80x34(void)
158{ 162{
159 vga_set_14font(); 163 vga_set_14font();
160 vga_set_480_scanlines(0xdb); 164 vga_set_480_scanlines(34*14);
161} 165}
162 166
163static void vga_set_80x60(void) 167static void vga_set_80x60(void)
164{ 168{
165 vga_set_8font(); 169 vga_set_8font();
166 vga_set_480_scanlines(0xdf); 170 vga_set_480_scanlines(60*8);
167} 171}
168 172
169static int vga_set_mode(struct mode_info *mode) 173static int vga_set_mode(struct mode_info *mode)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 903de4aa5094..ebe7deedd5b4 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
9obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 9obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
10obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 10obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
11obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o 11obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
12obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
12 13
13obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o 14obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
14 15
@@ -19,3 +20,5 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
19aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 20aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
20twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 21twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
21salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o 22salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
23
24aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index e41b147f4509..b949ec2f9af4 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -41,14 +41,14 @@
41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) 41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
42 42
43/* offsets to parameters with one register pushed onto stack */ 43/* offsets to parameters with one register pushed onto stack */
44#define tfm 8 44#define ctx 8
45#define out_blk 12 45#define out_blk 12
46#define in_blk 16 46#define in_blk 16
47 47
48/* offsets in crypto_tfm structure */ 48/* offsets in crypto_aes_ctx structure */
49#define klen (crypto_tfm_ctx_offset + 0) 49#define klen (480)
50#define ekey (crypto_tfm_ctx_offset + 4) 50#define ekey (0)
51#define dkey (crypto_tfm_ctx_offset + 244) 51#define dkey (240)
52 52
53// register mapping for encrypt and decrypt subroutines 53// register mapping for encrypt and decrypt subroutines
54 54
@@ -217,7 +217,7 @@
217 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ 217 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
218 218
219// AES (Rijndael) Encryption Subroutine 219// AES (Rijndael) Encryption Subroutine
220/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ 220/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
221 221
222.global aes_enc_blk 222.global aes_enc_blk
223 223
@@ -228,7 +228,7 @@
228 228
229aes_enc_blk: 229aes_enc_blk:
230 push %ebp 230 push %ebp
231 mov tfm(%esp),%ebp 231 mov ctx(%esp),%ebp
232 232
233// CAUTION: the order and the values used in these assigns 233// CAUTION: the order and the values used in these assigns
234// rely on the register mappings 234// rely on the register mappings
@@ -292,7 +292,7 @@ aes_enc_blk:
292 ret 292 ret
293 293
294// AES (Rijndael) Decryption Subroutine 294// AES (Rijndael) Decryption Subroutine
295/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ 295/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
296 296
297.global aes_dec_blk 297.global aes_dec_blk
298 298
@@ -303,7 +303,7 @@ aes_enc_blk:
303 303
304aes_dec_blk: 304aes_dec_blk:
305 push %ebp 305 push %ebp
306 mov tfm(%esp),%ebp 306 mov ctx(%esp),%ebp
307 307
308// CAUTION: the order and the values used in these assigns 308// CAUTION: the order and the values used in these assigns
309// rely on the register mappings 309// rely on the register mappings
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index a120f526c3df..5b577d5a059b 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -17,8 +17,6 @@
17 17
18#include <asm/asm-offsets.h> 18#include <asm/asm-offsets.h>
19 19
20#define BASE crypto_tfm_ctx_offset
21
22#define R1 %rax 20#define R1 %rax
23#define R1E %eax 21#define R1E %eax
24#define R1X %ax 22#define R1X %ax
@@ -56,13 +54,13 @@
56 .align 8; \ 54 .align 8; \
57FUNC: movq r1,r2; \ 55FUNC: movq r1,r2; \
58 movq r3,r4; \ 56 movq r3,r4; \
59 leaq BASE+KEY+48+4(r8),r9; \ 57 leaq KEY+48(r8),r9; \
60 movq r10,r11; \ 58 movq r10,r11; \
61 movl (r7),r5 ## E; \ 59 movl (r7),r5 ## E; \
62 movl 4(r7),r1 ## E; \ 60 movl 4(r7),r1 ## E; \
63 movl 8(r7),r6 ## E; \ 61 movl 8(r7),r6 ## E; \
64 movl 12(r7),r7 ## E; \ 62 movl 12(r7),r7 ## E; \
65 movl BASE+0(r8),r10 ## E; \ 63 movl 480(r8),r10 ## E; \
66 xorl -48(r9),r5 ## E; \ 64 xorl -48(r9),r5 ## E; \
67 xorl -44(r9),r1 ## E; \ 65 xorl -44(r9),r1 ## E; \
68 xorl -40(r9),r6 ## E; \ 66 xorl -40(r9),r6 ## E; \
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 71f457827116..49ae9fe32b22 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -5,17 +5,29 @@
5 5
6#include <crypto/aes.h> 6#include <crypto/aes.h>
7 7
8asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); 8asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
9asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); 9asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
10
11void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
12{
13 aes_enc_blk(ctx, dst, src);
14}
15EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
16
17void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
18{
19 aes_dec_blk(ctx, dst, src);
20}
21EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
10 22
11static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 23static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
12{ 24{
13 aes_enc_blk(tfm, dst, src); 25 aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
14} 26}
15 27
16static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 28static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
17{ 29{
18 aes_dec_blk(tfm, dst, src); 30 aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
19} 31}
20 32
21static struct crypto_alg aes_alg = { 33static struct crypto_alg aes_alg = {
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
new file mode 100644
index 000000000000..caba99601703
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -0,0 +1,896 @@
1/*
2 * Implement AES algorithm in Intel AES-NI instructions.
3 *
4 * The white paper of AES-NI instructions can be downloaded from:
5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
6 *
7 * Copyright (C) 2008, Intel Corp.
8 * Author: Huang Ying <ying.huang@intel.com>
9 * Vinodh Gopal <vinodh.gopal@intel.com>
10 * Kahraman Akdemir
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 */
17
18#include <linux/linkage.h>
19
20.text
21
22#define STATE1 %xmm0
23#define STATE2 %xmm4
24#define STATE3 %xmm5
25#define STATE4 %xmm6
26#define STATE STATE1
27#define IN1 %xmm1
28#define IN2 %xmm7
29#define IN3 %xmm8
30#define IN4 %xmm9
31#define IN IN1
32#define KEY %xmm2
33#define IV %xmm3
34
35#define KEYP %rdi
36#define OUTP %rsi
37#define INP %rdx
38#define LEN %rcx
39#define IVP %r8
40#define KLEN %r9d
41#define T1 %r10
42#define TKEYP T1
43#define T2 %r11
44
45_key_expansion_128:
46_key_expansion_256a:
47 pshufd $0b11111111, %xmm1, %xmm1
48 shufps $0b00010000, %xmm0, %xmm4
49 pxor %xmm4, %xmm0
50 shufps $0b10001100, %xmm0, %xmm4
51 pxor %xmm4, %xmm0
52 pxor %xmm1, %xmm0
53 movaps %xmm0, (%rcx)
54 add $0x10, %rcx
55 ret
56
57_key_expansion_192a:
58 pshufd $0b01010101, %xmm1, %xmm1
59 shufps $0b00010000, %xmm0, %xmm4
60 pxor %xmm4, %xmm0
61 shufps $0b10001100, %xmm0, %xmm4
62 pxor %xmm4, %xmm0
63 pxor %xmm1, %xmm0
64
65 movaps %xmm2, %xmm5
66 movaps %xmm2, %xmm6
67 pslldq $4, %xmm5
68 pshufd $0b11111111, %xmm0, %xmm3
69 pxor %xmm3, %xmm2
70 pxor %xmm5, %xmm2
71
72 movaps %xmm0, %xmm1
73 shufps $0b01000100, %xmm0, %xmm6
74 movaps %xmm6, (%rcx)
75 shufps $0b01001110, %xmm2, %xmm1
76 movaps %xmm1, 16(%rcx)
77 add $0x20, %rcx
78 ret
79
80_key_expansion_192b:
81 pshufd $0b01010101, %xmm1, %xmm1
82 shufps $0b00010000, %xmm0, %xmm4
83 pxor %xmm4, %xmm0
84 shufps $0b10001100, %xmm0, %xmm4
85 pxor %xmm4, %xmm0
86 pxor %xmm1, %xmm0
87
88 movaps %xmm2, %xmm5
89 pslldq $4, %xmm5
90 pshufd $0b11111111, %xmm0, %xmm3
91 pxor %xmm3, %xmm2
92 pxor %xmm5, %xmm2
93
94 movaps %xmm0, (%rcx)
95 add $0x10, %rcx
96 ret
97
98_key_expansion_256b:
99 pshufd $0b10101010, %xmm1, %xmm1
100 shufps $0b00010000, %xmm2, %xmm4
101 pxor %xmm4, %xmm2
102 shufps $0b10001100, %xmm2, %xmm4
103 pxor %xmm4, %xmm2
104 pxor %xmm1, %xmm2
105 movaps %xmm2, (%rcx)
106 add $0x10, %rcx
107 ret
108
109/*
110 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
111 * unsigned int key_len)
112 */
113ENTRY(aesni_set_key)
114 movups (%rsi), %xmm0 # user key (first 16 bytes)
115 movaps %xmm0, (%rdi)
116 lea 0x10(%rdi), %rcx # key addr
117 movl %edx, 480(%rdi)
118 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
119 cmp $24, %dl
120 jb .Lenc_key128
121 je .Lenc_key192
122 movups 0x10(%rsi), %xmm2 # other user key
123 movaps %xmm2, (%rcx)
124 add $0x10, %rcx
125 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
126 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
127 call _key_expansion_256a
128 # aeskeygenassist $0x1, %xmm0, %xmm1
129 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
130 call _key_expansion_256b
131 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
132 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
133 call _key_expansion_256a
134 # aeskeygenassist $0x2, %xmm0, %xmm1
135 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
136 call _key_expansion_256b
137 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
138 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
139 call _key_expansion_256a
140 # aeskeygenassist $0x4, %xmm0, %xmm1
141 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
142 call _key_expansion_256b
143 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
144 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
145 call _key_expansion_256a
146 # aeskeygenassist $0x8, %xmm0, %xmm1
147 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
148 call _key_expansion_256b
149 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
150 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
151 call _key_expansion_256a
152 # aeskeygenassist $0x10, %xmm0, %xmm1
153 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
154 call _key_expansion_256b
155 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
156 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
157 call _key_expansion_256a
158 # aeskeygenassist $0x20, %xmm0, %xmm1
159 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
160 call _key_expansion_256b
161 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
162 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
163 call _key_expansion_256a
164 jmp .Ldec_key
165.Lenc_key192:
166 movq 0x10(%rsi), %xmm2 # other user key
167 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
168 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
169 call _key_expansion_192a
170 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
171 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
172 call _key_expansion_192b
173 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
174 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
175 call _key_expansion_192a
176 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
177 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
178 call _key_expansion_192b
179 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
180 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
181 call _key_expansion_192a
182 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
183 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
184 call _key_expansion_192b
185 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
186 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
187 call _key_expansion_192a
188 # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
189 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
190 call _key_expansion_192b
191 jmp .Ldec_key
192.Lenc_key128:
193 # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
194 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
195 call _key_expansion_128
196 # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
197 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
198 call _key_expansion_128
199 # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
200 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
201 call _key_expansion_128
202 # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
203 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
204 call _key_expansion_128
205 # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
206 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
207 call _key_expansion_128
208 # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
209 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
210 call _key_expansion_128
211 # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
212 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
213 call _key_expansion_128
214 # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
215 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
216 call _key_expansion_128
217 # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
218 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
219 call _key_expansion_128
220 # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
221 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
222 call _key_expansion_128
223.Ldec_key:
224 sub $0x10, %rcx
225 movaps (%rdi), %xmm0
226 movaps (%rcx), %xmm1
227 movaps %xmm0, 240(%rcx)
228 movaps %xmm1, 240(%rdi)
229 add $0x10, %rdi
230 lea 240-16(%rcx), %rsi
231.align 4
232.Ldec_key_loop:
233 movaps (%rdi), %xmm0
234 # aesimc %xmm0, %xmm1
235 .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
236 movaps %xmm1, (%rsi)
237 add $0x10, %rdi
238 sub $0x10, %rsi
239 cmp %rcx, %rdi
240 jb .Ldec_key_loop
241 xor %rax, %rax
242 ret
243
244/*
245 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
246 */
247ENTRY(aesni_enc)
248 movl 480(KEYP), KLEN # key length
249 movups (INP), STATE # input
250 call _aesni_enc1
251 movups STATE, (OUTP) # output
252 ret
253
254/*
255 * _aesni_enc1: internal ABI
256 * input:
257 * KEYP: key struct pointer
258 * KLEN: round count
259 * STATE: initial state (input)
260 * output:
261 * STATE: finial state (output)
262 * changed:
263 * KEY
264 * TKEYP (T1)
265 */
266_aesni_enc1:
267 movaps (KEYP), KEY # key
268 mov KEYP, TKEYP
269 pxor KEY, STATE # round 0
270 add $0x30, TKEYP
271 cmp $24, KLEN
272 jb .Lenc128
273 lea 0x20(TKEYP), TKEYP
274 je .Lenc192
275 add $0x20, TKEYP
276 movaps -0x60(TKEYP), KEY
277 # aesenc KEY, STATE
278 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
279 movaps -0x50(TKEYP), KEY
280 # aesenc KEY, STATE
281 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
282.align 4
283.Lenc192:
284 movaps -0x40(TKEYP), KEY
285 # aesenc KEY, STATE
286 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
287 movaps -0x30(TKEYP), KEY
288 # aesenc KEY, STATE
289 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
290.align 4
291.Lenc128:
292 movaps -0x20(TKEYP), KEY
293 # aesenc KEY, STATE
294 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
295 movaps -0x10(TKEYP), KEY
296 # aesenc KEY, STATE
297 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
298 movaps (TKEYP), KEY
299 # aesenc KEY, STATE
300 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
301 movaps 0x10(TKEYP), KEY
302 # aesenc KEY, STATE
303 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
304 movaps 0x20(TKEYP), KEY
305 # aesenc KEY, STATE
306 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
307 movaps 0x30(TKEYP), KEY
308 # aesenc KEY, STATE
309 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
310 movaps 0x40(TKEYP), KEY
311 # aesenc KEY, STATE
312 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
313 movaps 0x50(TKEYP), KEY
314 # aesenc KEY, STATE
315 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
316 movaps 0x60(TKEYP), KEY
317 # aesenc KEY, STATE
318 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
319 movaps 0x70(TKEYP), KEY
320 # aesenclast KEY, STATE # last round
321 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
322 ret
323
324/*
325 * _aesni_enc4: internal ABI
326 * input:
327 * KEYP: key struct pointer
328 * KLEN: round count
329 * STATE1: initial state (input)
330 * STATE2
331 * STATE3
332 * STATE4
333 * output:
334 * STATE1: finial state (output)
335 * STATE2
336 * STATE3
337 * STATE4
338 * changed:
339 * KEY
340 * TKEYP (T1)
341 */
342_aesni_enc4:
343 movaps (KEYP), KEY # key
344 mov KEYP, TKEYP
345 pxor KEY, STATE1 # round 0
346 pxor KEY, STATE2
347 pxor KEY, STATE3
348 pxor KEY, STATE4
349 add $0x30, TKEYP
350 cmp $24, KLEN
351 jb .L4enc128
352 lea 0x20(TKEYP), TKEYP
353 je .L4enc192
354 add $0x20, TKEYP
355 movaps -0x60(TKEYP), KEY
356 # aesenc KEY, STATE1
357 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
358 # aesenc KEY, STATE2
359 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
360 # aesenc KEY, STATE3
361 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
362 # aesenc KEY, STATE4
363 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
364 movaps -0x50(TKEYP), KEY
365 # aesenc KEY, STATE1
366 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
367 # aesenc KEY, STATE2
368 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
369 # aesenc KEY, STATE3
370 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
371 # aesenc KEY, STATE4
372 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
373#.align 4
374.L4enc192:
375 movaps -0x40(TKEYP), KEY
376 # aesenc KEY, STATE1
377 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
378 # aesenc KEY, STATE2
379 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
380 # aesenc KEY, STATE3
381 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
382 # aesenc KEY, STATE4
383 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
384 movaps -0x30(TKEYP), KEY
385 # aesenc KEY, STATE1
386 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
387 # aesenc KEY, STATE2
388 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
389 # aesenc KEY, STATE3
390 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
391 # aesenc KEY, STATE4
392 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
393#.align 4
394.L4enc128:
395 movaps -0x20(TKEYP), KEY
396 # aesenc KEY, STATE1
397 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
398 # aesenc KEY, STATE2
399 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
400 # aesenc KEY, STATE3
401 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
402 # aesenc KEY, STATE4
403 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
404 movaps -0x10(TKEYP), KEY
405 # aesenc KEY, STATE1
406 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
407 # aesenc KEY, STATE2
408 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
409 # aesenc KEY, STATE3
410 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
411 # aesenc KEY, STATE4
412 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
413 movaps (TKEYP), KEY
414 # aesenc KEY, STATE1
415 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
416 # aesenc KEY, STATE2
417 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
418 # aesenc KEY, STATE3
419 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
420 # aesenc KEY, STATE4
421 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
422 movaps 0x10(TKEYP), KEY
423 # aesenc KEY, STATE1
424 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
425 # aesenc KEY, STATE2
426 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
427 # aesenc KEY, STATE3
428 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
429 # aesenc KEY, STATE4
430 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
431 movaps 0x20(TKEYP), KEY
432 # aesenc KEY, STATE1
433 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
434 # aesenc KEY, STATE2
435 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
436 # aesenc KEY, STATE3
437 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
438 # aesenc KEY, STATE4
439 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
440 movaps 0x30(TKEYP), KEY
441 # aesenc KEY, STATE1
442 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
443 # aesenc KEY, STATE2
444 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
445 # aesenc KEY, STATE3
446 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
447 # aesenc KEY, STATE4
448 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
449 movaps 0x40(TKEYP), KEY
450 # aesenc KEY, STATE1
451 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
452 # aesenc KEY, STATE2
453 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
454 # aesenc KEY, STATE3
455 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
456 # aesenc KEY, STATE4
457 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
458 movaps 0x50(TKEYP), KEY
459 # aesenc KEY, STATE1
460 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
461 # aesenc KEY, STATE2
462 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
463 # aesenc KEY, STATE3
464 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
465 # aesenc KEY, STATE4
466 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
467 movaps 0x60(TKEYP), KEY
468 # aesenc KEY, STATE1
469 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
470 # aesenc KEY, STATE2
471 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
472 # aesenc KEY, STATE3
473 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
474 # aesenc KEY, STATE4
475 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
476 movaps 0x70(TKEYP), KEY
477 # aesenclast KEY, STATE1 # last round
478 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
479 # aesenclast KEY, STATE2
480 .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
481 # aesenclast KEY, STATE3
482 .byte 0x66, 0x0f, 0x38, 0xdd, 0xea
483 # aesenclast KEY, STATE4
484 .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
485 ret
486
487/*
488 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
489 */
490ENTRY(aesni_dec)
491 mov 480(KEYP), KLEN # key length
492 add $240, KEYP
493 movups (INP), STATE # input
494 call _aesni_dec1
495 movups STATE, (OUTP) #output
496 ret
497
498/*
499 * _aesni_dec1: internal ABI
500 * input:
501 * KEYP: key struct pointer
502 * KLEN: key length
503 * STATE: initial state (input)
504 * output:
505 * STATE: finial state (output)
506 * changed:
507 * KEY
508 * TKEYP (T1)
509 */
510_aesni_dec1:
511 movaps (KEYP), KEY # key
512 mov KEYP, TKEYP
513 pxor KEY, STATE # round 0
514 add $0x30, TKEYP
515 cmp $24, KLEN
516 jb .Ldec128
517 lea 0x20(TKEYP), TKEYP
518 je .Ldec192
519 add $0x20, TKEYP
520 movaps -0x60(TKEYP), KEY
521 # aesdec KEY, STATE
522 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
523 movaps -0x50(TKEYP), KEY
524 # aesdec KEY, STATE
525 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
526.align 4
527.Ldec192:
528 movaps -0x40(TKEYP), KEY
529 # aesdec KEY, STATE
530 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
531 movaps -0x30(TKEYP), KEY
532 # aesdec KEY, STATE
533 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
534.align 4
535.Ldec128:
536 movaps -0x20(TKEYP), KEY
537 # aesdec KEY, STATE
538 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
539 movaps -0x10(TKEYP), KEY
540 # aesdec KEY, STATE
541 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
542 movaps (TKEYP), KEY
543 # aesdec KEY, STATE
544 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
545 movaps 0x10(TKEYP), KEY
546 # aesdec KEY, STATE
547 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
548 movaps 0x20(TKEYP), KEY
549 # aesdec KEY, STATE
550 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
551 movaps 0x30(TKEYP), KEY
552 # aesdec KEY, STATE
553 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
554 movaps 0x40(TKEYP), KEY
555 # aesdec KEY, STATE
556 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
557 movaps 0x50(TKEYP), KEY
558 # aesdec KEY, STATE
559 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
560 movaps 0x60(TKEYP), KEY
561 # aesdec KEY, STATE
562 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
563 movaps 0x70(TKEYP), KEY
564 # aesdeclast KEY, STATE # last round
565 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
566 ret
567
568/*
569 * _aesni_dec4: internal ABI
570 * input:
571 * KEYP: key struct pointer
572 * KLEN: key length
573 * STATE1: initial state (input)
574 * STATE2
575 * STATE3
576 * STATE4
577 * output:
578 * STATE1: finial state (output)
579 * STATE2
580 * STATE3
581 * STATE4
582 * changed:
583 * KEY
584 * TKEYP (T1)
585 */
586_aesni_dec4:
587 movaps (KEYP), KEY # key
588 mov KEYP, TKEYP
589 pxor KEY, STATE1 # round 0
590 pxor KEY, STATE2
591 pxor KEY, STATE3
592 pxor KEY, STATE4
593 add $0x30, TKEYP
594 cmp $24, KLEN
595 jb .L4dec128
596 lea 0x20(TKEYP), TKEYP
597 je .L4dec192
598 add $0x20, TKEYP
599 movaps -0x60(TKEYP), KEY
600 # aesdec KEY, STATE1
601 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
602 # aesdec KEY, STATE2
603 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
604 # aesdec KEY, STATE3
605 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
606 # aesdec KEY, STATE4
607 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
608 movaps -0x50(TKEYP), KEY
609 # aesdec KEY, STATE1
610 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
611 # aesdec KEY, STATE2
612 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
613 # aesdec KEY, STATE3
614 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
615 # aesdec KEY, STATE4
616 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
617.align 4
618.L4dec192:
619 movaps -0x40(TKEYP), KEY
620 # aesdec KEY, STATE1
621 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
622 # aesdec KEY, STATE2
623 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
624 # aesdec KEY, STATE3
625 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
626 # aesdec KEY, STATE4
627 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
628 movaps -0x30(TKEYP), KEY
629 # aesdec KEY, STATE1
630 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
631 # aesdec KEY, STATE2
632 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
633 # aesdec KEY, STATE3
634 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
635 # aesdec KEY, STATE4
636 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
637.align 4
638.L4dec128:
639 movaps -0x20(TKEYP), KEY
640 # aesdec KEY, STATE1
641 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
642 # aesdec KEY, STATE2
643 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
644 # aesdec KEY, STATE3
645 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
646 # aesdec KEY, STATE4
647 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
648 movaps -0x10(TKEYP), KEY
649 # aesdec KEY, STATE1
650 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
651 # aesdec KEY, STATE2
652 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
653 # aesdec KEY, STATE3
654 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
655 # aesdec KEY, STATE4
656 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
657 movaps (TKEYP), KEY
658 # aesdec KEY, STATE1
659 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
660 # aesdec KEY, STATE2
661 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
662 # aesdec KEY, STATE3
663 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
664 # aesdec KEY, STATE4
665 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
666 movaps 0x10(TKEYP), KEY
667 # aesdec KEY, STATE1
668 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
669 # aesdec KEY, STATE2
670 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
671 # aesdec KEY, STATE3
672 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
673 # aesdec KEY, STATE4
674 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
675 movaps 0x20(TKEYP), KEY
676 # aesdec KEY, STATE1
677 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
678 # aesdec KEY, STATE2
679 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
680 # aesdec KEY, STATE3
681 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
682 # aesdec KEY, STATE4
683 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
684 movaps 0x30(TKEYP), KEY
685 # aesdec KEY, STATE1
686 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
687 # aesdec KEY, STATE2
688 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
689 # aesdec KEY, STATE3
690 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
691 # aesdec KEY, STATE4
692 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
693 movaps 0x40(TKEYP), KEY
694 # aesdec KEY, STATE1
695 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
696 # aesdec KEY, STATE2
697 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
698 # aesdec KEY, STATE3
699 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
700 # aesdec KEY, STATE4
701 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
702 movaps 0x50(TKEYP), KEY
703 # aesdec KEY, STATE1
704 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
705 # aesdec KEY, STATE2
706 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
707 # aesdec KEY, STATE3
708 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
709 # aesdec KEY, STATE4
710 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
711 movaps 0x60(TKEYP), KEY
712 # aesdec KEY, STATE1
713 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
714 # aesdec KEY, STATE2
715 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
716 # aesdec KEY, STATE3
717 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
718 # aesdec KEY, STATE4
719 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
720 movaps 0x70(TKEYP), KEY
721 # aesdeclast KEY, STATE1 # last round
722 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
723 # aesdeclast KEY, STATE2
724 .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
725 # aesdeclast KEY, STATE3
726 .byte 0x66, 0x0f, 0x38, 0xdf, 0xea
727 # aesdeclast KEY, STATE4
728 .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
729 ret
730
731/*
732 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
733 * size_t len)
734 */
735ENTRY(aesni_ecb_enc)
736 test LEN, LEN # check length
737 jz .Lecb_enc_ret
738 mov 480(KEYP), KLEN
739 cmp $16, LEN
740 jb .Lecb_enc_ret
741 cmp $64, LEN
742 jb .Lecb_enc_loop1
743.align 4
744.Lecb_enc_loop4:
745 movups (INP), STATE1
746 movups 0x10(INP), STATE2
747 movups 0x20(INP), STATE3
748 movups 0x30(INP), STATE4
749 call _aesni_enc4
750 movups STATE1, (OUTP)
751 movups STATE2, 0x10(OUTP)
752 movups STATE3, 0x20(OUTP)
753 movups STATE4, 0x30(OUTP)
754 sub $64, LEN
755 add $64, INP
756 add $64, OUTP
757 cmp $64, LEN
758 jge .Lecb_enc_loop4
759 cmp $16, LEN
760 jb .Lecb_enc_ret
761.align 4
762.Lecb_enc_loop1:
763 movups (INP), STATE1
764 call _aesni_enc1
765 movups STATE1, (OUTP)
766 sub $16, LEN
767 add $16, INP
768 add $16, OUTP
769 cmp $16, LEN
770 jge .Lecb_enc_loop1
771.Lecb_enc_ret:
772 ret
773
774/*
775 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
776 * size_t len);
777 */
778ENTRY(aesni_ecb_dec)
779 test LEN, LEN
780 jz .Lecb_dec_ret
781 mov 480(KEYP), KLEN
782 add $240, KEYP
783 cmp $16, LEN
784 jb .Lecb_dec_ret
785 cmp $64, LEN
786 jb .Lecb_dec_loop1
787.align 4
788.Lecb_dec_loop4:
789 movups (INP), STATE1
790 movups 0x10(INP), STATE2
791 movups 0x20(INP), STATE3
792 movups 0x30(INP), STATE4
793 call _aesni_dec4
794 movups STATE1, (OUTP)
795 movups STATE2, 0x10(OUTP)
796 movups STATE3, 0x20(OUTP)
797 movups STATE4, 0x30(OUTP)
798 sub $64, LEN
799 add $64, INP
800 add $64, OUTP
801 cmp $64, LEN
802 jge .Lecb_dec_loop4
803 cmp $16, LEN
804 jb .Lecb_dec_ret
805.align 4
806.Lecb_dec_loop1:
807 movups (INP), STATE1
808 call _aesni_dec1
809 movups STATE1, (OUTP)
810 sub $16, LEN
811 add $16, INP
812 add $16, OUTP
813 cmp $16, LEN
814 jge .Lecb_dec_loop1
815.Lecb_dec_ret:
816 ret
817
818/*
819 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
820 * size_t len, u8 *iv)
821 */
822ENTRY(aesni_cbc_enc)
823 cmp $16, LEN
824 jb .Lcbc_enc_ret
825 mov 480(KEYP), KLEN
826 movups (IVP), STATE # load iv as initial state
827.align 4
828.Lcbc_enc_loop:
829 movups (INP), IN # load input
830 pxor IN, STATE
831 call _aesni_enc1
832 movups STATE, (OUTP) # store output
833 sub $16, LEN
834 add $16, INP
835 add $16, OUTP
836 cmp $16, LEN
837 jge .Lcbc_enc_loop
838 movups STATE, (IVP)
839.Lcbc_enc_ret:
840 ret
841
842/*
843 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
844 * size_t len, u8 *iv)
845 */
846ENTRY(aesni_cbc_dec)
847 cmp $16, LEN
848 jb .Lcbc_dec_ret
849 mov 480(KEYP), KLEN
850 add $240, KEYP
851 movups (IVP), IV
852 cmp $64, LEN
853 jb .Lcbc_dec_loop1
854.align 4
855.Lcbc_dec_loop4:
856 movups (INP), IN1
857 movaps IN1, STATE1
858 movups 0x10(INP), IN2
859 movaps IN2, STATE2
860 movups 0x20(INP), IN3
861 movaps IN3, STATE3
862 movups 0x30(INP), IN4
863 movaps IN4, STATE4
864 call _aesni_dec4
865 pxor IV, STATE1
866 pxor IN1, STATE2
867 pxor IN2, STATE3
868 pxor IN3, STATE4
869 movaps IN4, IV
870 movups STATE1, (OUTP)
871 movups STATE2, 0x10(OUTP)
872 movups STATE3, 0x20(OUTP)
873 movups STATE4, 0x30(OUTP)
874 sub $64, LEN
875 add $64, INP
876 add $64, OUTP
877 cmp $64, LEN
878 jge .Lcbc_dec_loop4
879 cmp $16, LEN
880 jb .Lcbc_dec_ret
881.align 4
882.Lcbc_dec_loop1:
883 movups (INP), IN
884 movaps IN, STATE
885 call _aesni_dec1
886 pxor IV, STATE
887 movups STATE, (OUTP)
888 movaps IN, IV
889 sub $16, LEN
890 add $16, INP
891 add $16, OUTP
892 cmp $16, LEN
893 jge .Lcbc_dec_loop1
894 movups IV, (IVP)
895.Lcbc_dec_ret:
896 ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
new file mode 100644
index 000000000000..02af0af65497
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -0,0 +1,461 @@
1/*
2 * Support for Intel AES-NI instructions. This file contains glue
3 * code, the real AES implementation is in intel-aes_asm.S.
4 *
5 * Copyright (C) 2008, Intel Corp.
6 * Author: Huang Ying <ying.huang@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/hardirq.h>
15#include <linux/types.h>
16#include <linux/crypto.h>
17#include <linux/err.h>
18#include <crypto/algapi.h>
19#include <crypto/aes.h>
20#include <crypto/cryptd.h>
21#include <asm/i387.h>
22#include <asm/aes.h>
23
24struct async_aes_ctx {
25 struct cryptd_ablkcipher *cryptd_tfm;
26};
27
28#define AESNI_ALIGN 16
29#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
30
31asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
32 unsigned int key_len);
33asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
34 const u8 *in);
35asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
36 const u8 *in);
37asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
38 const u8 *in, unsigned int len);
39asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
40 const u8 *in, unsigned int len);
41asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
42 const u8 *in, unsigned int len, u8 *iv);
43asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
44 const u8 *in, unsigned int len, u8 *iv);
45
46static inline int kernel_fpu_using(void)
47{
48 if (in_interrupt() && !(read_cr0() & X86_CR0_TS))
49 return 1;
50 return 0;
51}
52
53static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
54{
55 unsigned long addr = (unsigned long)raw_ctx;
56 unsigned long align = AESNI_ALIGN;
57
58 if (align <= crypto_tfm_ctx_alignment())
59 align = 1;
60 return (struct crypto_aes_ctx *)ALIGN(addr, align);
61}
62
63static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
64 const u8 *in_key, unsigned int key_len)
65{
66 struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx);
67 u32 *flags = &tfm->crt_flags;
68 int err;
69
70 if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
71 key_len != AES_KEYSIZE_256) {
72 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
73 return -EINVAL;
74 }
75
76 if (kernel_fpu_using())
77 err = crypto_aes_expand_key(ctx, in_key, key_len);
78 else {
79 kernel_fpu_begin();
80 err = aesni_set_key(ctx, in_key, key_len);
81 kernel_fpu_end();
82 }
83
84 return err;
85}
86
87static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
88 unsigned int key_len)
89{
90 return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
91}
92
93static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
94{
95 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
96
97 if (kernel_fpu_using())
98 crypto_aes_encrypt_x86(ctx, dst, src);
99 else {
100 kernel_fpu_begin();
101 aesni_enc(ctx, dst, src);
102 kernel_fpu_end();
103 }
104}
105
106static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
107{
108 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
109
110 if (kernel_fpu_using())
111 crypto_aes_decrypt_x86(ctx, dst, src);
112 else {
113 kernel_fpu_begin();
114 aesni_dec(ctx, dst, src);
115 kernel_fpu_end();
116 }
117}
118
119static struct crypto_alg aesni_alg = {
120 .cra_name = "aes",
121 .cra_driver_name = "aes-aesni",
122 .cra_priority = 300,
123 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
124 .cra_blocksize = AES_BLOCK_SIZE,
125 .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
126 .cra_alignmask = 0,
127 .cra_module = THIS_MODULE,
128 .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list),
129 .cra_u = {
130 .cipher = {
131 .cia_min_keysize = AES_MIN_KEY_SIZE,
132 .cia_max_keysize = AES_MAX_KEY_SIZE,
133 .cia_setkey = aes_set_key,
134 .cia_encrypt = aes_encrypt,
135 .cia_decrypt = aes_decrypt
136 }
137 }
138};
139
140static int ecb_encrypt(struct blkcipher_desc *desc,
141 struct scatterlist *dst, struct scatterlist *src,
142 unsigned int nbytes)
143{
144 struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
145 struct blkcipher_walk walk;
146 int err;
147
148 blkcipher_walk_init(&walk, dst, src, nbytes);
149 err = blkcipher_walk_virt(desc, &walk);
150
151 kernel_fpu_begin();
152 while ((nbytes = walk.nbytes)) {
153 aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
154 nbytes & AES_BLOCK_MASK);
155 nbytes &= AES_BLOCK_SIZE - 1;
156 err = blkcipher_walk_done(desc, &walk, nbytes);
157 }
158 kernel_fpu_end();
159
160 return err;
161}
162
163static int ecb_decrypt(struct blkcipher_desc *desc,
164 struct scatterlist *dst, struct scatterlist *src,
165 unsigned int nbytes)
166{
167 struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
168 struct blkcipher_walk walk;
169 int err;
170
171 blkcipher_walk_init(&walk, dst, src, nbytes);
172 err = blkcipher_walk_virt(desc, &walk);
173
174 kernel_fpu_begin();
175 while ((nbytes = walk.nbytes)) {
176 aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
177 nbytes & AES_BLOCK_MASK);
178 nbytes &= AES_BLOCK_SIZE - 1;
179 err = blkcipher_walk_done(desc, &walk, nbytes);
180 }
181 kernel_fpu_end();
182
183 return err;
184}
185
186static struct crypto_alg blk_ecb_alg = {
187 .cra_name = "__ecb-aes-aesni",
188 .cra_driver_name = "__driver-ecb-aes-aesni",
189 .cra_priority = 0,
190 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
191 .cra_blocksize = AES_BLOCK_SIZE,
192 .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
193 .cra_alignmask = 0,
194 .cra_type = &crypto_blkcipher_type,
195 .cra_module = THIS_MODULE,
196 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
197 .cra_u = {
198 .blkcipher = {
199 .min_keysize = AES_MIN_KEY_SIZE,
200 .max_keysize = AES_MAX_KEY_SIZE,
201 .setkey = aes_set_key,
202 .encrypt = ecb_encrypt,
203 .decrypt = ecb_decrypt,
204 },
205 },
206};
207
208static int cbc_encrypt(struct blkcipher_desc *desc,
209 struct scatterlist *dst, struct scatterlist *src,
210 unsigned int nbytes)
211{
212 struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
213 struct blkcipher_walk walk;
214 int err;
215
216 blkcipher_walk_init(&walk, dst, src, nbytes);
217 err = blkcipher_walk_virt(desc, &walk);
218
219 kernel_fpu_begin();
220 while ((nbytes = walk.nbytes)) {
221 aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
222 nbytes & AES_BLOCK_MASK, walk.iv);
223 nbytes &= AES_BLOCK_SIZE - 1;
224 err = blkcipher_walk_done(desc, &walk, nbytes);
225 }
226 kernel_fpu_end();
227
228 return err;
229}
230
231static int cbc_decrypt(struct blkcipher_desc *desc,
232 struct scatterlist *dst, struct scatterlist *src,
233 unsigned int nbytes)
234{
235 struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
236 struct blkcipher_walk walk;
237 int err;
238
239 blkcipher_walk_init(&walk, dst, src, nbytes);
240 err = blkcipher_walk_virt(desc, &walk);
241
242 kernel_fpu_begin();
243 while ((nbytes = walk.nbytes)) {
244 aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
245 nbytes & AES_BLOCK_MASK, walk.iv);
246 nbytes &= AES_BLOCK_SIZE - 1;
247 err = blkcipher_walk_done(desc, &walk, nbytes);
248 }
249 kernel_fpu_end();
250
251 return err;
252}
253
254static struct crypto_alg blk_cbc_alg = {
255 .cra_name = "__cbc-aes-aesni",
256 .cra_driver_name = "__driver-cbc-aes-aesni",
257 .cra_priority = 0,
258 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
259 .cra_blocksize = AES_BLOCK_SIZE,
260 .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
261 .cra_alignmask = 0,
262 .cra_type = &crypto_blkcipher_type,
263 .cra_module = THIS_MODULE,
264 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
265 .cra_u = {
266 .blkcipher = {
267 .min_keysize = AES_MIN_KEY_SIZE,
268 .max_keysize = AES_MAX_KEY_SIZE,
269 .setkey = aes_set_key,
270 .encrypt = cbc_encrypt,
271 .decrypt = cbc_decrypt,
272 },
273 },
274};
275
276static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
277 unsigned int key_len)
278{
279 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
280
281 return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len);
282}
283
284static int ablk_encrypt(struct ablkcipher_request *req)
285{
286 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
287 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
288
289 if (kernel_fpu_using()) {
290 struct ablkcipher_request *cryptd_req =
291 ablkcipher_request_ctx(req);
292 memcpy(cryptd_req, req, sizeof(*req));
293 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
294 return crypto_ablkcipher_encrypt(cryptd_req);
295 } else {
296 struct blkcipher_desc desc;
297 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
298 desc.info = req->info;
299 desc.flags = 0;
300 return crypto_blkcipher_crt(desc.tfm)->encrypt(
301 &desc, req->dst, req->src, req->nbytes);
302 }
303}
304
305static int ablk_decrypt(struct ablkcipher_request *req)
306{
307 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
308 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
309
310 if (kernel_fpu_using()) {
311 struct ablkcipher_request *cryptd_req =
312 ablkcipher_request_ctx(req);
313 memcpy(cryptd_req, req, sizeof(*req));
314 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
315 return crypto_ablkcipher_decrypt(cryptd_req);
316 } else {
317 struct blkcipher_desc desc;
318 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
319 desc.info = req->info;
320 desc.flags = 0;
321 return crypto_blkcipher_crt(desc.tfm)->decrypt(
322 &desc, req->dst, req->src, req->nbytes);
323 }
324}
325
326static void ablk_exit(struct crypto_tfm *tfm)
327{
328 struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
329
330 cryptd_free_ablkcipher(ctx->cryptd_tfm);
331}
332
333static void ablk_init_common(struct crypto_tfm *tfm,
334 struct cryptd_ablkcipher *cryptd_tfm)
335{
336 struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
337
338 ctx->cryptd_tfm = cryptd_tfm;
339 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
340 crypto_ablkcipher_reqsize(&cryptd_tfm->base);
341}
342
343static int ablk_ecb_init(struct crypto_tfm *tfm)
344{
345 struct cryptd_ablkcipher *cryptd_tfm;
346
347 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0);
348 if (IS_ERR(cryptd_tfm))
349 return PTR_ERR(cryptd_tfm);
350 ablk_init_common(tfm, cryptd_tfm);
351 return 0;
352}
353
354static struct crypto_alg ablk_ecb_alg = {
355 .cra_name = "ecb(aes)",
356 .cra_driver_name = "ecb-aes-aesni",
357 .cra_priority = 400,
358 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
359 .cra_blocksize = AES_BLOCK_SIZE,
360 .cra_ctxsize = sizeof(struct async_aes_ctx),
361 .cra_alignmask = 0,
362 .cra_type = &crypto_ablkcipher_type,
363 .cra_module = THIS_MODULE,
364 .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
365 .cra_init = ablk_ecb_init,
366 .cra_exit = ablk_exit,
367 .cra_u = {
368 .ablkcipher = {
369 .min_keysize = AES_MIN_KEY_SIZE,
370 .max_keysize = AES_MAX_KEY_SIZE,
371 .setkey = ablk_set_key,
372 .encrypt = ablk_encrypt,
373 .decrypt = ablk_decrypt,
374 },
375 },
376};
377
378static int ablk_cbc_init(struct crypto_tfm *tfm)
379{
380 struct cryptd_ablkcipher *cryptd_tfm;
381
382 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0);
383 if (IS_ERR(cryptd_tfm))
384 return PTR_ERR(cryptd_tfm);
385 ablk_init_common(tfm, cryptd_tfm);
386 return 0;
387}
388
389static struct crypto_alg ablk_cbc_alg = {
390 .cra_name = "cbc(aes)",
391 .cra_driver_name = "cbc-aes-aesni",
392 .cra_priority = 400,
393 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
394 .cra_blocksize = AES_BLOCK_SIZE,
395 .cra_ctxsize = sizeof(struct async_aes_ctx),
396 .cra_alignmask = 0,
397 .cra_type = &crypto_ablkcipher_type,
398 .cra_module = THIS_MODULE,
399 .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
400 .cra_init = ablk_cbc_init,
401 .cra_exit = ablk_exit,
402 .cra_u = {
403 .ablkcipher = {
404 .min_keysize = AES_MIN_KEY_SIZE,
405 .max_keysize = AES_MAX_KEY_SIZE,
406 .ivsize = AES_BLOCK_SIZE,
407 .setkey = ablk_set_key,
408 .encrypt = ablk_encrypt,
409 .decrypt = ablk_decrypt,
410 },
411 },
412};
413
414static int __init aesni_init(void)
415{
416 int err;
417
418 if (!cpu_has_aes) {
419 printk(KERN_ERR "Intel AES-NI instructions are not detected.\n");
420 return -ENODEV;
421 }
422 if ((err = crypto_register_alg(&aesni_alg)))
423 goto aes_err;
424 if ((err = crypto_register_alg(&blk_ecb_alg)))
425 goto blk_ecb_err;
426 if ((err = crypto_register_alg(&blk_cbc_alg)))
427 goto blk_cbc_err;
428 if ((err = crypto_register_alg(&ablk_ecb_alg)))
429 goto ablk_ecb_err;
430 if ((err = crypto_register_alg(&ablk_cbc_alg)))
431 goto ablk_cbc_err;
432
433 return err;
434
435ablk_cbc_err:
436 crypto_unregister_alg(&ablk_ecb_alg);
437ablk_ecb_err:
438 crypto_unregister_alg(&blk_cbc_alg);
439blk_cbc_err:
440 crypto_unregister_alg(&blk_ecb_alg);
441blk_ecb_err:
442 crypto_unregister_alg(&aesni_alg);
443aes_err:
444 return err;
445}
446
447static void __exit aesni_exit(void)
448{
449 crypto_unregister_alg(&ablk_cbc_alg);
450 crypto_unregister_alg(&ablk_ecb_alg);
451 crypto_unregister_alg(&blk_cbc_alg);
452 crypto_unregister_alg(&blk_ecb_alg);
453 crypto_unregister_alg(&aesni_alg);
454}
455
456module_init(aesni_init);
457module_exit(aesni_exit);
458
459MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
460MODULE_LICENSE("GPL");
461MODULE_ALIAS("aes");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 097a6b64c24d..db0c803170ab 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -557,7 +557,7 @@ ia32_sys_call_table:
557 .quad sys32_olduname 557 .quad sys32_olduname
558 .quad sys_umask /* 60 */ 558 .quad sys_umask /* 60 */
559 .quad sys_chroot 559 .quad sys_chroot
560 .quad sys32_ustat 560 .quad compat_sys_ustat
561 .quad sys_dup2 561 .quad sys_dup2
562 .quad sys_getppid 562 .quad sys_getppid
563 .quad sys_getpgrp /* 65 */ 563 .quad sys_getpgrp /* 65 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 6c0d7f6231af..efac92fd1efb 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -638,28 +638,6 @@ long sys32_uname(struct old_utsname __user *name)
638 return err ? -EFAULT : 0; 638 return err ? -EFAULT : 0;
639} 639}
640 640
641long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
642{
643 struct ustat u;
644 mm_segment_t seg;
645 int ret;
646
647 seg = get_fs();
648 set_fs(KERNEL_DS);
649 ret = sys_ustat(dev, (struct ustat __user *)&u);
650 set_fs(seg);
651 if (ret < 0)
652 return ret;
653
654 if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) ||
655 __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
656 __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
657 __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
658 __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
659 ret = -EFAULT;
660 return ret;
661}
662
663asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, 641asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
664 compat_uptr_t __user *envp, struct pt_regs *regs) 642 compat_uptr_t __user *envp, struct pt_regs *regs)
665{ 643{
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/aes.h
new file mode 100644
index 000000000000..80545a1cbe39
--- /dev/null
+++ b/arch/x86/include/asm/aes.h
@@ -0,0 +1,11 @@
1#ifndef ASM_X86_AES_H
2#define ASM_X86_AES_H
3
4#include <linux/crypto.h>
5#include <crypto/aes.h>
6
7void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
8 const u8 *src);
9void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
10 const u8 *src);
11#endif
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 394d177d721b..df8a300dfe6c 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -75,7 +75,7 @@ static inline void default_inquire_remote_apic(int apicid)
75#define setup_secondary_clock setup_secondary_APIC_clock 75#define setup_secondary_clock setup_secondary_APIC_clock
76#endif 76#endif
77 77
78#ifdef CONFIG_X86_VSMP 78#ifdef CONFIG_X86_64
79extern int is_vsmp_box(void); 79extern int is_vsmp_box(void);
80#else 80#else
81static inline int is_vsmp_box(void) 81static inline int is_vsmp_box(void)
@@ -108,6 +108,16 @@ extern void native_apic_icr_write(u32 low, u32 id);
108extern u64 native_apic_icr_read(void); 108extern u64 native_apic_icr_read(void);
109 109
110#ifdef CONFIG_X86_X2APIC 110#ifdef CONFIG_X86_X2APIC
111/*
112 * Make previous memory operations globally visible before
113 * sending the IPI through x2apic wrmsr. We need a serializing instruction or
114 * mfence for this.
115 */
116static inline void x2apic_wrmsr_fence(void)
117{
118 asm volatile("mfence" : : : "memory");
119}
120
111static inline void native_apic_msr_write(u32 reg, u32 v) 121static inline void native_apic_msr_write(u32 reg, u32 v)
112{ 122{
113 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || 123 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
@@ -184,6 +194,9 @@ static inline int x2apic_enabled(void)
184{ 194{
185 return 0; 195 return 0;
186} 196}
197
198#define x2apic 0
199
187#endif 200#endif
188 201
189extern int get_physical_broadcast(void); 202extern int get_physical_broadcast(void);
@@ -476,10 +489,19 @@ static inline int default_apic_id_registered(void)
476 return physid_isset(read_apic_id(), phys_cpu_present_map); 489 return physid_isset(read_apic_id(), phys_cpu_present_map);
477} 490}
478 491
492static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
493{
494 return cpuid_apic >> index_msb;
495}
496
497extern int default_apicid_to_node(int logical_apicid);
498
499#endif
500
479static inline unsigned int 501static inline unsigned int
480default_cpu_mask_to_apicid(const struct cpumask *cpumask) 502default_cpu_mask_to_apicid(const struct cpumask *cpumask)
481{ 503{
482 return cpumask_bits(cpumask)[0]; 504 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
483} 505}
484 506
485static inline unsigned int 507static inline unsigned int
@@ -493,15 +515,6 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
493 return (unsigned int)(mask1 & mask2 & mask3); 515 return (unsigned int)(mask1 & mask2 & mask3);
494} 516}
495 517
496static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
497{
498 return cpuid_apic >> index_msb;
499}
500
501extern int default_apicid_to_node(int logical_apicid);
502
503#endif
504
505static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) 518static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid)
506{ 519{
507 return physid_isset(apicid, bitmap); 520 return physid_isset(apicid, bitmap);
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6526cf08b0e4..6ba23dd9fc92 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -1,10 +1,6 @@
1#ifndef _ASM_X86_BOOT_H 1#ifndef _ASM_X86_BOOT_H
2#define _ASM_X86_BOOT_H 2#define _ASM_X86_BOOT_H
3 3
4/* Don't touch these, unless you really know what you're doing. */
5#define DEF_SYSSEG 0x1000
6#define DEF_SYSSIZE 0x7F00
7
8/* Internal svga startup constants */ 4/* Internal svga startup constants */
9#define NORMAL_VGA 0xffff /* 80x25 mode */ 5#define NORMAL_VGA 0xffff /* 80x25 mode */
10#define EXTENDED_VGA 0xfffe /* 80x50 mode */ 6#define EXTENDED_VGA 0xfffe /* 80x50 mode */
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 5b301b7ff5f4..b3894bf52fcd 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -90,6 +90,9 @@ int set_memory_4k(unsigned long addr, int numpages);
90int set_memory_array_uc(unsigned long *addr, int addrinarray); 90int set_memory_array_uc(unsigned long *addr, int addrinarray);
91int set_memory_array_wb(unsigned long *addr, int addrinarray); 91int set_memory_array_wb(unsigned long *addr, int addrinarray);
92 92
93int set_pages_array_uc(struct page **pages, int addrinarray);
94int set_pages_array_wb(struct page **pages, int addrinarray);
95
93/* 96/*
94 * For legacy compatibility with the old APIs, a few functions 97 * For legacy compatibility with the old APIs, a few functions
95 * are provided that work on a "struct page". 98 * are provided that work on a "struct page".
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h
new file mode 100755
index 000000000000..222802029fa6
--- /dev/null
+++ b/arch/x86/include/asm/cpu_debug.h
@@ -0,0 +1,226 @@
1#ifndef _ASM_X86_CPU_DEBUG_H
2#define _ASM_X86_CPU_DEBUG_H
3
4/*
5 * CPU x86 architecture debug
6 *
7 * Copyright(C) 2009 Jaswinder Singh Rajput
8 */
9
10/* Register flags */
11enum cpu_debug_bit {
12/* Model Specific Registers (MSRs) */
13 CPU_MC_BIT, /* Machine Check */
14 CPU_MONITOR_BIT, /* Monitor */
15 CPU_TIME_BIT, /* Time */
16 CPU_PMC_BIT, /* Performance Monitor */
17 CPU_PLATFORM_BIT, /* Platform */
18 CPU_APIC_BIT, /* APIC */
19 CPU_POWERON_BIT, /* Power-on */
20 CPU_CONTROL_BIT, /* Control */
21 CPU_FEATURES_BIT, /* Features control */
22 CPU_LBRANCH_BIT, /* Last Branch */
23 CPU_BIOS_BIT, /* BIOS */
24 CPU_FREQ_BIT, /* Frequency */
25 CPU_MTTR_BIT, /* MTRR */
26 CPU_PERF_BIT, /* Performance */
27 CPU_CACHE_BIT, /* Cache */
28 CPU_SYSENTER_BIT, /* Sysenter */
29 CPU_THERM_BIT, /* Thermal */
30 CPU_MISC_BIT, /* Miscellaneous */
31 CPU_DEBUG_BIT, /* Debug */
32 CPU_PAT_BIT, /* PAT */
33 CPU_VMX_BIT, /* VMX */
34 CPU_CALL_BIT, /* System Call */
35 CPU_BASE_BIT, /* BASE Address */
36 CPU_VER_BIT, /* Version ID */
37 CPU_CONF_BIT, /* Configuration */
38 CPU_SMM_BIT, /* System mgmt mode */
39 CPU_SVM_BIT, /*Secure Virtual Machine*/
40 CPU_OSVM_BIT, /* OS-Visible Workaround*/
41/* Standard Registers */
42 CPU_TSS_BIT, /* Task Stack Segment */
43 CPU_CR_BIT, /* Control Registers */
44 CPU_DT_BIT, /* Descriptor Table */
45/* End of Registers flags */
46 CPU_REG_ALL_BIT, /* Select all Registers */
47};
48
49#define CPU_REG_ALL (~0) /* Select all Registers */
50
51#define CPU_MC (1 << CPU_MC_BIT)
52#define CPU_MONITOR (1 << CPU_MONITOR_BIT)
53#define CPU_TIME (1 << CPU_TIME_BIT)
54#define CPU_PMC (1 << CPU_PMC_BIT)
55#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT)
56#define CPU_APIC (1 << CPU_APIC_BIT)
57#define CPU_POWERON (1 << CPU_POWERON_BIT)
58#define CPU_CONTROL (1 << CPU_CONTROL_BIT)
59#define CPU_FEATURES (1 << CPU_FEATURES_BIT)
60#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT)
61#define CPU_BIOS (1 << CPU_BIOS_BIT)
62#define CPU_FREQ (1 << CPU_FREQ_BIT)
63#define CPU_MTRR (1 << CPU_MTTR_BIT)
64#define CPU_PERF (1 << CPU_PERF_BIT)
65#define CPU_CACHE (1 << CPU_CACHE_BIT)
66#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT)
67#define CPU_THERM (1 << CPU_THERM_BIT)
68#define CPU_MISC (1 << CPU_MISC_BIT)
69#define CPU_DEBUG (1 << CPU_DEBUG_BIT)
70#define CPU_PAT (1 << CPU_PAT_BIT)
71#define CPU_VMX (1 << CPU_VMX_BIT)
72#define CPU_CALL (1 << CPU_CALL_BIT)
73#define CPU_BASE (1 << CPU_BASE_BIT)
74#define CPU_VER (1 << CPU_VER_BIT)
75#define CPU_CONF (1 << CPU_CONF_BIT)
76#define CPU_SMM (1 << CPU_SMM_BIT)
77#define CPU_SVM (1 << CPU_SVM_BIT)
78#define CPU_OSVM (1 << CPU_OSVM_BIT)
79#define CPU_TSS (1 << CPU_TSS_BIT)
80#define CPU_CR (1 << CPU_CR_BIT)
81#define CPU_DT (1 << CPU_DT_BIT)
82
83/* Register file flags */
84enum cpu_file_bit {
85 CPU_INDEX_BIT, /* index */
86 CPU_VALUE_BIT, /* value */
87};
88
89#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
90
91/*
92 * DisplayFamily_DisplayModel Processor Families/Processor Number Series
93 * -------------------------- ------------------------------------------
94 * 05_01, 05_02, 05_04 Pentium, Pentium with MMX
95 *
96 * 06_01 Pentium Pro
97 * 06_03, 06_05 Pentium II Xeon, Pentium II
98 * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III
99 *
100 * 06_09, 060D Pentium M
101 *
102 * 06_0E Core Duo, Core Solo
103 *
104 * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series,
105 * Core 2 Quad, Core 2 Extreme, Core 2 Duo,
106 * Pentium dual-core
107 * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650
108 *
109 * 06_1C Atom
110 *
111 * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4
112 * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D
113 *
114 * 0F_06 Xeon 7100, 5000 Series, Xeon MP,
115 * Pentium 4, Pentium D
116 */
117
118/* Register processors bits */
119enum cpu_processor_bit {
120 CPU_NONE,
121/* Intel */
122 CPU_INTEL_PENTIUM_BIT,
123 CPU_INTEL_P6_BIT,
124 CPU_INTEL_PENTIUM_M_BIT,
125 CPU_INTEL_CORE_BIT,
126 CPU_INTEL_CORE2_BIT,
127 CPU_INTEL_ATOM_BIT,
128 CPU_INTEL_XEON_P4_BIT,
129 CPU_INTEL_XEON_MP_BIT,
130/* AMD */
131 CPU_AMD_K6_BIT,
132 CPU_AMD_K7_BIT,
133 CPU_AMD_K8_BIT,
134 CPU_AMD_0F_BIT,
135 CPU_AMD_10_BIT,
136 CPU_AMD_11_BIT,
137};
138
139#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT)
140#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT)
141#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT)
142#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT)
143#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT)
144#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT)
145#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT)
146#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT)
147
148#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
149#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2)
150#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
151#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM)
152#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
153#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM)
154#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON)
155#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON)
156#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT)
157#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON)
158#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON)
159#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT)
160#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX)
161#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE)
162#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
163#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT)
164#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE)
165#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT)
166#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE)
167
168/* Select all supported Intel CPUs */
169#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
170
171#define CPU_AMD_K6 (1 << CPU_AMD_K6_BIT)
172#define CPU_AMD_K7 (1 << CPU_AMD_K7_BIT)
173#define CPU_AMD_K8 (1 << CPU_AMD_K8_BIT)
174#define CPU_AMD_0F (1 << CPU_AMD_0F_BIT)
175#define CPU_AMD_10 (1 << CPU_AMD_10_BIT)
176#define CPU_AMD_11 (1 << CPU_AMD_11_BIT)
177
178#define CPU_K10_PLUS (CPU_AMD_10 | CPU_AMD_11)
179#define CPU_K0F_PLUS (CPU_AMD_0F | CPU_K10_PLUS)
180#define CPU_K8_PLUS (CPU_AMD_K8 | CPU_K0F_PLUS)
181#define CPU_K7_PLUS (CPU_AMD_K7 | CPU_K8_PLUS)
182
183/* Select all supported AMD CPUs */
184#define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS)
185
186/* Select all supported CPUs */
187#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL)
188
189#define MAX_CPU_FILES 512
190
191struct cpu_private {
192 unsigned cpu;
193 unsigned type;
194 unsigned reg;
195 unsigned file;
196};
197
198struct cpu_debug_base {
199 char *name; /* Register name */
200 unsigned flag; /* Register flag */
201 unsigned write; /* Register write flag */
202};
203
204/*
205 * Currently it looks similar to cpu_debug_base but once we add more files
206 * cpu_file_base will go in different direction
207 */
208struct cpu_file_base {
209 char *name; /* Register file name */
210 unsigned flag; /* Register file flag */
211 unsigned write; /* Register write flag */
212};
213
214struct cpu_cpuX_base {
215 struct dentry *dentry; /* Register dentry */
216 int init; /* Register index file */
217};
218
219struct cpu_debug_range {
220 unsigned min; /* Register range min */
221 unsigned max; /* Register range max */
222 unsigned flag; /* Supported flags */
223 unsigned model; /* Supported models */
224};
225
226#endif /* _ASM_X86_CPU_DEBUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7301e60dc4a8..0beba0d1468d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -213,6 +213,7 @@ extern const char * const x86_power_flags[32];
213#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) 213#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
214#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) 214#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
215#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) 215#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3)
216#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
216#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) 217#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
217#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) 218#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
218#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) 219#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index dc27705f5443..5623c50d67b2 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -91,7 +91,6 @@ static inline int desc_empty(const void *ptr)
91#define store_gdt(dtr) native_store_gdt(dtr) 91#define store_gdt(dtr) native_store_gdt(dtr)
92#define store_idt(dtr) native_store_idt(dtr) 92#define store_idt(dtr) native_store_idt(dtr)
93#define store_tr(tr) (tr = native_store_tr()) 93#define store_tr(tr) (tr = native_store_tr())
94#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
95 94
96#define load_TLS(t, cpu) native_load_tls(t, cpu) 95#define load_TLS(t, cpu) native_load_tls(t, cpu)
97#define set_ldt native_set_ldt 96#define set_ldt native_set_ldt
@@ -112,6 +111,8 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
112} 111}
113#endif /* CONFIG_PARAVIRT */ 112#endif /* CONFIG_PARAVIRT */
114 113
114#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
115
115static inline void native_write_idt_entry(gate_desc *idt, int entry, 116static inline void native_write_idt_entry(gate_desc *idt, int entry,
116 const gate_desc *gate) 117 const gate_desc *gate)
117{ 118{
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 3c034f48fdb0..4994a20acbcb 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,7 +6,7 @@ struct dev_archdata {
6 void *acpi_handle; 6 void *acpi_handle;
7#endif 7#endif
8#ifdef CONFIG_X86_64 8#ifdef CONFIG_X86_64
9struct dma_mapping_ops *dma_ops; 9struct dma_map_ops *dma_ops;
10#endif 10#endif
11#ifdef CONFIG_DMAR 11#ifdef CONFIG_DMAR
12 void *iommu; /* hook for IOMMU specific extension */ 12 void *iommu; /* hook for IOMMU specific extension */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 132a134d12f2..cea7b74963e9 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -7,6 +7,8 @@
7 */ 7 */
8 8
9#include <linux/scatterlist.h> 9#include <linux/scatterlist.h>
10#include <linux/dma-debug.h>
11#include <linux/dma-attrs.h>
10#include <asm/io.h> 12#include <asm/io.h>
11#include <asm/swiotlb.h> 13#include <asm/swiotlb.h>
12#include <asm-generic/dma-coherent.h> 14#include <asm-generic/dma-coherent.h>
@@ -16,47 +18,9 @@ extern int iommu_merge;
16extern struct device x86_dma_fallback_dev; 18extern struct device x86_dma_fallback_dev;
17extern int panic_on_overflow; 19extern int panic_on_overflow;
18 20
19struct dma_mapping_ops { 21extern struct dma_map_ops *dma_ops;
20 int (*mapping_error)(struct device *dev, 22
21 dma_addr_t dma_addr); 23static inline struct dma_map_ops *get_dma_ops(struct device *dev)
22 void* (*alloc_coherent)(struct device *dev, size_t size,
23 dma_addr_t *dma_handle, gfp_t gfp);
24 void (*free_coherent)(struct device *dev, size_t size,
25 void *vaddr, dma_addr_t dma_handle);
26 dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr,
27 size_t size, int direction);
28 void (*unmap_single)(struct device *dev, dma_addr_t addr,
29 size_t size, int direction);
30 void (*sync_single_for_cpu)(struct device *hwdev,
31 dma_addr_t dma_handle, size_t size,
32 int direction);
33 void (*sync_single_for_device)(struct device *hwdev,
34 dma_addr_t dma_handle, size_t size,
35 int direction);
36 void (*sync_single_range_for_cpu)(struct device *hwdev,
37 dma_addr_t dma_handle, unsigned long offset,
38 size_t size, int direction);
39 void (*sync_single_range_for_device)(struct device *hwdev,
40 dma_addr_t dma_handle, unsigned long offset,
41 size_t size, int direction);
42 void (*sync_sg_for_cpu)(struct device *hwdev,
43 struct scatterlist *sg, int nelems,
44 int direction);
45 void (*sync_sg_for_device)(struct device *hwdev,
46 struct scatterlist *sg, int nelems,
47 int direction);
48 int (*map_sg)(struct device *hwdev, struct scatterlist *sg,
49 int nents, int direction);
50 void (*unmap_sg)(struct device *hwdev,
51 struct scatterlist *sg, int nents,
52 int direction);
53 int (*dma_supported)(struct device *hwdev, u64 mask);
54 int is_phys;
55};
56
57extern struct dma_mapping_ops *dma_ops;
58
59static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
60{ 24{
61#ifdef CONFIG_X86_32 25#ifdef CONFIG_X86_32
62 return dma_ops; 26 return dma_ops;
@@ -71,7 +35,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
71/* Make sure we keep the same behaviour */ 35/* Make sure we keep the same behaviour */
72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 36static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
73{ 37{
74 struct dma_mapping_ops *ops = get_dma_ops(dev); 38 struct dma_map_ops *ops = get_dma_ops(dev);
75 if (ops->mapping_error) 39 if (ops->mapping_error)
76 return ops->mapping_error(dev, dma_addr); 40 return ops->mapping_error(dev, dma_addr);
77 41
@@ -90,137 +54,167 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
90 54
91static inline dma_addr_t 55static inline dma_addr_t
92dma_map_single(struct device *hwdev, void *ptr, size_t size, 56dma_map_single(struct device *hwdev, void *ptr, size_t size,
93 int direction) 57 enum dma_data_direction dir)
94{ 58{
95 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 59 struct dma_map_ops *ops = get_dma_ops(hwdev);
96 60 dma_addr_t addr;
97 BUG_ON(!valid_dma_direction(direction)); 61
98 return ops->map_single(hwdev, virt_to_phys(ptr), size, direction); 62 BUG_ON(!valid_dma_direction(dir));
63 addr = ops->map_page(hwdev, virt_to_page(ptr),
64 (unsigned long)ptr & ~PAGE_MASK, size,
65 dir, NULL);
66 debug_dma_map_page(hwdev, virt_to_page(ptr),
67 (unsigned long)ptr & ~PAGE_MASK, size,
68 dir, addr, true);
69 return addr;
99} 70}
100 71
101static inline void 72static inline void
102dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, 73dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
103 int direction) 74 enum dma_data_direction dir)
104{ 75{
105 struct dma_mapping_ops *ops = get_dma_ops(dev); 76 struct dma_map_ops *ops = get_dma_ops(dev);
106 77
107 BUG_ON(!valid_dma_direction(direction)); 78 BUG_ON(!valid_dma_direction(dir));
108 if (ops->unmap_single) 79 if (ops->unmap_page)
109 ops->unmap_single(dev, addr, size, direction); 80 ops->unmap_page(dev, addr, size, dir, NULL);
81 debug_dma_unmap_page(dev, addr, size, dir, true);
110} 82}
111 83
112static inline int 84static inline int
113dma_map_sg(struct device *hwdev, struct scatterlist *sg, 85dma_map_sg(struct device *hwdev, struct scatterlist *sg,
114 int nents, int direction) 86 int nents, enum dma_data_direction dir)
115{ 87{
116 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 88 struct dma_map_ops *ops = get_dma_ops(hwdev);
89 int ents;
90
91 BUG_ON(!valid_dma_direction(dir));
92 ents = ops->map_sg(hwdev, sg, nents, dir, NULL);
93 debug_dma_map_sg(hwdev, sg, nents, ents, dir);
117 94
118 BUG_ON(!valid_dma_direction(direction)); 95 return ents;
119 return ops->map_sg(hwdev, sg, nents, direction);
120} 96}
121 97
122static inline void 98static inline void
123dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, 99dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
124 int direction) 100 enum dma_data_direction dir)
125{ 101{
126 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 102 struct dma_map_ops *ops = get_dma_ops(hwdev);
127 103
128 BUG_ON(!valid_dma_direction(direction)); 104 BUG_ON(!valid_dma_direction(dir));
105 debug_dma_unmap_sg(hwdev, sg, nents, dir);
129 if (ops->unmap_sg) 106 if (ops->unmap_sg)
130 ops->unmap_sg(hwdev, sg, nents, direction); 107 ops->unmap_sg(hwdev, sg, nents, dir, NULL);
131} 108}
132 109
133static inline void 110static inline void
134dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, 111dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
135 size_t size, int direction) 112 size_t size, enum dma_data_direction dir)
136{ 113{
137 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 114 struct dma_map_ops *ops = get_dma_ops(hwdev);
138 115
139 BUG_ON(!valid_dma_direction(direction)); 116 BUG_ON(!valid_dma_direction(dir));
140 if (ops->sync_single_for_cpu) 117 if (ops->sync_single_for_cpu)
141 ops->sync_single_for_cpu(hwdev, dma_handle, size, direction); 118 ops->sync_single_for_cpu(hwdev, dma_handle, size, dir);
119 debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, dir);
142 flush_write_buffers(); 120 flush_write_buffers();
143} 121}
144 122
145static inline void 123static inline void
146dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, 124dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
147 size_t size, int direction) 125 size_t size, enum dma_data_direction dir)
148{ 126{
149 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 127 struct dma_map_ops *ops = get_dma_ops(hwdev);
150 128
151 BUG_ON(!valid_dma_direction(direction)); 129 BUG_ON(!valid_dma_direction(dir));
152 if (ops->sync_single_for_device) 130 if (ops->sync_single_for_device)
153 ops->sync_single_for_device(hwdev, dma_handle, size, direction); 131 ops->sync_single_for_device(hwdev, dma_handle, size, dir);
132 debug_dma_sync_single_for_device(hwdev, dma_handle, size, dir);
154 flush_write_buffers(); 133 flush_write_buffers();
155} 134}
156 135
157static inline void 136static inline void
158dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, 137dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
159 unsigned long offset, size_t size, int direction) 138 unsigned long offset, size_t size,
139 enum dma_data_direction dir)
160{ 140{
161 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 141 struct dma_map_ops *ops = get_dma_ops(hwdev);
162 142
163 BUG_ON(!valid_dma_direction(direction)); 143 BUG_ON(!valid_dma_direction(dir));
164 if (ops->sync_single_range_for_cpu) 144 if (ops->sync_single_range_for_cpu)
165 ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, 145 ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
166 size, direction); 146 size, dir);
147 debug_dma_sync_single_range_for_cpu(hwdev, dma_handle,
148 offset, size, dir);
167 flush_write_buffers(); 149 flush_write_buffers();
168} 150}
169 151
170static inline void 152static inline void
171dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, 153dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
172 unsigned long offset, size_t size, 154 unsigned long offset, size_t size,
173 int direction) 155 enum dma_data_direction dir)
174{ 156{
175 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 157 struct dma_map_ops *ops = get_dma_ops(hwdev);
176 158
177 BUG_ON(!valid_dma_direction(direction)); 159 BUG_ON(!valid_dma_direction(dir));
178 if (ops->sync_single_range_for_device) 160 if (ops->sync_single_range_for_device)
179 ops->sync_single_range_for_device(hwdev, dma_handle, 161 ops->sync_single_range_for_device(hwdev, dma_handle,
180 offset, size, direction); 162 offset, size, dir);
163 debug_dma_sync_single_range_for_device(hwdev, dma_handle,
164 offset, size, dir);
181 flush_write_buffers(); 165 flush_write_buffers();
182} 166}
183 167
184static inline void 168static inline void
185dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, 169dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
186 int nelems, int direction) 170 int nelems, enum dma_data_direction dir)
187{ 171{
188 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 172 struct dma_map_ops *ops = get_dma_ops(hwdev);
189 173
190 BUG_ON(!valid_dma_direction(direction)); 174 BUG_ON(!valid_dma_direction(dir));
191 if (ops->sync_sg_for_cpu) 175 if (ops->sync_sg_for_cpu)
192 ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); 176 ops->sync_sg_for_cpu(hwdev, sg, nelems, dir);
177 debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, dir);
193 flush_write_buffers(); 178 flush_write_buffers();
194} 179}
195 180
196static inline void 181static inline void
197dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, 182dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
198 int nelems, int direction) 183 int nelems, enum dma_data_direction dir)
199{ 184{
200 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 185 struct dma_map_ops *ops = get_dma_ops(hwdev);
201 186
202 BUG_ON(!valid_dma_direction(direction)); 187 BUG_ON(!valid_dma_direction(dir));
203 if (ops->sync_sg_for_device) 188 if (ops->sync_sg_for_device)
204 ops->sync_sg_for_device(hwdev, sg, nelems, direction); 189 ops->sync_sg_for_device(hwdev, sg, nelems, dir);
190 debug_dma_sync_sg_for_device(hwdev, sg, nelems, dir);
205 191
206 flush_write_buffers(); 192 flush_write_buffers();
207} 193}
208 194
209static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, 195static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
210 size_t offset, size_t size, 196 size_t offset, size_t size,
211 int direction) 197 enum dma_data_direction dir)
212{ 198{
213 struct dma_mapping_ops *ops = get_dma_ops(dev); 199 struct dma_map_ops *ops = get_dma_ops(dev);
200 dma_addr_t addr;
214 201
215 BUG_ON(!valid_dma_direction(direction)); 202 BUG_ON(!valid_dma_direction(dir));
216 return ops->map_single(dev, page_to_phys(page) + offset, 203 addr = ops->map_page(dev, page, offset, size, dir, NULL);
217 size, direction); 204 debug_dma_map_page(dev, page, offset, size, dir, addr, false);
205
206 return addr;
218} 207}
219 208
220static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, 209static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
221 size_t size, int direction) 210 size_t size, enum dma_data_direction dir)
222{ 211{
223 dma_unmap_single(dev, addr, size, direction); 212 struct dma_map_ops *ops = get_dma_ops(dev);
213
214 BUG_ON(!valid_dma_direction(dir));
215 if (ops->unmap_page)
216 ops->unmap_page(dev, addr, size, dir, NULL);
217 debug_dma_unmap_page(dev, addr, size, dir, false);
224} 218}
225 219
226static inline void 220static inline void
@@ -266,7 +260,7 @@ static inline void *
266dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 260dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
267 gfp_t gfp) 261 gfp_t gfp)
268{ 262{
269 struct dma_mapping_ops *ops = get_dma_ops(dev); 263 struct dma_map_ops *ops = get_dma_ops(dev);
270 void *memory; 264 void *memory;
271 265
272 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 266 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
@@ -285,20 +279,24 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
285 if (!ops->alloc_coherent) 279 if (!ops->alloc_coherent)
286 return NULL; 280 return NULL;
287 281
288 return ops->alloc_coherent(dev, size, dma_handle, 282 memory = ops->alloc_coherent(dev, size, dma_handle,
289 dma_alloc_coherent_gfp_flags(dev, gfp)); 283 dma_alloc_coherent_gfp_flags(dev, gfp));
284 debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
285
286 return memory;
290} 287}
291 288
292static inline void dma_free_coherent(struct device *dev, size_t size, 289static inline void dma_free_coherent(struct device *dev, size_t size,
293 void *vaddr, dma_addr_t bus) 290 void *vaddr, dma_addr_t bus)
294{ 291{
295 struct dma_mapping_ops *ops = get_dma_ops(dev); 292 struct dma_map_ops *ops = get_dma_ops(dev);
296 293
297 WARN_ON(irqs_disabled()); /* for portability */ 294 WARN_ON(irqs_disabled()); /* for portability */
298 295
299 if (dma_release_from_coherent(dev, get_order(size), vaddr)) 296 if (dma_release_from_coherent(dev, get_order(size), vaddr))
300 return; 297 return;
301 298
299 debug_dma_free_coherent(dev, size, vaddr, bus);
302 if (ops->free_coherent) 300 if (ops->free_coherent)
303 ops->free_coherent(dev, size, vaddr, bus); 301 ops->free_coherent(dev, size, vaddr, bus);
304} 302}
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index bc68212c6bc0..fd8f9e2ca35f 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -1,22 +1,15 @@
1#ifndef _ASM_X86_DMI_H 1#ifndef _ASM_X86_DMI_H
2#define _ASM_X86_DMI_H 2#define _ASM_X86_DMI_H
3 3
4#include <asm/io.h> 4#include <linux/compiler.h>
5 5#include <linux/init.h>
6#define DMI_MAX_DATA 2048
7 6
8extern int dmi_alloc_index; 7#include <asm/io.h>
9extern char dmi_alloc_data[DMI_MAX_DATA]; 8#include <asm/setup.h>
10 9
11/* This is so early that there is no good way to allocate dynamic memory. 10static __always_inline __init void *dmi_alloc(unsigned len)
12 Allocate data in an BSS array. */
13static inline void *dmi_alloc(unsigned len)
14{ 11{
15 int idx = dmi_alloc_index; 12 return extend_brk(len, sizeof(int));
16 if ((dmi_alloc_index + len) > DMI_MAX_DATA)
17 return NULL;
18 dmi_alloc_index += len;
19 return dmi_alloc_data + idx;
20} 13}
21 14
22/* Use early IO mappings for DMI because it's initialized early */ 15/* Use early IO mappings for DMI because it's initialized early */
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 00d41ce4c844..7ecba4d85089 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -72,7 +72,7 @@ extern int e820_all_mapped(u64 start, u64 end, unsigned type);
72extern void e820_add_region(u64 start, u64 size, int type); 72extern void e820_add_region(u64 start, u64 size, int type);
73extern void e820_print_map(char *who); 73extern void e820_print_map(char *who);
74extern int 74extern int
75sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map); 75sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
76extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, 76extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
77 unsigned new_type); 77 unsigned new_type);
78extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, 78extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b55b4a7fbefd..db24c2278be0 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -55,29 +55,4 @@ struct dyn_arch_ftrace {
55#endif /* __ASSEMBLY__ */ 55#endif /* __ASSEMBLY__ */
56#endif /* CONFIG_FUNCTION_TRACER */ 56#endif /* CONFIG_FUNCTION_TRACER */
57 57
58#ifdef CONFIG_FUNCTION_GRAPH_TRACER
59
60#ifndef __ASSEMBLY__
61
62/*
63 * Stack of return addresses for functions
64 * of a thread.
65 * Used in struct thread_info
66 */
67struct ftrace_ret_stack {
68 unsigned long ret;
69 unsigned long func;
70 unsigned long long calltime;
71};
72
73/*
74 * Primary handler of a function return.
75 * It relays on ftrace_return_to_handler.
76 * Defined in entry_32/64.S
77 */
78extern void return_to_handler(void);
79
80#endif /* __ASSEMBLY__ */
81#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
82
83#endif /* _ASM_X86_FTRACE_H */ 58#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index bf9276bea660..014c2b85ae45 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -63,6 +63,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
63void *kmap_atomic(struct page *page, enum km_type type); 63void *kmap_atomic(struct page *page, enum km_type type);
64void kunmap_atomic(void *kvaddr, enum km_type type); 64void kunmap_atomic(void *kvaddr, enum km_type type);
65void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); 65void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
66void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
66struct page *kmap_atomic_to_page(void *ptr); 67struct page *kmap_atomic_to_page(void *ptr);
67 68
68#ifndef CONFIG_PARAVIRT 69#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 50ca486fd88c..1f7e62517284 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -129,13 +129,6 @@ typedef struct compat_siginfo {
129 } _sifields; 129 } _sifields;
130} compat_siginfo_t; 130} compat_siginfo_t;
131 131
132struct ustat32 {
133 __u32 f_tfree;
134 compat_ino_t f_tinode;
135 char f_fname[6];
136 char f_fpack[6];
137};
138
139#define IA32_STACK_TOP IA32_PAGE_OFFSET 132#define IA32_STACK_TOP IA32_PAGE_OFFSET
140 133
141#ifdef __KERNEL__ 134#ifdef __KERNEL__
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 59cb4a1317b7..373cc2bbcad2 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -162,7 +162,8 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq);
162extern void ioapic_init_mappings(void); 162extern void ioapic_init_mappings(void);
163 163
164#ifdef CONFIG_X86_64 164#ifdef CONFIG_X86_64
165extern int save_mask_IO_APIC_setup(void); 165extern int save_IO_APIC_setup(void);
166extern void mask_IO_APIC_setup(void);
166extern void restore_IO_APIC_setup(void); 167extern void restore_IO_APIC_setup(void);
167extern void reinit_intr_remapped_IO_APIC(int); 168extern void reinit_intr_remapped_IO_APIC(int);
168#endif 169#endif
@@ -172,7 +173,7 @@ extern void probe_nr_irqs_gsi(void);
172extern int setup_ioapic_entry(int apic, int irq, 173extern int setup_ioapic_entry(int apic, int irq,
173 struct IO_APIC_route_entry *entry, 174 struct IO_APIC_route_entry *entry,
174 unsigned int destination, int trigger, 175 unsigned int destination, int trigger,
175 int polarity, int vector); 176 int polarity, int vector, int pin);
176extern void ioapic_write_entry(int apic, int pin, 177extern void ioapic_write_entry(int apic, int pin,
177 struct IO_APIC_route_entry e); 178 struct IO_APIC_route_entry e);
178#else /* !CONFIG_X86_IO_APIC */ 179#else /* !CONFIG_X86_IO_APIC */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index a6ee9e6f530f..af326a2975b5 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -3,7 +3,7 @@
3 3
4extern void pci_iommu_shutdown(void); 4extern void pci_iommu_shutdown(void);
5extern void no_iommu_init(void); 5extern void no_iommu_init(void);
6extern struct dma_mapping_ops nommu_dma_ops; 6extern struct dma_map_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 7extern int force_iommu, no_iommu;
8extern int iommu_detected; 8extern int iommu_detected;
9 9
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 20e1fd588dbf..0396760fccb8 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,8 +1,6 @@
1#ifndef _ASM_X86_IRQ_REMAPPING_H 1#ifndef _ASM_X86_IRQ_REMAPPING_H
2#define _ASM_X86_IRQ_REMAPPING_H 2#define _ASM_X86_IRQ_REMAPPING_H
3 3
4extern int x2apic;
5
6#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) 4#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
7 5
8#endif /* _ASM_X86_IRQ_REMAPPING_H */ 6#endif /* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 886c9402ec45..dc3f6cf11704 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -15,6 +15,7 @@
15#define __KVM_HAVE_DEVICE_ASSIGNMENT 15#define __KVM_HAVE_DEVICE_ASSIGNMENT
16#define __KVM_HAVE_MSI 16#define __KVM_HAVE_MSI
17#define __KVM_HAVE_USER_NMI 17#define __KVM_HAVE_USER_NMI
18#define __KVM_HAVE_GUEST_DEBUG
18 19
19/* Architectural interrupt line count. */ 20/* Architectural interrupt line count. */
20#define KVM_NR_INTERRUPTS 256 21#define KVM_NR_INTERRUPTS 256
@@ -212,7 +213,30 @@ struct kvm_pit_channel_state {
212 __s64 count_load_time; 213 __s64 count_load_time;
213}; 214};
214 215
216struct kvm_debug_exit_arch {
217 __u32 exception;
218 __u32 pad;
219 __u64 pc;
220 __u64 dr6;
221 __u64 dr7;
222};
223
224#define KVM_GUESTDBG_USE_SW_BP 0x00010000
225#define KVM_GUESTDBG_USE_HW_BP 0x00020000
226#define KVM_GUESTDBG_INJECT_DB 0x00040000
227#define KVM_GUESTDBG_INJECT_BP 0x00080000
228
229/* for KVM_SET_GUEST_DEBUG */
230struct kvm_guest_debug_arch {
231 __u64 debugreg[8];
232};
233
215struct kvm_pit_state { 234struct kvm_pit_state {
216 struct kvm_pit_channel_state channels[3]; 235 struct kvm_pit_channel_state channels[3];
217}; 236};
237
238struct kvm_reinject_control {
239 __u8 pit_reinject;
240 __u8 reserved[31];
241};
218#endif /* _ASM_X86_KVM_H */ 242#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 730843d1d2fb..f0faf58044ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
22#include <asm/pvclock-abi.h> 22#include <asm/pvclock-abi.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/mtrr.h> 24#include <asm/mtrr.h>
25#include <asm/msr-index.h>
25 26
26#define KVM_MAX_VCPUS 16 27#define KVM_MAX_VCPUS 16
27#define KVM_MEMORY_SLOTS 32 28#define KVM_MEMORY_SLOTS 32
@@ -134,11 +135,18 @@ enum {
134 135
135#define KVM_NR_MEM_OBJS 40 136#define KVM_NR_MEM_OBJS 40
136 137
137struct kvm_guest_debug { 138#define KVM_NR_DB_REGS 4
138 int enabled; 139
139 unsigned long bp[4]; 140#define DR6_BD (1 << 13)
140 int singlestep; 141#define DR6_BS (1 << 14)
141}; 142#define DR6_FIXED_1 0xffff0ff0
143#define DR6_VOLATILE 0x0000e00f
144
145#define DR7_BP_EN_MASK 0x000000ff
146#define DR7_GE (1 << 9)
147#define DR7_GD (1 << 13)
148#define DR7_FIXED_1 0x00000400
149#define DR7_VOLATILE 0xffff23ff
142 150
143/* 151/*
144 * We don't want allocation failures within the mmu code, so we preallocate 152 * We don't want allocation failures within the mmu code, so we preallocate
@@ -162,7 +170,8 @@ struct kvm_pte_chain {
162 * bits 0:3 - total guest paging levels (2-4, or zero for real mode) 170 * bits 0:3 - total guest paging levels (2-4, or zero for real mode)
163 * bits 4:7 - page table level for this shadow (1-4) 171 * bits 4:7 - page table level for this shadow (1-4)
164 * bits 8:9 - page table quadrant for 2-level guests 172 * bits 8:9 - page table quadrant for 2-level guests
165 * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) 173 * bit 16 - direct mapping of virtual to physical mapping at gfn
174 * used for real mode and two-dimensional paging
166 * bits 17:19 - common access permissions for all ptes in this shadow page 175 * bits 17:19 - common access permissions for all ptes in this shadow page
167 */ 176 */
168union kvm_mmu_page_role { 177union kvm_mmu_page_role {
@@ -172,9 +181,10 @@ union kvm_mmu_page_role {
172 unsigned level:4; 181 unsigned level:4;
173 unsigned quadrant:2; 182 unsigned quadrant:2;
174 unsigned pad_for_nice_hex_output:6; 183 unsigned pad_for_nice_hex_output:6;
175 unsigned metaphysical:1; 184 unsigned direct:1;
176 unsigned access:3; 185 unsigned access:3;
177 unsigned invalid:1; 186 unsigned invalid:1;
187 unsigned cr4_pge:1;
178 }; 188 };
179}; 189};
180 190
@@ -218,6 +228,18 @@ struct kvm_pv_mmu_op_buffer {
218 char buf[512] __aligned(sizeof(long)); 228 char buf[512] __aligned(sizeof(long));
219}; 229};
220 230
231struct kvm_pio_request {
232 unsigned long count;
233 int cur_count;
234 gva_t guest_gva;
235 int in;
236 int port;
237 int size;
238 int string;
239 int down;
240 int rep;
241};
242
221/* 243/*
222 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level 244 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
223 * 32-bit). The kvm_mmu structure abstracts the details of the current mmu 245 * 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -236,6 +258,7 @@ struct kvm_mmu {
236 hpa_t root_hpa; 258 hpa_t root_hpa;
237 int root_level; 259 int root_level;
238 int shadow_root_level; 260 int shadow_root_level;
261 union kvm_mmu_page_role base_role;
239 262
240 u64 *pae_root; 263 u64 *pae_root;
241}; 264};
@@ -258,6 +281,7 @@ struct kvm_vcpu_arch {
258 unsigned long cr3; 281 unsigned long cr3;
259 unsigned long cr4; 282 unsigned long cr4;
260 unsigned long cr8; 283 unsigned long cr8;
284 u32 hflags;
261 u64 pdptrs[4]; /* pae */ 285 u64 pdptrs[4]; /* pae */
262 u64 shadow_efer; 286 u64 shadow_efer;
263 u64 apic_base; 287 u64 apic_base;
@@ -338,6 +362,15 @@ struct kvm_vcpu_arch {
338 362
339 struct mtrr_state_type mtrr_state; 363 struct mtrr_state_type mtrr_state;
340 u32 pat; 364 u32 pat;
365
366 int switch_db_regs;
367 unsigned long host_db[KVM_NR_DB_REGS];
368 unsigned long host_dr6;
369 unsigned long host_dr7;
370 unsigned long db[KVM_NR_DB_REGS];
371 unsigned long dr6;
372 unsigned long dr7;
373 unsigned long eff_db[KVM_NR_DB_REGS];
341}; 374};
342 375
343struct kvm_mem_alias { 376struct kvm_mem_alias {
@@ -378,6 +411,7 @@ struct kvm_arch{
378 411
379 unsigned long irq_sources_bitmap; 412 unsigned long irq_sources_bitmap;
380 unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; 413 unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
414 u64 vm_init_tsc;
381}; 415};
382 416
383struct kvm_vm_stat { 417struct kvm_vm_stat {
@@ -446,8 +480,7 @@ struct kvm_x86_ops {
446 void (*vcpu_put)(struct kvm_vcpu *vcpu); 480 void (*vcpu_put)(struct kvm_vcpu *vcpu);
447 481
448 int (*set_guest_debug)(struct kvm_vcpu *vcpu, 482 int (*set_guest_debug)(struct kvm_vcpu *vcpu,
449 struct kvm_debug_guest *dbg); 483 struct kvm_guest_debug *dbg);
450 void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
451 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); 484 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
452 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 485 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
453 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); 486 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -583,16 +616,12 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
583void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, 616void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
584 u32 error_code); 617 u32 error_code);
585 618
586void kvm_pic_set_irq(void *opaque, int irq, int level); 619int kvm_pic_set_irq(void *opaque, int irq, int level);
587 620
588void kvm_inject_nmi(struct kvm_vcpu *vcpu); 621void kvm_inject_nmi(struct kvm_vcpu *vcpu);
589 622
590void fx_init(struct kvm_vcpu *vcpu); 623void fx_init(struct kvm_vcpu *vcpu);
591 624
592int emulator_read_std(unsigned long addr,
593 void *val,
594 unsigned int bytes,
595 struct kvm_vcpu *vcpu);
596int emulator_write_emulated(unsigned long addr, 625int emulator_write_emulated(unsigned long addr,
597 const void *val, 626 const void *val,
598 unsigned int bytes, 627 unsigned int bytes,
@@ -737,6 +766,10 @@ enum {
737 TASK_SWITCH_GATE = 3, 766 TASK_SWITCH_GATE = 3,
738}; 767};
739 768
769#define HF_GIF_MASK (1 << 0)
770#define HF_HIF_MASK (1 << 1)
771#define HF_VINTR_MASK (1 << 2)
772
740/* 773/*
741 * Hardware virtualization extension instructions may fault if a 774 * Hardware virtualization extension instructions may fault if a
742 * reboot turns off virtualization while processes are running. 775 * reboot turns off virtualization while processes are running.
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index a0d70b46c27c..12d55e773eb6 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_LINKAGE_H 1#ifndef _ASM_X86_LINKAGE_H
2#define _ASM_X86_LINKAGE_H 2#define _ASM_X86_LINKAGE_H
3 3
4#include <linux/stringify.h>
5
4#undef notrace 6#undef notrace
5#define notrace __attribute__((no_instrument_function)) 7#define notrace __attribute__((no_instrument_function))
6 8
@@ -53,14 +55,9 @@
53 .globl name; \ 55 .globl name; \
54 name: 56 name:
55 57
56#ifdef CONFIG_X86_64 58#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
57#define __ALIGN .p2align 4,,15 59#define __ALIGN .p2align 4, 0x90
58#define __ALIGN_STR ".p2align 4,,15" 60#define __ALIGN_STR __stringify(__ALIGN)
59#endif
60
61#ifdef CONFIG_X86_ALIGNMENT_16
62#define __ALIGN .align 16,0x90
63#define __ALIGN_STR ".align 16,0x90"
64#endif 61#endif
65 62
66#endif /* __ASSEMBLY__ */ 63#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h
index 6706b3006f13..4cc48af23fef 100644
--- a/arch/x86/include/asm/msidef.h
+++ b/arch/x86/include/asm/msidef.h
@@ -47,6 +47,7 @@
47#define MSI_ADDR_DEST_ID_MASK 0x00ffff0 47#define MSI_ADDR_DEST_ID_MASK 0x00ffff0
48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ 48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
49 MSI_ADDR_DEST_ID_MASK) 49 MSI_ADDR_DEST_ID_MASK)
50#define MSI_ADDR_EXT_DEST_ID(dest) ((dest) & 0xffffff00)
50 51
51#define MSI_ADDR_IR_EXT_INT (1 << 4) 52#define MSI_ADDR_IR_EXT_INT (1 << 4)
52#define MSI_ADDR_IR_SHV (1 << 3) 53#define MSI_ADDR_IR_SHV (1 << 3)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 2dbd2314139e..ec41fc16c167 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,11 +18,15 @@
18#define _EFER_LME 8 /* Long mode enable */ 18#define _EFER_LME 8 /* Long mode enable */
19#define _EFER_LMA 10 /* Long mode active (read-only) */ 19#define _EFER_LMA 10 /* Long mode active (read-only) */
20#define _EFER_NX 11 /* No execute enable */ 20#define _EFER_NX 11 /* No execute enable */
21#define _EFER_SVME 12 /* Enable virtualization */
22#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
21 23
22#define EFER_SCE (1<<_EFER_SCE) 24#define EFER_SCE (1<<_EFER_SCE)
23#define EFER_LME (1<<_EFER_LME) 25#define EFER_LME (1<<_EFER_LME)
24#define EFER_LMA (1<<_EFER_LMA) 26#define EFER_LMA (1<<_EFER_LMA)
25#define EFER_NX (1<<_EFER_NX) 27#define EFER_NX (1<<_EFER_NX)
28#define EFER_SVME (1<<_EFER_SVME)
29#define EFER_FFXSR (1<<_EFER_FFXSR)
26 30
27/* Intel MSRs. Some also available on other CPUs */ 31/* Intel MSRs. Some also available on other CPUs */
28#define MSR_IA32_PERFCTR0 0x000000c1 32#define MSR_IA32_PERFCTR0 0x000000c1
@@ -365,4 +369,9 @@
365#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b 369#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
366#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c 370#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
367 371
372/* AMD-V MSRs */
373
374#define MSR_VM_CR 0xc0010114
375#define MSR_VM_HSAVE_PA 0xc0010117
376
368#endif /* _ASM_X86_MSR_INDEX_H */ 377#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index f1e4a79a6e41..0f915ae649a7 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
39#define __VIRTUAL_MASK_SHIFT 32 39#define __VIRTUAL_MASK_SHIFT 32
40#endif /* CONFIG_X86_PAE */ 40#endif /* CONFIG_X86_PAE */
41 41
42/*
43 * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
44 */
45#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
46
42#ifndef __ASSEMBLY__ 47#ifndef __ASSEMBLY__
43 48
44/* 49/*
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 0617d5cc9712..7727aa8b7dda 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -317,8 +317,6 @@ struct pv_mmu_ops {
317#if PAGETABLE_LEVELS >= 3 317#if PAGETABLE_LEVELS >= 3
318#ifdef CONFIG_X86_PAE 318#ifdef CONFIG_X86_PAE
319 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); 319 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
320 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
321 pte_t *ptep, pte_t pte);
322 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, 320 void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
323 pte_t *ptep); 321 pte_t *ptep);
324 void (*pmd_clear)(pmd_t *pmdp); 322 void (*pmd_clear)(pmd_t *pmdp);
@@ -389,7 +387,7 @@ extern struct pv_lock_ops pv_lock_ops;
389 387
390#define paravirt_type(op) \ 388#define paravirt_type(op) \
391 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ 389 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
392 [paravirt_opptr] "m" (op) 390 [paravirt_opptr] "i" (&(op))
393#define paravirt_clobber(clobber) \ 391#define paravirt_clobber(clobber) \
394 [paravirt_clobber] "i" (clobber) 392 [paravirt_clobber] "i" (clobber)
395 393
@@ -443,7 +441,7 @@ int paravirt_disable_iospace(void);
443 * offset into the paravirt_patch_template structure, and can therefore be 441 * offset into the paravirt_patch_template structure, and can therefore be
444 * freely converted back into a structure offset. 442 * freely converted back into a structure offset.
445 */ 443 */
446#define PARAVIRT_CALL "call *%[paravirt_opptr];" 444#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
447 445
448/* 446/*
449 * These macros are intended to wrap calls through one of the paravirt 447 * These macros are intended to wrap calls through one of the paravirt
@@ -1365,13 +1363,6 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
1365 pte.pte, pte.pte >> 32); 1363 pte.pte, pte.pte >> 32);
1366} 1364}
1367 1365
1368static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
1369 pte_t *ptep, pte_t pte)
1370{
1371 /* 5 arg words */
1372 pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
1373}
1374
1375static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 1366static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
1376 pte_t *ptep) 1367 pte_t *ptep)
1377{ 1368{
@@ -1388,12 +1379,6 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
1388 set_pte(ptep, pte); 1379 set_pte(ptep, pte);
1389} 1380}
1390 1381
1391static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
1392 pte_t *ptep, pte_t pte)
1393{
1394 set_pte(ptep, pte);
1395}
1396
1397static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 1382static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
1398 pte_t *ptep) 1383 pte_t *ptep)
1399{ 1384{
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 8f1d2fbec1d4..aee103b26d01 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -43,14 +43,6 @@
43#else /* ...!ASSEMBLY */ 43#else /* ...!ASSEMBLY */
44 44
45#include <linux/stringify.h> 45#include <linux/stringify.h>
46#include <asm/sections.h>
47
48#define __addr_to_pcpu_ptr(addr) \
49 (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
50 + (unsigned long)__per_cpu_start)
51#define __pcpu_ptr_to_addr(ptr) \
52 (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
53 - (unsigned long)__per_cpu_start)
54 46
55#ifdef CONFIG_SMP 47#ifdef CONFIG_SMP
56#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x 48#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index c1774ac9da7a..2334982b339e 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -26,13 +26,6 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
26 native_set_pte(ptep, pte); 26 native_set_pte(ptep, pte);
27} 27}
28 28
29static inline void native_set_pte_present(struct mm_struct *mm,
30 unsigned long addr,
31 pte_t *ptep, pte_t pte)
32{
33 native_set_pte(ptep, pte);
34}
35
36static inline void native_pmd_clear(pmd_t *pmdp) 29static inline void native_pmd_clear(pmd_t *pmdp)
37{ 30{
38 native_set_pmd(pmdp, __pmd(0)); 31 native_set_pmd(pmdp, __pmd(0));
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 3f13cdf61156..177b0165ea01 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -31,23 +31,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
31 ptep->pte_low = pte.pte_low; 31 ptep->pte_low = pte.pte_low;
32} 32}
33 33
34/*
35 * Since this is only called on user PTEs, and the page fault handler
36 * must handle the already racy situation of simultaneous page faults,
37 * we are justified in merely clearing the PTE present bit, followed
38 * by a set. The ordering here is important.
39 */
40static inline void native_set_pte_present(struct mm_struct *mm,
41 unsigned long addr,
42 pte_t *ptep, pte_t pte)
43{
44 ptep->pte_low = 0;
45 smp_wmb();
46 ptep->pte_high = pte.pte_high;
47 smp_wmb();
48 ptep->pte_low = pte.pte_low;
49}
50
51static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) 34static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
52{ 35{
53 set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); 36 set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index d0812e155f1d..29d96d168bc0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -31,8 +31,6 @@ extern struct list_head pgd_list;
31#define set_pte(ptep, pte) native_set_pte(ptep, pte) 31#define set_pte(ptep, pte) native_set_pte(ptep, pte)
32#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) 32#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
33 33
34#define set_pte_present(mm, addr, ptep, pte) \
35 native_set_pte_present(mm, addr, ptep, pte)
36#define set_pte_atomic(ptep, pte) \ 34#define set_pte_atomic(ptep, pte) \
37 native_set_pte_atomic(ptep, pte) 35 native_set_pte_atomic(ptep, pte)
38 36
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 97612fc7632f..31bd120cf2a2 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -42,9 +42,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
42 */ 42 */
43#undef TEST_ACCESS_OK 43#undef TEST_ACCESS_OK
44 44
45/* The boot page tables (all created as a single array) */
46extern unsigned long pg0[];
47
48#ifdef CONFIG_X86_PAE 45#ifdef CONFIG_X86_PAE
49# include <asm/pgtable-3level.h> 46# include <asm/pgtable-3level.h>
50#else 47#else
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9874dd98a29f..34c52370f2fe 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -75,9 +75,9 @@ struct cpuinfo_x86 {
75#else 75#else
76 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ 76 /* Number of 4K pages in DTLB/ITLB combined(in pages): */
77 int x86_tlbsize; 77 int x86_tlbsize;
78#endif
78 __u8 x86_virt_bits; 79 __u8 x86_virt_bits;
79 __u8 x86_phys_bits; 80 __u8 x86_phys_bits;
80#endif
81 /* CPUID returned core id bits: */ 81 /* CPUID returned core id bits: */
82 __u8 x86_coreid_bits; 82 __u8 x86_coreid_bits;
83 /* Max extended CPUID function supported: */ 83 /* Max extended CPUID function supported: */
@@ -391,6 +391,9 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
391DECLARE_INIT_PER_CPU(irq_stack_union); 391DECLARE_INIT_PER_CPU(irq_stack_union);
392 392
393DECLARE_PER_CPU(char *, irq_stack_ptr); 393DECLARE_PER_CPU(char *, irq_stack_ptr);
394DECLARE_PER_CPU(unsigned int, irq_count);
395extern unsigned long kernel_eflags;
396extern asmlinkage void ignore_sysret(void);
394#else /* X86_64 */ 397#else /* X86_64 */
395#ifdef CONFIG_CC_STACKPROTECTOR 398#ifdef CONFIG_CC_STACKPROTECTOR
396DECLARE_PER_CPU(unsigned long, stack_canary); 399DECLARE_PER_CPU(unsigned long, stack_canary);
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 2b8c5160388f..1b7ee5d673c2 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -1 +1,8 @@
1#ifndef _ASM_X86_SECTIONS_H
2#define _ASM_X86_SECTIONS_H
3
1#include <asm-generic/sections.h> 4#include <asm-generic/sections.h>
5
6extern char __brk_base[], __brk_limit[];
7
8#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 05c6f6b11fd5..bdc2ada05ae0 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -64,7 +64,7 @@ extern void x86_quirk_time_init(void);
64#include <asm/bootparam.h> 64#include <asm/bootparam.h>
65 65
66/* Interrupt control for vSMPowered x86_64 systems */ 66/* Interrupt control for vSMPowered x86_64 systems */
67#ifdef CONFIG_X86_VSMP 67#ifdef CONFIG_X86_64
68void vsmp_init(void); 68void vsmp_init(void);
69#else 69#else
70static inline void vsmp_init(void) { } 70static inline void vsmp_init(void) { }
@@ -100,20 +100,51 @@ extern struct boot_params boot_params;
100 */ 100 */
101#define LOWMEMSIZE() (0x9f000) 101#define LOWMEMSIZE() (0x9f000)
102 102
103/* exceedingly early brk-like allocator */
104extern unsigned long _brk_end;
105void *extend_brk(size_t size, size_t align);
106
107/*
108 * Reserve space in the brk section. The name must be unique within
109 * the file, and somewhat descriptive. The size is in bytes. Must be
110 * used at file scope.
111 *
112 * (This uses a temp function to wrap the asm so we can pass it the
113 * size parameter; otherwise we wouldn't be able to. We can't use a
114 * "section" attribute on a normal variable because it always ends up
115 * being @progbits, which ends up allocating space in the vmlinux
116 * executable.)
117 */
118#define RESERVE_BRK(name,sz) \
119 static void __section(.discard) __used \
120 __brk_reservation_fn_##name##__(void) { \
121 asm volatile ( \
122 ".pushsection .brk_reservation,\"aw\",@nobits;" \
123 ".brk." #name ":" \
124 " 1:.skip %c0;" \
125 " .size .brk." #name ", . - 1b;" \
126 " .popsection" \
127 : : "i" (sz)); \
128 }
129
103#ifdef __i386__ 130#ifdef __i386__
104 131
105void __init i386_start_kernel(void); 132void __init i386_start_kernel(void);
106extern void probe_roms(void); 133extern void probe_roms(void);
107 134
108extern unsigned long init_pg_tables_start;
109extern unsigned long init_pg_tables_end;
110
111#else 135#else
112void __init x86_64_start_kernel(char *real_mode); 136void __init x86_64_start_kernel(char *real_mode);
113void __init x86_64_start_reservations(char *real_mode_data); 137void __init x86_64_start_reservations(char *real_mode_data);
114 138
115#endif /* __i386__ */ 139#endif /* __i386__ */
116#endif /* _SETUP */ 140#endif /* _SETUP */
141#else
142#define RESERVE_BRK(name,sz) \
143 .pushsection .brk_reservation,"aw",@nobits; \
144.brk.name: \
1451: .skip sz; \
146 .size .brk.name,.-1b; \
147 .popsection
117#endif /* __ASSEMBLY__ */ 148#endif /* __ASSEMBLY__ */
118#endif /* __KERNEL__ */ 149#endif /* __KERNEL__ */
119 150
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index 8ab9cc8b2ecc..ca8bf2cd0ba9 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -54,4 +54,7 @@
54 54
55#define SO_MARK 36 55#define SO_MARK 36
56 56
57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59
57#endif /* _ASM_X86_SOCKET_H */ 60#endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e869..82ada75f3ebf 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb {
174#define SVM_CPUID_FEATURE_SHIFT 2 174#define SVM_CPUID_FEATURE_SHIFT 2
175#define SVM_CPUID_FUNC 0x8000000a 175#define SVM_CPUID_FUNC 0x8000000a
176 176
177#define MSR_EFER_SVME_MASK (1ULL << 12)
178#define MSR_VM_CR 0xc0010114
179#define MSR_VM_HSAVE_PA 0xc0010117ULL
180
181#define SVM_VM_CR_SVM_DISABLE 4 177#define SVM_VM_CR_SVM_DISABLE 4
182 178
183#define SVM_SELECTOR_S_SHIFT 4 179#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index ffb08be2a530..72a6dcd1299b 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -70,8 +70,6 @@ struct old_utsname;
70asmlinkage long sys32_olduname(struct oldold_utsname __user *); 70asmlinkage long sys32_olduname(struct oldold_utsname __user *);
71long sys32_uname(struct old_utsname __user *); 71long sys32_uname(struct old_utsname __user *);
72 72
73long sys32_ustat(unsigned, struct ustat32 __user *);
74
75asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, 73asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
76 compat_uptr_t __user *, struct pt_regs *); 74 compat_uptr_t __user *, struct pt_regs *);
77asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); 75asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index a81195eaa2b3..bd37ed444a21 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -12,9 +12,9 @@ unsigned long native_calibrate_tsc(void);
12 12
13#ifdef CONFIG_X86_32 13#ifdef CONFIG_X86_32
14extern int timer_ack; 14extern int timer_ack;
15extern int recalibrate_cpu_khz(void);
16extern irqreturn_t timer_interrupt(int irq, void *dev_id); 15extern irqreturn_t timer_interrupt(int irq, void *dev_id);
17#endif /* CONFIG_X86_32 */ 16#endif /* CONFIG_X86_32 */
17extern int recalibrate_cpu_khz(void);
18 18
19extern int no_timer_check; 19extern int no_timer_check;
20 20
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 593636275238..e0f9aa16358b 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void)
118 118
119 wrmsrl(MSR_VM_HSAVE_PA, 0); 119 wrmsrl(MSR_VM_HSAVE_PA, 0);
120 rdmsrl(MSR_EFER, efer); 120 rdmsrl(MSR_EFER, efer);
121 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); 121 wrmsrl(MSR_EFER, efer & ~EFER_SVME);
122} 122}
123 123
124/** Makes sure SVM is disabled, if it is supported on the CPU 124/** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d0238e6151d8..498f944010b9 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -270,8 +270,9 @@ enum vmcs_field {
270 270
271#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ 271#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
272#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ 272#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
273#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ 273#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
274#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ 274#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
275#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
275 276
276/* GUEST_INTERRUPTIBILITY_INFO flags. */ 277/* GUEST_INTERRUPTIBILITY_INFO flags. */
277#define GUEST_INTR_STATE_STI 0x00000001 278#define GUEST_INTR_STATE_STI 0x00000001
@@ -311,7 +312,7 @@ enum vmcs_field {
311#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ 312#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
312#define TYPE_MOV_TO_DR (0 << 4) 313#define TYPE_MOV_TO_DR (0 << 4)
313#define TYPE_MOV_FROM_DR (1 << 4) 314#define TYPE_MOV_FROM_DR (1 << 4)
314#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */ 315#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
315 316
316 317
317/* segment AR */ 318/* segment AR */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 5e79ca694326..9c371e4a9fa6 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -296,6 +296,8 @@ HYPERVISOR_get_debugreg(int reg)
296static inline int 296static inline int
297HYPERVISOR_update_descriptor(u64 ma, u64 desc) 297HYPERVISOR_update_descriptor(u64 ma, u64 desc)
298{ 298{
299 if (sizeof(u64) == sizeof(long))
300 return _hypercall2(int, update_descriptor, ma, desc);
299 return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); 301 return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
300} 302}
301 303
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 339ce35648e6..c611ad64137f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -70,7 +70,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
70obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 70obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
71obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 71obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
72obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 72obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
73obj-$(CONFIG_X86_VSMP) += vsmp_64.o
74obj-$(CONFIG_KPROBES) += kprobes.o 73obj-$(CONFIG_KPROBES) += kprobes.o
75obj-$(CONFIG_MODULES) += module_$(BITS).o 74obj-$(CONFIG_MODULES) += module_$(BITS).o
76obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o 75obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
@@ -106,7 +105,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
106 105
107obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o 106obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
108 107
109obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 108obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
110 109
111### 110###
112# 64 bit specific files 111# 64 bit specific files
@@ -120,4 +119,5 @@ ifeq ($(CONFIG_X86_64),y)
120 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o 119 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
121 120
122 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o 121 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
122 obj-y += vsmp_64.o
123endif 123endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5113c080f0c4..c5962fe3796f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -22,10 +22,9 @@
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
25#include <linux/dma-mapping.h>
25#include <linux/iommu-helper.h> 26#include <linux/iommu-helper.h>
26#ifdef CONFIG_IOMMU_API
27#include <linux/iommu.h> 27#include <linux/iommu.h>
28#endif
29#include <asm/proto.h> 28#include <asm/proto.h>
30#include <asm/iommu.h> 29#include <asm/iommu.h>
31#include <asm/gart.h> 30#include <asm/gart.h>
@@ -1297,8 +1296,10 @@ static void __unmap_single(struct amd_iommu *iommu,
1297/* 1296/*
1298 * The exported map_single function for dma_ops. 1297 * The exported map_single function for dma_ops.
1299 */ 1298 */
1300static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, 1299static dma_addr_t map_page(struct device *dev, struct page *page,
1301 size_t size, int dir) 1300 unsigned long offset, size_t size,
1301 enum dma_data_direction dir,
1302 struct dma_attrs *attrs)
1302{ 1303{
1303 unsigned long flags; 1304 unsigned long flags;
1304 struct amd_iommu *iommu; 1305 struct amd_iommu *iommu;
@@ -1306,6 +1307,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1306 u16 devid; 1307 u16 devid;
1307 dma_addr_t addr; 1308 dma_addr_t addr;
1308 u64 dma_mask; 1309 u64 dma_mask;
1310 phys_addr_t paddr = page_to_phys(page) + offset;
1309 1311
1310 INC_STATS_COUNTER(cnt_map_single); 1312 INC_STATS_COUNTER(cnt_map_single);
1311 1313
@@ -1340,8 +1342,8 @@ out:
1340/* 1342/*
1341 * The exported unmap_single function for dma_ops. 1343 * The exported unmap_single function for dma_ops.
1342 */ 1344 */
1343static void unmap_single(struct device *dev, dma_addr_t dma_addr, 1345static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
1344 size_t size, int dir) 1346 enum dma_data_direction dir, struct dma_attrs *attrs)
1345{ 1347{
1346 unsigned long flags; 1348 unsigned long flags;
1347 struct amd_iommu *iommu; 1349 struct amd_iommu *iommu;
@@ -1390,7 +1392,8 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
1390 * lists). 1392 * lists).
1391 */ 1393 */
1392static int map_sg(struct device *dev, struct scatterlist *sglist, 1394static int map_sg(struct device *dev, struct scatterlist *sglist,
1393 int nelems, int dir) 1395 int nelems, enum dma_data_direction dir,
1396 struct dma_attrs *attrs)
1394{ 1397{
1395 unsigned long flags; 1398 unsigned long flags;
1396 struct amd_iommu *iommu; 1399 struct amd_iommu *iommu;
@@ -1457,7 +1460,8 @@ unmap:
1457 * lists). 1460 * lists).
1458 */ 1461 */
1459static void unmap_sg(struct device *dev, struct scatterlist *sglist, 1462static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1460 int nelems, int dir) 1463 int nelems, enum dma_data_direction dir,
1464 struct dma_attrs *attrs)
1461{ 1465{
1462 unsigned long flags; 1466 unsigned long flags;
1463 struct amd_iommu *iommu; 1467 struct amd_iommu *iommu;
@@ -1644,11 +1648,11 @@ static void prealloc_protection_domains(void)
1644 } 1648 }
1645} 1649}
1646 1650
1647static struct dma_mapping_ops amd_iommu_dma_ops = { 1651static struct dma_map_ops amd_iommu_dma_ops = {
1648 .alloc_coherent = alloc_coherent, 1652 .alloc_coherent = alloc_coherent,
1649 .free_coherent = free_coherent, 1653 .free_coherent = free_coherent,
1650 .map_single = map_single, 1654 .map_page = map_page,
1651 .unmap_single = unmap_single, 1655 .unmap_page = unmap_page,
1652 .map_sg = map_sg, 1656 .map_sg = map_sg,
1653 .unmap_sg = unmap_sg, 1657 .unmap_sg = unmap_sg,
1654 .dma_supported = amd_iommu_dma_supported, 1658 .dma_supported = amd_iommu_dma_supported,
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 30909a258d0f..85eb8e100818 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -809,7 +809,7 @@ void clear_local_APIC(void)
809 u32 v; 809 u32 v;
810 810
811 /* APIC hasn't been mapped yet */ 811 /* APIC hasn't been mapped yet */
812 if (!apic_phys) 812 if (!x2apic && !apic_phys)
813 return; 813 return;
814 814
815 maxlvt = lapic_get_maxlvt(); 815 maxlvt = lapic_get_maxlvt();
@@ -1334,15 +1334,16 @@ void __init enable_IR_x2apic(void)
1334 return; 1334 return;
1335 } 1335 }
1336 1336
1337 local_irq_save(flags); 1337 ret = save_IO_APIC_setup();
1338 mask_8259A();
1339
1340 ret = save_mask_IO_APIC_setup();
1341 if (ret) { 1338 if (ret) {
1342 pr_info("Saving IO-APIC state failed: %d\n", ret); 1339 pr_info("Saving IO-APIC state failed: %d\n", ret);
1343 goto end; 1340 goto end;
1344 } 1341 }
1345 1342
1343 local_irq_save(flags);
1344 mask_IO_APIC_setup();
1345 mask_8259A();
1346
1346 ret = enable_intr_remapping(1); 1347 ret = enable_intr_remapping(1);
1347 1348
1348 if (ret && x2apic_preenabled) { 1349 if (ret && x2apic_preenabled) {
@@ -1367,10 +1368,10 @@ end_restore:
1367 else 1368 else
1368 reinit_intr_remapped_IO_APIC(x2apic_preenabled); 1369 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1369 1370
1370end:
1371 unmask_8259A(); 1371 unmask_8259A();
1372 local_irq_restore(flags); 1372 local_irq_restore(flags);
1373 1373
1374end:
1374 if (!ret) { 1375 if (!ret) {
1375 if (!x2apic_preenabled) 1376 if (!x2apic_preenabled)
1376 pr_info("Enabled x2apic and interrupt-remapping\n"); 1377 pr_info("Enabled x2apic and interrupt-remapping\n");
@@ -1523,12 +1524,10 @@ void __init early_init_lapic_mapping(void)
1523 */ 1524 */
1524void __init init_apic_mappings(void) 1525void __init init_apic_mappings(void)
1525{ 1526{
1526#ifdef CONFIG_X86_X2APIC
1527 if (x2apic) { 1527 if (x2apic) {
1528 boot_cpu_physical_apicid = read_apic_id(); 1528 boot_cpu_physical_apicid = read_apic_id();
1529 return; 1529 return;
1530 } 1530 }
1531#endif
1532 1531
1533 /* 1532 /*
1534 * If no local APIC can be found then set up a fake all 1533 * If no local APIC can be found then set up a fake all
@@ -1972,12 +1971,9 @@ static int lapic_resume(struct sys_device *dev)
1972 1971
1973 local_irq_save(flags); 1972 local_irq_save(flags);
1974 1973
1975#ifdef CONFIG_X86_X2APIC
1976 if (x2apic) 1974 if (x2apic)
1977 enable_x2apic(); 1975 enable_x2apic();
1978 else 1976 else {
1979#endif
1980 {
1981 /* 1977 /*
1982 * Make sure the APICBASE points to the right address 1978 * Make sure the APICBASE points to the right address
1983 * 1979 *
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f933822dba18..0014714ea97b 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -159,20 +159,6 @@ static int flat_apic_id_registered(void)
159 return physid_isset(read_xapic_id(), phys_cpu_present_map); 159 return physid_isset(read_xapic_id(), phys_cpu_present_map);
160} 160}
161 161
162static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
163{
164 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
165}
166
167static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
168 const struct cpumask *andmask)
169{
170 unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
171 unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
172
173 return mask1 & mask2;
174}
175
176static int flat_phys_pkg_id(int initial_apic_id, int index_msb) 162static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
177{ 163{
178 return hard_smp_processor_id() >> index_msb; 164 return hard_smp_processor_id() >> index_msb;
@@ -213,8 +199,8 @@ struct apic apic_flat = {
213 .set_apic_id = set_apic_id, 199 .set_apic_id = set_apic_id,
214 .apic_id_mask = 0xFFu << 24, 200 .apic_id_mask = 0xFFu << 24,
215 201
216 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 202 .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
217 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, 203 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
218 204
219 .send_IPI_mask = flat_send_IPI_mask, 205 .send_IPI_mask = flat_send_IPI_mask,
220 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, 206 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 00e6071cefc4..da99ffcdfde6 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -389,6 +389,8 @@ struct io_apic {
389 unsigned int index; 389 unsigned int index;
390 unsigned int unused[3]; 390 unsigned int unused[3];
391 unsigned int data; 391 unsigned int data;
392 unsigned int unused2[11];
393 unsigned int eoi;
392}; 394};
393 395
394static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 396static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
@@ -397,6 +399,12 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
397 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); 399 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
398} 400}
399 401
402static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
403{
404 struct io_apic __iomem *io_apic = io_apic_base(apic);
405 writel(vector, &io_apic->eoi);
406}
407
400static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 408static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
401{ 409{
402 struct io_apic __iomem *io_apic = io_apic_base(apic); 410 struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -546,16 +554,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
546 554
547 apic = entry->apic; 555 apic = entry->apic;
548 pin = entry->pin; 556 pin = entry->pin;
549#ifdef CONFIG_INTR_REMAP
550 /* 557 /*
551 * With interrupt-remapping, destination information comes 558 * With interrupt-remapping, destination information comes
552 * from interrupt-remapping table entry. 559 * from interrupt-remapping table entry.
553 */ 560 */
554 if (!irq_remapped(irq)) 561 if (!irq_remapped(irq))
555 io_apic_write(apic, 0x11 + pin*2, dest); 562 io_apic_write(apic, 0x11 + pin*2, dest);
556#else
557 io_apic_write(apic, 0x11 + pin*2, dest);
558#endif
559 reg = io_apic_read(apic, 0x10 + pin*2); 563 reg = io_apic_read(apic, 0x10 + pin*2);
560 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 564 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
561 reg |= vector; 565 reg |= vector;
@@ -588,10 +592,12 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
588 if (assign_irq_vector(irq, cfg, mask)) 592 if (assign_irq_vector(irq, cfg, mask))
589 return BAD_APICID; 593 return BAD_APICID;
590 594
591 cpumask_and(desc->affinity, cfg->domain, mask); 595 /* check that before desc->addinity get updated */
592 set_extra_move_desc(desc, mask); 596 set_extra_move_desc(desc, mask);
593 597
594 return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); 598 cpumask_copy(desc->affinity, mask);
599
600 return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
595} 601}
596 602
597static void 603static void
@@ -849,9 +855,9 @@ __setup("pirq=", ioapic_pirq_setup);
849static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; 855static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
850 856
851/* 857/*
852 * Saves and masks all the unmasked IO-APIC RTE's 858 * Saves all the IO-APIC RTE's
853 */ 859 */
854int save_mask_IO_APIC_setup(void) 860int save_IO_APIC_setup(void)
855{ 861{
856 union IO_APIC_reg_01 reg_01; 862 union IO_APIC_reg_01 reg_01;
857 unsigned long flags; 863 unsigned long flags;
@@ -876,16 +882,9 @@ int save_mask_IO_APIC_setup(void)
876 } 882 }
877 883
878 for (apic = 0; apic < nr_ioapics; apic++) 884 for (apic = 0; apic < nr_ioapics; apic++)
879 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 885 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
880 struct IO_APIC_route_entry entry; 886 early_ioapic_entries[apic][pin] =
881
882 entry = early_ioapic_entries[apic][pin] =
883 ioapic_read_entry(apic, pin); 887 ioapic_read_entry(apic, pin);
884 if (!entry.mask) {
885 entry.mask = 1;
886 ioapic_write_entry(apic, pin, entry);
887 }
888 }
889 888
890 return 0; 889 return 0;
891 890
@@ -898,6 +897,25 @@ nomem:
898 return -ENOMEM; 897 return -ENOMEM;
899} 898}
900 899
900void mask_IO_APIC_setup(void)
901{
902 int apic, pin;
903
904 for (apic = 0; apic < nr_ioapics; apic++) {
905 if (!early_ioapic_entries[apic])
906 break;
907 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
908 struct IO_APIC_route_entry entry;
909
910 entry = early_ioapic_entries[apic][pin];
911 if (!entry.mask) {
912 entry.mask = 1;
913 ioapic_write_entry(apic, pin, entry);
914 }
915 }
916 }
917}
918
901void restore_IO_APIC_setup(void) 919void restore_IO_APIC_setup(void)
902{ 920{
903 int apic, pin; 921 int apic, pin;
@@ -1411,9 +1429,7 @@ void __setup_vector_irq(int cpu)
1411} 1429}
1412 1430
1413static struct irq_chip ioapic_chip; 1431static struct irq_chip ioapic_chip;
1414#ifdef CONFIG_INTR_REMAP
1415static struct irq_chip ir_ioapic_chip; 1432static struct irq_chip ir_ioapic_chip;
1416#endif
1417 1433
1418#define IOAPIC_AUTO -1 1434#define IOAPIC_AUTO -1
1419#define IOAPIC_EDGE 0 1435#define IOAPIC_EDGE 0
@@ -1452,7 +1468,6 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1452 else 1468 else
1453 desc->status &= ~IRQ_LEVEL; 1469 desc->status &= ~IRQ_LEVEL;
1454 1470
1455#ifdef CONFIG_INTR_REMAP
1456 if (irq_remapped(irq)) { 1471 if (irq_remapped(irq)) {
1457 desc->status |= IRQ_MOVE_PCNTXT; 1472 desc->status |= IRQ_MOVE_PCNTXT;
1458 if (trigger) 1473 if (trigger)
@@ -1464,7 +1479,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1464 handle_edge_irq, "edge"); 1479 handle_edge_irq, "edge");
1465 return; 1480 return;
1466 } 1481 }
1467#endif 1482
1468 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1483 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1469 trigger == IOAPIC_LEVEL) 1484 trigger == IOAPIC_LEVEL)
1470 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1485 set_irq_chip_and_handler_name(irq, &ioapic_chip,
@@ -1478,14 +1493,13 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1478int setup_ioapic_entry(int apic_id, int irq, 1493int setup_ioapic_entry(int apic_id, int irq,
1479 struct IO_APIC_route_entry *entry, 1494 struct IO_APIC_route_entry *entry,
1480 unsigned int destination, int trigger, 1495 unsigned int destination, int trigger,
1481 int polarity, int vector) 1496 int polarity, int vector, int pin)
1482{ 1497{
1483 /* 1498 /*
1484 * add it to the IO-APIC irq-routing table: 1499 * add it to the IO-APIC irq-routing table:
1485 */ 1500 */
1486 memset(entry,0,sizeof(*entry)); 1501 memset(entry,0,sizeof(*entry));
1487 1502
1488#ifdef CONFIG_INTR_REMAP
1489 if (intr_remapping_enabled) { 1503 if (intr_remapping_enabled) {
1490 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); 1504 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
1491 struct irte irte; 1505 struct irte irte;
@@ -1504,7 +1518,14 @@ int setup_ioapic_entry(int apic_id, int irq,
1504 1518
1505 irte.present = 1; 1519 irte.present = 1;
1506 irte.dst_mode = apic->irq_dest_mode; 1520 irte.dst_mode = apic->irq_dest_mode;
1507 irte.trigger_mode = trigger; 1521 /*
1522 * Trigger mode in the IRTE will always be edge, and the
1523 * actual level or edge trigger will be setup in the IO-APIC
1524 * RTE. This will help simplify level triggered irq migration.
1525 * For more details, see the comments above explainig IO-APIC
1526 * irq migration in the presence of interrupt-remapping.
1527 */
1528 irte.trigger_mode = 0;
1508 irte.dlvry_mode = apic->irq_delivery_mode; 1529 irte.dlvry_mode = apic->irq_delivery_mode;
1509 irte.vector = vector; 1530 irte.vector = vector;
1510 irte.dest_id = IRTE_DEST(destination); 1531 irte.dest_id = IRTE_DEST(destination);
@@ -1515,18 +1536,21 @@ int setup_ioapic_entry(int apic_id, int irq,
1515 ir_entry->zero = 0; 1536 ir_entry->zero = 0;
1516 ir_entry->format = 1; 1537 ir_entry->format = 1;
1517 ir_entry->index = (index & 0x7fff); 1538 ir_entry->index = (index & 0x7fff);
1518 } else 1539 /*
1519#endif 1540 * IO-APIC RTE will be configured with virtual vector.
1520 { 1541 * irq handler will do the explicit EOI to the io-apic.
1542 */
1543 ir_entry->vector = pin;
1544 } else {
1521 entry->delivery_mode = apic->irq_delivery_mode; 1545 entry->delivery_mode = apic->irq_delivery_mode;
1522 entry->dest_mode = apic->irq_dest_mode; 1546 entry->dest_mode = apic->irq_dest_mode;
1523 entry->dest = destination; 1547 entry->dest = destination;
1548 entry->vector = vector;
1524 } 1549 }
1525 1550
1526 entry->mask = 0; /* enable IRQ */ 1551 entry->mask = 0; /* enable IRQ */
1527 entry->trigger = trigger; 1552 entry->trigger = trigger;
1528 entry->polarity = polarity; 1553 entry->polarity = polarity;
1529 entry->vector = vector;
1530 1554
1531 /* Mask level triggered irqs. 1555 /* Mask level triggered irqs.
1532 * Use IRQ_DELAYED_DISABLE for edge triggered irqs. 1556 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
@@ -1561,7 +1585,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1561 1585
1562 1586
1563 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, 1587 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
1564 dest, trigger, polarity, cfg->vector)) { 1588 dest, trigger, polarity, cfg->vector, pin)) {
1565 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1589 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1566 mp_ioapics[apic_id].apicid, pin); 1590 mp_ioapics[apic_id].apicid, pin);
1567 __clear_irq_vector(irq, cfg); 1591 __clear_irq_vector(irq, cfg);
@@ -1642,10 +1666,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1642{ 1666{
1643 struct IO_APIC_route_entry entry; 1667 struct IO_APIC_route_entry entry;
1644 1668
1645#ifdef CONFIG_INTR_REMAP
1646 if (intr_remapping_enabled) 1669 if (intr_remapping_enabled)
1647 return; 1670 return;
1648#endif
1649 1671
1650 memset(&entry, 0, sizeof(entry)); 1672 memset(&entry, 0, sizeof(entry));
1651 1673
@@ -2040,8 +2062,13 @@ void disable_IO_APIC(void)
2040 * If the i8259 is routed through an IOAPIC 2062 * If the i8259 is routed through an IOAPIC
2041 * Put that IOAPIC in virtual wire mode 2063 * Put that IOAPIC in virtual wire mode
2042 * so legacy interrupts can be delivered. 2064 * so legacy interrupts can be delivered.
2065 *
2066 * With interrupt-remapping, for now we will use virtual wire A mode,
2067 * as virtual wire B is little complex (need to configure both
2068 * IOAPIC RTE aswell as interrupt-remapping table entry).
2069 * As this gets called during crash dump, keep this simple for now.
2043 */ 2070 */
2044 if (ioapic_i8259.pin != -1) { 2071 if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
2045 struct IO_APIC_route_entry entry; 2072 struct IO_APIC_route_entry entry;
2046 2073
2047 memset(&entry, 0, sizeof(entry)); 2074 memset(&entry, 0, sizeof(entry));
@@ -2061,7 +2088,10 @@ void disable_IO_APIC(void)
2061 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); 2088 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
2062 } 2089 }
2063 2090
2064 disconnect_bsp_APIC(ioapic_i8259.pin != -1); 2091 /*
2092 * Use virtual wire A mode when interrupt remapping is enabled.
2093 */
2094 disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
2065} 2095}
2066 2096
2067#ifdef CONFIG_X86_32 2097#ifdef CONFIG_X86_32
@@ -2303,37 +2333,24 @@ static int ioapic_retrigger_irq(unsigned int irq)
2303#ifdef CONFIG_SMP 2333#ifdef CONFIG_SMP
2304 2334
2305#ifdef CONFIG_INTR_REMAP 2335#ifdef CONFIG_INTR_REMAP
2306static void ir_irq_migration(struct work_struct *work);
2307
2308static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2309 2336
2310/* 2337/*
2311 * Migrate the IO-APIC irq in the presence of intr-remapping. 2338 * Migrate the IO-APIC irq in the presence of intr-remapping.
2312 * 2339 *
2313 * For edge triggered, irq migration is a simple atomic update(of vector 2340 * For both level and edge triggered, irq migration is a simple atomic
2314 * and cpu destination) of IRTE and flush the hardware cache. 2341 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
2315 *
2316 * For level triggered, we need to modify the io-apic RTE aswell with the update
2317 * vector information, along with modifying IRTE with vector and destination.
2318 * So irq migration for level triggered is little bit more complex compared to
2319 * edge triggered migration. But the good news is, we use the same algorithm
2320 * for level triggered migration as we have today, only difference being,
2321 * we now initiate the irq migration from process context instead of the
2322 * interrupt context.
2323 * 2342 *
2324 * In future, when we do a directed EOI (combined with cpu EOI broadcast 2343 * For level triggered, we eliminate the io-apic RTE modification (with the
2325 * suppression) to the IO-APIC, level triggered irq migration will also be 2344 * updated vector information), by using a virtual vector (io-apic pin number).
2326 * as simple as edge triggered migration and we can do the irq migration 2345 * Real vector that is used for interrupting cpu will be coming from
2327 * with a simple atomic update to IO-APIC RTE. 2346 * the interrupt-remapping table entry.
2328 */ 2347 */
2329static void 2348static void
2330migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2349migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2331{ 2350{
2332 struct irq_cfg *cfg; 2351 struct irq_cfg *cfg;
2333 struct irte irte; 2352 struct irte irte;
2334 int modify_ioapic_rte;
2335 unsigned int dest; 2353 unsigned int dest;
2336 unsigned long flags;
2337 unsigned int irq; 2354 unsigned int irq;
2338 2355
2339 if (!cpumask_intersects(mask, cpu_online_mask)) 2356 if (!cpumask_intersects(mask, cpu_online_mask))
@@ -2351,13 +2368,6 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2351 2368
2352 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); 2369 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2353 2370
2354 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2355 if (modify_ioapic_rte) {
2356 spin_lock_irqsave(&ioapic_lock, flags);
2357 __target_IO_APIC_irq(irq, dest, cfg);
2358 spin_unlock_irqrestore(&ioapic_lock, flags);
2359 }
2360
2361 irte.vector = cfg->vector; 2371 irte.vector = cfg->vector;
2362 irte.dest_id = IRTE_DEST(dest); 2372 irte.dest_id = IRTE_DEST(dest);
2363 2373
@@ -2372,73 +2382,12 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2372 cpumask_copy(desc->affinity, mask); 2382 cpumask_copy(desc->affinity, mask);
2373} 2383}
2374 2384
2375static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2376{
2377 int ret = -1;
2378 struct irq_cfg *cfg = desc->chip_data;
2379
2380 mask_IO_APIC_irq_desc(desc);
2381
2382 if (io_apic_level_ack_pending(cfg)) {
2383 /*
2384 * Interrupt in progress. Migrating irq now will change the
2385 * vector information in the IO-APIC RTE and that will confuse
2386 * the EOI broadcast performed by cpu.
2387 * So, delay the irq migration to the next instance.
2388 */
2389 schedule_delayed_work(&ir_migration_work, 1);
2390 goto unmask;
2391 }
2392
2393 /* everthing is clear. we have right of way */
2394 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2395
2396 ret = 0;
2397 desc->status &= ~IRQ_MOVE_PENDING;
2398 cpumask_clear(desc->pending_mask);
2399
2400unmask:
2401 unmask_IO_APIC_irq_desc(desc);
2402
2403 return ret;
2404}
2405
2406static void ir_irq_migration(struct work_struct *work)
2407{
2408 unsigned int irq;
2409 struct irq_desc *desc;
2410
2411 for_each_irq_desc(irq, desc) {
2412 if (desc->status & IRQ_MOVE_PENDING) {
2413 unsigned long flags;
2414
2415 spin_lock_irqsave(&desc->lock, flags);
2416 if (!desc->chip->set_affinity ||
2417 !(desc->status & IRQ_MOVE_PENDING)) {
2418 desc->status &= ~IRQ_MOVE_PENDING;
2419 spin_unlock_irqrestore(&desc->lock, flags);
2420 continue;
2421 }
2422
2423 desc->chip->set_affinity(irq, desc->pending_mask);
2424 spin_unlock_irqrestore(&desc->lock, flags);
2425 }
2426 }
2427}
2428
2429/* 2385/*
2430 * Migrates the IRQ destination in the process context. 2386 * Migrates the IRQ destination in the process context.
2431 */ 2387 */
2432static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, 2388static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2433 const struct cpumask *mask) 2389 const struct cpumask *mask)
2434{ 2390{
2435 if (desc->status & IRQ_LEVEL) {
2436 desc->status |= IRQ_MOVE_PENDING;
2437 cpumask_copy(desc->pending_mask, mask);
2438 migrate_irq_remapped_level_desc(desc);
2439 return;
2440 }
2441
2442 migrate_ioapic_irq_desc(desc, mask); 2391 migrate_ioapic_irq_desc(desc, mask);
2443} 2392}
2444static void set_ir_ioapic_affinity_irq(unsigned int irq, 2393static void set_ir_ioapic_affinity_irq(unsigned int irq,
@@ -2448,6 +2397,11 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq,
2448 2397
2449 set_ir_ioapic_affinity_irq_desc(desc, mask); 2398 set_ir_ioapic_affinity_irq_desc(desc, mask);
2450} 2399}
2400#else
2401static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2402 const struct cpumask *mask)
2403{
2404}
2451#endif 2405#endif
2452 2406
2453asmlinkage void smp_irq_move_cleanup_interrupt(void) 2407asmlinkage void smp_irq_move_cleanup_interrupt(void)
@@ -2461,6 +2415,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2461 me = smp_processor_id(); 2415 me = smp_processor_id();
2462 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 2416 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
2463 unsigned int irq; 2417 unsigned int irq;
2418 unsigned int irr;
2464 struct irq_desc *desc; 2419 struct irq_desc *desc;
2465 struct irq_cfg *cfg; 2420 struct irq_cfg *cfg;
2466 irq = __get_cpu_var(vector_irq)[vector]; 2421 irq = __get_cpu_var(vector_irq)[vector];
@@ -2480,6 +2435,18 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2480 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) 2435 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2481 goto unlock; 2436 goto unlock;
2482 2437
2438 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
2439 /*
2440 * Check if the vector that needs to be cleanedup is
2441 * registered at the cpu's IRR. If so, then this is not
2442 * the best time to clean it up. Lets clean it up in the
2443 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
2444 * to myself.
2445 */
2446 if (irr & (1 << (vector % 32))) {
2447 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
2448 goto unlock;
2449 }
2483 __get_cpu_var(vector_irq)[vector] = -1; 2450 __get_cpu_var(vector_irq)[vector] = -1;
2484 cfg->move_cleanup_count--; 2451 cfg->move_cleanup_count--;
2485unlock: 2452unlock:
@@ -2529,9 +2496,44 @@ static inline void irq_complete_move(struct irq_desc **descp) {}
2529#endif 2496#endif
2530 2497
2531#ifdef CONFIG_INTR_REMAP 2498#ifdef CONFIG_INTR_REMAP
2499static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2500{
2501 int apic, pin;
2502 struct irq_pin_list *entry;
2503
2504 entry = cfg->irq_2_pin;
2505 for (;;) {
2506
2507 if (!entry)
2508 break;
2509
2510 apic = entry->apic;
2511 pin = entry->pin;
2512 io_apic_eoi(apic, pin);
2513 entry = entry->next;
2514 }
2515}
2516
2517static void
2518eoi_ioapic_irq(struct irq_desc *desc)
2519{
2520 struct irq_cfg *cfg;
2521 unsigned long flags;
2522 unsigned int irq;
2523
2524 irq = desc->irq;
2525 cfg = desc->chip_data;
2526
2527 spin_lock_irqsave(&ioapic_lock, flags);
2528 __eoi_ioapic_irq(irq, cfg);
2529 spin_unlock_irqrestore(&ioapic_lock, flags);
2530}
2531
2532static void ack_x2apic_level(unsigned int irq) 2532static void ack_x2apic_level(unsigned int irq)
2533{ 2533{
2534 struct irq_desc *desc = irq_to_desc(irq);
2534 ack_x2APIC_irq(); 2535 ack_x2APIC_irq();
2536 eoi_ioapic_irq(desc);
2535} 2537}
2536 2538
2537static void ack_x2apic_edge(unsigned int irq) 2539static void ack_x2apic_edge(unsigned int irq)
@@ -2662,20 +2664,20 @@ static struct irq_chip ioapic_chip __read_mostly = {
2662 .retrigger = ioapic_retrigger_irq, 2664 .retrigger = ioapic_retrigger_irq,
2663}; 2665};
2664 2666
2665#ifdef CONFIG_INTR_REMAP
2666static struct irq_chip ir_ioapic_chip __read_mostly = { 2667static struct irq_chip ir_ioapic_chip __read_mostly = {
2667 .name = "IR-IO-APIC", 2668 .name = "IR-IO-APIC",
2668 .startup = startup_ioapic_irq, 2669 .startup = startup_ioapic_irq,
2669 .mask = mask_IO_APIC_irq, 2670 .mask = mask_IO_APIC_irq,
2670 .unmask = unmask_IO_APIC_irq, 2671 .unmask = unmask_IO_APIC_irq,
2672#ifdef CONFIG_INTR_REMAP
2671 .ack = ack_x2apic_edge, 2673 .ack = ack_x2apic_edge,
2672 .eoi = ack_x2apic_level, 2674 .eoi = ack_x2apic_level,
2673#ifdef CONFIG_SMP 2675#ifdef CONFIG_SMP
2674 .set_affinity = set_ir_ioapic_affinity_irq, 2676 .set_affinity = set_ir_ioapic_affinity_irq,
2675#endif 2677#endif
2678#endif
2676 .retrigger = ioapic_retrigger_irq, 2679 .retrigger = ioapic_retrigger_irq,
2677}; 2680};
2678#endif
2679 2681
2680static inline void init_IO_APIC_traps(void) 2682static inline void init_IO_APIC_traps(void)
2681{ 2683{
@@ -2901,10 +2903,8 @@ static inline void __init check_timer(void)
2901 * 8259A. 2903 * 8259A.
2902 */ 2904 */
2903 if (pin1 == -1) { 2905 if (pin1 == -1) {
2904#ifdef CONFIG_INTR_REMAP
2905 if (intr_remapping_enabled) 2906 if (intr_remapping_enabled)
2906 panic("BIOS bug: timer not connected to IO-APIC"); 2907 panic("BIOS bug: timer not connected to IO-APIC");
2907#endif
2908 pin1 = pin2; 2908 pin1 = pin2;
2909 apic1 = apic2; 2909 apic1 = apic2;
2910 no_pin1 = 1; 2910 no_pin1 = 1;
@@ -2940,10 +2940,8 @@ static inline void __init check_timer(void)
2940 clear_IO_APIC_pin(0, pin1); 2940 clear_IO_APIC_pin(0, pin1);
2941 goto out; 2941 goto out;
2942 } 2942 }
2943#ifdef CONFIG_INTR_REMAP
2944 if (intr_remapping_enabled) 2943 if (intr_remapping_enabled)
2945 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2944 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2946#endif
2947 local_irq_disable(); 2945 local_irq_disable();
2948 clear_IO_APIC_pin(apic1, pin1); 2946 clear_IO_APIC_pin(apic1, pin1);
2949 if (!no_pin1) 2947 if (!no_pin1)
@@ -3237,9 +3235,7 @@ void destroy_irq(unsigned int irq)
3237 if (desc) 3235 if (desc)
3238 desc->chip_data = cfg; 3236 desc->chip_data = cfg;
3239 3237
3240#ifdef CONFIG_INTR_REMAP
3241 free_irte(irq); 3238 free_irte(irq);
3242#endif
3243 spin_lock_irqsave(&vector_lock, flags); 3239 spin_lock_irqsave(&vector_lock, flags);
3244 __clear_irq_vector(irq, cfg); 3240 __clear_irq_vector(irq, cfg);
3245 spin_unlock_irqrestore(&vector_lock, flags); 3241 spin_unlock_irqrestore(&vector_lock, flags);
@@ -3265,7 +3261,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3265 3261
3266 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3262 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3267 3263
3268#ifdef CONFIG_INTR_REMAP
3269 if (irq_remapped(irq)) { 3264 if (irq_remapped(irq)) {
3270 struct irte irte; 3265 struct irte irte;
3271 int ir_index; 3266 int ir_index;
@@ -3291,10 +3286,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3291 MSI_ADDR_IR_SHV | 3286 MSI_ADDR_IR_SHV |
3292 MSI_ADDR_IR_INDEX1(ir_index) | 3287 MSI_ADDR_IR_INDEX1(ir_index) |
3293 MSI_ADDR_IR_INDEX2(ir_index); 3288 MSI_ADDR_IR_INDEX2(ir_index);
3294 } else 3289 } else {
3295#endif 3290 if (x2apic_enabled())
3296 { 3291 msg->address_hi = MSI_ADDR_BASE_HI |
3297 msg->address_hi = MSI_ADDR_BASE_HI; 3292 MSI_ADDR_EXT_DEST_ID(dest);
3293 else
3294 msg->address_hi = MSI_ADDR_BASE_HI;
3295
3298 msg->address_lo = 3296 msg->address_lo =
3299 MSI_ADDR_BASE_LO | 3297 MSI_ADDR_BASE_LO |
3300 ((apic->irq_dest_mode == 0) ? 3298 ((apic->irq_dest_mode == 0) ?
@@ -3394,15 +3392,16 @@ static struct irq_chip msi_chip = {
3394 .retrigger = ioapic_retrigger_irq, 3392 .retrigger = ioapic_retrigger_irq,
3395}; 3393};
3396 3394
3397#ifdef CONFIG_INTR_REMAP
3398static struct irq_chip msi_ir_chip = { 3395static struct irq_chip msi_ir_chip = {
3399 .name = "IR-PCI-MSI", 3396 .name = "IR-PCI-MSI",
3400 .unmask = unmask_msi_irq, 3397 .unmask = unmask_msi_irq,
3401 .mask = mask_msi_irq, 3398 .mask = mask_msi_irq,
3399#ifdef CONFIG_INTR_REMAP
3402 .ack = ack_x2apic_edge, 3400 .ack = ack_x2apic_edge,
3403#ifdef CONFIG_SMP 3401#ifdef CONFIG_SMP
3404 .set_affinity = ir_set_msi_irq_affinity, 3402 .set_affinity = ir_set_msi_irq_affinity,
3405#endif 3403#endif
3404#endif
3406 .retrigger = ioapic_retrigger_irq, 3405 .retrigger = ioapic_retrigger_irq,
3407}; 3406};
3408 3407
@@ -3432,7 +3431,6 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3432 } 3431 }
3433 return index; 3432 return index;
3434} 3433}
3435#endif
3436 3434
3437static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3435static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3438{ 3436{
@@ -3446,7 +3444,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3446 set_irq_msi(irq, msidesc); 3444 set_irq_msi(irq, msidesc);
3447 write_msi_msg(irq, &msg); 3445 write_msi_msg(irq, &msg);
3448 3446
3449#ifdef CONFIG_INTR_REMAP
3450 if (irq_remapped(irq)) { 3447 if (irq_remapped(irq)) {
3451 struct irq_desc *desc = irq_to_desc(irq); 3448 struct irq_desc *desc = irq_to_desc(irq);
3452 /* 3449 /*
@@ -3455,7 +3452,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3455 desc->status |= IRQ_MOVE_PCNTXT; 3452 desc->status |= IRQ_MOVE_PCNTXT;
3456 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); 3453 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
3457 } else 3454 } else
3458#endif
3459 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3455 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
3460 3456
3461 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); 3457 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
@@ -3469,11 +3465,8 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3469 int ret, sub_handle; 3465 int ret, sub_handle;
3470 struct msi_desc *msidesc; 3466 struct msi_desc *msidesc;
3471 unsigned int irq_want; 3467 unsigned int irq_want;
3472 3468 struct intel_iommu *iommu = NULL;
3473#ifdef CONFIG_INTR_REMAP
3474 struct intel_iommu *iommu = 0;
3475 int index = 0; 3469 int index = 0;
3476#endif
3477 3470
3478 irq_want = nr_irqs_gsi; 3471 irq_want = nr_irqs_gsi;
3479 sub_handle = 0; 3472 sub_handle = 0;
@@ -3482,7 +3475,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3482 if (irq == 0) 3475 if (irq == 0)
3483 return -1; 3476 return -1;
3484 irq_want = irq + 1; 3477 irq_want = irq + 1;
3485#ifdef CONFIG_INTR_REMAP
3486 if (!intr_remapping_enabled) 3478 if (!intr_remapping_enabled)
3487 goto no_ir; 3479 goto no_ir;
3488 3480
@@ -3510,7 +3502,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3510 set_irte_irq(irq, iommu, index, sub_handle); 3502 set_irte_irq(irq, iommu, index, sub_handle);
3511 } 3503 }
3512no_ir: 3504no_ir:
3513#endif
3514 ret = setup_msi_irq(dev, msidesc, irq); 3505 ret = setup_msi_irq(dev, msidesc, irq);
3515 if (ret < 0) 3506 if (ret < 0)
3516 goto error; 3507 goto error;
@@ -3528,7 +3519,7 @@ void arch_teardown_msi_irq(unsigned int irq)
3528 destroy_irq(irq); 3519 destroy_irq(irq);
3529} 3520}
3530 3521
3531#ifdef CONFIG_DMAR 3522#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
3532#ifdef CONFIG_SMP 3523#ifdef CONFIG_SMP
3533static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3524static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3534{ 3525{
@@ -3609,7 +3600,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3609 3600
3610#endif /* CONFIG_SMP */ 3601#endif /* CONFIG_SMP */
3611 3602
3612struct irq_chip hpet_msi_type = { 3603static struct irq_chip hpet_msi_type = {
3613 .name = "HPET_MSI", 3604 .name = "HPET_MSI",
3614 .unmask = hpet_msi_unmask, 3605 .unmask = hpet_msi_unmask,
3615 .mask = hpet_msi_mask, 3606 .mask = hpet_msi_mask,
@@ -4045,11 +4036,9 @@ void __init setup_ioapic_dest(void)
4045 else 4036 else
4046 mask = apic->target_cpus(); 4037 mask = apic->target_cpus();
4047 4038
4048#ifdef CONFIG_INTR_REMAP
4049 if (intr_remapping_enabled) 4039 if (intr_remapping_enabled)
4050 set_ir_ioapic_affinity_irq_desc(desc, mask); 4040 set_ir_ioapic_affinity_irq_desc(desc, mask);
4051 else 4041 else
4052#endif
4053 set_ioapic_affinity_irq_desc(desc, mask); 4042 set_ioapic_affinity_irq_desc(desc, mask);
4054 } 4043 }
4055 4044
@@ -4142,9 +4131,12 @@ static int __init ioapic_insert_resources(void)
4142 struct resource *r = ioapic_resources; 4131 struct resource *r = ioapic_resources;
4143 4132
4144 if (!r) { 4133 if (!r) {
4145 printk(KERN_ERR 4134 if (nr_ioapics > 0) {
4146 "IO APIC resources could be not be allocated.\n"); 4135 printk(KERN_ERR
4147 return -1; 4136 "IO APIC resources couldn't be allocated.\n");
4137 return -1;
4138 }
4139 return 0;
4148 } 4140 }
4149 4141
4150 for (i = 0; i < nr_ioapics; i++) { 4142 for (i = 0; i < nr_ioapics; i++) {
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 8d7748efe6a8..1783652bb0e5 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -68,6 +68,13 @@ void __init default_setup_apic_routing(void)
68 apic = &apic_physflat; 68 apic = &apic_physflat;
69 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); 69 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
70 } 70 }
71
72 /*
73 * Now that apic routing model is selected, configure the
74 * fault handling for intr remapping.
75 */
76 if (intr_remapping_enabled)
77 enable_drhd_fault_handling();
71} 78}
72 79
73/* Same for both flat and physical. */ 80/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 8fb87b6dd633..4a903e2f0d17 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -57,6 +57,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
57 unsigned long query_cpu; 57 unsigned long query_cpu;
58 unsigned long flags; 58 unsigned long flags;
59 59
60 x2apic_wrmsr_fence();
61
60 local_irq_save(flags); 62 local_irq_save(flags);
61 for_each_cpu(query_cpu, mask) { 63 for_each_cpu(query_cpu, mask) {
62 __x2apic_send_IPI_dest( 64 __x2apic_send_IPI_dest(
@@ -73,6 +75,8 @@ static void
73 unsigned long query_cpu; 75 unsigned long query_cpu;
74 unsigned long flags; 76 unsigned long flags;
75 77
78 x2apic_wrmsr_fence();
79
76 local_irq_save(flags); 80 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) { 81 for_each_cpu(query_cpu, mask) {
78 if (query_cpu == this_cpu) 82 if (query_cpu == this_cpu)
@@ -90,6 +94,8 @@ static void x2apic_send_IPI_allbutself(int vector)
90 unsigned long query_cpu; 94 unsigned long query_cpu;
91 unsigned long flags; 95 unsigned long flags;
92 96
97 x2apic_wrmsr_fence();
98
93 local_irq_save(flags); 99 local_irq_save(flags);
94 for_each_online_cpu(query_cpu) { 100 for_each_online_cpu(query_cpu) {
95 if (query_cpu == this_cpu) 101 if (query_cpu == this_cpu)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 23625b9f98b2..a284359627e7 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -58,6 +58,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
58 unsigned long query_cpu; 58 unsigned long query_cpu;
59 unsigned long flags; 59 unsigned long flags;
60 60
61 x2apic_wrmsr_fence();
62
61 local_irq_save(flags); 63 local_irq_save(flags);
62 for_each_cpu(query_cpu, mask) { 64 for_each_cpu(query_cpu, mask) {
63 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), 65 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
@@ -73,6 +75,8 @@ static void
73 unsigned long query_cpu; 75 unsigned long query_cpu;
74 unsigned long flags; 76 unsigned long flags;
75 77
78 x2apic_wrmsr_fence();
79
76 local_irq_save(flags); 80 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) { 81 for_each_cpu(query_cpu, mask) {
78 if (query_cpu != this_cpu) 82 if (query_cpu != this_cpu)
@@ -89,6 +93,8 @@ static void x2apic_send_IPI_allbutself(int vector)
89 unsigned long query_cpu; 93 unsigned long query_cpu;
90 unsigned long flags; 94 unsigned long flags;
91 95
96 x2apic_wrmsr_fence();
97
92 local_irq_save(flags); 98 local_irq_save(flags);
93 for_each_online_cpu(query_cpu) { 99 for_each_online_cpu(query_cpu) {
94 if (query_cpu == this_cpu) 100 if (query_cpu == this_cpu)
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 2ac0ab71412a..fc999e6fc46a 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -83,15 +83,15 @@ void __init setup_bios_corruption_check(void)
83 u64 size; 83 u64 size;
84 addr = find_e820_area_size(addr, &size, PAGE_SIZE); 84 addr = find_e820_area_size(addr, &size, PAGE_SIZE);
85 85
86 if (addr == 0) 86 if (!(addr + 1))
87 break;
88
89 if (addr >= corruption_check_size)
87 break; 90 break;
88 91
89 if ((addr + size) > corruption_check_size) 92 if ((addr + size) > corruption_check_size)
90 size = corruption_check_size - addr; 93 size = corruption_check_size - addr;
91 94
92 if (size == 0)
93 break;
94
95 e820_update_range(addr, size, E820_RAM, E820_RESERVED); 95 e820_update_range(addr, size, E820_RAM, E820_RESERVED);
96 scan_areas[num_scan_areas].addr = addr; 96 scan_areas[num_scan_areas].addr = addr;
97 scan_areas[num_scan_areas].size = size; 97 scan_areas[num_scan_areas].size = size;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82db7f45e2de..4e242f9a06e4 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -14,11 +14,12 @@ obj-y += vmware.o hypervisor.o
14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
15obj-$(CONFIG_X86_64) += bugs_64.o 15obj-$(CONFIG_X86_64) += bugs_64.o
16 16
17obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
18
17obj-$(CONFIG_CPU_SUP_INTEL) += intel.o 19obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
18obj-$(CONFIG_CPU_SUP_AMD) += amd.o 20obj-$(CONFIG_CPU_SUP_AMD) += amd.o
19obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o 21obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
20obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o 22obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
21obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o
22obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
23obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
24 25
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 6882a735d9c0..8220ae69849d 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -29,7 +29,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
29 u32 regs[4]; 29 u32 regs[4];
30 const struct cpuid_bit *cb; 30 const struct cpuid_bit *cb;
31 31
32 static const struct cpuid_bit cpuid_bits[] = { 32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, 33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
34 { 0, 0, 0, 0 } 34 { 0, 0, 0, 0 }
35 }; 35 };
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f47df59016c5..7e4a459daa64 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -502,7 +502,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int
502} 502}
503#endif 503#endif
504 504
505static struct cpu_dev amd_cpu_dev __cpuinitdata = { 505static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
506 .c_vendor = "AMD", 506 .c_vendor = "AMD",
507 .c_ident = { "AuthenticAMD" }, 507 .c_ident = { "AuthenticAMD" },
508#ifdef CONFIG_X86_32 508#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 89bfdd9cacc6..c95e831bb095 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,11 +1,11 @@
1#include <linux/bitops.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/init.h> 3#include <linux/init.h>
3#include <linux/bitops.h>
4 4
5#include <asm/processor.h> 5#include <asm/processor.h>
6#include <asm/msr.h>
7#include <asm/e820.h> 6#include <asm/e820.h>
8#include <asm/mtrr.h> 7#include <asm/mtrr.h>
8#include <asm/msr.h>
9 9
10#include "cpu.h" 10#include "cpu.h"
11 11
@@ -276,7 +276,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
276 */ 276 */
277 c->x86_capability[5] = cpuid_edx(0xC0000001); 277 c->x86_capability[5] = cpuid_edx(0xC0000001);
278 } 278 }
279 279#ifdef CONFIG_X86_32
280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ 280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */
281 if (c->x86_model >= 6 && c->x86_model <= 9) { 281 if (c->x86_model >= 6 && c->x86_model <= 9) {
282 rdmsr(MSR_VIA_FCR, lo, hi); 282 rdmsr(MSR_VIA_FCR, lo, hi);
@@ -288,6 +288,11 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
288 /* Before Nehemiah, the C3's had 3dNOW! */ 288 /* Before Nehemiah, the C3's had 3dNOW! */
289 if (c->x86_model >= 6 && c->x86_model < 9) 289 if (c->x86_model >= 6 && c->x86_model < 9)
290 set_cpu_cap(c, X86_FEATURE_3DNOW); 290 set_cpu_cap(c, X86_FEATURE_3DNOW);
291#endif
292 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
293 c->x86_cache_alignment = c->x86_clflush_size * 2;
294 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
295 }
291 296
292 display_cacheinfo(c); 297 display_cacheinfo(c);
293} 298}
@@ -316,16 +321,25 @@ enum {
316static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) 321static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
317{ 322{
318 switch (c->x86) { 323 switch (c->x86) {
324#ifdef CONFIG_X86_32
319 case 5: 325 case 5:
320 /* Emulate MTRRs using Centaur's MCR. */ 326 /* Emulate MTRRs using Centaur's MCR. */
321 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); 327 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
322 break; 328 break;
329#endif
330 case 6:
331 if (c->x86_model >= 0xf)
332 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
333 break;
323 } 334 }
335#ifdef CONFIG_X86_64
336 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
337#endif
324} 338}
325 339
326static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 340static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
327{ 341{
328 342#ifdef CONFIG_X86_32
329 char *name; 343 char *name;
330 u32 fcr_set = 0; 344 u32 fcr_set = 0;
331 u32 fcr_clr = 0; 345 u32 fcr_clr = 0;
@@ -337,8 +351,10 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
337 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 351 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
338 */ 352 */
339 clear_cpu_cap(c, 0*32+31); 353 clear_cpu_cap(c, 0*32+31);
340 354#endif
355 early_init_centaur(c);
341 switch (c->x86) { 356 switch (c->x86) {
357#ifdef CONFIG_X86_32
342 case 5: 358 case 5:
343 switch (c->x86_model) { 359 switch (c->x86_model) {
344 case 4: 360 case 4:
@@ -442,16 +458,20 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
442 } 458 }
443 sprintf(c->x86_model_id, "WinChip %s", name); 459 sprintf(c->x86_model_id, "WinChip %s", name);
444 break; 460 break;
445 461#endif
446 case 6: 462 case 6:
447 init_c3(c); 463 init_c3(c);
448 break; 464 break;
449 } 465 }
466#ifdef CONFIG_X86_64
467 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
468#endif
450} 469}
451 470
452static unsigned int __cpuinit 471static unsigned int __cpuinit
453centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) 472centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
454{ 473{
474#ifdef CONFIG_X86_32
455 /* VIA C3 CPUs (670-68F) need further shifting. */ 475 /* VIA C3 CPUs (670-68F) need further shifting. */
456 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) 476 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
457 size >>= 8; 477 size >>= 8;
@@ -464,11 +484,11 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
464 if ((c->x86 == 6) && (c->x86_model == 9) && 484 if ((c->x86 == 6) && (c->x86_model == 9) &&
465 (c->x86_mask == 1) && (size == 65)) 485 (c->x86_mask == 1) && (size == 65))
466 size -= 1; 486 size -= 1;
467 487#endif
468 return size; 488 return size;
469} 489}
470 490
471static struct cpu_dev centaur_cpu_dev __cpuinitdata = { 491static const struct cpu_dev __cpuinitconst centaur_cpu_dev = {
472 .c_vendor = "Centaur", 492 .c_vendor = "Centaur",
473 .c_ident = { "CentaurHauls" }, 493 .c_ident = { "CentaurHauls" },
474 .c_early_init = early_init_centaur, 494 .c_early_init = early_init_centaur,
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
deleted file mode 100644
index a1625f5a1e78..000000000000
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ /dev/null
@@ -1,37 +0,0 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3
4#include <asm/cpufeature.h>
5#include <asm/processor.h>
6
7#include "cpu.h"
8
9static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
10{
11 if (c->x86 == 0x6 && c->x86_model >= 0xf)
12 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
13
14 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
15}
16
17static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
18{
19 early_init_centaur(c);
20
21 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
22 c->x86_cache_alignment = c->x86_clflush_size * 2;
23 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
24 }
25 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
26}
27
28static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
29 .c_vendor = "Centaur",
30 .c_ident = { "CentaurHauls" },
31 .c_early_init = early_init_centaur,
32 .c_init = init_centaur,
33 .c_x86_vendor = X86_VENDOR_CENTAUR,
34};
35
36cpu_dev_register(centaur_cpu_dev);
37
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d7dd3c294e2a..c4f667896c28 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,50 +1,50 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/string.h>
5#include <linux/bootmem.h> 1#include <linux/bootmem.h>
2#include <linux/linkage.h>
6#include <linux/bitops.h> 3#include <linux/bitops.h>
4#include <linux/kernel.h>
7#include <linux/module.h> 5#include <linux/module.h>
8#include <linux/kgdb.h> 6#include <linux/percpu.h>
9#include <linux/topology.h> 7#include <linux/string.h>
10#include <linux/delay.h> 8#include <linux/delay.h>
9#include <linux/sched.h>
10#include <linux/init.h>
11#include <linux/kgdb.h>
11#include <linux/smp.h> 12#include <linux/smp.h>
12#include <linux/percpu.h> 13#include <linux/io.h>
13#include <asm/i387.h> 14
14#include <asm/msr.h> 15#include <asm/stackprotector.h>
15#include <asm/io.h>
16#include <asm/linkage.h>
17#include <asm/mmu_context.h> 16#include <asm/mmu_context.h>
17#include <asm/hypervisor.h>
18#include <asm/processor.h>
19#include <asm/sections.h>
20#include <asm/topology.h>
21#include <asm/cpumask.h>
22#include <asm/pgtable.h>
23#include <asm/atomic.h>
24#include <asm/proto.h>
25#include <asm/setup.h>
26#include <asm/apic.h>
27#include <asm/desc.h>
28#include <asm/i387.h>
18#include <asm/mtrr.h> 29#include <asm/mtrr.h>
30#include <asm/numa.h>
31#include <asm/asm.h>
32#include <asm/cpu.h>
19#include <asm/mce.h> 33#include <asm/mce.h>
34#include <asm/msr.h>
20#include <asm/pat.h> 35#include <asm/pat.h>
21#include <asm/asm.h>
22#include <asm/numa.h>
23#include <asm/smp.h> 36#include <asm/smp.h>
24#include <asm/cpu.h>
25#include <asm/cpumask.h>
26#include <asm/apic.h>
27 37
28#ifdef CONFIG_X86_LOCAL_APIC 38#ifdef CONFIG_X86_LOCAL_APIC
29#include <asm/uv/uv.h> 39#include <asm/uv/uv.h>
30#endif 40#endif
31 41
32#include <asm/pgtable.h>
33#include <asm/processor.h>
34#include <asm/desc.h>
35#include <asm/atomic.h>
36#include <asm/proto.h>
37#include <asm/sections.h>
38#include <asm/setup.h>
39#include <asm/hypervisor.h>
40#include <asm/stackprotector.h>
41
42#include "cpu.h" 42#include "cpu.h"
43 43
44/* all of these masks are initialized in setup_cpu_local_masks() */ 44/* all of these masks are initialized in setup_cpu_local_masks() */
45cpumask_var_t cpu_callin_mask;
46cpumask_var_t cpu_callout_mask;
47cpumask_var_t cpu_initialized_mask; 45cpumask_var_t cpu_initialized_mask;
46cpumask_var_t cpu_callout_mask;
47cpumask_var_t cpu_callin_mask;
48 48
49/* representing cpus for which sibling maps can be computed */ 49/* representing cpus for which sibling maps can be computed */
50cpumask_var_t cpu_sibling_setup_mask; 50cpumask_var_t cpu_sibling_setup_mask;
@@ -58,7 +58,7 @@ void __init setup_cpu_local_masks(void)
58 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 58 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
59} 59}
60 60
61static struct cpu_dev *this_cpu __cpuinitdata; 61static const struct cpu_dev *this_cpu __cpuinitdata;
62 62
63DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 63DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
64#ifdef CONFIG_X86_64 64#ifdef CONFIG_X86_64
@@ -67,48 +67,48 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
67 * IRET will check the segment types kkeil 2000/10/28 67 * IRET will check the segment types kkeil 2000/10/28
68 * Also sysret mandates a special GDT layout 68 * Also sysret mandates a special GDT layout
69 * 69 *
70 * The TLS descriptors are currently at a different place compared to i386. 70 * TLS descriptors are currently at a different place compared to i386.
71 * Hopefully nobody expects them at a fixed place (Wine?) 71 * Hopefully nobody expects them at a fixed place (Wine?)
72 */ 72 */
73 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 73 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
74 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 74 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
75 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 75 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
76 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 76 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
77 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 77 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
78 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 78 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
79#else 79#else
80 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 80 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
81 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 81 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
82 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 82 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
83 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, 83 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
84 /* 84 /*
85 * Segments used for calling PnP BIOS have byte granularity. 85 * Segments used for calling PnP BIOS have byte granularity.
86 * They code segments and data segments have fixed 64k limits, 86 * They code segments and data segments have fixed 64k limits,
87 * the transfer segment sizes are set at run time. 87 * the transfer segment sizes are set at run time.
88 */ 88 */
89 /* 32-bit code */ 89 /* 32-bit code */
90 [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, 90 [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
91 /* 16-bit code */ 91 /* 16-bit code */
92 [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, 92 [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
93 /* 16-bit data */ 93 /* 16-bit data */
94 [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, 94 [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
95 /* 16-bit data */ 95 /* 16-bit data */
96 [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, 96 [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
97 /* 16-bit data */ 97 /* 16-bit data */
98 [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, 98 [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
99 /* 99 /*
100 * The APM segments have byte granularity and their bases 100 * The APM segments have byte granularity and their bases
101 * are set at run time. All have 64k limits. 101 * are set at run time. All have 64k limits.
102 */ 102 */
103 /* 32-bit code */ 103 /* 32-bit code */
104 [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, 104 [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
105 /* 16-bit code */ 105 /* 16-bit code */
106 [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, 106 [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
107 /* data */ 107 /* data */
108 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 108 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
109 109
110 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 110 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
111 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, 111 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
112 GDT_STACK_CANARY_INIT 112 GDT_STACK_CANARY_INIT
113#endif 113#endif
114} }; 114} };
@@ -152,16 +152,17 @@ static inline int flag_is_changeable_p(u32 flag)
152 * the CPUID. Add "volatile" to not allow gcc to 152 * the CPUID. Add "volatile" to not allow gcc to
153 * optimize the subsequent calls to this function. 153 * optimize the subsequent calls to this function.
154 */ 154 */
155 asm volatile ("pushfl\n\t" 155 asm volatile ("pushfl \n\t"
156 "pushfl\n\t" 156 "pushfl \n\t"
157 "popl %0\n\t" 157 "popl %0 \n\t"
158 "movl %0,%1\n\t" 158 "movl %0, %1 \n\t"
159 "xorl %2,%0\n\t" 159 "xorl %2, %0 \n\t"
160 "pushl %0\n\t" 160 "pushl %0 \n\t"
161 "popfl\n\t" 161 "popfl \n\t"
162 "pushfl\n\t" 162 "pushfl \n\t"
163 "popl %0\n\t" 163 "popl %0 \n\t"
164 "popfl\n\t" 164 "popfl \n\t"
165
165 : "=&r" (f1), "=&r" (f2) 166 : "=&r" (f1), "=&r" (f2)
166 : "ir" (flag)); 167 : "ir" (flag));
167 168
@@ -176,18 +177,22 @@ static int __cpuinit have_cpuid_p(void)
176 177
177static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 178static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
178{ 179{
179 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { 180 unsigned long lo, hi;
180 /* Disable processor serial number */ 181
181 unsigned long lo, hi; 182 if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
182 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 183 return;
183 lo |= 0x200000; 184
184 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 185 /* Disable processor serial number: */
185 printk(KERN_NOTICE "CPU serial number disabled.\n"); 186
186 clear_cpu_cap(c, X86_FEATURE_PN); 187 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
187 188 lo |= 0x200000;
188 /* Disabling the serial number may affect the cpuid level */ 189 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
189 c->cpuid_level = cpuid_eax(0); 190
190 } 191 printk(KERN_NOTICE "CPU serial number disabled.\n");
192 clear_cpu_cap(c, X86_FEATURE_PN);
193
194 /* Disabling the serial number may affect the cpuid level */
195 c->cpuid_level = cpuid_eax(0);
191} 196}
192 197
193static int __init x86_serial_nr_setup(char *s) 198static int __init x86_serial_nr_setup(char *s)
@@ -220,6 +225,7 @@ struct cpuid_dependent_feature {
220 u32 feature; 225 u32 feature;
221 u32 level; 226 u32 level;
222}; 227};
228
223static const struct cpuid_dependent_feature __cpuinitconst 229static const struct cpuid_dependent_feature __cpuinitconst
224cpuid_dependent_features[] = { 230cpuid_dependent_features[] = {
225 { X86_FEATURE_MWAIT, 0x00000005 }, 231 { X86_FEATURE_MWAIT, 0x00000005 },
@@ -231,7 +237,11 @@ cpuid_dependent_features[] = {
231static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) 237static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
232{ 238{
233 const struct cpuid_dependent_feature *df; 239 const struct cpuid_dependent_feature *df;
240
234 for (df = cpuid_dependent_features; df->feature; df++) { 241 for (df = cpuid_dependent_features; df->feature; df++) {
242
243 if (!cpu_has(c, df->feature))
244 continue;
235 /* 245 /*
236 * Note: cpuid_level is set to -1 if unavailable, but 246 * Note: cpuid_level is set to -1 if unavailable, but
237 * extended_extended_level is set to 0 if unavailable 247 * extended_extended_level is set to 0 if unavailable
@@ -239,32 +249,32 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
239 * when signed; hence the weird messing around with 249 * when signed; hence the weird messing around with
240 * signs here... 250 * signs here...
241 */ 251 */
242 if (cpu_has(c, df->feature) && 252 if (!((s32)df->level < 0 ?
243 ((s32)df->level < 0 ?
244 (u32)df->level > (u32)c->extended_cpuid_level : 253 (u32)df->level > (u32)c->extended_cpuid_level :
245 (s32)df->level > (s32)c->cpuid_level)) { 254 (s32)df->level > (s32)c->cpuid_level))
246 clear_cpu_cap(c, df->feature); 255 continue;
247 if (warn) 256
248 printk(KERN_WARNING 257 clear_cpu_cap(c, df->feature);
249 "CPU: CPU feature %s disabled " 258 if (!warn)
250 "due to lack of CPUID level 0x%x\n", 259 continue;
251 x86_cap_flags[df->feature], 260
252 df->level); 261 printk(KERN_WARNING
253 } 262 "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
263 x86_cap_flags[df->feature], df->level);
254 } 264 }
255} 265}
256 266
257/* 267/*
258 * Naming convention should be: <Name> [(<Codename>)] 268 * Naming convention should be: <Name> [(<Codename>)]
259 * This table only is used unless init_<vendor>() below doesn't set it; 269 * This table only is used unless init_<vendor>() below doesn't set it;
260 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 270 * in particular, if CPUID levels 0x80000002..4 are supported, this
261 * 271 * isn't used
262 */ 272 */
263 273
264/* Look up CPU names by table lookup. */ 274/* Look up CPU names by table lookup. */
265static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) 275static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
266{ 276{
267 struct cpu_model_info *info; 277 const struct cpu_model_info *info;
268 278
269 if (c->x86_model >= 16) 279 if (c->x86_model >= 16)
270 return NULL; /* Range check */ 280 return NULL; /* Range check */
@@ -295,8 +305,10 @@ void load_percpu_segment(int cpu)
295 load_stack_canary_segment(); 305 load_stack_canary_segment();
296} 306}
297 307
298/* Current gdt points %fs at the "master" per-cpu area: after this, 308/*
299 * it's on the real one. */ 309 * Current gdt points %fs at the "master" per-cpu area: after this,
310 * it's on the real one.
311 */
300void switch_to_new_gdt(int cpu) 312void switch_to_new_gdt(int cpu)
301{ 313{
302 struct desc_ptr gdt_descr; 314 struct desc_ptr gdt_descr;
@@ -309,7 +321,7 @@ void switch_to_new_gdt(int cpu)
309 load_percpu_segment(cpu); 321 load_percpu_segment(cpu);
310} 322}
311 323
312static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 324static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
313 325
314static void __cpuinit default_init(struct cpuinfo_x86 *c) 326static void __cpuinit default_init(struct cpuinfo_x86 *c)
315{ 327{
@@ -328,7 +340,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
328#endif 340#endif
329} 341}
330 342
331static struct cpu_dev __cpuinitdata default_cpu = { 343static const struct cpu_dev __cpuinitconst default_cpu = {
332 .c_init = default_init, 344 .c_init = default_init,
333 .c_vendor = "Unknown", 345 .c_vendor = "Unknown",
334 .c_x86_vendor = X86_VENDOR_UNKNOWN, 346 .c_x86_vendor = X86_VENDOR_UNKNOWN,
@@ -342,22 +354,24 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
342 if (c->extended_cpuid_level < 0x80000004) 354 if (c->extended_cpuid_level < 0x80000004)
343 return; 355 return;
344 356
345 v = (unsigned int *) c->x86_model_id; 357 v = (unsigned int *)c->x86_model_id;
346 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); 358 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
347 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); 359 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
348 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); 360 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
349 c->x86_model_id[48] = 0; 361 c->x86_model_id[48] = 0;
350 362
351 /* Intel chips right-justify this string for some dumb reason; 363 /*
352 undo that brain damage */ 364 * Intel chips right-justify this string for some dumb reason;
365 * undo that brain damage:
366 */
353 p = q = &c->x86_model_id[0]; 367 p = q = &c->x86_model_id[0];
354 while (*p == ' ') 368 while (*p == ' ')
355 p++; 369 p++;
356 if (p != q) { 370 if (p != q) {
357 while (*p) 371 while (*p)
358 *q++ = *p++; 372 *q++ = *p++;
359 while (q <= &c->x86_model_id[48]) 373 while (q <= &c->x86_model_id[48])
360 *q++ = '\0'; /* Zero-pad the rest */ 374 *q++ = '\0'; /* Zero-pad the rest */
361 } 375 }
362} 376}
363 377
@@ -426,27 +440,30 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
426 440
427 if (smp_num_siblings == 1) { 441 if (smp_num_siblings == 1) {
428 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 442 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
429 } else if (smp_num_siblings > 1) { 443 goto out;
444 }
430 445
431 if (smp_num_siblings > nr_cpu_ids) { 446 if (smp_num_siblings <= 1)
432 printk(KERN_WARNING "CPU: Unsupported number of siblings %d", 447 goto out;
433 smp_num_siblings);
434 smp_num_siblings = 1;
435 return;
436 }
437 448
438 index_msb = get_count_order(smp_num_siblings); 449 if (smp_num_siblings > nr_cpu_ids) {
439 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); 450 pr_warning("CPU: Unsupported number of siblings %d",
451 smp_num_siblings);
452 smp_num_siblings = 1;
453 return;
454 }
440 455
441 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 456 index_msb = get_count_order(smp_num_siblings);
457 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
442 458
443 index_msb = get_count_order(smp_num_siblings); 459 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
444 460
445 core_bits = get_count_order(c->x86_max_cores); 461 index_msb = get_count_order(smp_num_siblings);
446 462
447 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & 463 core_bits = get_count_order(c->x86_max_cores);
448 ((1 << core_bits) - 1); 464
449 } 465 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
466 ((1 << core_bits) - 1);
450 467
451out: 468out:
452 if ((c->x86_max_cores * smp_num_siblings) > 1) { 469 if ((c->x86_max_cores * smp_num_siblings) > 1) {
@@ -461,8 +478,8 @@ out:
461static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) 478static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
462{ 479{
463 char *v = c->x86_vendor_id; 480 char *v = c->x86_vendor_id;
464 int i;
465 static int printed; 481 static int printed;
482 int i;
466 483
467 for (i = 0; i < X86_VENDOR_NUM; i++) { 484 for (i = 0; i < X86_VENDOR_NUM; i++) {
468 if (!cpu_devs[i]) 485 if (!cpu_devs[i])
@@ -471,6 +488,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
471 if (!strcmp(v, cpu_devs[i]->c_ident[0]) || 488 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
472 (cpu_devs[i]->c_ident[1] && 489 (cpu_devs[i]->c_ident[1] &&
473 !strcmp(v, cpu_devs[i]->c_ident[1]))) { 490 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
491
474 this_cpu = cpu_devs[i]; 492 this_cpu = cpu_devs[i];
475 c->x86_vendor = this_cpu->c_x86_vendor; 493 c->x86_vendor = this_cpu->c_x86_vendor;
476 return; 494 return;
@@ -479,7 +497,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
479 497
480 if (!printed) { 498 if (!printed) {
481 printed++; 499 printed++;
482 printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); 500 printk(KERN_ERR
501 "CPU: vendor_id '%s' unknown, using generic init.\n", v);
502
483 printk(KERN_ERR "CPU: Your system may be unstable.\n"); 503 printk(KERN_ERR "CPU: Your system may be unstable.\n");
484 } 504 }
485 505
@@ -499,14 +519,17 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
499 /* Intel-defined flags: level 0x00000001 */ 519 /* Intel-defined flags: level 0x00000001 */
500 if (c->cpuid_level >= 0x00000001) { 520 if (c->cpuid_level >= 0x00000001) {
501 u32 junk, tfms, cap0, misc; 521 u32 junk, tfms, cap0, misc;
522
502 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 523 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
503 c->x86 = (tfms >> 8) & 0xf; 524 c->x86 = (tfms >> 8) & 0xf;
504 c->x86_model = (tfms >> 4) & 0xf; 525 c->x86_model = (tfms >> 4) & 0xf;
505 c->x86_mask = tfms & 0xf; 526 c->x86_mask = tfms & 0xf;
527
506 if (c->x86 == 0xf) 528 if (c->x86 == 0xf)
507 c->x86 += (tfms >> 20) & 0xff; 529 c->x86 += (tfms >> 20) & 0xff;
508 if (c->x86 >= 0x6) 530 if (c->x86 >= 0x6)
509 c->x86_model += ((tfms >> 16) & 0xf) << 4; 531 c->x86_model += ((tfms >> 16) & 0xf) << 4;
532
510 if (cap0 & (1<<19)) { 533 if (cap0 & (1<<19)) {
511 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 534 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
512 c->x86_cache_alignment = c->x86_clflush_size; 535 c->x86_cache_alignment = c->x86_clflush_size;
@@ -522,6 +545,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
522 /* Intel-defined flags: level 0x00000001 */ 545 /* Intel-defined flags: level 0x00000001 */
523 if (c->cpuid_level >= 0x00000001) { 546 if (c->cpuid_level >= 0x00000001) {
524 u32 capability, excap; 547 u32 capability, excap;
548
525 cpuid(0x00000001, &tfms, &ebx, &excap, &capability); 549 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
526 c->x86_capability[0] = capability; 550 c->x86_capability[0] = capability;
527 c->x86_capability[4] = excap; 551 c->x86_capability[4] = excap;
@@ -530,6 +554,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
530 /* AMD-defined flags: level 0x80000001 */ 554 /* AMD-defined flags: level 0x80000001 */
531 xlvl = cpuid_eax(0x80000000); 555 xlvl = cpuid_eax(0x80000000);
532 c->extended_cpuid_level = xlvl; 556 c->extended_cpuid_level = xlvl;
557
533 if ((xlvl & 0xffff0000) == 0x80000000) { 558 if ((xlvl & 0xffff0000) == 0x80000000) {
534 if (xlvl >= 0x80000001) { 559 if (xlvl >= 0x80000001) {
535 c->x86_capability[1] = cpuid_edx(0x80000001); 560 c->x86_capability[1] = cpuid_edx(0x80000001);
@@ -537,13 +562,15 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
537 } 562 }
538 } 563 }
539 564
540#ifdef CONFIG_X86_64
541 if (c->extended_cpuid_level >= 0x80000008) { 565 if (c->extended_cpuid_level >= 0x80000008) {
542 u32 eax = cpuid_eax(0x80000008); 566 u32 eax = cpuid_eax(0x80000008);
543 567
544 c->x86_virt_bits = (eax >> 8) & 0xff; 568 c->x86_virt_bits = (eax >> 8) & 0xff;
545 c->x86_phys_bits = eax & 0xff; 569 c->x86_phys_bits = eax & 0xff;
546 } 570 }
571#ifdef CONFIG_X86_32
572 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
573 c->x86_phys_bits = 36;
547#endif 574#endif
548 575
549 if (c->extended_cpuid_level >= 0x80000007) 576 if (c->extended_cpuid_level >= 0x80000007)
@@ -590,8 +617,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
590{ 617{
591#ifdef CONFIG_X86_64 618#ifdef CONFIG_X86_64
592 c->x86_clflush_size = 64; 619 c->x86_clflush_size = 64;
620 c->x86_phys_bits = 36;
621 c->x86_virt_bits = 48;
593#else 622#else
594 c->x86_clflush_size = 32; 623 c->x86_clflush_size = 32;
624 c->x86_phys_bits = 32;
625 c->x86_virt_bits = 32;
595#endif 626#endif
596 c->x86_cache_alignment = c->x86_clflush_size; 627 c->x86_cache_alignment = c->x86_clflush_size;
597 628
@@ -622,12 +653,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
622 653
623void __init early_cpu_init(void) 654void __init early_cpu_init(void)
624{ 655{
625 struct cpu_dev **cdev; 656 const struct cpu_dev *const *cdev;
626 int count = 0; 657 int count = 0;
627 658
628 printk("KERNEL supported cpus:\n"); 659 printk(KERN_INFO "KERNEL supported cpus:\n");
629 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { 660 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
630 struct cpu_dev *cpudev = *cdev; 661 const struct cpu_dev *cpudev = *cdev;
631 unsigned int j; 662 unsigned int j;
632 663
633 if (count >= X86_VENDOR_NUM) 664 if (count >= X86_VENDOR_NUM)
@@ -638,7 +669,7 @@ void __init early_cpu_init(void)
638 for (j = 0; j < 2; j++) { 669 for (j = 0; j < 2; j++) {
639 if (!cpudev->c_ident[j]) 670 if (!cpudev->c_ident[j])
640 continue; 671 continue;
641 printk(" %s %s\n", cpudev->c_vendor, 672 printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
642 cpudev->c_ident[j]); 673 cpudev->c_ident[j]);
643 } 674 }
644 } 675 }
@@ -714,9 +745,13 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
714 c->x86_coreid_bits = 0; 745 c->x86_coreid_bits = 0;
715#ifdef CONFIG_X86_64 746#ifdef CONFIG_X86_64
716 c->x86_clflush_size = 64; 747 c->x86_clflush_size = 64;
748 c->x86_phys_bits = 36;
749 c->x86_virt_bits = 48;
717#else 750#else
718 c->cpuid_level = -1; /* CPUID not detected */ 751 c->cpuid_level = -1; /* CPUID not detected */
719 c->x86_clflush_size = 32; 752 c->x86_clflush_size = 32;
753 c->x86_phys_bits = 32;
754 c->x86_virt_bits = 32;
720#endif 755#endif
721 c->x86_cache_alignment = c->x86_clflush_size; 756 c->x86_cache_alignment = c->x86_clflush_size;
722 memset(&c->x86_capability, 0, sizeof c->x86_capability); 757 memset(&c->x86_capability, 0, sizeof c->x86_capability);
@@ -747,8 +782,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
747 squash_the_stupid_serial_number(c); 782 squash_the_stupid_serial_number(c);
748 783
749 /* 784 /*
750 * The vendor-specific functions might have changed features. Now 785 * The vendor-specific functions might have changed features.
751 * we do "generic changes." 786 * Now we do "generic changes."
752 */ 787 */
753 788
754 /* Filter out anything that depends on CPUID levels we don't have */ 789 /* Filter out anything that depends on CPUID levels we don't have */
@@ -756,7 +791,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
756 791
757 /* If the model name is still unset, do table lookup. */ 792 /* If the model name is still unset, do table lookup. */
758 if (!c->x86_model_id[0]) { 793 if (!c->x86_model_id[0]) {
759 char *p; 794 const char *p;
760 p = table_lookup_model(c); 795 p = table_lookup_model(c);
761 if (p) 796 if (p)
762 strcpy(c->x86_model_id, p); 797 strcpy(c->x86_model_id, p);
@@ -832,11 +867,11 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
832} 867}
833 868
834struct msr_range { 869struct msr_range {
835 unsigned min; 870 unsigned min;
836 unsigned max; 871 unsigned max;
837}; 872};
838 873
839static struct msr_range msr_range_array[] __cpuinitdata = { 874static const struct msr_range msr_range_array[] __cpuinitconst = {
840 { 0x00000000, 0x00000418}, 875 { 0x00000000, 0x00000418},
841 { 0xc0000000, 0xc000040b}, 876 { 0xc0000000, 0xc000040b},
842 { 0xc0010000, 0xc0010142}, 877 { 0xc0010000, 0xc0010142},
@@ -845,14 +880,15 @@ static struct msr_range msr_range_array[] __cpuinitdata = {
845 880
846static void __cpuinit print_cpu_msr(void) 881static void __cpuinit print_cpu_msr(void)
847{ 882{
883 unsigned index_min, index_max;
848 unsigned index; 884 unsigned index;
849 u64 val; 885 u64 val;
850 int i; 886 int i;
851 unsigned index_min, index_max;
852 887
853 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { 888 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
854 index_min = msr_range_array[i].min; 889 index_min = msr_range_array[i].min;
855 index_max = msr_range_array[i].max; 890 index_max = msr_range_array[i].max;
891
856 for (index = index_min; index < index_max; index++) { 892 for (index = index_min; index < index_max; index++) {
857 if (rdmsrl_amd_safe(index, &val)) 893 if (rdmsrl_amd_safe(index, &val))
858 continue; 894 continue;
@@ -862,6 +898,7 @@ static void __cpuinit print_cpu_msr(void)
862} 898}
863 899
864static int show_msr __cpuinitdata; 900static int show_msr __cpuinitdata;
901
865static __init int setup_show_msr(char *arg) 902static __init int setup_show_msr(char *arg)
866{ 903{
867 int num; 904 int num;
@@ -883,12 +920,14 @@ __setup("noclflush", setup_noclflush);
883 920
884void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) 921void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
885{ 922{
886 char *vendor = NULL; 923 const char *vendor = NULL;
887 924
888 if (c->x86_vendor < X86_VENDOR_NUM) 925 if (c->x86_vendor < X86_VENDOR_NUM) {
889 vendor = this_cpu->c_vendor; 926 vendor = this_cpu->c_vendor;
890 else if (c->cpuid_level >= 0) 927 } else {
891 vendor = c->x86_vendor_id; 928 if (c->cpuid_level >= 0)
929 vendor = c->x86_vendor_id;
930 }
892 931
893 if (vendor && !strstr(c->x86_model_id, vendor)) 932 if (vendor && !strstr(c->x86_model_id, vendor))
894 printk(KERN_CONT "%s ", vendor); 933 printk(KERN_CONT "%s ", vendor);
@@ -915,10 +954,12 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
915static __init int setup_disablecpuid(char *arg) 954static __init int setup_disablecpuid(char *arg)
916{ 955{
917 int bit; 956 int bit;
957
918 if (get_option(&arg, &bit) && bit < NCAPINTS*32) 958 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
919 setup_clear_cpu_cap(bit); 959 setup_clear_cpu_cap(bit);
920 else 960 else
921 return 0; 961 return 0;
962
922 return 1; 963 return 1;
923} 964}
924__setup("clearcpuid=", setup_disablecpuid); 965__setup("clearcpuid=", setup_disablecpuid);
@@ -928,6 +969,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
928 969
929DEFINE_PER_CPU_FIRST(union irq_stack_union, 970DEFINE_PER_CPU_FIRST(union irq_stack_union,
930 irq_stack_union) __aligned(PAGE_SIZE); 971 irq_stack_union) __aligned(PAGE_SIZE);
972
931DEFINE_PER_CPU(char *, irq_stack_ptr) = 973DEFINE_PER_CPU(char *, irq_stack_ptr) =
932 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 974 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
933 975
@@ -937,12 +979,21 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
937 979
938DEFINE_PER_CPU(unsigned int, irq_count) = -1; 980DEFINE_PER_CPU(unsigned int, irq_count) = -1;
939 981
982/*
983 * Special IST stacks which the CPU switches to when it calls
984 * an IST-marked descriptor entry. Up to 7 stacks (hardware
985 * limit), all of them are 4K, except the debug stack which
986 * is 8K.
987 */
988static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
989 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
990 [DEBUG_STACK - 1] = DEBUG_STKSZ
991};
992
940static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks 993static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
941 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) 994 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
942 __aligned(PAGE_SIZE); 995 __aligned(PAGE_SIZE);
943 996
944extern asmlinkage void ignore_sysret(void);
945
946/* May not be marked __init: used by software suspend */ 997/* May not be marked __init: used by software suspend */
947void syscall_init(void) 998void syscall_init(void)
948{ 999{
@@ -972,7 +1023,7 @@ unsigned long kernel_eflags;
972 */ 1023 */
973DEFINE_PER_CPU(struct orig_ist, orig_ist); 1024DEFINE_PER_CPU(struct orig_ist, orig_ist);
974 1025
975#else /* x86_64 */ 1026#else /* CONFIG_X86_64 */
976 1027
977#ifdef CONFIG_CC_STACKPROTECTOR 1028#ifdef CONFIG_CC_STACKPROTECTOR
978DEFINE_PER_CPU(unsigned long, stack_canary); 1029DEFINE_PER_CPU(unsigned long, stack_canary);
@@ -984,9 +1035,26 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
984 memset(regs, 0, sizeof(struct pt_regs)); 1035 memset(regs, 0, sizeof(struct pt_regs));
985 regs->fs = __KERNEL_PERCPU; 1036 regs->fs = __KERNEL_PERCPU;
986 regs->gs = __KERNEL_STACK_CANARY; 1037 regs->gs = __KERNEL_STACK_CANARY;
1038
987 return regs; 1039 return regs;
988} 1040}
989#endif /* x86_64 */ 1041#endif /* CONFIG_X86_64 */
1042
1043/*
1044 * Clear all 6 debug registers:
1045 */
1046static void clear_all_debug_regs(void)
1047{
1048 int i;
1049
1050 for (i = 0; i < 8; i++) {
1051 /* Ignore db4, db5 */
1052 if ((i == 4) || (i == 5))
1053 continue;
1054
1055 set_debugreg(0, i);
1056 }
1057}
990 1058
991/* 1059/*
992 * cpu_init() initializes state that is per-CPU. Some data is already 1060 * cpu_init() initializes state that is per-CPU. Some data is already
@@ -996,15 +1064,20 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
996 * A lot of state is already set up in PDA init for 64 bit 1064 * A lot of state is already set up in PDA init for 64 bit
997 */ 1065 */
998#ifdef CONFIG_X86_64 1066#ifdef CONFIG_X86_64
1067
999void __cpuinit cpu_init(void) 1068void __cpuinit cpu_init(void)
1000{ 1069{
1001 int cpu = stack_smp_processor_id(); 1070 struct orig_ist *orig_ist;
1002 struct tss_struct *t = &per_cpu(init_tss, cpu);
1003 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
1004 unsigned long v;
1005 struct task_struct *me; 1071 struct task_struct *me;
1072 struct tss_struct *t;
1073 unsigned long v;
1074 int cpu;
1006 int i; 1075 int i;
1007 1076
1077 cpu = stack_smp_processor_id();
1078 t = &per_cpu(init_tss, cpu);
1079 orig_ist = &per_cpu(orig_ist, cpu);
1080
1008#ifdef CONFIG_NUMA 1081#ifdef CONFIG_NUMA
1009 if (cpu != 0 && percpu_read(node_number) == 0 && 1082 if (cpu != 0 && percpu_read(node_number) == 0 &&
1010 cpu_to_node(cpu) != NUMA_NO_NODE) 1083 cpu_to_node(cpu) != NUMA_NO_NODE)
@@ -1045,19 +1118,17 @@ void __cpuinit cpu_init(void)
1045 * set up and load the per-CPU TSS 1118 * set up and load the per-CPU TSS
1046 */ 1119 */
1047 if (!orig_ist->ist[0]) { 1120 if (!orig_ist->ist[0]) {
1048 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1049 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1050 [DEBUG_STACK - 1] = DEBUG_STKSZ
1051 };
1052 char *estacks = per_cpu(exception_stacks, cpu); 1121 char *estacks = per_cpu(exception_stacks, cpu);
1122
1053 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1123 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1054 estacks += sizes[v]; 1124 estacks += exception_stack_sizes[v];
1055 orig_ist->ist[v] = t->x86_tss.ist[v] = 1125 orig_ist->ist[v] = t->x86_tss.ist[v] =
1056 (unsigned long)estacks; 1126 (unsigned long)estacks;
1057 } 1127 }
1058 } 1128 }
1059 1129
1060 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1130 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
1131
1061 /* 1132 /*
1062 * <= is required because the CPU will access up to 1133 * <= is required because the CPU will access up to
1063 * 8 bits beyond the end of the IO permission bitmap. 1134 * 8 bits beyond the end of the IO permission bitmap.
@@ -1067,8 +1138,7 @@ void __cpuinit cpu_init(void)
1067 1138
1068 atomic_inc(&init_mm.mm_count); 1139 atomic_inc(&init_mm.mm_count);
1069 me->active_mm = &init_mm; 1140 me->active_mm = &init_mm;
1070 if (me->mm) 1141 BUG_ON(me->mm);
1071 BUG();
1072 enter_lazy_tlb(&init_mm, me); 1142 enter_lazy_tlb(&init_mm, me);
1073 1143
1074 load_sp0(t, &current->thread); 1144 load_sp0(t, &current->thread);
@@ -1087,17 +1157,7 @@ void __cpuinit cpu_init(void)
1087 arch_kgdb_ops.correct_hw_break(); 1157 arch_kgdb_ops.correct_hw_break();
1088 else 1158 else
1089#endif 1159#endif
1090 { 1160 clear_all_debug_regs();
1091 /*
1092 * Clear all 6 debug registers:
1093 */
1094 set_debugreg(0UL, 0);
1095 set_debugreg(0UL, 1);
1096 set_debugreg(0UL, 2);
1097 set_debugreg(0UL, 3);
1098 set_debugreg(0UL, 6);
1099 set_debugreg(0UL, 7);
1100 }
1101 1161
1102 fpu_init(); 1162 fpu_init();
1103 1163
@@ -1118,7 +1178,8 @@ void __cpuinit cpu_init(void)
1118 1178
1119 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1179 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
1120 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1180 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
1121 for (;;) local_irq_enable(); 1181 for (;;)
1182 local_irq_enable();
1122 } 1183 }
1123 1184
1124 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1185 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
@@ -1134,8 +1195,7 @@ void __cpuinit cpu_init(void)
1134 */ 1195 */
1135 atomic_inc(&init_mm.mm_count); 1196 atomic_inc(&init_mm.mm_count);
1136 curr->active_mm = &init_mm; 1197 curr->active_mm = &init_mm;
1137 if (curr->mm) 1198 BUG_ON(curr->mm);
1138 BUG();
1139 enter_lazy_tlb(&init_mm, curr); 1199 enter_lazy_tlb(&init_mm, curr);
1140 1200
1141 load_sp0(t, thread); 1201 load_sp0(t, thread);
@@ -1148,13 +1208,7 @@ void __cpuinit cpu_init(void)
1148 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1208 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1149#endif 1209#endif
1150 1210
1151 /* Clear all 6 debug registers: */ 1211 clear_all_debug_regs();
1152 set_debugreg(0, 0);
1153 set_debugreg(0, 1);
1154 set_debugreg(0, 2);
1155 set_debugreg(0, 3);
1156 set_debugreg(0, 6);
1157 set_debugreg(0, 7);
1158 1212
1159 /* 1213 /*
1160 * Force FPU initialization: 1214 * Force FPU initialization:
@@ -1174,6 +1228,4 @@ void __cpuinit cpu_init(void)
1174 1228
1175 xsave_init(); 1229 xsave_init();
1176} 1230}
1177
1178
1179#endif 1231#endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index de4094a39210..6de9a908e400 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -3,33 +3,34 @@
3#define ARCH_X86_CPU_H 3#define ARCH_X86_CPU_H
4 4
5struct cpu_model_info { 5struct cpu_model_info {
6 int vendor; 6 int vendor;
7 int family; 7 int family;
8 char *model_names[16]; 8 const char *model_names[16];
9}; 9};
10 10
11/* attempt to consolidate cpu attributes */ 11/* attempt to consolidate cpu attributes */
12struct cpu_dev { 12struct cpu_dev {
13 char * c_vendor; 13 const char *c_vendor;
14 14
15 /* some have two possibilities for cpuid string */ 15 /* some have two possibilities for cpuid string */
16 char * c_ident[2]; 16 const char *c_ident[2];
17 17
18 struct cpu_model_info c_models[4]; 18 struct cpu_model_info c_models[4];
19 19
20 void (*c_early_init)(struct cpuinfo_x86 *c); 20 void (*c_early_init)(struct cpuinfo_x86 *);
21 void (*c_init)(struct cpuinfo_x86 * c); 21 void (*c_init)(struct cpuinfo_x86 *);
22 void (*c_identify)(struct cpuinfo_x86 * c); 22 void (*c_identify)(struct cpuinfo_x86 *);
23 unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); 23 unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
24 int c_x86_vendor; 24 int c_x86_vendor;
25}; 25};
26 26
27#define cpu_dev_register(cpu_devX) \ 27#define cpu_dev_register(cpu_devX) \
28 static struct cpu_dev *__cpu_dev_##cpu_devX __used \ 28 static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
29 __attribute__((__section__(".x86_cpu_dev.init"))) = \ 29 __attribute__((__section__(".x86_cpu_dev.init"))) = \
30 &cpu_devX; 30 &cpu_devX;
31 31
32extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; 32extern const struct cpu_dev *const __x86_cpu_dev_start[],
33 *const __x86_cpu_dev_end[];
33 34
34extern void display_cacheinfo(struct cpuinfo_x86 *c); 35extern void display_cacheinfo(struct cpuinfo_x86 *c);
35 36
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
new file mode 100755
index 000000000000..46e29ab96c6a
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -0,0 +1,901 @@
1/*
2 * CPU x86 architecture debug code
3 *
4 * Copyright(C) 2009 Jaswinder Singh Rajput
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/interrupt.h>
10#include <linux/compiler.h>
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kprobes.h>
14#include <linux/uaccess.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/percpu.h>
18#include <linux/signal.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/types.h>
22#include <linux/init.h>
23#include <linux/slab.h>
24#include <linux/smp.h>
25
26#include <asm/cpu_debug.h>
27#include <asm/paravirt.h>
28#include <asm/system.h>
29#include <asm/traps.h>
30#include <asm/apic.h>
31#include <asm/desc.h>
32
33static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
34static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
35static DEFINE_PER_CPU(unsigned, cpu_modelflag);
36static DEFINE_PER_CPU(int, cpu_priv_count);
37static DEFINE_PER_CPU(unsigned, cpu_model);
38
39static DEFINE_MUTEX(cpu_debug_lock);
40
41static struct dentry *cpu_debugfs_dir;
42
43static struct cpu_debug_base cpu_base[] = {
44 { "mc", CPU_MC, 0 },
45 { "monitor", CPU_MONITOR, 0 },
46 { "time", CPU_TIME, 0 },
47 { "pmc", CPU_PMC, 1 },
48 { "platform", CPU_PLATFORM, 0 },
49 { "apic", CPU_APIC, 0 },
50 { "poweron", CPU_POWERON, 0 },
51 { "control", CPU_CONTROL, 0 },
52 { "features", CPU_FEATURES, 0 },
53 { "lastbranch", CPU_LBRANCH, 0 },
54 { "bios", CPU_BIOS, 0 },
55 { "freq", CPU_FREQ, 0 },
56 { "mtrr", CPU_MTRR, 0 },
57 { "perf", CPU_PERF, 0 },
58 { "cache", CPU_CACHE, 0 },
59 { "sysenter", CPU_SYSENTER, 0 },
60 { "therm", CPU_THERM, 0 },
61 { "misc", CPU_MISC, 0 },
62 { "debug", CPU_DEBUG, 0 },
63 { "pat", CPU_PAT, 0 },
64 { "vmx", CPU_VMX, 0 },
65 { "call", CPU_CALL, 0 },
66 { "base", CPU_BASE, 0 },
67 { "ver", CPU_VER, 0 },
68 { "conf", CPU_CONF, 0 },
69 { "smm", CPU_SMM, 0 },
70 { "svm", CPU_SVM, 0 },
71 { "osvm", CPU_OSVM, 0 },
72 { "tss", CPU_TSS, 0 },
73 { "cr", CPU_CR, 0 },
74 { "dt", CPU_DT, 0 },
75 { "registers", CPU_REG_ALL, 0 },
76};
77
78static struct cpu_file_base cpu_file[] = {
79 { "index", CPU_REG_ALL, 0 },
80 { "value", CPU_REG_ALL, 1 },
81};
82
83/* Intel Registers Range */
84static struct cpu_debug_range cpu_intel_range[] = {
85 { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL },
86 { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE },
87 { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL },
88 { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM },
89 { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE },
90 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE },
91
92 { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE },
93 { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON },
94 { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON },
95 { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE },
96
97 { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE },
98 { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT },
99 { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT },
100 { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM },
101
102 { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE },
103 { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 },
104 { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE },
105 { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON },
106
107 { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT },
108 { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT },
109 { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT },
110 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE },
111
112 { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 },
113 { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 },
114 { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX },
115 { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 },
116 { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT },
117
118 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE },
119 { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE },
120 { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE },
121 { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT },
122 { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE },
123 { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE },
124 { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE },
125 { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE },
126
127 { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT },
128 { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON },
129 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE },
130 { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON },
131 { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE },
132 { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 },
133 { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE },
134 { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 },
135
136 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE },
137 { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE },
138 { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE },
139 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE },
140 { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE },
141 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE },
142
143 { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON },
144 { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE },
145 { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON },
146 { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT },
147 { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON },
148 { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT },
149 { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON },
150 { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON },
151 { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON },
152 { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON },
153 { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE },
154 { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON },
155
156 { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE },
157 { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON },
158 { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE },
159 { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON },
160 { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE },
161 { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON },
162 { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE },
163 { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON },
164 { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE },
165 { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE },
166 { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE },
167
168 { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE },
169 { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON },
170 { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON },
171
172 { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP },
173
174 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON },
175 { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON },
176 { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON },
177 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON },
178};
179
180/* AMD Registers Range */
181static struct cpu_debug_range cpu_amd_range[] = {
182 { 0x00000000, 0x00000001, CPU_MC, CPU_K10_PLUS, },
183 { 0x00000010, 0x00000010, CPU_TIME, CPU_K8_PLUS, },
184 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_K8_PLUS, },
185 { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_K7_PLUS },
186 { 0x0000008B, 0x0000008B, CPU_VER, CPU_K8_PLUS },
187 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_K8_PLUS, },
188
189 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS, },
190 { 0x00000179, 0x0000017B, CPU_MC, CPU_K8_PLUS, },
191 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_K8_PLUS, },
192 { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_K8_PLUS, },
193
194 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_K8_PLUS, },
195 { 0x00000250, 0x00000250, CPU_MTRR, CPU_K8_PLUS, },
196 { 0x00000258, 0x00000259, CPU_MTRR, CPU_K8_PLUS, },
197 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_K8_PLUS, },
198 { 0x00000277, 0x00000277, CPU_PAT, CPU_K8_PLUS, },
199 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_K8_PLUS, },
200
201 { 0x00000400, 0x00000413, CPU_MC, CPU_K8_PLUS, },
202
203 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL, },
204 { 0xC0000081, 0xC0000084, CPU_CALL, CPU_K8_PLUS, },
205 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_K8_PLUS, },
206 { 0xC0000103, 0xC0000103, CPU_TIME, CPU_K10_PLUS, },
207
208 { 0xC0010000, 0xC0010007, CPU_PMC, CPU_K8_PLUS, },
209 { 0xC0010010, 0xC0010010, CPU_CONF, CPU_K7_PLUS, },
210 { 0xC0010015, 0xC0010015, CPU_CONF, CPU_K7_PLUS, },
211 { 0xC0010016, 0xC001001A, CPU_MTRR, CPU_K8_PLUS, },
212 { 0xC001001D, 0xC001001D, CPU_MTRR, CPU_K8_PLUS, },
213 { 0xC001001F, 0xC001001F, CPU_CONF, CPU_K8_PLUS, },
214 { 0xC0010030, 0xC0010035, CPU_BIOS, CPU_K8_PLUS, },
215 { 0xC0010044, 0xC0010048, CPU_MC, CPU_K8_PLUS, },
216 { 0xC0010050, 0xC0010056, CPU_SMM, CPU_K0F_PLUS, },
217 { 0xC0010058, 0xC0010058, CPU_CONF, CPU_K10_PLUS, },
218 { 0xC0010060, 0xC0010060, CPU_CACHE, CPU_AMD_11, },
219 { 0xC0010061, 0xC0010068, CPU_SMM, CPU_K10_PLUS, },
220 { 0xC0010069, 0xC001006B, CPU_SMM, CPU_AMD_11, },
221 { 0xC0010070, 0xC0010071, CPU_SMM, CPU_K10_PLUS, },
222 { 0xC0010111, 0xC0010113, CPU_SMM, CPU_K8_PLUS, },
223 { 0xC0010114, 0xC0010118, CPU_SVM, CPU_K10_PLUS, },
224 { 0xC0010140, 0xC0010141, CPU_OSVM, CPU_K10_PLUS, },
225 { 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, },
226};
227
228
229/* Intel */
230static int get_intel_modelflag(unsigned model)
231{
232 int flag;
233
234 switch (model) {
235 case 0x0501:
236 case 0x0502:
237 case 0x0504:
238 flag = CPU_INTEL_PENTIUM;
239 break;
240 case 0x0601:
241 case 0x0603:
242 case 0x0605:
243 case 0x0607:
244 case 0x0608:
245 case 0x060A:
246 case 0x060B:
247 flag = CPU_INTEL_P6;
248 break;
249 case 0x0609:
250 case 0x060D:
251 flag = CPU_INTEL_PENTIUM_M;
252 break;
253 case 0x060E:
254 flag = CPU_INTEL_CORE;
255 break;
256 case 0x060F:
257 case 0x0617:
258 flag = CPU_INTEL_CORE2;
259 break;
260 case 0x061C:
261 flag = CPU_INTEL_ATOM;
262 break;
263 case 0x0F00:
264 case 0x0F01:
265 case 0x0F02:
266 case 0x0F03:
267 case 0x0F04:
268 flag = CPU_INTEL_XEON_P4;
269 break;
270 case 0x0F06:
271 flag = CPU_INTEL_XEON_MP;
272 break;
273 default:
274 flag = CPU_NONE;
275 break;
276 }
277
278 return flag;
279}
280
281/* AMD */
282static int get_amd_modelflag(unsigned model)
283{
284 int flag;
285
286 switch (model >> 8) {
287 case 0x6:
288 flag = CPU_AMD_K6;
289 break;
290 case 0x7:
291 flag = CPU_AMD_K7;
292 break;
293 case 0x8:
294 flag = CPU_AMD_K8;
295 break;
296 case 0xf:
297 flag = CPU_AMD_0F;
298 break;
299 case 0x10:
300 flag = CPU_AMD_10;
301 break;
302 case 0x11:
303 flag = CPU_AMD_11;
304 break;
305 default:
306 flag = CPU_NONE;
307 break;
308 }
309
310 return flag;
311}
312
313static int get_cpu_modelflag(unsigned cpu)
314{
315 int flag;
316
317 flag = per_cpu(cpu_model, cpu);
318
319 switch (flag >> 16) {
320 case X86_VENDOR_INTEL:
321 flag = get_intel_modelflag(flag);
322 break;
323 case X86_VENDOR_AMD:
324 flag = get_amd_modelflag(flag & 0xffff);
325 break;
326 default:
327 flag = CPU_NONE;
328 break;
329 }
330
331 return flag;
332}
333
334static int get_cpu_range_count(unsigned cpu)
335{
336 int index;
337
338 switch (per_cpu(cpu_model, cpu) >> 16) {
339 case X86_VENDOR_INTEL:
340 index = ARRAY_SIZE(cpu_intel_range);
341 break;
342 case X86_VENDOR_AMD:
343 index = ARRAY_SIZE(cpu_amd_range);
344 break;
345 default:
346 index = 0;
347 break;
348 }
349
350 return index;
351}
352
353static int is_typeflag_valid(unsigned cpu, unsigned flag)
354{
355 unsigned vendor, modelflag;
356 int i, index;
357
358 /* Standard Registers should be always valid */
359 if (flag >= CPU_TSS)
360 return 1;
361
362 modelflag = per_cpu(cpu_modelflag, cpu);
363 vendor = per_cpu(cpu_model, cpu) >> 16;
364 index = get_cpu_range_count(cpu);
365
366 for (i = 0; i < index; i++) {
367 switch (vendor) {
368 case X86_VENDOR_INTEL:
369 if ((cpu_intel_range[i].model & modelflag) &&
370 (cpu_intel_range[i].flag & flag))
371 return 1;
372 break;
373 case X86_VENDOR_AMD:
374 if ((cpu_amd_range[i].model & modelflag) &&
375 (cpu_amd_range[i].flag & flag))
376 return 1;
377 break;
378 }
379 }
380
381 /* Invalid */
382 return 0;
383}
384
385static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
386 int index, unsigned flag)
387{
388 unsigned modelflag;
389
390 modelflag = per_cpu(cpu_modelflag, cpu);
391 *max = 0;
392 switch (per_cpu(cpu_model, cpu) >> 16) {
393 case X86_VENDOR_INTEL:
394 if ((cpu_intel_range[index].model & modelflag) &&
395 (cpu_intel_range[index].flag & flag)) {
396 *min = cpu_intel_range[index].min;
397 *max = cpu_intel_range[index].max;
398 }
399 break;
400 case X86_VENDOR_AMD:
401 if ((cpu_amd_range[index].model & modelflag) &&
402 (cpu_amd_range[index].flag & flag)) {
403 *min = cpu_amd_range[index].min;
404 *max = cpu_amd_range[index].max;
405 }
406 break;
407 }
408
409 return *max;
410}
411
412/* This function can also be called with seq = NULL for printk */
413static void print_cpu_data(struct seq_file *seq, unsigned type,
414 u32 low, u32 high)
415{
416 struct cpu_private *priv;
417 u64 val = high;
418
419 if (seq) {
420 priv = seq->private;
421 if (priv->file) {
422 val = (val << 32) | low;
423 seq_printf(seq, "0x%llx\n", val);
424 } else
425 seq_printf(seq, " %08x: %08x_%08x\n",
426 type, high, low);
427 } else
428 printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
429}
430
431/* This function can also be called with seq = NULL for printk */
432static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
433{
434 unsigned msr, msr_min, msr_max;
435 struct cpu_private *priv;
436 u32 low, high;
437 int i, range;
438
439 if (seq) {
440 priv = seq->private;
441 if (priv->file) {
442 if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
443 &low, &high))
444 print_cpu_data(seq, priv->reg, low, high);
445 return;
446 }
447 }
448
449 range = get_cpu_range_count(cpu);
450
451 for (i = 0; i < range; i++) {
452 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
453 continue;
454
455 for (msr = msr_min; msr <= msr_max; msr++) {
456 if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
457 continue;
458 print_cpu_data(seq, msr, low, high);
459 }
460 }
461}
462
463static void print_tss(void *arg)
464{
465 struct pt_regs *regs = task_pt_regs(current);
466 struct seq_file *seq = arg;
467 unsigned int seg;
468
469 seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
470 seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
471 seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
472 seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
473
474 seq_printf(seq, " RSI\t: %016lx\n", regs->si);
475 seq_printf(seq, " RDI\t: %016lx\n", regs->di);
476 seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
477 seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
478
479#ifdef CONFIG_X86_64
480 seq_printf(seq, " R08\t: %016lx\n", regs->r8);
481 seq_printf(seq, " R09\t: %016lx\n", regs->r9);
482 seq_printf(seq, " R10\t: %016lx\n", regs->r10);
483 seq_printf(seq, " R11\t: %016lx\n", regs->r11);
484 seq_printf(seq, " R12\t: %016lx\n", regs->r12);
485 seq_printf(seq, " R13\t: %016lx\n", regs->r13);
486 seq_printf(seq, " R14\t: %016lx\n", regs->r14);
487 seq_printf(seq, " R15\t: %016lx\n", regs->r15);
488#endif
489
490 asm("movl %%cs,%0" : "=r" (seg));
491 seq_printf(seq, " CS\t: %04x\n", seg);
492 asm("movl %%ds,%0" : "=r" (seg));
493 seq_printf(seq, " DS\t: %04x\n", seg);
494 seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff);
495 asm("movl %%es,%0" : "=r" (seg));
496 seq_printf(seq, " ES\t: %04x\n", seg);
497 asm("movl %%fs,%0" : "=r" (seg));
498 seq_printf(seq, " FS\t: %04x\n", seg);
499 asm("movl %%gs,%0" : "=r" (seg));
500 seq_printf(seq, " GS\t: %04x\n", seg);
501
502 seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
503
504 seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
505}
506
507static void print_cr(void *arg)
508{
509 struct seq_file *seq = arg;
510
511 seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
512 seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
513 seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
514 seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
515#ifdef CONFIG_X86_64
516 seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
517#endif
518}
519
520static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
521{
522 seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
523}
524
525static void print_dt(void *seq)
526{
527 struct desc_ptr dt;
528 unsigned long ldt;
529
530 /* IDT */
531 store_idt((struct desc_ptr *)&dt);
532 print_desc_ptr("IDT", seq, dt);
533
534 /* GDT */
535 store_gdt((struct desc_ptr *)&dt);
536 print_desc_ptr("GDT", seq, dt);
537
538 /* LDT */
539 store_ldt(ldt);
540 seq_printf(seq, " LDT\t: %016lx\n", ldt);
541
542 /* TR */
543 store_tr(ldt);
544 seq_printf(seq, " TR\t: %016lx\n", ldt);
545}
546
547static void print_dr(void *arg)
548{
549 struct seq_file *seq = arg;
550 unsigned long dr;
551 int i;
552
553 for (i = 0; i < 8; i++) {
554 /* Ignore db4, db5 */
555 if ((i == 4) || (i == 5))
556 continue;
557 get_debugreg(dr, i);
558 seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
559 }
560
561 seq_printf(seq, "\n MSR\t:\n");
562}
563
564static void print_apic(void *arg)
565{
566 struct seq_file *seq = arg;
567
568#ifdef CONFIG_X86_LOCAL_APIC
569 seq_printf(seq, " LAPIC\t:\n");
570 seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24);
571 seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR));
572 seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI));
573 seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI));
574 seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI));
575 seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR));
576 seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR));
577 seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV));
578 seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR));
579 seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR));
580 seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR));
581 seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2));
582 seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT));
583 seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR));
584 seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC));
585 seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0));
586 seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1));
587 seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR));
588 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
589 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
590 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
591#endif /* CONFIG_X86_LOCAL_APIC */
592
593 seq_printf(seq, "\n MSR\t:\n");
594}
595
596static int cpu_seq_show(struct seq_file *seq, void *v)
597{
598 struct cpu_private *priv = seq->private;
599
600 if (priv == NULL)
601 return -EINVAL;
602
603 switch (cpu_base[priv->type].flag) {
604 case CPU_TSS:
605 smp_call_function_single(priv->cpu, print_tss, seq, 1);
606 break;
607 case CPU_CR:
608 smp_call_function_single(priv->cpu, print_cr, seq, 1);
609 break;
610 case CPU_DT:
611 smp_call_function_single(priv->cpu, print_dt, seq, 1);
612 break;
613 case CPU_DEBUG:
614 if (priv->file == CPU_INDEX_BIT)
615 smp_call_function_single(priv->cpu, print_dr, seq, 1);
616 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
617 break;
618 case CPU_APIC:
619 if (priv->file == CPU_INDEX_BIT)
620 smp_call_function_single(priv->cpu, print_apic, seq, 1);
621 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
622 break;
623
624 default:
625 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
626 break;
627 }
628 seq_printf(seq, "\n");
629
630 return 0;
631}
632
633static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
634{
635 if (*pos == 0) /* One time is enough ;-) */
636 return seq;
637
638 return NULL;
639}
640
641static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
642{
643 (*pos)++;
644
645 return cpu_seq_start(seq, pos);
646}
647
648static void cpu_seq_stop(struct seq_file *seq, void *v)
649{
650}
651
652static const struct seq_operations cpu_seq_ops = {
653 .start = cpu_seq_start,
654 .next = cpu_seq_next,
655 .stop = cpu_seq_stop,
656 .show = cpu_seq_show,
657};
658
659static int cpu_seq_open(struct inode *inode, struct file *file)
660{
661 struct cpu_private *priv = inode->i_private;
662 struct seq_file *seq;
663 int err;
664
665 err = seq_open(file, &cpu_seq_ops);
666 if (!err) {
667 seq = file->private_data;
668 seq->private = priv;
669 }
670
671 return err;
672}
673
674static int write_msr(struct cpu_private *priv, u64 val)
675{
676 u32 low, high;
677
678 high = (val >> 32) & 0xffffffff;
679 low = val & 0xffffffff;
680
681 if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
682 return 0;
683
684 return -EPERM;
685}
686
687static int write_cpu_register(struct cpu_private *priv, const char *buf)
688{
689 int ret = -EPERM;
690 u64 val;
691
692 ret = strict_strtoull(buf, 0, &val);
693 if (ret < 0)
694 return ret;
695
696 /* Supporting only MSRs */
697 if (priv->type < CPU_TSS_BIT)
698 return write_msr(priv, val);
699
700 return ret;
701}
702
703static ssize_t cpu_write(struct file *file, const char __user *ubuf,
704 size_t count, loff_t *off)
705{
706 struct seq_file *seq = file->private_data;
707 struct cpu_private *priv = seq->private;
708 char buf[19];
709
710 if ((priv == NULL) || (count >= sizeof(buf)))
711 return -EINVAL;
712
713 if (copy_from_user(&buf, ubuf, count))
714 return -EFAULT;
715
716 buf[count] = 0;
717
718 if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
719 if (!write_cpu_register(priv, buf))
720 return count;
721
722 return -EACCES;
723}
724
725static const struct file_operations cpu_fops = {
726 .owner = THIS_MODULE,
727 .open = cpu_seq_open,
728 .read = seq_read,
729 .write = cpu_write,
730 .llseek = seq_lseek,
731 .release = seq_release,
732};
733
734static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
735 unsigned file, struct dentry *dentry)
736{
737 struct cpu_private *priv = NULL;
738
739 /* Already intialized */
740 if (file == CPU_INDEX_BIT)
741 if (per_cpu(cpu_arr[type].init, cpu))
742 return 0;
743
744 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
745 if (priv == NULL)
746 return -ENOMEM;
747
748 priv->cpu = cpu;
749 priv->type = type;
750 priv->reg = reg;
751 priv->file = file;
752 mutex_lock(&cpu_debug_lock);
753 per_cpu(priv_arr[type], cpu) = priv;
754 per_cpu(cpu_priv_count, cpu)++;
755 mutex_unlock(&cpu_debug_lock);
756
757 if (file)
758 debugfs_create_file(cpu_file[file].name, S_IRUGO,
759 dentry, (void *)priv, &cpu_fops);
760 else {
761 debugfs_create_file(cpu_base[type].name, S_IRUGO,
762 per_cpu(cpu_arr[type].dentry, cpu),
763 (void *)priv, &cpu_fops);
764 mutex_lock(&cpu_debug_lock);
765 per_cpu(cpu_arr[type].init, cpu) = 1;
766 mutex_unlock(&cpu_debug_lock);
767 }
768
769 return 0;
770}
771
772static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
773 struct dentry *dentry)
774{
775 unsigned file;
776 int err = 0;
777
778 for (file = 0; file < ARRAY_SIZE(cpu_file); file++) {
779 err = cpu_create_file(cpu, type, reg, file, dentry);
780 if (err)
781 return err;
782 }
783
784 return err;
785}
786
787static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
788{
789 struct dentry *cpu_dentry = NULL;
790 unsigned reg, reg_min, reg_max;
791 int i, range, err = 0;
792 char reg_dir[12];
793 u32 low, high;
794
795 range = get_cpu_range_count(cpu);
796
797 for (i = 0; i < range; i++) {
798 if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
799 cpu_base[type].flag))
800 continue;
801
802 for (reg = reg_min; reg <= reg_max; reg++) {
803 if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
804 continue;
805
806 sprintf(reg_dir, "0x%x", reg);
807 cpu_dentry = debugfs_create_dir(reg_dir, dentry);
808 err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
809 if (err)
810 return err;
811 }
812 }
813
814 return err;
815}
816
817static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
818{
819 struct dentry *cpu_dentry = NULL;
820 unsigned type;
821 int err = 0;
822
823 for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) {
824 if (!is_typeflag_valid(cpu, cpu_base[type].flag))
825 continue;
826 cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
827 per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
828
829 if (type < CPU_TSS_BIT)
830 err = cpu_init_msr(cpu, type, cpu_dentry);
831 else
832 err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
833 cpu_dentry);
834 if (err)
835 return err;
836 }
837
838 return err;
839}
840
841static int cpu_init_cpu(void)
842{
843 struct dentry *cpu_dentry = NULL;
844 struct cpuinfo_x86 *cpui;
845 char cpu_dir[12];
846 unsigned cpu;
847 int err = 0;
848
849 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
850 cpui = &cpu_data(cpu);
851 if (!cpu_has(cpui, X86_FEATURE_MSR))
852 continue;
853 per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
854 (cpui->x86 << 8) |
855 (cpui->x86_model));
856 per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
857
858 sprintf(cpu_dir, "cpu%d", cpu);
859 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
860 err = cpu_init_allreg(cpu, cpu_dentry);
861
862 pr_info("cpu%d(%d) debug files %d\n",
863 cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
864 if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
865 pr_err("Register files count %d exceeds limit %d\n",
866 per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
867 per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
868 err = -ENFILE;
869 }
870 if (err)
871 return err;
872 }
873
874 return err;
875}
876
877static int __init cpu_debug_init(void)
878{
879 cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
880
881 return cpu_init_cpu();
882}
883
884static void __exit cpu_debug_exit(void)
885{
886 int i, cpu;
887
888 if (cpu_debugfs_dir)
889 debugfs_remove_recursive(cpu_debugfs_dir);
890
891 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
892 for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
893 kfree(per_cpu(priv_arr[i], cpu));
894}
895
896module_init(cpu_debug_init);
897module_exit(cpu_debug_exit);
898
899MODULE_AUTHOR("Jaswinder Singh Rajput");
900MODULE_DESCRIPTION("CPU Debug module");
901MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 65792c2cc462..52c839875478 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -87,30 +87,15 @@ config X86_POWERNOW_K7_ACPI
87config X86_POWERNOW_K8 87config X86_POWERNOW_K8
88 tristate "AMD Opteron/Athlon64 PowerNow!" 88 tristate "AMD Opteron/Athlon64 PowerNow!"
89 select CPU_FREQ_TABLE 89 select CPU_FREQ_TABLE
90 depends on ACPI && ACPI_PROCESSOR
90 help 91 help
91 This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. 92 This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
92 93
93 To compile this driver as a module, choose M here: the 94 To compile this driver as a module, choose M here: the
94 module will be called powernow-k8. 95 module will be called powernow-k8.
95 96
96 For details, take a look at <file:Documentation/cpu-freq/>. 97 For details, take a look at <file:Documentation/cpu-freq/>.
97 98
98 If in doubt, say N.
99
100config X86_POWERNOW_K8_ACPI
101 bool
102 prompt "ACPI Support" if X86_32
103 depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR
104 depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
105 default y
106 help
107 This provides access to the K8s Processor Performance States via ACPI.
108 This driver is probably required for CPUFreq to work with multi-socket and
109 SMP systems. It is not required on at least some single-socket yet
110 multi-core systems, even if SMP is enabled.
111
112 It is safe to say Y here.
113
114config X86_GX_SUSPMOD 99config X86_GX_SUSPMOD
115 tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" 100 tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
116 depends on X86_32 && PCI 101 depends on X86_32 && PCI
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 560f7760dae5..509296df294d 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -1,6 +1,11 @@
1# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
2# K8 systems. ACPI is preferred to all other hardware-specific drivers.
3# speedstep-* is preferred over p4-clockmod.
4
5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
1obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o 7obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
2obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o 8obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
3obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
4obj-$(CONFIG_X86_LONGHAUL) += longhaul.o 9obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
5obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o 10obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o
6obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o 11obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o
@@ -10,7 +15,6 @@ obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o
10obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o 15obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o
11obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o 16obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
12obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o 17obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o
13obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
14obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o 18obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
15obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o 19obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
16obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o 20obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 22590cf688ae..23da96e57b17 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) 2 * acpi-cpufreq.c - ACPI Processor P-States Driver
3 * 3 *
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
@@ -36,16 +36,18 @@
36#include <linux/ftrace.h> 36#include <linux/ftrace.h>
37 37
38#include <linux/acpi.h> 38#include <linux/acpi.h>
39#include <linux/io.h>
40#include <linux/delay.h>
41#include <linux/uaccess.h>
42
39#include <acpi/processor.h> 43#include <acpi/processor.h>
40 44
41#include <asm/io.h>
42#include <asm/msr.h> 45#include <asm/msr.h>
43#include <asm/processor.h> 46#include <asm/processor.h>
44#include <asm/cpufeature.h> 47#include <asm/cpufeature.h>
45#include <asm/delay.h>
46#include <asm/uaccess.h>
47 48
48#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) 49#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
50 "acpi-cpufreq", msg)
49 51
50MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); 52MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
51MODULE_DESCRIPTION("ACPI Processor P-States Driver"); 53MODULE_DESCRIPTION("ACPI Processor P-States Driver");
@@ -95,7 +97,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
95 97
96 perf = data->acpi_data; 98 perf = data->acpi_data;
97 99
98 for (i=0; i<perf->state_count; i++) { 100 for (i = 0; i < perf->state_count; i++) {
99 if (value == perf->states[i].status) 101 if (value == perf->states[i].status)
100 return data->freq_table[i].frequency; 102 return data->freq_table[i].frequency;
101 } 103 }
@@ -110,7 +112,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
110 msr &= INTEL_MSR_RANGE; 112 msr &= INTEL_MSR_RANGE;
111 perf = data->acpi_data; 113 perf = data->acpi_data;
112 114
113 for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { 115 for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
114 if (msr == perf->states[data->freq_table[i].index].status) 116 if (msr == perf->states[data->freq_table[i].index].status)
115 return data->freq_table[i].frequency; 117 return data->freq_table[i].frequency;
116 } 118 }
@@ -138,15 +140,13 @@ struct io_addr {
138 u8 bit_width; 140 u8 bit_width;
139}; 141};
140 142
141typedef union {
142 struct msr_addr msr;
143 struct io_addr io;
144} drv_addr_union;
145
146struct drv_cmd { 143struct drv_cmd {
147 unsigned int type; 144 unsigned int type;
148 const struct cpumask *mask; 145 const struct cpumask *mask;
149 drv_addr_union addr; 146 union {
147 struct msr_addr msr;
148 struct io_addr io;
149 } addr;
150 u32 val; 150 u32 val;
151}; 151};
152 152
@@ -369,7 +369,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
369 unsigned int cur_freq; 369 unsigned int cur_freq;
370 unsigned int i; 370 unsigned int i;
371 371
372 for (i=0; i<100; i++) { 372 for (i = 0; i < 100; i++) {
373 cur_freq = extract_freq(get_cur_val(mask), data); 373 cur_freq = extract_freq(get_cur_val(mask), data);
374 if (cur_freq == freq) 374 if (cur_freq == freq)
375 return 1; 375 return 1;
@@ -494,7 +494,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
494 unsigned long freq; 494 unsigned long freq;
495 unsigned long freqn = perf->states[0].core_frequency * 1000; 495 unsigned long freqn = perf->states[0].core_frequency * 1000;
496 496
497 for (i=0; i<(perf->state_count-1); i++) { 497 for (i = 0; i < (perf->state_count-1); i++) {
498 freq = freqn; 498 freq = freqn;
499 freqn = perf->states[i+1].core_frequency * 1000; 499 freqn = perf->states[i+1].core_frequency * 1000;
500 if ((2 * cpu_khz) > (freqn + freq)) { 500 if ((2 * cpu_khz) > (freqn + freq)) {
@@ -673,7 +673,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
673 673
674 /* detect transition latency */ 674 /* detect transition latency */
675 policy->cpuinfo.transition_latency = 0; 675 policy->cpuinfo.transition_latency = 0;
676 for (i=0; i<perf->state_count; i++) { 676 for (i = 0; i < perf->state_count; i++) {
677 if ((perf->states[i].transition_latency * 1000) > 677 if ((perf->states[i].transition_latency * 1000) >
678 policy->cpuinfo.transition_latency) 678 policy->cpuinfo.transition_latency)
679 policy->cpuinfo.transition_latency = 679 policy->cpuinfo.transition_latency =
@@ -682,8 +682,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
682 682
683 data->max_freq = perf->states[0].core_frequency * 1000; 683 data->max_freq = perf->states[0].core_frequency * 1000;
684 /* table init */ 684 /* table init */
685 for (i=0; i<perf->state_count; i++) { 685 for (i = 0; i < perf->state_count; i++) {
686 if (i>0 && perf->states[i].core_frequency >= 686 if (i > 0 && perf->states[i].core_frequency >=
687 data->freq_table[valid_states-1].frequency / 1000) 687 data->freq_table[valid_states-1].frequency / 1000)
688 continue; 688 continue;
689 689
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 965ea52767ac..733093d60436 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -32,7 +32,7 @@
32 * nforce2_chipset: 32 * nforce2_chipset:
33 * FSB is changed using the chipset 33 * FSB is changed using the chipset
34 */ 34 */
35static struct pci_dev *nforce2_chipset_dev; 35static struct pci_dev *nforce2_dev;
36 36
37/* fid: 37/* fid:
38 * multiplier * 10 38 * multiplier * 10
@@ -56,7 +56,9 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
56MODULE_PARM_DESC(min_fsb, 56MODULE_PARM_DESC(min_fsb,
57 "Minimum FSB to use, if not defined: current FSB - 50"); 57 "Minimum FSB to use, if not defined: current FSB - 50");
58 58
59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) 59#define PFX "cpufreq-nforce2: "
60#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
61 "cpufreq-nforce2", msg)
60 62
61/** 63/**
62 * nforce2_calc_fsb - calculate FSB 64 * nforce2_calc_fsb - calculate FSB
@@ -118,11 +120,11 @@ static void nforce2_write_pll(int pll)
118 int temp; 120 int temp;
119 121
120 /* Set the pll addr. to 0x00 */ 122 /* Set the pll addr. to 0x00 */
121 pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); 123 pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0);
122 124
123 /* Now write the value in all 64 registers */ 125 /* Now write the value in all 64 registers */
124 for (temp = 0; temp <= 0x3f; temp++) 126 for (temp = 0; temp <= 0x3f; temp++)
125 pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); 127 pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
126 128
127 return; 129 return;
128} 130}
@@ -139,8 +141,8 @@ static unsigned int nforce2_fsb_read(int bootfsb)
139 u32 fsb, temp = 0; 141 u32 fsb, temp = 0;
140 142
141 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ 143 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
142 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 144 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF,
143 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL); 145 PCI_ANY_ID, PCI_ANY_ID, NULL);
144 if (!nforce2_sub5) 146 if (!nforce2_sub5)
145 return 0; 147 return 0;
146 148
@@ -148,13 +150,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
148 fsb /= 1000000; 150 fsb /= 1000000;
149 151
150 /* Check if PLL register is already set */ 152 /* Check if PLL register is already set */
151 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); 153 pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
152 154
153 if (bootfsb || !temp) 155 if (bootfsb || !temp)
154 return fsb; 156 return fsb;
155 157
156 /* Use PLL register FSB value */ 158 /* Use PLL register FSB value */
157 pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp); 159 pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp);
158 fsb = nforce2_calc_fsb(temp); 160 fsb = nforce2_calc_fsb(temp);
159 161
160 return fsb; 162 return fsb;
@@ -174,18 +176,18 @@ static int nforce2_set_fsb(unsigned int fsb)
174 int pll = 0; 176 int pll = 0;
175 177
176 if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { 178 if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
177 printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); 179 printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
178 return -EINVAL; 180 return -EINVAL;
179 } 181 }
180 182
181 tfsb = nforce2_fsb_read(0); 183 tfsb = nforce2_fsb_read(0);
182 if (!tfsb) { 184 if (!tfsb) {
183 printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); 185 printk(KERN_ERR PFX "Error while reading the FSB\n");
184 return -EINVAL; 186 return -EINVAL;
185 } 187 }
186 188
187 /* First write? Then set actual value */ 189 /* First write? Then set actual value */
188 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); 190 pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
189 if (!temp) { 191 if (!temp) {
190 pll = nforce2_calc_pll(tfsb); 192 pll = nforce2_calc_pll(tfsb);
191 193
@@ -197,7 +199,7 @@ static int nforce2_set_fsb(unsigned int fsb)
197 199
198 /* Enable write access */ 200 /* Enable write access */
199 temp = 0x01; 201 temp = 0x01;
200 pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); 202 pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp);
201 203
202 diff = tfsb - fsb; 204 diff = tfsb - fsb;
203 205
@@ -222,7 +224,7 @@ static int nforce2_set_fsb(unsigned int fsb)
222 } 224 }
223 225
224 temp = 0x40; 226 temp = 0x40;
225 pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); 227 pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp);
226 228
227 return 0; 229 return 0;
228} 230}
@@ -244,7 +246,8 @@ static unsigned int nforce2_get(unsigned int cpu)
244 * nforce2_target - set a new CPUFreq policy 246 * nforce2_target - set a new CPUFreq policy
245 * @policy: new policy 247 * @policy: new policy
246 * @target_freq: the target frequency 248 * @target_freq: the target frequency
247 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) 249 * @relation: how that frequency relates to achieved frequency
250 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
248 * 251 *
249 * Sets a new CPUFreq policy. 252 * Sets a new CPUFreq policy.
250 */ 253 */
@@ -276,7 +279,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
276 /* local_irq_save(flags); */ 279 /* local_irq_save(flags); */
277 280
278 if (nforce2_set_fsb(target_fsb) < 0) 281 if (nforce2_set_fsb(target_fsb) < 0)
279 printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", 282 printk(KERN_ERR PFX "Changing FSB to %d failed\n",
280 target_fsb); 283 target_fsb);
281 else 284 else
282 dprintk("Changed FSB successfully to %d\n", 285 dprintk("Changed FSB successfully to %d\n",
@@ -327,8 +330,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
327 /* FIX: Get FID from CPU */ 330 /* FIX: Get FID from CPU */
328 if (!fid) { 331 if (!fid) {
329 if (!cpu_khz) { 332 if (!cpu_khz) {
330 printk(KERN_WARNING 333 printk(KERN_WARNING PFX
331 "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); 334 "cpu_khz not set, can't calculate multiplier!\n");
332 return -ENODEV; 335 return -ENODEV;
333 } 336 }
334 337
@@ -343,7 +346,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
343 } 346 }
344 } 347 }
345 348
346 printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, 349 printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
347 fid / 10, fid % 10); 350 fid / 10, fid % 10);
348 351
349 /* Set maximum FSB to FSB at boot time */ 352 /* Set maximum FSB to FSB at boot time */
@@ -392,17 +395,18 @@ static struct cpufreq_driver nforce2_driver = {
392 */ 395 */
393static unsigned int nforce2_detect_chipset(void) 396static unsigned int nforce2_detect_chipset(void)
394{ 397{
395 nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 398 nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
396 PCI_DEVICE_ID_NVIDIA_NFORCE2, 399 PCI_DEVICE_ID_NVIDIA_NFORCE2,
397 PCI_ANY_ID, PCI_ANY_ID, NULL); 400 PCI_ANY_ID, PCI_ANY_ID, NULL);
398 401
399 if (nforce2_chipset_dev == NULL) 402 if (nforce2_dev == NULL)
400 return -ENODEV; 403 return -ENODEV;
401 404
402 printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", 405 printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
403 nforce2_chipset_dev->revision); 406 nforce2_dev->revision);
404 printk(KERN_INFO 407 printk(KERN_INFO PFX
405 "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); 408 "FSB changing is maybe unstable and can lead to "
409 "crashes and data loss.\n");
406 410
407 return 0; 411 return 0;
408} 412}
@@ -420,7 +424,7 @@ static int __init nforce2_init(void)
420 424
421 /* detect chipset */ 425 /* detect chipset */
422 if (nforce2_detect_chipset()) { 426 if (nforce2_detect_chipset()) {
423 printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); 427 printk(KERN_INFO PFX "No nForce2 chipset.\n");
424 return -ENODEV; 428 return -ENODEV;
425 } 429 }
426 430
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index 41ab3f064cb1..35a257dd4bb7 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -12,12 +12,12 @@
12#include <linux/cpufreq.h> 12#include <linux/cpufreq.h>
13#include <linux/ioport.h> 13#include <linux/ioport.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/timex.h>
16#include <linux/io.h>
17#include <linux/delay.h>
15 18
16#include <asm/msr.h> 19#include <asm/msr.h>
17#include <asm/tsc.h> 20#include <asm/tsc.h>
18#include <asm/timex.h>
19#include <asm/io.h>
20#include <asm/delay.h>
21 21
22#define EPS_BRAND_C7M 0 22#define EPS_BRAND_C7M 0
23#define EPS_BRAND_C7 1 23#define EPS_BRAND_C7 1
@@ -184,7 +184,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
184 break; 184 break;
185 } 185 }
186 186
187 switch(brand) { 187 switch (brand) {
188 case EPS_BRAND_C7M: 188 case EPS_BRAND_C7M:
189 printk(KERN_CONT "C7-M\n"); 189 printk(KERN_CONT "C7-M\n");
190 break; 190 break;
@@ -218,17 +218,20 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
218 /* Print voltage and multiplier */ 218 /* Print voltage and multiplier */
219 rdmsr(MSR_IA32_PERF_STATUS, lo, hi); 219 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
220 current_voltage = lo & 0xff; 220 current_voltage = lo & 0xff;
221 printk(KERN_INFO "eps: Current voltage = %dmV\n", current_voltage * 16 + 700); 221 printk(KERN_INFO "eps: Current voltage = %dmV\n",
222 current_voltage * 16 + 700);
222 current_multiplier = (lo >> 8) & 0xff; 223 current_multiplier = (lo >> 8) & 0xff;
223 printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier); 224 printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
224 225
225 /* Print limits */ 226 /* Print limits */
226 max_voltage = hi & 0xff; 227 max_voltage = hi & 0xff;
227 printk(KERN_INFO "eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); 228 printk(KERN_INFO "eps: Highest voltage = %dmV\n",
229 max_voltage * 16 + 700);
228 max_multiplier = (hi >> 8) & 0xff; 230 max_multiplier = (hi >> 8) & 0xff;
229 printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier); 231 printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
230 min_voltage = (hi >> 16) & 0xff; 232 min_voltage = (hi >> 16) & 0xff;
231 printk(KERN_INFO "eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); 233 printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
234 min_voltage * 16 + 700);
232 min_multiplier = (hi >> 24) & 0xff; 235 min_multiplier = (hi >> 24) & 0xff;
233 printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier); 236 printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
234 237
@@ -318,7 +321,7 @@ static int eps_cpu_exit(struct cpufreq_policy *policy)
318 return 0; 321 return 0;
319} 322}
320 323
321static struct freq_attr* eps_attr[] = { 324static struct freq_attr *eps_attr[] = {
322 &cpufreq_freq_attr_scaling_available_freqs, 325 &cpufreq_freq_attr_scaling_available_freqs,
323 NULL, 326 NULL,
324}; 327};
@@ -356,7 +359,7 @@ static void __exit eps_exit(void)
356 cpufreq_unregister_driver(&eps_driver); 359 cpufreq_unregister_driver(&eps_driver);
357} 360}
358 361
359MODULE_AUTHOR("Rafa³ Bilski <rafalbilski@interia.pl>"); 362MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
360MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); 363MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
361MODULE_LICENSE("GPL"); 364MODULE_LICENSE("GPL");
362 365
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index fe613c93b366..006b278b0d5d 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -184,7 +184,8 @@ static int elanfreq_target(struct cpufreq_policy *policy,
184{ 184{
185 unsigned int newstate = 0; 185 unsigned int newstate = 0;
186 186
187 if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) 187 if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
188 target_freq, relation, &newstate))
188 return -EINVAL; 189 return -EINVAL;
189 190
190 elanfreq_set_cpu_state(newstate); 191 elanfreq_set_cpu_state(newstate);
@@ -301,7 +302,8 @@ static void __exit elanfreq_exit(void)
301module_param(max_freq, int, 0444); 302module_param(max_freq, int, 0444);
302 303
303MODULE_LICENSE("GPL"); 304MODULE_LICENSE("GPL");
304MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>"); 305MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, "
306 "Sven Geggus <sven@geggus.net>");
305MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); 307MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
306 308
307module_init(elanfreq_init); 309module_init(elanfreq_init);
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 9d9eae82e60f..ac27ec2264d5 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -79,8 +79,9 @@
79#include <linux/smp.h> 79#include <linux/smp.h>
80#include <linux/cpufreq.h> 80#include <linux/cpufreq.h>
81#include <linux/pci.h> 81#include <linux/pci.h>
82#include <linux/errno.h>
83
82#include <asm/processor-cyrix.h> 84#include <asm/processor-cyrix.h>
83#include <asm/errno.h>
84 85
85/* PCI config registers, all at F0 */ 86/* PCI config registers, all at F0 */
86#define PCI_PMER1 0x80 /* power management enable register 1 */ 87#define PCI_PMER1 0x80 /* power management enable register 1 */
@@ -122,8 +123,8 @@ static struct gxfreq_params *gx_params;
122static int stock_freq; 123static int stock_freq;
123 124
124/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ 125/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
125static int pci_busclk = 0; 126static int pci_busclk;
126module_param (pci_busclk, int, 0444); 127module_param(pci_busclk, int, 0444);
127 128
128/* maximum duration for which the cpu may be suspended 129/* maximum duration for which the cpu may be suspended
129 * (32us * MAX_DURATION). If no parameter is given, this defaults 130 * (32us * MAX_DURATION). If no parameter is given, this defaults
@@ -132,7 +133,7 @@ module_param (pci_busclk, int, 0444);
132 * is suspended -- processing power is just 0.39% of what it used to be, 133 * is suspended -- processing power is just 0.39% of what it used to be,
133 * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ 134 * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
134static int max_duration = 255; 135static int max_duration = 255;
135module_param (max_duration, int, 0444); 136module_param(max_duration, int, 0444);
136 137
137/* For the default policy, we want at least some processing power 138/* For the default policy, we want at least some processing power
138 * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) 139 * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
@@ -140,7 +141,8 @@ module_param (max_duration, int, 0444);
140#define POLICY_MIN_DIV 20 141#define POLICY_MIN_DIV 20
141 142
142 143
143#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) 144#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
145 "gx-suspmod", msg)
144 146
145/** 147/**
146 * we can detect a core multipiler from dir0_lsb 148 * we can detect a core multipiler from dir0_lsb
@@ -166,12 +168,20 @@ static int gx_freq_mult[16] = {
166 * Low Level chipset interface * 168 * Low Level chipset interface *
167 ****************************************************************/ 169 ****************************************************************/
168static struct pci_device_id gx_chipset_tbl[] __initdata = { 170static struct pci_device_id gx_chipset_tbl[] __initdata = {
169 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, 171 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY,
170 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, 172 PCI_ANY_ID, PCI_ANY_ID },
171 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, 173 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520,
174 PCI_ANY_ID, PCI_ANY_ID },
175 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510,
176 PCI_ANY_ID, PCI_ANY_ID },
172 { 0, }, 177 { 0, },
173}; 178};
174 179
180static void gx_write_byte(int reg, int value)
181{
182 pci_write_config_byte(gx_params->cs55x0, reg, value);
183}
184
175/** 185/**
176 * gx_detect_chipset: 186 * gx_detect_chipset:
177 * 187 *
@@ -200,7 +210,8 @@ static __init struct pci_dev *gx_detect_chipset(void)
200/** 210/**
201 * gx_get_cpuspeed: 211 * gx_get_cpuspeed:
202 * 212 *
203 * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. 213 * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi
214 * Geode CPU runs.
204 */ 215 */
205static unsigned int gx_get_cpuspeed(unsigned int cpu) 216static unsigned int gx_get_cpuspeed(unsigned int cpu)
206{ 217{
@@ -217,17 +228,18 @@ static unsigned int gx_get_cpuspeed(unsigned int cpu)
217 * 228 *
218 **/ 229 **/
219 230
220static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) 231static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration,
232 u8 *off_duration)
221{ 233{
222 unsigned int i; 234 unsigned int i;
223 u8 tmp_on, tmp_off; 235 u8 tmp_on, tmp_off;
224 int old_tmp_freq = stock_freq; 236 int old_tmp_freq = stock_freq;
225 int tmp_freq; 237 int tmp_freq;
226 238
227 *off_duration=1; 239 *off_duration = 1;
228 *on_duration=0; 240 *on_duration = 0;
229 241
230 for (i=max_duration; i>0; i--) { 242 for (i = max_duration; i > 0; i--) {
231 tmp_off = ((khz * i) / stock_freq) & 0xff; 243 tmp_off = ((khz * i) / stock_freq) & 0xff;
232 tmp_on = i - tmp_off; 244 tmp_on = i - tmp_off;
233 tmp_freq = (stock_freq * tmp_off) / i; 245 tmp_freq = (stock_freq * tmp_off) / i;
@@ -259,26 +271,34 @@ static void gx_set_cpuspeed(unsigned int khz)
259 freqs.cpu = 0; 271 freqs.cpu = 0;
260 freqs.old = gx_get_cpuspeed(0); 272 freqs.old = gx_get_cpuspeed(0);
261 273
262 new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); 274 new_khz = gx_validate_speed(khz, &gx_params->on_duration,
275 &gx_params->off_duration);
263 276
264 freqs.new = new_khz; 277 freqs.new = new_khz;
265 278
266 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 279 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
267 local_irq_save(flags); 280 local_irq_save(flags);
268 281
269 if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ 282
283
284 if (new_khz != stock_freq) {
285 /* if new khz == 100% of CPU speed, it is special case */
270 switch (gx_params->cs55x0->device) { 286 switch (gx_params->cs55x0->device) {
271 case PCI_DEVICE_ID_CYRIX_5530_LEGACY: 287 case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
272 pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; 288 pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
273 /* FIXME: need to test other values -- Zwane,Miura */ 289 /* FIXME: need to test other values -- Zwane,Miura */
274 pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ 290 /* typical 2 to 4ms */
275 pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ 291 gx_write_byte(PCI_IRQTC, 4);
276 pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); 292 /* typical 50 to 100ms */
277 293 gx_write_byte(PCI_VIDTC, 100);
278 if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ 294 gx_write_byte(PCI_PMER1, pmer1);
279 suscfg = gx_params->pci_suscfg | SUSMOD; 295
280 } else { /* CS5530A,B.. */ 296 if (gx_params->cs55x0->revision < 0x10) {
281 suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; 297 /* CS5530(rev 1.2, 1.3) */
298 suscfg = gx_params->pci_suscfg|SUSMOD;
299 } else {
300 /* CS5530A,B.. */
301 suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE;
282 } 302 }
283 break; 303 break;
284 case PCI_DEVICE_ID_CYRIX_5520: 304 case PCI_DEVICE_ID_CYRIX_5520:
@@ -294,13 +314,13 @@ static void gx_set_cpuspeed(unsigned int khz)
294 suscfg = gx_params->pci_suscfg & ~(SUSMOD); 314 suscfg = gx_params->pci_suscfg & ~(SUSMOD);
295 gx_params->off_duration = 0; 315 gx_params->off_duration = 0;
296 gx_params->on_duration = 0; 316 gx_params->on_duration = 0;
297 dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); 317 dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
298 } 318 }
299 319
300 pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); 320 gx_write_byte(PCI_MODOFF, gx_params->off_duration);
301 pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); 321 gx_write_byte(PCI_MODON, gx_params->on_duration);
302 322
303 pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); 323 gx_write_byte(PCI_SUSCFG, suscfg);
304 pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); 324 pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
305 325
306 local_irq_restore(flags); 326 local_irq_restore(flags);
@@ -334,7 +354,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy)
334 return -EINVAL; 354 return -EINVAL;
335 355
336 policy->cpu = 0; 356 policy->cpu = 0;
337 cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); 357 cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
358 stock_freq);
338 359
339 /* it needs to be assured that at least one supported frequency is 360 /* it needs to be assured that at least one supported frequency is
340 * within policy->min and policy->max. If it is not, policy->max 361 * within policy->min and policy->max. If it is not, policy->max
@@ -354,7 +375,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy)
354 policy->max = tmp_freq; 375 policy->max = tmp_freq;
355 if (policy->max < policy->min) 376 if (policy->max < policy->min)
356 policy->max = policy->min; 377 policy->max = policy->min;
357 cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); 378 cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
379 stock_freq);
358 380
359 return 0; 381 return 0;
360} 382}
@@ -398,18 +420,18 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
398 return -ENODEV; 420 return -ENODEV;
399 421
400 /* determine maximum frequency */ 422 /* determine maximum frequency */
401 if (pci_busclk) { 423 if (pci_busclk)
402 maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; 424 maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
403 } else if (cpu_khz) { 425 else if (cpu_khz)
404 maxfreq = cpu_khz; 426 maxfreq = cpu_khz;
405 } else { 427 else
406 maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; 428 maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
407 } 429
408 stock_freq = maxfreq; 430 stock_freq = maxfreq;
409 curfreq = gx_get_cpuspeed(0); 431 curfreq = gx_get_cpuspeed(0);
410 432
411 dprintk("cpu max frequency is %d.\n", maxfreq); 433 dprintk("cpu max frequency is %d.\n", maxfreq);
412 dprintk("cpu current frequency is %dkHz.\n",curfreq); 434 dprintk("cpu current frequency is %dkHz.\n", curfreq);
413 435
414 /* setup basic struct for cpufreq API */ 436 /* setup basic struct for cpufreq API */
415 policy->cpu = 0; 437 policy->cpu = 0;
@@ -447,7 +469,8 @@ static int __init cpufreq_gx_init(void)
447 struct pci_dev *gx_pci; 469 struct pci_dev *gx_pci;
448 470
449 /* Test if we have the right hardware */ 471 /* Test if we have the right hardware */
450 if ((gx_pci = gx_detect_chipset()) == NULL) 472 gx_pci = gx_detect_chipset();
473 if (gx_pci == NULL)
451 return -ENODEV; 474 return -ENODEV;
452 475
453 /* check whether module parameters are sane */ 476 /* check whether module parameters are sane */
@@ -468,9 +491,11 @@ static int __init cpufreq_gx_init(void)
468 pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); 491 pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
469 pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); 492 pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
470 pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); 493 pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
471 pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); 494 pci_read_config_byte(params->cs55x0, PCI_MODOFF,
495 &(params->off_duration));
472 496
473 if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { 497 ret = cpufreq_register_driver(&gx_suspmod_driver);
498 if (ret) {
474 kfree(params); 499 kfree(params);
475 return ret; /* register error! */ 500 return ret; /* register error! */
476 } 501 }
@@ -485,9 +510,9 @@ static void __exit cpufreq_gx_exit(void)
485 kfree(gx_params); 510 kfree(gx_params);
486} 511}
487 512
488MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>"); 513MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
489MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); 514MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
490MODULE_LICENSE ("GPL"); 515MODULE_LICENSE("GPL");
491 516
492module_init(cpufreq_gx_init); 517module_init(cpufreq_gx_init);
493module_exit(cpufreq_gx_exit); 518module_exit(cpufreq_gx_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index a4cff5d6e380..f1c51aea064d 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -30,12 +30,12 @@
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/delay.h> 32#include <linux/delay.h>
33#include <linux/timex.h>
34#include <linux/io.h>
35#include <linux/acpi.h>
36#include <linux/kernel.h>
33 37
34#include <asm/msr.h> 38#include <asm/msr.h>
35#include <asm/timex.h>
36#include <asm/io.h>
37#include <asm/acpi.h>
38#include <linux/acpi.h>
39#include <acpi/processor.h> 39#include <acpi/processor.h>
40 40
41#include "longhaul.h" 41#include "longhaul.h"
@@ -58,7 +58,7 @@
58#define USE_NORTHBRIDGE (1 << 2) 58#define USE_NORTHBRIDGE (1 << 2)
59 59
60static int cpu_model; 60static int cpu_model;
61static unsigned int numscales=16; 61static unsigned int numscales = 16;
62static unsigned int fsb; 62static unsigned int fsb;
63 63
64static const struct mV_pos *vrm_mV_table; 64static const struct mV_pos *vrm_mV_table;
@@ -67,8 +67,8 @@ static const unsigned char *mV_vrm_table;
67static unsigned int highest_speed, lowest_speed; /* kHz */ 67static unsigned int highest_speed, lowest_speed; /* kHz */
68static unsigned int minmult, maxmult; 68static unsigned int minmult, maxmult;
69static int can_scale_voltage; 69static int can_scale_voltage;
70static struct acpi_processor *pr = NULL; 70static struct acpi_processor *pr;
71static struct acpi_processor_cx *cx = NULL; 71static struct acpi_processor_cx *cx;
72static u32 acpi_regs_addr; 72static u32 acpi_regs_addr;
73static u8 longhaul_flags; 73static u8 longhaul_flags;
74static unsigned int longhaul_index; 74static unsigned int longhaul_index;
@@ -78,12 +78,13 @@ static int scale_voltage;
78static int disable_acpi_c3; 78static int disable_acpi_c3;
79static int revid_errata; 79static int revid_errata;
80 80
81#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) 81#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
82 "longhaul", msg)
82 83
83 84
84/* Clock ratios multiplied by 10 */ 85/* Clock ratios multiplied by 10 */
85static int clock_ratio[32]; 86static int mults[32];
86static int eblcr_table[32]; 87static int eblcr[32];
87static int longhaul_version; 88static int longhaul_version;
88static struct cpufreq_frequency_table *longhaul_table; 89static struct cpufreq_frequency_table *longhaul_table;
89 90
@@ -93,7 +94,7 @@ static char speedbuffer[8];
93static char *print_speed(int speed) 94static char *print_speed(int speed)
94{ 95{
95 if (speed < 1000) { 96 if (speed < 1000) {
96 snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); 97 snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed);
97 return speedbuffer; 98 return speedbuffer;
98 } 99 }
99 100
@@ -122,27 +123,28 @@ static unsigned int calc_speed(int mult)
122 123
123static int longhaul_get_cpu_mult(void) 124static int longhaul_get_cpu_mult(void)
124{ 125{
125 unsigned long invalue=0,lo, hi; 126 unsigned long invalue = 0, lo, hi;
126 127
127 rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); 128 rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi);
128 invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; 129 invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22;
129 if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { 130 if (longhaul_version == TYPE_LONGHAUL_V2 ||
131 longhaul_version == TYPE_POWERSAVER) {
130 if (lo & (1<<27)) 132 if (lo & (1<<27))
131 invalue+=16; 133 invalue += 16;
132 } 134 }
133 return eblcr_table[invalue]; 135 return eblcr[invalue];
134} 136}
135 137
136/* For processor with BCR2 MSR */ 138/* For processor with BCR2 MSR */
137 139
138static void do_longhaul1(unsigned int clock_ratio_index) 140static void do_longhaul1(unsigned int mults_index)
139{ 141{
140 union msr_bcr2 bcr2; 142 union msr_bcr2 bcr2;
141 143
142 rdmsrl(MSR_VIA_BCR2, bcr2.val); 144 rdmsrl(MSR_VIA_BCR2, bcr2.val);
143 /* Enable software clock multiplier */ 145 /* Enable software clock multiplier */
144 bcr2.bits.ESOFTBF = 1; 146 bcr2.bits.ESOFTBF = 1;
145 bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; 147 bcr2.bits.CLOCKMUL = mults_index & 0xff;
146 148
147 /* Sync to timer tick */ 149 /* Sync to timer tick */
148 safe_halt(); 150 safe_halt();
@@ -161,7 +163,7 @@ static void do_longhaul1(unsigned int clock_ratio_index)
161 163
162/* For processor with Longhaul MSR */ 164/* For processor with Longhaul MSR */
163 165
164static void do_powersaver(int cx_address, unsigned int clock_ratio_index, 166static void do_powersaver(int cx_address, unsigned int mults_index,
165 unsigned int dir) 167 unsigned int dir)
166{ 168{
167 union msr_longhaul longhaul; 169 union msr_longhaul longhaul;
@@ -173,11 +175,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
173 longhaul.bits.RevisionKey = longhaul.bits.RevisionID; 175 longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
174 else 176 else
175 longhaul.bits.RevisionKey = 0; 177 longhaul.bits.RevisionKey = 0;
176 longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; 178 longhaul.bits.SoftBusRatio = mults_index & 0xf;
177 longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; 179 longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4;
178 /* Setup new voltage */ 180 /* Setup new voltage */
179 if (can_scale_voltage) 181 if (can_scale_voltage)
180 longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; 182 longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f;
181 /* Sync to timer tick */ 183 /* Sync to timer tick */
182 safe_halt(); 184 safe_halt();
183 /* Raise voltage if necessary */ 185 /* Raise voltage if necessary */
@@ -240,14 +242,14 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
240 242
241/** 243/**
242 * longhaul_set_cpu_frequency() 244 * longhaul_set_cpu_frequency()
243 * @clock_ratio_index : bitpattern of the new multiplier. 245 * @mults_index : bitpattern of the new multiplier.
244 * 246 *
245 * Sets a new clock ratio. 247 * Sets a new clock ratio.
246 */ 248 */
247 249
248static void longhaul_setstate(unsigned int table_index) 250static void longhaul_setstate(unsigned int table_index)
249{ 251{
250 unsigned int clock_ratio_index; 252 unsigned int mults_index;
251 int speed, mult; 253 int speed, mult;
252 struct cpufreq_freqs freqs; 254 struct cpufreq_freqs freqs;
253 unsigned long flags; 255 unsigned long flags;
@@ -256,9 +258,9 @@ static void longhaul_setstate(unsigned int table_index)
256 u32 bm_timeout = 1000; 258 u32 bm_timeout = 1000;
257 unsigned int dir = 0; 259 unsigned int dir = 0;
258 260
259 clock_ratio_index = longhaul_table[table_index].index; 261 mults_index = longhaul_table[table_index].index;
260 /* Safety precautions */ 262 /* Safety precautions */
261 mult = clock_ratio[clock_ratio_index & 0x1f]; 263 mult = mults[mults_index & 0x1f];
262 if (mult == -1) 264 if (mult == -1)
263 return; 265 return;
264 speed = calc_speed(mult); 266 speed = calc_speed(mult);
@@ -274,7 +276,7 @@ static void longhaul_setstate(unsigned int table_index)
274 276
275 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 277 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
276 278
277 dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", 279 dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
278 fsb, mult/10, mult%10, print_speed(speed/1000)); 280 fsb, mult/10, mult%10, print_speed(speed/1000));
279retry_loop: 281retry_loop:
280 preempt_disable(); 282 preempt_disable();
@@ -282,8 +284,8 @@ retry_loop:
282 284
283 pic2_mask = inb(0xA1); 285 pic2_mask = inb(0xA1);
284 pic1_mask = inb(0x21); /* works on C3. save mask. */ 286 pic1_mask = inb(0x21); /* works on C3. save mask. */
285 outb(0xFF,0xA1); /* Overkill */ 287 outb(0xFF, 0xA1); /* Overkill */
286 outb(0xFE,0x21); /* TMR0 only */ 288 outb(0xFE, 0x21); /* TMR0 only */
287 289
288 /* Wait while PCI bus is busy. */ 290 /* Wait while PCI bus is busy. */
289 if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE 291 if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
@@ -312,7 +314,7 @@ retry_loop:
312 * Software controlled multipliers only. 314 * Software controlled multipliers only.
313 */ 315 */
314 case TYPE_LONGHAUL_V1: 316 case TYPE_LONGHAUL_V1:
315 do_longhaul1(clock_ratio_index); 317 do_longhaul1(mults_index);
316 break; 318 break;
317 319
318 /* 320 /*
@@ -327,9 +329,9 @@ retry_loop:
327 if (longhaul_flags & USE_ACPI_C3) { 329 if (longhaul_flags & USE_ACPI_C3) {
328 /* Don't allow wakeup */ 330 /* Don't allow wakeup */
329 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 331 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
330 do_powersaver(cx->address, clock_ratio_index, dir); 332 do_powersaver(cx->address, mults_index, dir);
331 } else { 333 } else {
332 do_powersaver(0, clock_ratio_index, dir); 334 do_powersaver(0, mults_index, dir);
333 } 335 }
334 break; 336 break;
335 } 337 }
@@ -341,8 +343,8 @@ retry_loop:
341 /* Enable bus master arbitration */ 343 /* Enable bus master arbitration */
342 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 344 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
343 } 345 }
344 outb(pic2_mask,0xA1); /* restore mask */ 346 outb(pic2_mask, 0xA1); /* restore mask */
345 outb(pic1_mask,0x21); 347 outb(pic1_mask, 0x21);
346 348
347 local_irq_restore(flags); 349 local_irq_restore(flags);
348 preempt_enable(); 350 preempt_enable();
@@ -392,7 +394,8 @@ retry_loop:
392 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 394 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
393 395
394 if (!bm_timeout) 396 if (!bm_timeout)
395 printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); 397 printk(KERN_INFO PFX "Warning: Timeout while waiting for "
398 "idle PCI bus.\n");
396} 399}
397 400
398/* 401/*
@@ -458,31 +461,32 @@ static int __init longhaul_get_ranges(void)
458 break; 461 break;
459 } 462 }
460 463
461 dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", 464 dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
462 minmult/10, minmult%10, maxmult/10, maxmult%10); 465 minmult/10, minmult%10, maxmult/10, maxmult%10);
463 466
464 highest_speed = calc_speed(maxmult); 467 highest_speed = calc_speed(maxmult);
465 lowest_speed = calc_speed(minmult); 468 lowest_speed = calc_speed(minmult);
466 dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, 469 dprintk("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
467 print_speed(lowest_speed/1000), 470 print_speed(lowest_speed/1000),
468 print_speed(highest_speed/1000)); 471 print_speed(highest_speed/1000));
469 472
470 if (lowest_speed == highest_speed) { 473 if (lowest_speed == highest_speed) {
471 printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); 474 printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
472 return -EINVAL; 475 return -EINVAL;
473 } 476 }
474 if (lowest_speed > highest_speed) { 477 if (lowest_speed > highest_speed) {
475 printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", 478 printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
476 lowest_speed, highest_speed); 479 lowest_speed, highest_speed);
477 return -EINVAL; 480 return -EINVAL;
478 } 481 }
479 482
480 longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); 483 longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table),
481 if(!longhaul_table) 484 GFP_KERNEL);
485 if (!longhaul_table)
482 return -ENOMEM; 486 return -ENOMEM;
483 487
484 for (j = 0; j < numscales; j++) { 488 for (j = 0; j < numscales; j++) {
485 ratio = clock_ratio[j]; 489 ratio = mults[j];
486 if (ratio == -1) 490 if (ratio == -1)
487 continue; 491 continue;
488 if (ratio > maxmult || ratio < minmult) 492 if (ratio > maxmult || ratio < minmult)
@@ -507,13 +511,10 @@ static int __init longhaul_get_ranges(void)
507 } 511 }
508 } 512 }
509 if (min_i != j) { 513 if (min_i != j) {
510 unsigned int temp; 514 swap(longhaul_table[j].frequency,
511 temp = longhaul_table[j].frequency; 515 longhaul_table[min_i].frequency);
512 longhaul_table[j].frequency = longhaul_table[min_i].frequency; 516 swap(longhaul_table[j].index,
513 longhaul_table[min_i].frequency = temp; 517 longhaul_table[min_i].index);
514 temp = longhaul_table[j].index;
515 longhaul_table[j].index = longhaul_table[min_i].index;
516 longhaul_table[min_i].index = temp;
517 } 518 }
518 } 519 }
519 520
@@ -521,7 +522,7 @@ static int __init longhaul_get_ranges(void)
521 522
522 /* Find index we are running on */ 523 /* Find index we are running on */
523 for (j = 0; j < k; j++) { 524 for (j = 0; j < k; j++) {
524 if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { 525 if (mults[longhaul_table[j].index & 0x1f] == mult) {
525 longhaul_index = j; 526 longhaul_index = j;
526 break; 527 break;
527 } 528 }
@@ -559,20 +560,22 @@ static void __init longhaul_setup_voltagescaling(void)
559 maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; 560 maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
560 561
561 if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { 562 if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
562 printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " 563 printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
563 "Voltage scaling disabled.\n", 564 "Voltage scaling disabled.\n",
564 minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); 565 minvid.mV/1000, minvid.mV%1000,
566 maxvid.mV/1000, maxvid.mV%1000);
565 return; 567 return;
566 } 568 }
567 569
568 if (minvid.mV == maxvid.mV) { 570 if (minvid.mV == maxvid.mV) {
569 printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " 571 printk(KERN_INFO PFX "Claims to support voltage scaling but "
570 "both %d.%03d. Voltage scaling disabled\n", 572 "min & max are both %d.%03d. "
573 "Voltage scaling disabled\n",
571 maxvid.mV/1000, maxvid.mV%1000); 574 maxvid.mV/1000, maxvid.mV%1000);
572 return; 575 return;
573 } 576 }
574 577
575 /* How many voltage steps */ 578 /* How many voltage steps*/
576 numvscales = maxvid.pos - minvid.pos + 1; 579 numvscales = maxvid.pos - minvid.pos + 1;
577 printk(KERN_INFO PFX 580 printk(KERN_INFO PFX
578 "Max VID=%d.%03d " 581 "Max VID=%d.%03d "
@@ -586,7 +589,7 @@ static void __init longhaul_setup_voltagescaling(void)
586 j = longhaul.bits.MinMHzBR; 589 j = longhaul.bits.MinMHzBR;
587 if (longhaul.bits.MinMHzBR4) 590 if (longhaul.bits.MinMHzBR4)
588 j += 16; 591 j += 16;
589 min_vid_speed = eblcr_table[j]; 592 min_vid_speed = eblcr[j];
590 if (min_vid_speed == -1) 593 if (min_vid_speed == -1)
591 return; 594 return;
592 switch (longhaul.bits.MinMHzFSB) { 595 switch (longhaul.bits.MinMHzFSB) {
@@ -617,7 +620,8 @@ static void __init longhaul_setup_voltagescaling(void)
617 pos = minvid.pos; 620 pos = minvid.pos;
618 longhaul_table[j].index |= mV_vrm_table[pos] << 8; 621 longhaul_table[j].index |= mV_vrm_table[pos] << 8;
619 vid = vrm_mV_table[mV_vrm_table[pos]]; 622 vid = vrm_mV_table[mV_vrm_table[pos]];
620 printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); 623 printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
624 speed, j, vid.mV);
621 j++; 625 j++;
622 } 626 }
623 627
@@ -640,7 +644,8 @@ static int longhaul_target(struct cpufreq_policy *policy,
640 unsigned int dir = 0; 644 unsigned int dir = 0;
641 u8 vid, current_vid; 645 u8 vid, current_vid;
642 646
643 if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) 647 if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
648 relation, &table_index))
644 return -EINVAL; 649 return -EINVAL;
645 650
646 /* Don't set same frequency again */ 651 /* Don't set same frequency again */
@@ -656,7 +661,8 @@ static int longhaul_target(struct cpufreq_policy *policy,
656 * this in hardware, C3 is old and we need to do this 661 * this in hardware, C3 is old and we need to do this
657 * in software. */ 662 * in software. */
658 i = longhaul_index; 663 i = longhaul_index;
659 current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; 664 current_vid = (longhaul_table[longhaul_index].index >> 8);
665 current_vid &= 0x1f;
660 if (table_index > longhaul_index) 666 if (table_index > longhaul_index)
661 dir = 1; 667 dir = 1;
662 while (i != table_index) { 668 while (i != table_index) {
@@ -691,9 +697,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
691{ 697{
692 struct acpi_device *d; 698 struct acpi_device *d;
693 699
694 if ( acpi_bus_get_device(obj_handle, &d) ) { 700 if (acpi_bus_get_device(obj_handle, &d))
695 return 0; 701 return 0;
696 } 702
697 *return_value = acpi_driver_data(d); 703 *return_value = acpi_driver_data(d);
698 return 1; 704 return 1;
699} 705}
@@ -750,7 +756,7 @@ static int longhaul_setup_southbridge(void)
750 /* Find VT8235 southbridge */ 756 /* Find VT8235 southbridge */
751 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); 757 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
752 if (dev == NULL) 758 if (dev == NULL)
753 /* Find VT8237 southbridge */ 759 /* Find VT8237 southbridge */
754 dev = pci_get_device(PCI_VENDOR_ID_VIA, 760 dev = pci_get_device(PCI_VENDOR_ID_VIA,
755 PCI_DEVICE_ID_VIA_8237, NULL); 761 PCI_DEVICE_ID_VIA_8237, NULL);
756 if (dev != NULL) { 762 if (dev != NULL) {
@@ -769,7 +775,8 @@ static int longhaul_setup_southbridge(void)
769 if (pci_cmd & 1 << 7) { 775 if (pci_cmd & 1 << 7) {
770 pci_read_config_dword(dev, 0x88, &acpi_regs_addr); 776 pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
771 acpi_regs_addr &= 0xff00; 777 acpi_regs_addr &= 0xff00;
772 printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); 778 printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
779 acpi_regs_addr);
773 } 780 }
774 781
775 pci_dev_put(dev); 782 pci_dev_put(dev);
@@ -781,7 +788,7 @@ static int longhaul_setup_southbridge(void)
781static int __init longhaul_cpu_init(struct cpufreq_policy *policy) 788static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
782{ 789{
783 struct cpuinfo_x86 *c = &cpu_data(0); 790 struct cpuinfo_x86 *c = &cpu_data(0);
784 char *cpuname=NULL; 791 char *cpuname = NULL;
785 int ret; 792 int ret;
786 u32 lo, hi; 793 u32 lo, hi;
787 794
@@ -791,8 +798,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
791 cpu_model = CPU_SAMUEL; 798 cpu_model = CPU_SAMUEL;
792 cpuname = "C3 'Samuel' [C5A]"; 799 cpuname = "C3 'Samuel' [C5A]";
793 longhaul_version = TYPE_LONGHAUL_V1; 800 longhaul_version = TYPE_LONGHAUL_V1;
794 memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); 801 memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
795 memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); 802 memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr));
796 break; 803 break;
797 804
798 case 7: 805 case 7:
@@ -803,10 +810,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
803 cpuname = "C3 'Samuel 2' [C5B]"; 810 cpuname = "C3 'Samuel 2' [C5B]";
804 /* Note, this is not a typo, early Samuel2's had 811 /* Note, this is not a typo, early Samuel2's had
805 * Samuel1 ratios. */ 812 * Samuel1 ratios. */
806 memcpy(clock_ratio, samuel1_clock_ratio, 813 memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
807 sizeof(samuel1_clock_ratio)); 814 memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
808 memcpy(eblcr_table, samuel2_eblcr,
809 sizeof(samuel2_eblcr));
810 break; 815 break;
811 case 1 ... 15: 816 case 1 ... 15:
812 longhaul_version = TYPE_LONGHAUL_V1; 817 longhaul_version = TYPE_LONGHAUL_V1;
@@ -817,10 +822,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
817 cpu_model = CPU_EZRA; 822 cpu_model = CPU_EZRA;
818 cpuname = "C3 'Ezra' [C5C]"; 823 cpuname = "C3 'Ezra' [C5C]";
819 } 824 }
820 memcpy(clock_ratio, ezra_clock_ratio, 825 memcpy(mults, ezra_mults, sizeof(ezra_mults));
821 sizeof(ezra_clock_ratio)); 826 memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr));
822 memcpy(eblcr_table, ezra_eblcr,
823 sizeof(ezra_eblcr));
824 break; 827 break;
825 } 828 }
826 break; 829 break;
@@ -829,18 +832,16 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
829 cpu_model = CPU_EZRA_T; 832 cpu_model = CPU_EZRA_T;
830 cpuname = "C3 'Ezra-T' [C5M]"; 833 cpuname = "C3 'Ezra-T' [C5M]";
831 longhaul_version = TYPE_POWERSAVER; 834 longhaul_version = TYPE_POWERSAVER;
832 numscales=32; 835 numscales = 32;
833 memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); 836 memcpy(mults, ezrat_mults, sizeof(ezrat_mults));
834 memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); 837 memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr));
835 break; 838 break;
836 839
837 case 9: 840 case 9:
838 longhaul_version = TYPE_POWERSAVER; 841 longhaul_version = TYPE_POWERSAVER;
839 numscales = 32; 842 numscales = 32;
840 memcpy(clock_ratio, 843 memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
841 nehemiah_clock_ratio, 844 memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
842 sizeof(nehemiah_clock_ratio));
843 memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr));
844 switch (c->x86_mask) { 845 switch (c->x86_mask) {
845 case 0 ... 1: 846 case 0 ... 1:
846 cpu_model = CPU_NEHEMIAH; 847 cpu_model = CPU_NEHEMIAH;
@@ -869,14 +870,14 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
869 longhaul_version = TYPE_LONGHAUL_V1; 870 longhaul_version = TYPE_LONGHAUL_V1;
870 } 871 }
871 872
872 printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); 873 printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
873 switch (longhaul_version) { 874 switch (longhaul_version) {
874 case TYPE_LONGHAUL_V1: 875 case TYPE_LONGHAUL_V1:
875 case TYPE_LONGHAUL_V2: 876 case TYPE_LONGHAUL_V2:
876 printk ("Longhaul v%d supported.\n", longhaul_version); 877 printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
877 break; 878 break;
878 case TYPE_POWERSAVER: 879 case TYPE_POWERSAVER:
879 printk ("Powersaver supported.\n"); 880 printk(KERN_CONT "Powersaver supported.\n");
880 break; 881 break;
881 }; 882 };
882 883
@@ -940,7 +941,7 @@ static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
940 return 0; 941 return 0;
941} 942}
942 943
943static struct freq_attr* longhaul_attr[] = { 944static struct freq_attr *longhaul_attr[] = {
944 &cpufreq_freq_attr_scaling_available_freqs, 945 &cpufreq_freq_attr_scaling_available_freqs,
945 NULL, 946 NULL,
946}; 947};
@@ -966,13 +967,15 @@ static int __init longhaul_init(void)
966 967
967#ifdef CONFIG_SMP 968#ifdef CONFIG_SMP
968 if (num_online_cpus() > 1) { 969 if (num_online_cpus() > 1) {
969 printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); 970 printk(KERN_ERR PFX "More than 1 CPU detected, "
971 "longhaul disabled.\n");
970 return -ENODEV; 972 return -ENODEV;
971 } 973 }
972#endif 974#endif
973#ifdef CONFIG_X86_IO_APIC 975#ifdef CONFIG_X86_IO_APIC
974 if (cpu_has_apic) { 976 if (cpu_has_apic) {
975 printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); 977 printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
978 "broken in this configuration.\n");
976 return -ENODEV; 979 return -ENODEV;
977 } 980 }
978#endif 981#endif
@@ -993,8 +996,8 @@ static void __exit longhaul_exit(void)
993{ 996{
994 int i; 997 int i;
995 998
996 for (i=0; i < numscales; i++) { 999 for (i = 0; i < numscales; i++) {
997 if (clock_ratio[i] == maxmult) { 1000 if (mults[i] == maxmult) {
998 longhaul_setstate(i); 1001 longhaul_setstate(i);
999 break; 1002 break;
1000 } 1003 }
@@ -1007,11 +1010,11 @@ static void __exit longhaul_exit(void)
1007/* Even if BIOS is exporting ACPI C3 state, and it is used 1010/* Even if BIOS is exporting ACPI C3 state, and it is used
1008 * with success when CPU is idle, this state doesn't 1011 * with success when CPU is idle, this state doesn't
1009 * trigger frequency transition in some cases. */ 1012 * trigger frequency transition in some cases. */
1010module_param (disable_acpi_c3, int, 0644); 1013module_param(disable_acpi_c3, int, 0644);
1011MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); 1014MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
1012/* Change CPU voltage with frequency. Very usefull to save 1015/* Change CPU voltage with frequency. Very usefull to save
1013 * power, but most VIA C3 processors aren't supporting it. */ 1016 * power, but most VIA C3 processors aren't supporting it. */
1014module_param (scale_voltage, int, 0644); 1017module_param(scale_voltage, int, 0644);
1015MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); 1018MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
1016/* Force revision key to 0 for processors which doesn't 1019/* Force revision key to 0 for processors which doesn't
1017 * support voltage scaling, but are introducing itself as 1020 * support voltage scaling, but are introducing itself as
@@ -1019,9 +1022,9 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
1019module_param(revid_errata, int, 0644); 1022module_param(revid_errata, int, 0644);
1020MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); 1023MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
1021 1024
1022MODULE_AUTHOR ("Dave Jones <davej@redhat.com>"); 1025MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
1023MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); 1026MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
1024MODULE_LICENSE ("GPL"); 1027MODULE_LICENSE("GPL");
1025 1028
1026late_initcall(longhaul_init); 1029late_initcall(longhaul_init);
1027module_exit(longhaul_exit); 1030module_exit(longhaul_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h
index 4fcc320997df..e2360a469f79 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h
@@ -49,14 +49,14 @@ union msr_longhaul {
49 49
50/* 50/*
51 * Clock ratio tables. Div/Mod by 10 to get ratio. 51 * Clock ratio tables. Div/Mod by 10 to get ratio.
52 * The eblcr ones specify the ratio read from the CPU. 52 * The eblcr values specify the ratio read from the CPU.
53 * The clock_ratio ones specify what to write to the CPU. 53 * The mults values specify what to write to the CPU.
54 */ 54 */
55 55
56/* 56/*
57 * VIA C3 Samuel 1 & Samuel 2 (stepping 0) 57 * VIA C3 Samuel 1 & Samuel 2 (stepping 0)
58 */ 58 */
59static const int __initdata samuel1_clock_ratio[16] = { 59static const int __initdata samuel1_mults[16] = {
60 -1, /* 0000 -> RESERVED */ 60 -1, /* 0000 -> RESERVED */
61 30, /* 0001 -> 3.0x */ 61 30, /* 0001 -> 3.0x */
62 40, /* 0010 -> 4.0x */ 62 40, /* 0010 -> 4.0x */
@@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = {
119/* 119/*
120 * VIA C3 Ezra 120 * VIA C3 Ezra
121 */ 121 */
122static const int __initdata ezra_clock_ratio[16] = { 122static const int __initdata ezra_mults[16] = {
123 100, /* 0000 -> 10.0x */ 123 100, /* 0000 -> 10.0x */
124 30, /* 0001 -> 3.0x */ 124 30, /* 0001 -> 3.0x */
125 40, /* 0010 -> 4.0x */ 125 40, /* 0010 -> 4.0x */
@@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = {
160/* 160/*
161 * VIA C3 (Ezra-T) [C5M]. 161 * VIA C3 (Ezra-T) [C5M].
162 */ 162 */
163static const int __initdata ezrat_clock_ratio[32] = { 163static const int __initdata ezrat_mults[32] = {
164 100, /* 0000 -> 10.0x */ 164 100, /* 0000 -> 10.0x */
165 30, /* 0001 -> 3.0x */ 165 30, /* 0001 -> 3.0x */
166 40, /* 0010 -> 4.0x */ 166 40, /* 0010 -> 4.0x */
@@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = {
235/* 235/*
236 * VIA C3 Nehemiah */ 236 * VIA C3 Nehemiah */
237 237
238static const int __initdata nehemiah_clock_ratio[32] = { 238static const int __initdata nehemiah_mults[32] = {
239 100, /* 0000 -> 10.0x */ 239 100, /* 0000 -> 10.0x */
240 -1, /* 0001 -> 16.0x */ 240 -1, /* 0001 -> 16.0x */
241 40, /* 0010 -> 4.0x */ 241 40, /* 0010 -> 4.0x */
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index 777a7ff075de..da5f70fcb766 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -11,12 +11,13 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/cpufreq.h> 13#include <linux/cpufreq.h>
14#include <linux/timex.h>
14 15
15#include <asm/msr.h> 16#include <asm/msr.h>
16#include <asm/processor.h> 17#include <asm/processor.h>
17#include <asm/timex.h>
18 18
19#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) 19#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
20 "longrun", msg)
20 21
21static struct cpufreq_driver longrun_driver; 22static struct cpufreq_driver longrun_driver;
22 23
@@ -51,7 +52,7 @@ static void __init longrun_get_policy(struct cpufreq_policy *policy)
51 msr_lo &= 0x0000007F; 52 msr_lo &= 0x0000007F;
52 msr_hi &= 0x0000007F; 53 msr_hi &= 0x0000007F;
53 54
54 if ( longrun_high_freq <= longrun_low_freq ) { 55 if (longrun_high_freq <= longrun_low_freq) {
55 /* Assume degenerate Longrun table */ 56 /* Assume degenerate Longrun table */
56 policy->min = policy->max = longrun_high_freq; 57 policy->min = policy->max = longrun_high_freq;
57 } else { 58 } else {
@@ -79,7 +80,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy)
79 if (!policy) 80 if (!policy)
80 return -EINVAL; 81 return -EINVAL;
81 82
82 if ( longrun_high_freq <= longrun_low_freq ) { 83 if (longrun_high_freq <= longrun_low_freq) {
83 /* Assume degenerate Longrun table */ 84 /* Assume degenerate Longrun table */
84 pctg_lo = pctg_hi = 100; 85 pctg_lo = pctg_hi = 100;
85 } else { 86 } else {
@@ -152,7 +153,7 @@ static unsigned int longrun_get(unsigned int cpu)
152 cpuid(0x80860007, &eax, &ebx, &ecx, &edx); 153 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
153 dprintk("cpuid eax is %u\n", eax); 154 dprintk("cpuid eax is %u\n", eax);
154 155
155 return (eax * 1000); 156 return eax * 1000;
156} 157}
157 158
158/** 159/**
@@ -196,7 +197,8 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
196 rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); 197 rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
197 *high_freq = msr_lo * 1000; /* to kHz */ 198 *high_freq = msr_lo * 1000; /* to kHz */
198 199
199 dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); 200 dprintk("longrun table interface told %u - %u kHz\n",
201 *low_freq, *high_freq);
200 202
201 if (*low_freq > *high_freq) 203 if (*low_freq > *high_freq)
202 *low_freq = *high_freq; 204 *low_freq = *high_freq;
@@ -219,7 +221,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
219 cpuid(0x80860007, &eax, &ebx, &ecx, &edx); 221 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
220 /* try decreasing in 10% steps, some processors react only 222 /* try decreasing in 10% steps, some processors react only
221 * on some barrier values */ 223 * on some barrier values */
222 for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { 224 for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) {
223 /* set to 0 to try_hi perf_pctg */ 225 /* set to 0 to try_hi perf_pctg */
224 msr_lo &= 0xFFFFFF80; 226 msr_lo &= 0xFFFFFF80;
225 msr_hi &= 0xFFFFFF80; 227 msr_hi &= 0xFFFFFF80;
@@ -236,7 +238,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
236 238
237 /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) 239 /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
238 * eqals 240 * eqals
239 * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) 241 * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
240 * 242 *
241 * high_freq * perf_pctg is stored tempoarily into "ebx". 243 * high_freq * perf_pctg is stored tempoarily into "ebx".
242 */ 244 */
@@ -317,9 +319,10 @@ static void __exit longrun_exit(void)
317} 319}
318 320
319 321
320MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>"); 322MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
321MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); 323MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and "
322MODULE_LICENSE ("GPL"); 324 "Efficeon processors.");
325MODULE_LICENSE("GPL");
323 326
324module_init(longrun_init); 327module_init(longrun_init);
325module_exit(longrun_exit); 328module_exit(longrun_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index d8341d17c189..6ac55bd341ae 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -27,15 +27,17 @@
27#include <linux/cpufreq.h> 27#include <linux/cpufreq.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/cpumask.h> 29#include <linux/cpumask.h>
30#include <linux/timex.h>
30 31
31#include <asm/processor.h> 32#include <asm/processor.h>
32#include <asm/msr.h> 33#include <asm/msr.h>
33#include <asm/timex.h> 34#include <asm/timer.h>
34 35
35#include "speedstep-lib.h" 36#include "speedstep-lib.h"
36 37
37#define PFX "p4-clockmod: " 38#define PFX "p4-clockmod: "
38#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) 39#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
40 "p4-clockmod", msg)
39 41
40/* 42/*
41 * Duty Cycle (3bits), note DC_DISABLE is not specified in 43 * Duty Cycle (3bits), note DC_DISABLE is not specified in
@@ -58,7 +60,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
58{ 60{
59 u32 l, h; 61 u32 l, h;
60 62
61 if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) 63 if (!cpu_online(cpu) ||
64 (newstate > DC_DISABLE) || (newstate == DC_RESV))
62 return -EINVAL; 65 return -EINVAL;
63 66
64 rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); 67 rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
@@ -66,7 +69,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
66 if (l & 0x01) 69 if (l & 0x01)
67 dprintk("CPU#%d currently thermal throttled\n", cpu); 70 dprintk("CPU#%d currently thermal throttled\n", cpu);
68 71
69 if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) 72 if (has_N44_O17_errata[cpu] &&
73 (newstate == DC_25PT || newstate == DC_DFLT))
70 newstate = DC_38PT; 74 newstate = DC_38PT;
71 75
72 rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); 76 rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
@@ -112,7 +116,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
112 struct cpufreq_freqs freqs; 116 struct cpufreq_freqs freqs;
113 int i; 117 int i;
114 118
115 if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) 119 if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
120 target_freq, relation, &newstate))
116 return -EINVAL; 121 return -EINVAL;
117 122
118 freqs.old = cpufreq_p4_get(policy->cpu); 123 freqs.old = cpufreq_p4_get(policy->cpu);
@@ -127,7 +132,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
127 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 132 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
128 } 133 }
129 134
130 /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software 135 /* run on each logical CPU,
136 * see section 13.15.3 of IA32 Intel Architecture Software
131 * Developer's Manual, Volume 3 137 * Developer's Manual, Volume 3
132 */ 138 */
133 for_each_cpu(i, policy->cpus) 139 for_each_cpu(i, policy->cpus)
@@ -153,28 +159,30 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
153{ 159{
154 if (c->x86 == 0x06) { 160 if (c->x86 == 0x06) {
155 if (cpu_has(c, X86_FEATURE_EST)) 161 if (cpu_has(c, X86_FEATURE_EST))
156 printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " 162 printk(KERN_WARNING PFX "Warning: EST-capable CPU "
157 "The acpi-cpufreq module offers voltage scaling" 163 "detected. The acpi-cpufreq module offers "
158 " in addition of frequency scaling. You should use " 164 "voltage scaling in addition of frequency "
159 "that instead of p4-clockmod, if possible.\n"); 165 "scaling. You should use that instead of "
166 "p4-clockmod, if possible.\n");
160 switch (c->x86_model) { 167 switch (c->x86_model) {
161 case 0x0E: /* Core */ 168 case 0x0E: /* Core */
162 case 0x0F: /* Core Duo */ 169 case 0x0F: /* Core Duo */
163 case 0x16: /* Celeron Core */ 170 case 0x16: /* Celeron Core */
164 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; 171 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
165 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); 172 return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
166 case 0x0D: /* Pentium M (Dothan) */ 173 case 0x0D: /* Pentium M (Dothan) */
167 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; 174 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
168 /* fall through */ 175 /* fall through */
169 case 0x09: /* Pentium M (Banias) */ 176 case 0x09: /* Pentium M (Banias) */
170 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); 177 return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
171 } 178 }
172 } 179 }
173 180
174 if (c->x86 != 0xF) { 181 if (c->x86 != 0xF) {
175 if (!cpu_has(c, X86_FEATURE_EST)) 182 if (!cpu_has(c, X86_FEATURE_EST))
176 printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. " 183 printk(KERN_WARNING PFX "Unknown CPU. "
177 "Please send an e-mail to <cpufreq@vger.kernel.org>\n"); 184 "Please send an e-mail to "
185 "<cpufreq@vger.kernel.org>\n");
178 return 0; 186 return 0;
179 } 187 }
180 188
@@ -182,16 +190,16 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
182 * throttling is active or not. */ 190 * throttling is active or not. */
183 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; 191 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
184 192
185 if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { 193 if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
186 printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " 194 printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
187 "The speedstep-ich or acpi cpufreq modules offer " 195 "The speedstep-ich or acpi cpufreq modules offer "
188 "voltage scaling in addition of frequency scaling. " 196 "voltage scaling in addition of frequency scaling. "
189 "You should use either one instead of p4-clockmod, " 197 "You should use either one instead of p4-clockmod, "
190 "if possible.\n"); 198 "if possible.\n");
191 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); 199 return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
192 } 200 }
193 201
194 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); 202 return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
195} 203}
196 204
197 205
@@ -217,14 +225,20 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
217 dprintk("has errata -- disabling low frequencies\n"); 225 dprintk("has errata -- disabling low frequencies\n");
218 } 226 }
219 227
228 if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
229 c->x86_model < 2) {
230 /* switch to maximum frequency and measure result */
231 cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
232 recalibrate_cpu_khz();
233 }
220 /* get max frequency */ 234 /* get max frequency */
221 stock_freq = cpufreq_p4_get_frequency(c); 235 stock_freq = cpufreq_p4_get_frequency(c);
222 if (!stock_freq) 236 if (!stock_freq)
223 return -EINVAL; 237 return -EINVAL;
224 238
225 /* table init */ 239 /* table init */
226 for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { 240 for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
227 if ((i<2) && (has_N44_O17_errata[policy->cpu])) 241 if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
228 p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; 242 p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
229 else 243 else
230 p4clockmod_table[i].frequency = (stock_freq * i)/8; 244 p4clockmod_table[i].frequency = (stock_freq * i)/8;
@@ -232,7 +246,10 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
232 cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); 246 cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
233 247
234 /* cpuinfo and default policy values */ 248 /* cpuinfo and default policy values */
235 policy->cpuinfo.transition_latency = 1000000; /* assumed */ 249
250 /* the transition latency is set to be 1 higher than the maximum
251 * transition latency of the ondemand governor */
252 policy->cpuinfo.transition_latency = 10000001;
236 policy->cur = stock_freq; 253 policy->cur = stock_freq;
237 254
238 return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); 255 return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
@@ -258,12 +275,12 @@ static unsigned int cpufreq_p4_get(unsigned int cpu)
258 l = DC_DISABLE; 275 l = DC_DISABLE;
259 276
260 if (l != DC_DISABLE) 277 if (l != DC_DISABLE)
261 return (stock_freq * l / 8); 278 return stock_freq * l / 8;
262 279
263 return stock_freq; 280 return stock_freq;
264} 281}
265 282
266static struct freq_attr* p4clockmod_attr[] = { 283static struct freq_attr *p4clockmod_attr[] = {
267 &cpufreq_freq_attr_scaling_available_freqs, 284 &cpufreq_freq_attr_scaling_available_freqs,
268 NULL, 285 NULL,
269}; 286};
@@ -298,9 +315,10 @@ static int __init cpufreq_p4_init(void)
298 315
299 ret = cpufreq_register_driver(&p4clockmod_driver); 316 ret = cpufreq_register_driver(&p4clockmod_driver);
300 if (!ret) 317 if (!ret)
301 printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); 318 printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
319 "Modulation available\n");
302 320
303 return (ret); 321 return ret;
304} 322}
305 323
306 324
@@ -310,9 +328,9 @@ static void __exit cpufreq_p4_exit(void)
310} 328}
311 329
312 330
313MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>"); 331MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
314MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); 332MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
315MODULE_LICENSE ("GPL"); 333MODULE_LICENSE("GPL");
316 334
317late_initcall(cpufreq_p4_init); 335late_initcall(cpufreq_p4_init);
318module_exit(cpufreq_p4_exit); 336module_exit(cpufreq_p4_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index c1ac5790c63e..f10dea409f40 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) 2 * This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
3 * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. 3 * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä,
4 * Dominik Brodowski.
4 * 5 *
5 * Licensed under the terms of the GNU GPL License version 2. 6 * Licensed under the terms of the GNU GPL License version 2.
6 * 7 *
@@ -13,14 +14,15 @@
13#include <linux/cpufreq.h> 14#include <linux/cpufreq.h>
14#include <linux/ioport.h> 15#include <linux/ioport.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
16
17#include <asm/msr.h>
18#include <linux/timex.h> 17#include <linux/timex.h>
19#include <linux/io.h> 18#include <linux/io.h>
20 19
20#include <asm/msr.h>
21
21#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long 22#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
22 as it is unused */ 23 as it is unused */
23 24
25#define PFX "powernow-k6: "
24static unsigned int busfreq; /* FSB, in 10 kHz */ 26static unsigned int busfreq; /* FSB, in 10 kHz */
25static unsigned int max_multiplier; 27static unsigned int max_multiplier;
26 28
@@ -47,8 +49,8 @@ static struct cpufreq_frequency_table clock_ratio[] = {
47 */ 49 */
48static int powernow_k6_get_cpu_multiplier(void) 50static int powernow_k6_get_cpu_multiplier(void)
49{ 51{
50 u64 invalue = 0; 52 u64 invalue = 0;
51 u32 msrval; 53 u32 msrval;
52 54
53 msrval = POWERNOW_IOPORT + 0x1; 55 msrval = POWERNOW_IOPORT + 0x1;
54 wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ 56 wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
@@ -68,12 +70,12 @@ static int powernow_k6_get_cpu_multiplier(void)
68 */ 70 */
69static void powernow_k6_set_state(unsigned int best_i) 71static void powernow_k6_set_state(unsigned int best_i)
70{ 72{
71 unsigned long outvalue = 0, invalue = 0; 73 unsigned long outvalue = 0, invalue = 0;
72 unsigned long msrval; 74 unsigned long msrval;
73 struct cpufreq_freqs freqs; 75 struct cpufreq_freqs freqs;
74 76
75 if (clock_ratio[best_i].index > max_multiplier) { 77 if (clock_ratio[best_i].index > max_multiplier) {
76 printk(KERN_ERR "cpufreq: invalid target frequency\n"); 78 printk(KERN_ERR PFX "invalid target frequency\n");
77 return; 79 return;
78 } 80 }
79 81
@@ -119,7 +121,8 @@ static int powernow_k6_verify(struct cpufreq_policy *policy)
119 * powernow_k6_setpolicy - sets a new CPUFreq policy 121 * powernow_k6_setpolicy - sets a new CPUFreq policy
120 * @policy: new policy 122 * @policy: new policy
121 * @target_freq: the target frequency 123 * @target_freq: the target frequency
122 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) 124 * @relation: how that frequency relates to achieved frequency
125 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
123 * 126 *
124 * sets a new CPUFreq policy 127 * sets a new CPUFreq policy
125 */ 128 */
@@ -127,9 +130,10 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
127 unsigned int target_freq, 130 unsigned int target_freq,
128 unsigned int relation) 131 unsigned int relation)
129{ 132{
130 unsigned int newstate = 0; 133 unsigned int newstate = 0;
131 134
132 if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) 135 if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
136 target_freq, relation, &newstate))
133 return -EINVAL; 137 return -EINVAL;
134 138
135 powernow_k6_set_state(newstate); 139 powernow_k6_set_state(newstate);
@@ -140,7 +144,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
140 144
141static int powernow_k6_cpu_init(struct cpufreq_policy *policy) 145static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
142{ 146{
143 unsigned int i; 147 unsigned int i, f;
144 int result; 148 int result;
145 149
146 if (policy->cpu != 0) 150 if (policy->cpu != 0)
@@ -152,10 +156,11 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
152 156
153 /* table init */ 157 /* table init */
154 for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { 158 for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
155 if (clock_ratio[i].index > max_multiplier) 159 f = clock_ratio[i].index;
160 if (f > max_multiplier)
156 clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; 161 clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
157 else 162 else
158 clock_ratio[i].frequency = busfreq * clock_ratio[i].index; 163 clock_ratio[i].frequency = busfreq * f;
159 } 164 }
160 165
161 /* cpuinfo and default policy values */ 166 /* cpuinfo and default policy values */
@@ -185,7 +190,9 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
185 190
186static unsigned int powernow_k6_get(unsigned int cpu) 191static unsigned int powernow_k6_get(unsigned int cpu)
187{ 192{
188 return busfreq * powernow_k6_get_cpu_multiplier(); 193 unsigned int ret;
194 ret = (busfreq * powernow_k6_get_cpu_multiplier());
195 return ret;
189} 196}
190 197
191static struct freq_attr *powernow_k6_attr[] = { 198static struct freq_attr *powernow_k6_attr[] = {
@@ -221,7 +228,7 @@ static int __init powernow_k6_init(void)
221 return -ENODEV; 228 return -ENODEV;
222 229
223 if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { 230 if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
224 printk("cpufreq: PowerNow IOPORT region already used.\n"); 231 printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
225 return -EIO; 232 return -EIO;
226 } 233 }
227 234
@@ -246,7 +253,8 @@ static void __exit powernow_k6_exit(void)
246} 253}
247 254
248 255
249MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>"); 256MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
257 "Dominik Brodowski <linux@brodo.de>");
250MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); 258MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
251MODULE_LICENSE("GPL"); 259MODULE_LICENSE("GPL");
252 260
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 1b446d79a8fd..3c28ccd49742 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -6,10 +6,12 @@
6 * Licensed under the terms of the GNU GPL License version 2. 6 * Licensed under the terms of the GNU GPL License version 2.
7 * Based upon datasheets & sample CPUs kindly provided by AMD. 7 * Based upon datasheets & sample CPUs kindly provided by AMD.
8 * 8 *
9 * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. 9 * Errata 5:
10 * - We cli/sti on stepping A0 CPUs around the FID/VID transition. 10 * CPU may fail to execute a FID/VID change in presence of interrupt.
11 * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. 11 * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
12 * - We disable half multipliers if ACPI is used on A0 stepping CPUs. 12 * Errata 15:
13 * CPU with half frequency multipliers may hang upon wakeup from disconnect.
14 * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
13 */ 15 */
14 16
15#include <linux/kernel.h> 17#include <linux/kernel.h>
@@ -20,11 +22,11 @@
20#include <linux/slab.h> 22#include <linux/slab.h>
21#include <linux/string.h> 23#include <linux/string.h>
22#include <linux/dmi.h> 24#include <linux/dmi.h>
25#include <linux/timex.h>
26#include <linux/io.h>
23 27
28#include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */
24#include <asm/msr.h> 29#include <asm/msr.h>
25#include <asm/timer.h>
26#include <asm/timex.h>
27#include <asm/io.h>
28#include <asm/system.h> 30#include <asm/system.h>
29 31
30#ifdef CONFIG_X86_POWERNOW_K7_ACPI 32#ifdef CONFIG_X86_POWERNOW_K7_ACPI
@@ -58,9 +60,9 @@ struct pst_s {
58union powernow_acpi_control_t { 60union powernow_acpi_control_t {
59 struct { 61 struct {
60 unsigned long fid:5, 62 unsigned long fid:5,
61 vid:5, 63 vid:5,
62 sgtc:20, 64 sgtc:20,
63 res1:2; 65 res1:2;
64 } bits; 66 } bits;
65 unsigned long val; 67 unsigned long val;
66}; 68};
@@ -94,14 +96,15 @@ static struct cpufreq_frequency_table *powernow_table;
94 96
95static unsigned int can_scale_bus; 97static unsigned int can_scale_bus;
96static unsigned int can_scale_vid; 98static unsigned int can_scale_vid;
97static unsigned int minimum_speed=-1; 99static unsigned int minimum_speed = -1;
98static unsigned int maximum_speed; 100static unsigned int maximum_speed;
99static unsigned int number_scales; 101static unsigned int number_scales;
100static unsigned int fsb; 102static unsigned int fsb;
101static unsigned int latency; 103static unsigned int latency;
102static char have_a0; 104static char have_a0;
103 105
104#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) 106#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
107 "powernow-k7", msg)
105 108
106static int check_fsb(unsigned int fsbspeed) 109static int check_fsb(unsigned int fsbspeed)
107{ 110{
@@ -109,7 +112,7 @@ static int check_fsb(unsigned int fsbspeed)
109 unsigned int f = fsb / 1000; 112 unsigned int f = fsb / 1000;
110 113
111 delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; 114 delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
112 return (delta < 5); 115 return delta < 5;
113} 116}
114 117
115static int check_powernow(void) 118static int check_powernow(void)
@@ -117,24 +120,26 @@ static int check_powernow(void)
117 struct cpuinfo_x86 *c = &cpu_data(0); 120 struct cpuinfo_x86 *c = &cpu_data(0);
118 unsigned int maxei, eax, ebx, ecx, edx; 121 unsigned int maxei, eax, ebx, ecx, edx;
119 122
120 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { 123 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
121#ifdef MODULE 124#ifdef MODULE
122 printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); 125 printk(KERN_INFO PFX "This module only works with "
126 "AMD K7 CPUs\n");
123#endif 127#endif
124 return 0; 128 return 0;
125 } 129 }
126 130
127 /* Get maximum capabilities */ 131 /* Get maximum capabilities */
128 maxei = cpuid_eax (0x80000000); 132 maxei = cpuid_eax(0x80000000);
129 if (maxei < 0x80000007) { /* Any powernow info ? */ 133 if (maxei < 0x80000007) { /* Any powernow info ? */
130#ifdef MODULE 134#ifdef MODULE
131 printk (KERN_INFO PFX "No powernow capabilities detected\n"); 135 printk(KERN_INFO PFX "No powernow capabilities detected\n");
132#endif 136#endif
133 return 0; 137 return 0;
134 } 138 }
135 139
136 if ((c->x86_model == 6) && (c->x86_mask == 0)) { 140 if ((c->x86_model == 6) && (c->x86_mask == 0)) {
137 printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); 141 printk(KERN_INFO PFX "K7 660[A0] core detected, "
142 "enabling errata workarounds\n");
138 have_a0 = 1; 143 have_a0 = 1;
139 } 144 }
140 145
@@ -144,37 +149,42 @@ static int check_powernow(void)
144 if (!(edx & (1 << 1 | 1 << 2))) 149 if (!(edx & (1 << 1 | 1 << 2)))
145 return 0; 150 return 0;
146 151
147 printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); 152 printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
148 153
149 if (edx & 1 << 1) { 154 if (edx & 1 << 1) {
150 printk ("frequency"); 155 printk("frequency");
151 can_scale_bus=1; 156 can_scale_bus = 1;
152 } 157 }
153 158
154 if ((edx & (1 << 1 | 1 << 2)) == 0x6) 159 if ((edx & (1 << 1 | 1 << 2)) == 0x6)
155 printk (" and "); 160 printk(" and ");
156 161
157 if (edx & 1 << 2) { 162 if (edx & 1 << 2) {
158 printk ("voltage"); 163 printk("voltage");
159 can_scale_vid=1; 164 can_scale_vid = 1;
160 } 165 }
161 166
162 printk (".\n"); 167 printk(".\n");
163 return 1; 168 return 1;
164} 169}
165 170
171static void invalidate_entry(unsigned int entry)
172{
173 powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
174}
166 175
167static int get_ranges (unsigned char *pst) 176static int get_ranges(unsigned char *pst)
168{ 177{
169 unsigned int j; 178 unsigned int j;
170 unsigned int speed; 179 unsigned int speed;
171 u8 fid, vid; 180 u8 fid, vid;
172 181
173 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); 182 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
183 (number_scales + 1)), GFP_KERNEL);
174 if (!powernow_table) 184 if (!powernow_table)
175 return -ENOMEM; 185 return -ENOMEM;
176 186
177 for (j=0 ; j < number_scales; j++) { 187 for (j = 0 ; j < number_scales; j++) {
178 fid = *pst++; 188 fid = *pst++;
179 189
180 powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; 190 powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
@@ -182,10 +192,10 @@ static int get_ranges (unsigned char *pst)
182 192
183 speed = powernow_table[j].frequency; 193 speed = powernow_table[j].frequency;
184 194
185 if ((fid_codes[fid] % 10)==5) { 195 if ((fid_codes[fid] % 10) == 5) {
186#ifdef CONFIG_X86_POWERNOW_K7_ACPI 196#ifdef CONFIG_X86_POWERNOW_K7_ACPI
187 if (have_a0 == 1) 197 if (have_a0 == 1)
188 powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; 198 invalidate_entry(j);
189#endif 199#endif
190 } 200 }
191 201
@@ -197,7 +207,7 @@ static int get_ranges (unsigned char *pst)
197 vid = *pst++; 207 vid = *pst++;
198 powernow_table[j].index |= (vid << 8); /* upper 8 bits */ 208 powernow_table[j].index |= (vid << 8); /* upper 8 bits */
199 209
200 dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " 210 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
201 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, 211 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
202 fid_codes[fid] % 10, speed/1000, vid, 212 fid_codes[fid] % 10, speed/1000, vid,
203 mobile_vid_table[vid]/1000, 213 mobile_vid_table[vid]/1000,
@@ -214,13 +224,13 @@ static void change_FID(int fid)
214{ 224{
215 union msr_fidvidctl fidvidctl; 225 union msr_fidvidctl fidvidctl;
216 226
217 rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); 227 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
218 if (fidvidctl.bits.FID != fid) { 228 if (fidvidctl.bits.FID != fid) {
219 fidvidctl.bits.SGTC = latency; 229 fidvidctl.bits.SGTC = latency;
220 fidvidctl.bits.FID = fid; 230 fidvidctl.bits.FID = fid;
221 fidvidctl.bits.VIDC = 0; 231 fidvidctl.bits.VIDC = 0;
222 fidvidctl.bits.FIDC = 1; 232 fidvidctl.bits.FIDC = 1;
223 wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); 233 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
224 } 234 }
225} 235}
226 236
@@ -229,18 +239,18 @@ static void change_VID(int vid)
229{ 239{
230 union msr_fidvidctl fidvidctl; 240 union msr_fidvidctl fidvidctl;
231 241
232 rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); 242 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
233 if (fidvidctl.bits.VID != vid) { 243 if (fidvidctl.bits.VID != vid) {
234 fidvidctl.bits.SGTC = latency; 244 fidvidctl.bits.SGTC = latency;
235 fidvidctl.bits.VID = vid; 245 fidvidctl.bits.VID = vid;
236 fidvidctl.bits.FIDC = 0; 246 fidvidctl.bits.FIDC = 0;
237 fidvidctl.bits.VIDC = 1; 247 fidvidctl.bits.VIDC = 1;
238 wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); 248 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
239 } 249 }
240} 250}
241 251
242 252
243static void change_speed (unsigned int index) 253static void change_speed(unsigned int index)
244{ 254{
245 u8 fid, vid; 255 u8 fid, vid;
246 struct cpufreq_freqs freqs; 256 struct cpufreq_freqs freqs;
@@ -257,7 +267,7 @@ static void change_speed (unsigned int index)
257 267
258 freqs.cpu = 0; 268 freqs.cpu = 0;
259 269
260 rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); 270 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
261 cfid = fidvidstatus.bits.CFID; 271 cfid = fidvidstatus.bits.CFID;
262 freqs.old = fsb * fid_codes[cfid] / 10; 272 freqs.old = fsb * fid_codes[cfid] / 10;
263 273
@@ -321,12 +331,14 @@ static int powernow_acpi_init(void)
321 goto err1; 331 goto err1;
322 } 332 }
323 333
324 if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { 334 if (acpi_processor_perf->control_register.space_id !=
335 ACPI_ADR_SPACE_FIXED_HARDWARE) {
325 retval = -ENODEV; 336 retval = -ENODEV;
326 goto err2; 337 goto err2;
327 } 338 }
328 339
329 if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { 340 if (acpi_processor_perf->status_register.space_id !=
341 ACPI_ADR_SPACE_FIXED_HARDWARE) {
330 retval = -ENODEV; 342 retval = -ENODEV;
331 goto err2; 343 goto err2;
332 } 344 }
@@ -338,7 +350,8 @@ static int powernow_acpi_init(void)
338 goto err2; 350 goto err2;
339 } 351 }
340 352
341 powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); 353 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
354 (number_scales + 1)), GFP_KERNEL);
342 if (!powernow_table) { 355 if (!powernow_table) {
343 retval = -ENOMEM; 356 retval = -ENOMEM;
344 goto err2; 357 goto err2;
@@ -352,7 +365,7 @@ static int powernow_acpi_init(void)
352 unsigned int speed, speed_mhz; 365 unsigned int speed, speed_mhz;
353 366
354 pc.val = (unsigned long) state->control; 367 pc.val = (unsigned long) state->control;
355 dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", 368 dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
356 i, 369 i,
357 (u32) state->core_frequency, 370 (u32) state->core_frequency,
358 (u32) state->power, 371 (u32) state->power,
@@ -381,12 +394,12 @@ static int powernow_acpi_init(void)
381 if (speed % 1000 > 0) 394 if (speed % 1000 > 0)
382 speed_mhz++; 395 speed_mhz++;
383 396
384 if ((fid_codes[fid] % 10)==5) { 397 if ((fid_codes[fid] % 10) == 5) {
385 if (have_a0 == 1) 398 if (have_a0 == 1)
386 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 399 invalidate_entry(i);
387 } 400 }
388 401
389 dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " 402 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
390 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, 403 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
391 fid_codes[fid] % 10, speed_mhz, vid, 404 fid_codes[fid] % 10, speed_mhz, vid,
392 mobile_vid_table[vid]/1000, 405 mobile_vid_table[vid]/1000,
@@ -422,7 +435,8 @@ err1:
422err05: 435err05:
423 kfree(acpi_processor_perf); 436 kfree(acpi_processor_perf);
424err0: 437err0:
425 printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); 438 printk(KERN_WARNING PFX "ACPI perflib can not be used on "
439 "this platform\n");
426 acpi_processor_perf = NULL; 440 acpi_processor_perf = NULL;
427 return retval; 441 return retval;
428} 442}
@@ -435,7 +449,14 @@ static int powernow_acpi_init(void)
435} 449}
436#endif 450#endif
437 451
438static int powernow_decode_bios (int maxfid, int startvid) 452static void print_pst_entry(struct pst_s *pst, unsigned int j)
453{
454 dprintk("PST:%d (@%p)\n", j, pst);
455 dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
456 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
457}
458
459static int powernow_decode_bios(int maxfid, int startvid)
439{ 460{
440 struct psb_s *psb; 461 struct psb_s *psb;
441 struct pst_s *pst; 462 struct pst_s *pst;
@@ -446,61 +467,67 @@ static int powernow_decode_bios (int maxfid, int startvid)
446 467
447 etuple = cpuid_eax(0x80000001); 468 etuple = cpuid_eax(0x80000001);
448 469
449 for (i=0xC0000; i < 0xffff0 ; i+=16) { 470 for (i = 0xC0000; i < 0xffff0 ; i += 16) {
450 471
451 p = phys_to_virt(i); 472 p = phys_to_virt(i);
452 473
453 if (memcmp(p, "AMDK7PNOW!", 10) == 0){ 474 if (memcmp(p, "AMDK7PNOW!", 10) == 0) {
454 dprintk ("Found PSB header at %p\n", p); 475 dprintk("Found PSB header at %p\n", p);
455 psb = (struct psb_s *) p; 476 psb = (struct psb_s *) p;
456 dprintk ("Table version: 0x%x\n", psb->tableversion); 477 dprintk("Table version: 0x%x\n", psb->tableversion);
457 if (psb->tableversion != 0x12) { 478 if (psb->tableversion != 0x12) {
458 printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); 479 printk(KERN_INFO PFX "Sorry, only v1.2 tables"
480 " supported right now\n");
459 return -ENODEV; 481 return -ENODEV;
460 } 482 }
461 483
462 dprintk ("Flags: 0x%x\n", psb->flags); 484 dprintk("Flags: 0x%x\n", psb->flags);
463 if ((psb->flags & 1)==0) { 485 if ((psb->flags & 1) == 0)
464 dprintk ("Mobile voltage regulator\n"); 486 dprintk("Mobile voltage regulator\n");
465 } else { 487 else
466 dprintk ("Desktop voltage regulator\n"); 488 dprintk("Desktop voltage regulator\n");
467 }
468 489
469 latency = psb->settlingtime; 490 latency = psb->settlingtime;
470 if (latency < 100) { 491 if (latency < 100) {
471 printk(KERN_INFO PFX "BIOS set settling time to %d microseconds. " 492 printk(KERN_INFO PFX "BIOS set settling time "
472 "Should be at least 100. Correcting.\n", latency); 493 "to %d microseconds. "
494 "Should be at least 100. "
495 "Correcting.\n", latency);
473 latency = 100; 496 latency = 100;
474 } 497 }
475 dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); 498 dprintk("Settling Time: %d microseconds.\n",
476 dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); 499 psb->settlingtime);
500 dprintk("Has %d PST tables. (Only dumping ones "
501 "relevant to this CPU).\n",
502 psb->numpst);
477 503
478 p += sizeof (struct psb_s); 504 p += sizeof(struct psb_s);
479 505
480 pst = (struct pst_s *) p; 506 pst = (struct pst_s *) p;
481 507
482 for (j=0; j<psb->numpst; j++) { 508 for (j = 0; j < psb->numpst; j++) {
483 pst = (struct pst_s *) p; 509 pst = (struct pst_s *) p;
484 number_scales = pst->numpstates; 510 number_scales = pst->numpstates;
485 511
486 if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && 512 if ((etuple == pst->cpuid) &&
487 (maxfid==pst->maxfid) && (startvid==pst->startvid)) 513 check_fsb(pst->fsbspeed) &&
488 { 514 (maxfid == pst->maxfid) &&
489 dprintk ("PST:%d (@%p)\n", j, pst); 515 (startvid == pst->startvid)) {
490 dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", 516 print_pst_entry(pst, j);
491 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); 517 p = (char *)pst + sizeof(struct pst_s);
492 518 ret = get_ranges(p);
493 ret = get_ranges ((char *) pst + sizeof (struct pst_s));
494 return ret; 519 return ret;
495 } else { 520 } else {
496 unsigned int k; 521 unsigned int k;
497 p = (char *) pst + sizeof (struct pst_s); 522 p = (char *)pst + sizeof(struct pst_s);
498 for (k=0; k<number_scales; k++) 523 for (k = 0; k < number_scales; k++)
499 p+=2; 524 p += 2;
500 } 525 }
501 } 526 }
502 printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple); 527 printk(KERN_INFO PFX "No PST tables match this cpuid "
503 printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n"); 528 "(0x%x)\n", etuple);
529 printk(KERN_INFO PFX "This is indicative of a broken "
530 "BIOS.\n");
504 531
505 return -EINVAL; 532 return -EINVAL;
506 } 533 }
@@ -511,13 +538,14 @@ static int powernow_decode_bios (int maxfid, int startvid)
511} 538}
512 539
513 540
514static int powernow_target (struct cpufreq_policy *policy, 541static int powernow_target(struct cpufreq_policy *policy,
515 unsigned int target_freq, 542 unsigned int target_freq,
516 unsigned int relation) 543 unsigned int relation)
517{ 544{
518 unsigned int newstate; 545 unsigned int newstate;
519 546
520 if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate)) 547 if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
548 relation, &newstate))
521 return -EINVAL; 549 return -EINVAL;
522 550
523 change_speed(newstate); 551 change_speed(newstate);
@@ -526,7 +554,7 @@ static int powernow_target (struct cpufreq_policy *policy,
526} 554}
527 555
528 556
529static int powernow_verify (struct cpufreq_policy *policy) 557static int powernow_verify(struct cpufreq_policy *policy)
530{ 558{
531 return cpufreq_frequency_table_verify(policy, powernow_table); 559 return cpufreq_frequency_table_verify(policy, powernow_table);
532} 560}
@@ -566,18 +594,23 @@ static unsigned int powernow_get(unsigned int cpu)
566 594
567 if (cpu) 595 if (cpu)
568 return 0; 596 return 0;
569 rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); 597 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
570 cfid = fidvidstatus.bits.CFID; 598 cfid = fidvidstatus.bits.CFID;
571 599
572 return (fsb * fid_codes[cfid] / 10); 600 return fsb * fid_codes[cfid] / 10;
573} 601}
574 602
575 603
576static int __init acer_cpufreq_pst(const struct dmi_system_id *d) 604static int __init acer_cpufreq_pst(const struct dmi_system_id *d)
577{ 605{
578 printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); 606 printk(KERN_WARNING PFX
579 printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); 607 "%s laptop with broken PST tables in BIOS detected.\n",
580 printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); 608 d->ident);
609 printk(KERN_WARNING PFX
610 "You need to downgrade to 3A21 (09/09/2002), or try a newer "
611 "BIOS than 3A71 (01/20/2003)\n");
612 printk(KERN_WARNING PFX
613 "cpufreq scaling has been disabled as a result of this.\n");
581 return 0; 614 return 0;
582} 615}
583 616
@@ -598,7 +631,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = {
598 { } 631 { }
599}; 632};
600 633
601static int __init powernow_cpu_init (struct cpufreq_policy *policy) 634static int __init powernow_cpu_init(struct cpufreq_policy *policy)
602{ 635{
603 union msr_fidvidstatus fidvidstatus; 636 union msr_fidvidstatus fidvidstatus;
604 int result; 637 int result;
@@ -606,7 +639,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
606 if (policy->cpu != 0) 639 if (policy->cpu != 0)
607 return -ENODEV; 640 return -ENODEV;
608 641
609 rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); 642 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
610 643
611 recalibrate_cpu_khz(); 644 recalibrate_cpu_khz();
612 645
@@ -618,19 +651,21 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
618 dprintk("FSB: %3dMHz\n", fsb/1000); 651 dprintk("FSB: %3dMHz\n", fsb/1000);
619 652
620 if (dmi_check_system(powernow_dmi_table) || acpi_force) { 653 if (dmi_check_system(powernow_dmi_table) || acpi_force) {
621 printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); 654 printk(KERN_INFO PFX "PSB/PST known to be broken. "
655 "Trying ACPI instead\n");
622 result = powernow_acpi_init(); 656 result = powernow_acpi_init();
623 } else { 657 } else {
624 result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); 658 result = powernow_decode_bios(fidvidstatus.bits.MFID,
659 fidvidstatus.bits.SVID);
625 if (result) { 660 if (result) {
626 printk (KERN_INFO PFX "Trying ACPI perflib\n"); 661 printk(KERN_INFO PFX "Trying ACPI perflib\n");
627 maximum_speed = 0; 662 maximum_speed = 0;
628 minimum_speed = -1; 663 minimum_speed = -1;
629 latency = 0; 664 latency = 0;
630 result = powernow_acpi_init(); 665 result = powernow_acpi_init();
631 if (result) { 666 if (result) {
632 printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); 667 printk(KERN_INFO PFX
633 printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); 668 "ACPI and legacy methods failed\n");
634 } 669 }
635 } else { 670 } else {
636 /* SGTC use the bus clock as timer */ 671 /* SGTC use the bus clock as timer */
@@ -642,10 +677,11 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
642 if (result) 677 if (result)
643 return result; 678 return result;
644 679
645 printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", 680 printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
646 minimum_speed/1000, maximum_speed/1000); 681 minimum_speed/1000, maximum_speed/1000);
647 682
648 policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); 683 policy->cpuinfo.transition_latency =
684 cpufreq_scale(2000000UL, fsb, latency);
649 685
650 policy->cur = powernow_get(0); 686 policy->cur = powernow_get(0);
651 687
@@ -654,7 +690,8 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
654 return cpufreq_frequency_table_cpuinfo(policy, powernow_table); 690 return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
655} 691}
656 692
657static int powernow_cpu_exit (struct cpufreq_policy *policy) { 693static int powernow_cpu_exit(struct cpufreq_policy *policy)
694{
658 cpufreq_frequency_table_put_attr(policy->cpu); 695 cpufreq_frequency_table_put_attr(policy->cpu);
659 696
660#ifdef CONFIG_X86_POWERNOW_K7_ACPI 697#ifdef CONFIG_X86_POWERNOW_K7_ACPI
@@ -669,7 +706,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
669 return 0; 706 return 0;
670} 707}
671 708
672static struct freq_attr* powernow_table_attr[] = { 709static struct freq_attr *powernow_table_attr[] = {
673 &cpufreq_freq_attr_scaling_available_freqs, 710 &cpufreq_freq_attr_scaling_available_freqs,
674 NULL, 711 NULL,
675}; 712};
@@ -685,15 +722,15 @@ static struct cpufreq_driver powernow_driver = {
685 .attr = powernow_table_attr, 722 .attr = powernow_table_attr,
686}; 723};
687 724
688static int __init powernow_init (void) 725static int __init powernow_init(void)
689{ 726{
690 if (check_powernow()==0) 727 if (check_powernow() == 0)
691 return -ENODEV; 728 return -ENODEV;
692 return cpufreq_register_driver(&powernow_driver); 729 return cpufreq_register_driver(&powernow_driver);
693} 730}
694 731
695 732
696static void __exit powernow_exit (void) 733static void __exit powernow_exit(void)
697{ 734{
698 cpufreq_unregister_driver(&powernow_driver); 735 cpufreq_unregister_driver(&powernow_driver);
699} 736}
@@ -701,9 +738,9 @@ static void __exit powernow_exit (void)
701module_param(acpi_force, int, 0444); 738module_param(acpi_force, int, 0444);
702MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); 739MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
703 740
704MODULE_AUTHOR ("Dave Jones <davej@redhat.com>"); 741MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
705MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); 742MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
706MODULE_LICENSE ("GPL"); 743MODULE_LICENSE("GPL");
707 744
708late_initcall(powernow_init); 745late_initcall(powernow_init);
709module_exit(powernow_exit); 746module_exit(powernow_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index e8fd76f98883..4709ead2db52 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -33,16 +33,14 @@
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/cpumask.h> 34#include <linux/cpumask.h>
35#include <linux/sched.h> /* for current / set_cpus_allowed() */ 35#include <linux/sched.h> /* for current / set_cpus_allowed() */
36#include <linux/io.h>
37#include <linux/delay.h>
36 38
37#include <asm/msr.h> 39#include <asm/msr.h>
38#include <asm/io.h>
39#include <asm/delay.h>
40 40
41#ifdef CONFIG_X86_POWERNOW_K8_ACPI
42#include <linux/acpi.h> 41#include <linux/acpi.h>
43#include <linux/mutex.h> 42#include <linux/mutex.h>
44#include <acpi/processor.h> 43#include <acpi/processor.h>
45#endif
46 44
47#define PFX "powernow-k8: " 45#define PFX "powernow-k8: "
48#define VERSION "version 2.20.00" 46#define VERSION "version 2.20.00"
@@ -74,7 +72,8 @@ static u32 find_khz_freq_from_fid(u32 fid)
74 return 1000 * find_freq_from_fid(fid); 72 return 1000 * find_freq_from_fid(fid);
75} 73}
76 74
77static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate) 75static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
76 u32 pstate)
78{ 77{
79 return data[pstate].frequency; 78 return data[pstate].frequency;
80} 79}
@@ -189,7 +188,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
189 return 1; 188 return 1;
190 } 189 }
191 190
192 lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; 191 lo = fid;
192 lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
193 lo |= MSR_C_LO_INIT_FID_VID;
193 194
194 dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", 195 dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
195 fid, lo, data->plllock * PLL_LOCK_CONVERSION); 196 fid, lo, data->plllock * PLL_LOCK_CONVERSION);
@@ -197,7 +198,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
197 do { 198 do {
198 wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); 199 wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
199 if (i++ > 100) { 200 if (i++ > 100) {
200 printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); 201 printk(KERN_ERR PFX
202 "Hardware error - pending bit very stuck - "
203 "no further pstate changes possible\n");
201 return 1; 204 return 1;
202 } 205 }
203 } while (query_current_values_with_pending_wait(data)); 206 } while (query_current_values_with_pending_wait(data));
@@ -205,14 +208,16 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
205 count_off_irt(data); 208 count_off_irt(data);
206 209
207 if (savevid != data->currvid) { 210 if (savevid != data->currvid) {
208 printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", 211 printk(KERN_ERR PFX
209 savevid, data->currvid); 212 "vid change on fid trans, old 0x%x, new 0x%x\n",
213 savevid, data->currvid);
210 return 1; 214 return 1;
211 } 215 }
212 216
213 if (fid != data->currfid) { 217 if (fid != data->currfid) {
214 printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, 218 printk(KERN_ERR PFX
215 data->currfid); 219 "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
220 data->currfid);
216 return 1; 221 return 1;
217 } 222 }
218 223
@@ -231,7 +236,9 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
231 return 1; 236 return 1;
232 } 237 }
233 238
234 lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; 239 lo = data->currfid;
240 lo |= (vid << MSR_C_LO_VID_SHIFT);
241 lo |= MSR_C_LO_INIT_FID_VID;
235 242
236 dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", 243 dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
237 vid, lo, STOP_GRANT_5NS); 244 vid, lo, STOP_GRANT_5NS);
@@ -239,20 +246,24 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
239 do { 246 do {
240 wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); 247 wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
241 if (i++ > 100) { 248 if (i++ > 100) {
242 printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); 249 printk(KERN_ERR PFX "internal error - pending bit "
250 "very stuck - no further pstate "
251 "changes possible\n");
243 return 1; 252 return 1;
244 } 253 }
245 } while (query_current_values_with_pending_wait(data)); 254 } while (query_current_values_with_pending_wait(data));
246 255
247 if (savefid != data->currfid) { 256 if (savefid != data->currfid) {
248 printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", 257 printk(KERN_ERR PFX "fid changed on vid trans, old "
258 "0x%x new 0x%x\n",
249 savefid, data->currfid); 259 savefid, data->currfid);
250 return 1; 260 return 1;
251 } 261 }
252 262
253 if (vid != data->currvid) { 263 if (vid != data->currvid) {
254 printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, 264 printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
255 data->currvid); 265 "curr 0x%x\n",
266 vid, data->currvid);
256 return 1; 267 return 1;
257 } 268 }
258 269
@@ -264,7 +275,8 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
264 * Decreasing vid codes represent increasing voltages: 275 * Decreasing vid codes represent increasing voltages:
265 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. 276 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
266 */ 277 */
267static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) 278static int decrease_vid_code_by_step(struct powernow_k8_data *data,
279 u32 reqvid, u32 step)
268{ 280{
269 if ((data->currvid - reqvid) > step) 281 if ((data->currvid - reqvid) > step)
270 reqvid = data->currvid - step; 282 reqvid = data->currvid - step;
@@ -286,7 +298,8 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
286} 298}
287 299
288/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ 300/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
289static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) 301static int transition_fid_vid(struct powernow_k8_data *data,
302 u32 reqfid, u32 reqvid)
290{ 303{
291 if (core_voltage_pre_transition(data, reqvid)) 304 if (core_voltage_pre_transition(data, reqvid))
292 return 1; 305 return 1;
@@ -301,7 +314,8 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req
301 return 1; 314 return 1;
302 315
303 if ((reqfid != data->currfid) || (reqvid != data->currvid)) { 316 if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
304 printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", 317 printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
318 "curr 0x%x 0x%x\n",
305 smp_processor_id(), 319 smp_processor_id(),
306 reqfid, reqvid, data->currfid, data->currvid); 320 reqfid, reqvid, data->currfid, data->currvid);
307 return 1; 321 return 1;
@@ -314,13 +328,15 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req
314} 328}
315 329
316/* Phase 1 - core voltage transition ... setup voltage */ 330/* Phase 1 - core voltage transition ... setup voltage */
317static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) 331static int core_voltage_pre_transition(struct powernow_k8_data *data,
332 u32 reqvid)
318{ 333{
319 u32 rvosteps = data->rvo; 334 u32 rvosteps = data->rvo;
320 u32 savefid = data->currfid; 335 u32 savefid = data->currfid;
321 u32 maxvid, lo; 336 u32 maxvid, lo;
322 337
323 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", 338 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
339 "reqvid 0x%x, rvo 0x%x\n",
324 smp_processor_id(), 340 smp_processor_id(),
325 data->currfid, data->currvid, reqvid, data->rvo); 341 data->currfid, data->currvid, reqvid, data->rvo);
326 342
@@ -343,7 +359,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
343 } else { 359 } else {
344 dprintk("ph1: changing vid for rvo, req 0x%x\n", 360 dprintk("ph1: changing vid for rvo, req 0x%x\n",
345 data->currvid - 1); 361 data->currvid - 1);
346 if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) 362 if (decrease_vid_code_by_step(data, data->currvid-1, 1))
347 return 1; 363 return 1;
348 rvosteps--; 364 rvosteps--;
349 } 365 }
@@ -353,7 +369,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
353 return 1; 369 return 1;
354 370
355 if (savefid != data->currfid) { 371 if (savefid != data->currfid) {
356 printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); 372 printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
373 data->currfid);
357 return 1; 374 return 1;
358 } 375 }
359 376
@@ -366,20 +383,24 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
366/* Phase 2 - core frequency transition */ 383/* Phase 2 - core frequency transition */
367static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) 384static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
368{ 385{
369 u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; 386 u32 vcoreqfid, vcocurrfid, vcofiddiff;
387 u32 fid_interval, savevid = data->currvid;
370 388
371 if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { 389 if ((reqfid < HI_FID_TABLE_BOTTOM) &&
372 printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", 390 (data->currfid < HI_FID_TABLE_BOTTOM)) {
373 reqfid, data->currfid); 391 printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
392 "0x%x 0x%x\n", reqfid, data->currfid);
374 return 1; 393 return 1;
375 } 394 }
376 395
377 if (data->currfid == reqfid) { 396 if (data->currfid == reqfid) {
378 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); 397 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
398 data->currfid);
379 return 0; 399 return 0;
380 } 400 }
381 401
382 dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", 402 dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
403 "reqfid 0x%x\n",
383 smp_processor_id(), 404 smp_processor_id(),
384 data->currfid, data->currvid, reqfid); 405 data->currfid, data->currvid, reqfid);
385 406
@@ -393,14 +414,14 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
393 414
394 if (reqfid > data->currfid) { 415 if (reqfid > data->currfid) {
395 if (data->currfid > LO_FID_TABLE_TOP) { 416 if (data->currfid > LO_FID_TABLE_TOP) {
396 if (write_new_fid(data, data->currfid + fid_interval)) { 417 if (write_new_fid(data,
418 data->currfid + fid_interval))
397 return 1; 419 return 1;
398 }
399 } else { 420 } else {
400 if (write_new_fid 421 if (write_new_fid
401 (data, 2 + convert_fid_to_vco_fid(data->currfid))) { 422 (data,
423 2 + convert_fid_to_vco_fid(data->currfid)))
402 return 1; 424 return 1;
403 }
404 } 425 }
405 } else { 426 } else {
406 if (write_new_fid(data, data->currfid - fid_interval)) 427 if (write_new_fid(data, data->currfid - fid_interval))
@@ -420,7 +441,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
420 441
421 if (data->currfid != reqfid) { 442 if (data->currfid != reqfid) {
422 printk(KERN_ERR PFX 443 printk(KERN_ERR PFX
423 "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", 444 "ph2: mismatch, failed fid transition, "
445 "curr 0x%x, req 0x%x\n",
424 data->currfid, reqfid); 446 data->currfid, reqfid);
425 return 1; 447 return 1;
426 } 448 }
@@ -438,7 +460,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
438} 460}
439 461
440/* Phase 3 - core voltage transition flow ... jump to the final vid. */ 462/* Phase 3 - core voltage transition flow ... jump to the final vid. */
441static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) 463static int core_voltage_post_transition(struct powernow_k8_data *data,
464 u32 reqvid)
442{ 465{
443 u32 savefid = data->currfid; 466 u32 savefid = data->currfid;
444 u32 savereqvid = reqvid; 467 u32 savereqvid = reqvid;
@@ -460,7 +483,8 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
460 483
461 if (data->currvid != reqvid) { 484 if (data->currvid != reqvid) {
462 printk(KERN_ERR PFX 485 printk(KERN_ERR PFX
463 "ph3: failed vid transition\n, req 0x%x, curr 0x%x", 486 "ph3: failed vid transition\n, "
487 "req 0x%x, curr 0x%x",
464 reqvid, data->currvid); 488 reqvid, data->currvid);
465 return 1; 489 return 1;
466 } 490 }
@@ -511,7 +535,8 @@ static int check_supported_cpu(unsigned int cpu)
511 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { 535 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
512 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || 536 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
513 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { 537 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
514 printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); 538 printk(KERN_INFO PFX
539 "Processor cpuid %x not supported\n", eax);
515 goto out; 540 goto out;
516 } 541 }
517 542
@@ -523,8 +548,10 @@ static int check_supported_cpu(unsigned int cpu)
523 } 548 }
524 549
525 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 550 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
526 if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { 551 if ((edx & P_STATE_TRANSITION_CAPABLE)
527 printk(KERN_INFO PFX "Power state transitions not supported\n"); 552 != P_STATE_TRANSITION_CAPABLE) {
553 printk(KERN_INFO PFX
554 "Power state transitions not supported\n");
528 goto out; 555 goto out;
529 } 556 }
530 } else { /* must be a HW Pstate capable processor */ 557 } else { /* must be a HW Pstate capable processor */
@@ -542,7 +569,8 @@ out:
542 return rc; 569 return rc;
543} 570}
544 571
545static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) 572static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
573 u8 maxvid)
546{ 574{
547 unsigned int j; 575 unsigned int j;
548 u8 lastfid = 0xff; 576 u8 lastfid = 0xff;
@@ -553,12 +581,14 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8
553 j, pst[j].vid); 581 j, pst[j].vid);
554 return -EINVAL; 582 return -EINVAL;
555 } 583 }
556 if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ 584 if (pst[j].vid < data->rvo) {
585 /* vid + rvo >= 0 */
557 printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" 586 printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
558 " %d\n", j); 587 " %d\n", j);
559 return -ENODEV; 588 return -ENODEV;
560 } 589 }
561 if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ 590 if (pst[j].vid < maxvid + data->rvo) {
591 /* vid + rvo >= maxvid */
562 printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" 592 printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
563 " %d\n", j); 593 " %d\n", j);
564 return -ENODEV; 594 return -ENODEV;
@@ -582,23 +612,31 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8
582 return -EINVAL; 612 return -EINVAL;
583 } 613 }
584 if (lastfid > LO_FID_TABLE_TOP) 614 if (lastfid > LO_FID_TABLE_TOP)
585 printk(KERN_INFO FW_BUG PFX "first fid not from lo freq table\n"); 615 printk(KERN_INFO FW_BUG PFX
616 "first fid not from lo freq table\n");
586 617
587 return 0; 618 return 0;
588} 619}
589 620
621static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry)
622{
623 data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
624}
625
590static void print_basics(struct powernow_k8_data *data) 626static void print_basics(struct powernow_k8_data *data)
591{ 627{
592 int j; 628 int j;
593 for (j = 0; j < data->numps; j++) { 629 for (j = 0; j < data->numps; j++) {
594 if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { 630 if (data->powernow_table[j].frequency !=
631 CPUFREQ_ENTRY_INVALID) {
595 if (cpu_family == CPU_HW_PSTATE) { 632 if (cpu_family == CPU_HW_PSTATE) {
596 printk(KERN_INFO PFX " %d : pstate %d (%d MHz)\n", 633 printk(KERN_INFO PFX
597 j, 634 " %d : pstate %d (%d MHz)\n", j,
598 data->powernow_table[j].index, 635 data->powernow_table[j].index,
599 data->powernow_table[j].frequency/1000); 636 data->powernow_table[j].frequency/1000);
600 } else { 637 } else {
601 printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", 638 printk(KERN_INFO PFX
639 " %d : fid 0x%x (%d MHz), vid 0x%x\n",
602 j, 640 j,
603 data->powernow_table[j].index & 0xff, 641 data->powernow_table[j].index & 0xff,
604 data->powernow_table[j].frequency/1000, 642 data->powernow_table[j].frequency/1000,
@@ -607,20 +645,25 @@ static void print_basics(struct powernow_k8_data *data)
607 } 645 }
608 } 646 }
609 if (data->batps) 647 if (data->batps)
610 printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); 648 printk(KERN_INFO PFX "Only %d pstates on battery\n",
649 data->batps);
611} 650}
612 651
613static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) 652static int fill_powernow_table(struct powernow_k8_data *data,
653 struct pst_s *pst, u8 maxvid)
614{ 654{
615 struct cpufreq_frequency_table *powernow_table; 655 struct cpufreq_frequency_table *powernow_table;
616 unsigned int j; 656 unsigned int j;
617 657
618 if (data->batps) { /* use ACPI support to get full speed on mains power */ 658 if (data->batps) {
619 printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); 659 /* use ACPI support to get full speed on mains power */
660 printk(KERN_WARNING PFX
661 "Only %d pstates usable (use ACPI driver for full "
662 "range\n", data->batps);
620 data->numps = data->batps; 663 data->numps = data->batps;
621 } 664 }
622 665
623 for ( j=1; j<data->numps; j++ ) { 666 for (j = 1; j < data->numps; j++) {
624 if (pst[j-1].fid >= pst[j].fid) { 667 if (pst[j-1].fid >= pst[j].fid) {
625 printk(KERN_ERR PFX "PST out of sequence\n"); 668 printk(KERN_ERR PFX "PST out of sequence\n");
626 return -EINVAL; 669 return -EINVAL;
@@ -643,9 +686,11 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
643 } 686 }
644 687
645 for (j = 0; j < data->numps; j++) { 688 for (j = 0; j < data->numps; j++) {
689 int freq;
646 powernow_table[j].index = pst[j].fid; /* lower 8 bits */ 690 powernow_table[j].index = pst[j].fid; /* lower 8 bits */
647 powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ 691 powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
648 powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); 692 freq = find_khz_freq_from_fid(pst[j].fid);
693 powernow_table[j].frequency = freq;
649 } 694 }
650 powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; 695 powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
651 powernow_table[data->numps].index = 0; 696 powernow_table[data->numps].index = 0;
@@ -661,7 +706,8 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
661 print_basics(data); 706 print_basics(data);
662 707
663 for (j = 0; j < data->numps; j++) 708 for (j = 0; j < data->numps; j++)
664 if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) 709 if ((pst[j].fid == data->currfid) &&
710 (pst[j].vid == data->currvid))
665 return 0; 711 return 0;
666 712
667 dprintk("currfid/vid do not match PST, ignoring\n"); 713 dprintk("currfid/vid do not match PST, ignoring\n");
@@ -701,7 +747,8 @@ static int find_psb_table(struct powernow_k8_data *data)
701 } 747 }
702 748
703 data->vstable = psb->vstable; 749 data->vstable = psb->vstable;
704 dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); 750 dprintk("voltage stabilization time: %d(*20us)\n",
751 data->vstable);
705 752
706 dprintk("flags2: 0x%x\n", psb->flags2); 753 dprintk("flags2: 0x%x\n", psb->flags2);
707 data->rvo = psb->flags2 & 3; 754 data->rvo = psb->flags2 & 3;
@@ -716,11 +763,12 @@ static int find_psb_table(struct powernow_k8_data *data)
716 763
717 dprintk("numpst: 0x%x\n", psb->num_tables); 764 dprintk("numpst: 0x%x\n", psb->num_tables);
718 cpst = psb->num_tables; 765 cpst = psb->num_tables;
719 if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ 766 if ((psb->cpuid == 0x00000fc0) ||
767 (psb->cpuid == 0x00000fe0)) {
720 thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 768 thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
721 if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { 769 if ((thiscpuid == 0x00000fc0) ||
770 (thiscpuid == 0x00000fe0))
722 cpst = 1; 771 cpst = 1;
723 }
724 } 772 }
725 if (cpst != 1) { 773 if (cpst != 1) {
726 printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); 774 printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
@@ -735,7 +783,8 @@ static int find_psb_table(struct powernow_k8_data *data)
735 783
736 data->numps = psb->numps; 784 data->numps = psb->numps;
737 dprintk("numpstates: 0x%x\n", data->numps); 785 dprintk("numpstates: 0x%x\n", data->numps);
738 return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); 786 return fill_powernow_table(data,
787 (struct pst_s *)(psb+1), maxvid);
739 } 788 }
740 /* 789 /*
741 * If you see this message, complain to BIOS manufacturer. If 790 * If you see this message, complain to BIOS manufacturer. If
@@ -748,28 +797,31 @@ static int find_psb_table(struct powernow_k8_data *data)
748 * BIOS and Kernel Developer's Guide, which is available on 797 * BIOS and Kernel Developer's Guide, which is available on
749 * www.amd.com 798 * www.amd.com
750 */ 799 */
751 printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); 800 printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
752 return -ENODEV; 801 return -ENODEV;
753} 802}
754 803
755#ifdef CONFIG_X86_POWERNOW_K8_ACPI 804static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
756static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) 805 unsigned int index)
757{ 806{
807 acpi_integer control;
808
758 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 809 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
759 return; 810 return;
760 811
761 data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; 812 control = data->acpi_data.states[index].control; data->irt = (control
762 data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; 813 >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >>
763 data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; 814 RVO_SHIFT) & RVO_MASK; data->exttype = (control
764 data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; 815 >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
765 data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); 816 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1
766 data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; 817 << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable =
767} 818 (control >> VST_SHIFT) & VST_MASK; }
768 819
769static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 820static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
770{ 821{
771 struct cpufreq_frequency_table *powernow_table; 822 struct cpufreq_frequency_table *powernow_table;
772 int ret_val = -ENODEV; 823 int ret_val = -ENODEV;
824 acpi_integer space_id;
773 825
774 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 826 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
775 dprintk("register performance failed: bad ACPI data\n"); 827 dprintk("register performance failed: bad ACPI data\n");
@@ -782,11 +834,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
782 goto err_out; 834 goto err_out;
783 } 835 }
784 836
785 if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || 837 space_id = data->acpi_data.control_register.space_id;
786 (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { 838 if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
839 (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
787 dprintk("Invalid control/status registers (%x - %x)\n", 840 dprintk("Invalid control/status registers (%x - %x)\n",
788 data->acpi_data.control_register.space_id, 841 data->acpi_data.control_register.space_id,
789 data->acpi_data.status_register.space_id); 842 space_id);
790 goto err_out; 843 goto err_out;
791 } 844 }
792 845
@@ -805,7 +858,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
805 if (ret_val) 858 if (ret_val)
806 goto err_out_mem; 859 goto err_out_mem;
807 860
808 powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; 861 powernow_table[data->acpi_data.state_count].frequency =
862 CPUFREQ_TABLE_END;
809 powernow_table[data->acpi_data.state_count].index = 0; 863 powernow_table[data->acpi_data.state_count].index = 0;
810 data->powernow_table = powernow_table; 864 data->powernow_table = powernow_table;
811 865
@@ -833,13 +887,15 @@ err_out_mem:
833err_out: 887err_out:
834 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 888 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
835 889
836 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ 890 /* data->acpi_data.state_count informs us at ->exit()
891 * whether ACPI was used */
837 data->acpi_data.state_count = 0; 892 data->acpi_data.state_count = 0;
838 893
839 return ret_val; 894 return ret_val;
840} 895}
841 896
842static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) 897static int fill_powernow_table_pstate(struct powernow_k8_data *data,
898 struct cpufreq_frequency_table *powernow_table)
843{ 899{
844 int i; 900 int i;
845 u32 hi = 0, lo = 0; 901 u32 hi = 0, lo = 0;
@@ -851,84 +907,101 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
851 907
852 index = data->acpi_data.states[i].control & HW_PSTATE_MASK; 908 index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
853 if (index > data->max_hw_pstate) { 909 if (index > data->max_hw_pstate) {
854 printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); 910 printk(KERN_ERR PFX "invalid pstate %d - "
855 printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); 911 "bad value %d.\n", i, index);
856 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 912 printk(KERN_ERR PFX "Please report to BIOS "
913 "manufacturer\n");
914 invalidate_entry(data, i);
857 continue; 915 continue;
858 } 916 }
859 rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); 917 rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
860 if (!(hi & HW_PSTATE_VALID_MASK)) { 918 if (!(hi & HW_PSTATE_VALID_MASK)) {
861 dprintk("invalid pstate %d, ignoring\n", index); 919 dprintk("invalid pstate %d, ignoring\n", index);
862 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 920 invalidate_entry(data, i);
863 continue; 921 continue;
864 } 922 }
865 923
866 powernow_table[i].index = index; 924 powernow_table[i].index = index;
867 925
868 powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; 926 powernow_table[i].frequency =
927 data->acpi_data.states[i].core_frequency * 1000;
869 } 928 }
870 return 0; 929 return 0;
871} 930}
872 931
873static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) 932static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
933 struct cpufreq_frequency_table *powernow_table)
874{ 934{
875 int i; 935 int i;
876 int cntlofreq = 0; 936 int cntlofreq = 0;
937
877 for (i = 0; i < data->acpi_data.state_count; i++) { 938 for (i = 0; i < data->acpi_data.state_count; i++) {
878 u32 fid; 939 u32 fid;
879 u32 vid; 940 u32 vid;
941 u32 freq, index;
942 acpi_integer status, control;
880 943
881 if (data->exttype) { 944 if (data->exttype) {
882 fid = data->acpi_data.states[i].status & EXT_FID_MASK; 945 status = data->acpi_data.states[i].status;
883 vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; 946 fid = status & EXT_FID_MASK;
947 vid = (status >> VID_SHIFT) & EXT_VID_MASK;
884 } else { 948 } else {
885 fid = data->acpi_data.states[i].control & FID_MASK; 949 control = data->acpi_data.states[i].control;
886 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; 950 fid = control & FID_MASK;
951 vid = (control >> VID_SHIFT) & VID_MASK;
887 } 952 }
888 953
889 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); 954 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
890 955
891 powernow_table[i].index = fid; /* lower 8 bits */ 956 index = fid | (vid<<8);
892 powernow_table[i].index |= (vid << 8); /* upper 8 bits */ 957 powernow_table[i].index = index;
893 powernow_table[i].frequency = find_khz_freq_from_fid(fid); 958
959 freq = find_khz_freq_from_fid(fid);
960 powernow_table[i].frequency = freq;
894 961
895 /* verify frequency is OK */ 962 /* verify frequency is OK */
896 if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || 963 if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
897 (powernow_table[i].frequency < (MIN_FREQ * 1000))) { 964 dprintk("invalid freq %u kHz, ignoring\n", freq);
898 dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); 965 invalidate_entry(data, i);
899 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
900 continue; 966 continue;
901 } 967 }
902 968
903 /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ 969 /* verify voltage is OK -
970 * BIOSs are using "off" to indicate invalid */
904 if (vid == VID_OFF) { 971 if (vid == VID_OFF) {
905 dprintk("invalid vid %u, ignoring\n", vid); 972 dprintk("invalid vid %u, ignoring\n", vid);
906 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 973 invalidate_entry(data, i);
907 continue; 974 continue;
908 } 975 }
909 976
910 /* verify only 1 entry from the lo frequency table */ 977 /* verify only 1 entry from the lo frequency table */
911 if (fid < HI_FID_TABLE_BOTTOM) { 978 if (fid < HI_FID_TABLE_BOTTOM) {
912 if (cntlofreq) { 979 if (cntlofreq) {
913 /* if both entries are the same, ignore this one ... */ 980 /* if both entries are the same,
914 if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || 981 * ignore this one ... */
915 (powernow_table[i].index != powernow_table[cntlofreq].index)) { 982 if ((freq != powernow_table[cntlofreq].frequency) ||
916 printk(KERN_ERR PFX "Too many lo freq table entries\n"); 983 (index != powernow_table[cntlofreq].index)) {
984 printk(KERN_ERR PFX
985 "Too many lo freq table "
986 "entries\n");
917 return 1; 987 return 1;
918 } 988 }
919 989
920 dprintk("double low frequency table entry, ignoring it.\n"); 990 dprintk("double low frequency table entry, "
921 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 991 "ignoring it.\n");
992 invalidate_entry(data, i);
922 continue; 993 continue;
923 } else 994 } else
924 cntlofreq = i; 995 cntlofreq = i;
925 } 996 }
926 997
927 if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { 998 if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
928 printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", 999 printk(KERN_INFO PFX "invalid freq entries "
929 powernow_table[i].frequency, 1000 "%u kHz vs. %u kHz\n", freq,
930 (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); 1001 (unsigned int)
931 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 1002 (data->acpi_data.states[i].core_frequency
1003 * 1000));
1004 invalidate_entry(data, i);
932 continue; 1005 continue;
933 } 1006 }
934 } 1007 }
@@ -938,7 +1011,8 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
938static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) 1011static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
939{ 1012{
940 if (data->acpi_data.state_count) 1013 if (data->acpi_data.state_count)
941 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 1014 acpi_processor_unregister_performance(&data->acpi_data,
1015 data->cpu);
942 free_cpumask_var(data->acpi_data.shared_cpu_map); 1016 free_cpumask_var(data->acpi_data.shared_cpu_map);
943} 1017}
944 1018
@@ -956,15 +1030,9 @@ static int get_transition_latency(struct powernow_k8_data *data)
956 return 1000 * max_latency; 1030 return 1000 * max_latency;
957} 1031}
958 1032
959#else
960static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
961static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
962static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
963static int get_transition_latency(struct powernow_k8_data *data) { return 0; }
964#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
965
966/* Take a frequency, and issue the fid/vid transition command */ 1033/* Take a frequency, and issue the fid/vid transition command */
967static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) 1034static int transition_frequency_fidvid(struct powernow_k8_data *data,
1035 unsigned int index)
968{ 1036{
969 u32 fid = 0; 1037 u32 fid = 0;
970 u32 vid = 0; 1038 u32 vid = 0;
@@ -992,7 +1060,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
992 return 0; 1060 return 0;
993 } 1061 }
994 1062
995 if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { 1063 if ((fid < HI_FID_TABLE_BOTTOM) &&
1064 (data->currfid < HI_FID_TABLE_BOTTOM)) {
996 printk(KERN_ERR PFX 1065 printk(KERN_ERR PFX
997 "ignoring illegal change in lo freq table-%x to 0x%x\n", 1066 "ignoring illegal change in lo freq table-%x to 0x%x\n",
998 data->currfid, fid); 1067 data->currfid, fid);
@@ -1020,7 +1089,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
1020} 1089}
1021 1090
1022/* Take a frequency, and issue the hardware pstate transition command */ 1091/* Take a frequency, and issue the hardware pstate transition command */
1023static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) 1092static int transition_frequency_pstate(struct powernow_k8_data *data,
1093 unsigned int index)
1024{ 1094{
1025 u32 pstate = 0; 1095 u32 pstate = 0;
1026 int res, i; 1096 int res, i;
@@ -1032,7 +1102,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1032 pstate = index & HW_PSTATE_MASK; 1102 pstate = index & HW_PSTATE_MASK;
1033 if (pstate > data->max_hw_pstate) 1103 if (pstate > data->max_hw_pstate)
1034 return 0; 1104 return 0;
1035 freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); 1105 freqs.old = find_khz_freq_from_pstate(data->powernow_table,
1106 data->currpstate);
1036 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1107 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1037 1108
1038 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1109 for_each_cpu_mask_nr(i, *(data->available_cores)) {
@@ -1051,7 +1122,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1051} 1122}
1052 1123
1053/* Driver entry point to switch to the target frequency */ 1124/* Driver entry point to switch to the target frequency */
1054static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) 1125static int powernowk8_target(struct cpufreq_policy *pol,
1126 unsigned targfreq, unsigned relation)
1055{ 1127{
1056 cpumask_t oldmask; 1128 cpumask_t oldmask;
1057 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1129 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
@@ -1090,14 +1162,18 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1090 dprintk("targ: curr fid 0x%x, vid 0x%x\n", 1162 dprintk("targ: curr fid 0x%x, vid 0x%x\n",
1091 data->currfid, data->currvid); 1163 data->currfid, data->currvid);
1092 1164
1093 if ((checkvid != data->currvid) || (checkfid != data->currfid)) { 1165 if ((checkvid != data->currvid) ||
1166 (checkfid != data->currfid)) {
1094 printk(KERN_INFO PFX 1167 printk(KERN_INFO PFX
1095 "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", 1168 "error - out of sync, fix 0x%x 0x%x, "
1096 checkfid, data->currfid, checkvid, data->currvid); 1169 "vid 0x%x 0x%x\n",
1170 checkfid, data->currfid,
1171 checkvid, data->currvid);
1097 } 1172 }
1098 } 1173 }
1099 1174
1100 if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) 1175 if (cpufreq_frequency_table_target(pol, data->powernow_table,
1176 targfreq, relation, &newstate))
1101 goto err_out; 1177 goto err_out;
1102 1178
1103 mutex_lock(&fidvid_mutex); 1179 mutex_lock(&fidvid_mutex);
@@ -1117,7 +1193,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1117 mutex_unlock(&fidvid_mutex); 1193 mutex_unlock(&fidvid_mutex);
1118 1194
1119 if (cpu_family == CPU_HW_PSTATE) 1195 if (cpu_family == CPU_HW_PSTATE)
1120 pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate); 1196 pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1197 newstate);
1121 else 1198 else
1122 pol->cur = find_khz_freq_from_fid(data->currfid); 1199 pol->cur = find_khz_freq_from_fid(data->currfid);
1123 ret = 0; 1200 ret = 0;
@@ -1144,6 +1221,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1144 struct powernow_k8_data *data; 1221 struct powernow_k8_data *data;
1145 cpumask_t oldmask; 1222 cpumask_t oldmask;
1146 int rc; 1223 int rc;
1224 static int print_once;
1147 1225
1148 if (!cpu_online(pol->cpu)) 1226 if (!cpu_online(pol->cpu))
1149 return -ENODEV; 1227 return -ENODEV;
@@ -1166,33 +1244,31 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1166 * an UP version, and is deprecated by AMD. 1244 * an UP version, and is deprecated by AMD.
1167 */ 1245 */
1168 if (num_online_cpus() != 1) { 1246 if (num_online_cpus() != 1) {
1169#ifndef CONFIG_ACPI_PROCESSOR 1247 /*
1170 printk(KERN_ERR PFX "ACPI Processor support is required " 1248 * Replace this one with print_once as soon as such a
1171 "for SMP systems but is absent. Please load the " 1249 * thing gets introduced
1172 "ACPI Processor module before starting this " 1250 */
1173 "driver.\n"); 1251 if (!print_once) {
1174#else 1252 WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS "
1175 printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" 1253 "does not provide ACPI _PSS objects "
1176 " ACPI _PSS objects in a way that Linux " 1254 "in a way that Linux understands. "
1177 "understands. Please report this to the Linux " 1255 "Please report this to the Linux ACPI"
1178 "ACPI maintainers and complain to your BIOS " 1256 " maintainers and complain to your "
1179 "vendor.\n"); 1257 "BIOS vendor.\n");
1180#endif 1258 print_once++;
1181 kfree(data); 1259 }
1182 return -ENODEV; 1260 goto err_out;
1183 } 1261 }
1184 if (pol->cpu != 0) { 1262 if (pol->cpu != 0) {
1185 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1263 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1186 "CPU other than CPU0. Complain to your BIOS " 1264 "CPU other than CPU0. Complain to your BIOS "
1187 "vendor.\n"); 1265 "vendor.\n");
1188 kfree(data); 1266 goto err_out;
1189 return -ENODEV;
1190 } 1267 }
1191 rc = find_psb_table(data); 1268 rc = find_psb_table(data);
1192 if (rc) { 1269 if (rc)
1193 kfree(data); 1270 goto err_out;
1194 return -ENODEV; 1271
1195 }
1196 /* Take a crude guess here. 1272 /* Take a crude guess here.
1197 * That guess was in microseconds, so multiply with 1000 */ 1273 * That guess was in microseconds, so multiply with 1000 */
1198 pol->cpuinfo.transition_latency = ( 1274 pol->cpuinfo.transition_latency = (
@@ -1207,16 +1283,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1207 1283
1208 if (smp_processor_id() != pol->cpu) { 1284 if (smp_processor_id() != pol->cpu) {
1209 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1285 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
1210 goto err_out; 1286 goto err_out_unmask;
1211 } 1287 }
1212 1288
1213 if (pending_bit_stuck()) { 1289 if (pending_bit_stuck()) {
1214 printk(KERN_ERR PFX "failing init, change pending bit set\n"); 1290 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1215 goto err_out; 1291 goto err_out_unmask;
1216 } 1292 }
1217 1293
1218 if (query_current_values_with_pending_wait(data)) 1294 if (query_current_values_with_pending_wait(data))
1219 goto err_out; 1295 goto err_out_unmask;
1220 1296
1221 if (cpu_family == CPU_OPTERON) 1297 if (cpu_family == CPU_OPTERON)
1222 fidvid_msr_init(); 1298 fidvid_msr_init();
@@ -1231,7 +1307,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1231 data->available_cores = pol->cpus; 1307 data->available_cores = pol->cpus;
1232 1308
1233 if (cpu_family == CPU_HW_PSTATE) 1309 if (cpu_family == CPU_HW_PSTATE)
1234 pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); 1310 pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1311 data->currpstate);
1235 else 1312 else
1236 pol->cur = find_khz_freq_from_fid(data->currfid); 1313 pol->cur = find_khz_freq_from_fid(data->currfid);
1237 dprintk("policy current frequency %d kHz\n", pol->cur); 1314 dprintk("policy current frequency %d kHz\n", pol->cur);
@@ -1248,7 +1325,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1248 cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); 1325 cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
1249 1326
1250 if (cpu_family == CPU_HW_PSTATE) 1327 if (cpu_family == CPU_HW_PSTATE)
1251 dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate); 1328 dprintk("cpu_init done, current pstate 0x%x\n",
1329 data->currpstate);
1252 else 1330 else
1253 dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", 1331 dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
1254 data->currfid, data->currvid); 1332 data->currfid, data->currvid);
@@ -1257,15 +1335,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1257 1335
1258 return 0; 1336 return 0;
1259 1337
1260err_out: 1338err_out_unmask:
1261 set_cpus_allowed_ptr(current, &oldmask); 1339 set_cpus_allowed_ptr(current, &oldmask);
1262 powernow_k8_cpu_exit_acpi(data); 1340 powernow_k8_cpu_exit_acpi(data);
1263 1341
1342err_out:
1264 kfree(data); 1343 kfree(data);
1265 return -ENODEV; 1344 return -ENODEV;
1266} 1345}
1267 1346
1268static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) 1347static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1269{ 1348{
1270 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1349 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1271 1350
@@ -1282,7 +1361,7 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
1282 return 0; 1361 return 0;
1283} 1362}
1284 1363
1285static unsigned int powernowk8_get (unsigned int cpu) 1364static unsigned int powernowk8_get(unsigned int cpu)
1286{ 1365{
1287 struct powernow_k8_data *data; 1366 struct powernow_k8_data *data;
1288 cpumask_t oldmask = current->cpus_allowed; 1367 cpumask_t oldmask = current->cpus_allowed;
@@ -1318,7 +1397,7 @@ out:
1318 return khz; 1397 return khz;
1319} 1398}
1320 1399
1321static struct freq_attr* powernow_k8_attr[] = { 1400static struct freq_attr *powernow_k8_attr[] = {
1322 &cpufreq_freq_attr_scaling_available_freqs, 1401 &cpufreq_freq_attr_scaling_available_freqs,
1323 NULL, 1402 NULL,
1324}; 1403};
@@ -1363,7 +1442,8 @@ static void __exit powernowk8_exit(void)
1363 cpufreq_unregister_driver(&cpufreq_amd64_driver); 1442 cpufreq_unregister_driver(&cpufreq_amd64_driver);
1364} 1443}
1365 1444
1366MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); 1445MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
1446 "Mark Langsdorf <mark.langsdorf@amd.com>");
1367MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); 1447MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
1368MODULE_LICENSE("GPL"); 1448MODULE_LICENSE("GPL");
1369 1449
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 8ecc75b6c7c3..6c6698feade1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -45,11 +45,10 @@ struct powernow_k8_data {
45 * frequency is in kHz */ 45 * frequency is in kHz */
46 struct cpufreq_frequency_table *powernow_table; 46 struct cpufreq_frequency_table *powernow_table;
47 47
48#ifdef CONFIG_X86_POWERNOW_K8_ACPI
49 /* the acpi table needs to be kept. it's only available if ACPI was 48 /* the acpi table needs to be kept. it's only available if ACPI was
50 * used to determine valid frequency/vid/fid states */ 49 * used to determine valid frequency/vid/fid states */
51 struct acpi_processor_performance acpi_data; 50 struct acpi_processor_performance acpi_data;
52#endif 51
53 /* we need to keep track of associated cores, but let cpufreq 52 /* we need to keep track of associated cores, but let cpufreq
54 * handle hotplug events - so just point at cpufreq pol->cpus 53 * handle hotplug events - so just point at cpufreq pol->cpus
55 * structure */ 54 * structure */
@@ -222,10 +221,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
222 221
223static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); 222static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
224 223
225#ifdef CONFIG_X86_POWERNOW_K8_ACPI
226static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 224static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
227static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 225static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
228#endif
229 226
230#ifdef CONFIG_SMP 227#ifdef CONFIG_SMP
231static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) 228static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index 42da9bd677d6..435a996a613a 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -19,17 +19,19 @@
19 19
20#include <linux/delay.h> 20#include <linux/delay.h>
21#include <linux/cpufreq.h> 21#include <linux/cpufreq.h>
22#include <linux/timex.h>
23#include <linux/io.h>
22 24
23#include <asm/msr.h> 25#include <asm/msr.h>
24#include <asm/timex.h>
25#include <asm/io.h>
26 26
27#define MMCR_BASE 0xfffef000 /* The default base address */ 27#define MMCR_BASE 0xfffef000 /* The default base address */
28#define OFFS_CPUCTL 0x2 /* CPU Control Register */ 28#define OFFS_CPUCTL 0x2 /* CPU Control Register */
29 29
30static __u8 __iomem *cpuctl; 30static __u8 __iomem *cpuctl;
31 31
32#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) 32#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
33 "sc520_freq", msg)
34#define PFX "sc520_freq: "
33 35
34static struct cpufreq_frequency_table sc520_freq_table[] = { 36static struct cpufreq_frequency_table sc520_freq_table[] = {
35 {0x01, 100000}, 37 {0x01, 100000},
@@ -43,7 +45,8 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
43 45
44 switch (clockspeed_reg & 0x03) { 46 switch (clockspeed_reg & 0x03) {
45 default: 47 default:
46 printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); 48 printk(KERN_ERR PFX "error: cpuctl register has unexpected "
49 "value %02x\n", clockspeed_reg);
47 case 0x01: 50 case 0x01:
48 return 100000; 51 return 100000;
49 case 0x02: 52 case 0x02:
@@ -51,7 +54,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
51 } 54 }
52} 55}
53 56
54static void sc520_freq_set_cpu_state (unsigned int state) 57static void sc520_freq_set_cpu_state(unsigned int state)
55{ 58{
56 59
57 struct cpufreq_freqs freqs; 60 struct cpufreq_freqs freqs;
@@ -76,18 +79,19 @@ static void sc520_freq_set_cpu_state (unsigned int state)
76 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 79 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
77}; 80};
78 81
79static int sc520_freq_verify (struct cpufreq_policy *policy) 82static int sc520_freq_verify(struct cpufreq_policy *policy)
80{ 83{
81 return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); 84 return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
82} 85}
83 86
84static int sc520_freq_target (struct cpufreq_policy *policy, 87static int sc520_freq_target(struct cpufreq_policy *policy,
85 unsigned int target_freq, 88 unsigned int target_freq,
86 unsigned int relation) 89 unsigned int relation)
87{ 90{
88 unsigned int newstate = 0; 91 unsigned int newstate = 0;
89 92
90 if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) 93 if (cpufreq_frequency_table_target(policy, sc520_freq_table,
94 target_freq, relation, &newstate))
91 return -EINVAL; 95 return -EINVAL;
92 96
93 sc520_freq_set_cpu_state(newstate); 97 sc520_freq_set_cpu_state(newstate);
@@ -116,7 +120,7 @@ static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
116 120
117 result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); 121 result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
118 if (result) 122 if (result)
119 return (result); 123 return result;
120 124
121 cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); 125 cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
122 126
@@ -131,7 +135,7 @@ static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
131} 135}
132 136
133 137
134static struct freq_attr* sc520_freq_attr[] = { 138static struct freq_attr *sc520_freq_attr[] = {
135 &cpufreq_freq_attr_scaling_available_freqs, 139 &cpufreq_freq_attr_scaling_available_freqs,
136 NULL, 140 NULL,
137}; 141};
@@ -155,13 +159,13 @@ static int __init sc520_freq_init(void)
155 int err; 159 int err;
156 160
157 /* Test if we have the right hardware */ 161 /* Test if we have the right hardware */
158 if(c->x86_vendor != X86_VENDOR_AMD || 162 if (c->x86_vendor != X86_VENDOR_AMD ||
159 c->x86 != 4 || c->x86_model != 9) { 163 c->x86 != 4 || c->x86_model != 9) {
160 dprintk("no Elan SC520 processor found!\n"); 164 dprintk("no Elan SC520 processor found!\n");
161 return -ENODEV; 165 return -ENODEV;
162 } 166 }
163 cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); 167 cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
164 if(!cpuctl) { 168 if (!cpuctl) {
165 printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); 169 printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
166 return -ENOMEM; 170 return -ENOMEM;
167 } 171 }
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 1f0ec83d343b..016c1a4fa3fc 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev;
39 39
40/* speedstep_processor 40/* speedstep_processor
41 */ 41 */
42static unsigned int speedstep_processor = 0; 42static unsigned int speedstep_processor;
43 43
44static u32 pmbase; 44static u32 pmbase;
45 45
@@ -54,7 +54,8 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
54}; 54};
55 55
56 56
57#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) 57#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
58 "speedstep-ich", msg)
58 59
59 60
60/** 61/**
@@ -62,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
62 * 63 *
63 * Returns: -ENODEV if no register could be found 64 * Returns: -ENODEV if no register could be found
64 */ 65 */
65static int speedstep_find_register (void) 66static int speedstep_find_register(void)
66{ 67{
67 if (!speedstep_chipset_dev) 68 if (!speedstep_chipset_dev)
68 return -ENODEV; 69 return -ENODEV;
@@ -90,7 +91,7 @@ static int speedstep_find_register (void)
90 * 91 *
91 * Tries to change the SpeedStep state. 92 * Tries to change the SpeedStep state.
92 */ 93 */
93static void speedstep_set_state (unsigned int state) 94static void speedstep_set_state(unsigned int state)
94{ 95{
95 u8 pm2_blk; 96 u8 pm2_blk;
96 u8 value; 97 u8 value;
@@ -133,11 +134,11 @@ static void speedstep_set_state (unsigned int state)
133 134
134 dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); 135 dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
135 136
136 if (state == (value & 0x1)) { 137 if (state == (value & 0x1))
137 dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); 138 dprintk("change to %u MHz succeeded\n",
138 } else { 139 speedstep_get_frequency(speedstep_processor) / 1000);
139 printk (KERN_ERR "cpufreq: change failed - I/O error\n"); 140 else
140 } 141 printk(KERN_ERR "cpufreq: change failed - I/O error\n");
141 142
142 return; 143 return;
143} 144}
@@ -149,7 +150,7 @@ static void speedstep_set_state (unsigned int state)
149 * Tries to activate the SpeedStep status and control registers. 150 * Tries to activate the SpeedStep status and control registers.
150 * Returns -EINVAL on an unsupported chipset, and zero on success. 151 * Returns -EINVAL on an unsupported chipset, and zero on success.
151 */ 152 */
152static int speedstep_activate (void) 153static int speedstep_activate(void)
153{ 154{
154 u16 value = 0; 155 u16 value = 0;
155 156
@@ -175,20 +176,18 @@ static int speedstep_activate (void)
175 * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected 176 * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
176 * chipset, or zero on failure. 177 * chipset, or zero on failure.
177 */ 178 */
178static unsigned int speedstep_detect_chipset (void) 179static unsigned int speedstep_detect_chipset(void)
179{ 180{
180 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, 181 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
181 PCI_DEVICE_ID_INTEL_82801DB_12, 182 PCI_DEVICE_ID_INTEL_82801DB_12,
182 PCI_ANY_ID, 183 PCI_ANY_ID, PCI_ANY_ID,
183 PCI_ANY_ID,
184 NULL); 184 NULL);
185 if (speedstep_chipset_dev) 185 if (speedstep_chipset_dev)
186 return 4; /* 4-M */ 186 return 4; /* 4-M */
187 187
188 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, 188 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
189 PCI_DEVICE_ID_INTEL_82801CA_12, 189 PCI_DEVICE_ID_INTEL_82801CA_12,
190 PCI_ANY_ID, 190 PCI_ANY_ID, PCI_ANY_ID,
191 PCI_ANY_ID,
192 NULL); 191 NULL);
193 if (speedstep_chipset_dev) 192 if (speedstep_chipset_dev)
194 return 3; /* 3-M */ 193 return 3; /* 3-M */
@@ -196,8 +195,7 @@ static unsigned int speedstep_detect_chipset (void)
196 195
197 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, 196 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
198 PCI_DEVICE_ID_INTEL_82801BA_10, 197 PCI_DEVICE_ID_INTEL_82801BA_10,
199 PCI_ANY_ID, 198 PCI_ANY_ID, PCI_ANY_ID,
200 PCI_ANY_ID,
201 NULL); 199 NULL);
202 if (speedstep_chipset_dev) { 200 if (speedstep_chipset_dev) {
203 /* speedstep.c causes lockups on Dell Inspirons 8000 and 201 /* speedstep.c causes lockups on Dell Inspirons 8000 and
@@ -208,8 +206,7 @@ static unsigned int speedstep_detect_chipset (void)
208 206
209 hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, 207 hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL,
210 PCI_DEVICE_ID_INTEL_82815_MC, 208 PCI_DEVICE_ID_INTEL_82815_MC,
211 PCI_ANY_ID, 209 PCI_ANY_ID, PCI_ANY_ID,
212 PCI_ANY_ID,
213 NULL); 210 NULL);
214 211
215 if (!hostbridge) 212 if (!hostbridge)
@@ -236,7 +233,7 @@ static unsigned int _speedstep_get(const struct cpumask *cpus)
236 233
237 cpus_allowed = current->cpus_allowed; 234 cpus_allowed = current->cpus_allowed;
238 set_cpus_allowed_ptr(current, cpus); 235 set_cpus_allowed_ptr(current, cpus);
239 speed = speedstep_get_processor_frequency(speedstep_processor); 236 speed = speedstep_get_frequency(speedstep_processor);
240 set_cpus_allowed_ptr(current, &cpus_allowed); 237 set_cpus_allowed_ptr(current, &cpus_allowed);
241 dprintk("detected %u kHz as current frequency\n", speed); 238 dprintk("detected %u kHz as current frequency\n", speed);
242 return speed; 239 return speed;
@@ -251,11 +248,12 @@ static unsigned int speedstep_get(unsigned int cpu)
251 * speedstep_target - set a new CPUFreq policy 248 * speedstep_target - set a new CPUFreq policy
252 * @policy: new policy 249 * @policy: new policy
253 * @target_freq: the target frequency 250 * @target_freq: the target frequency
254 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) 251 * @relation: how that frequency relates to achieved frequency
252 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
255 * 253 *
256 * Sets a new CPUFreq policy. 254 * Sets a new CPUFreq policy.
257 */ 255 */
258static int speedstep_target (struct cpufreq_policy *policy, 256static int speedstep_target(struct cpufreq_policy *policy,
259 unsigned int target_freq, 257 unsigned int target_freq,
260 unsigned int relation) 258 unsigned int relation)
261{ 259{
@@ -264,7 +262,8 @@ static int speedstep_target (struct cpufreq_policy *policy,
264 cpumask_t cpus_allowed; 262 cpumask_t cpus_allowed;
265 int i; 263 int i;
266 264
267 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) 265 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
266 target_freq, relation, &newstate))
268 return -EINVAL; 267 return -EINVAL;
269 268
270 freqs.old = _speedstep_get(policy->cpus); 269 freqs.old = _speedstep_get(policy->cpus);
@@ -308,7 +307,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
308 * Limit must be within speedstep_low_freq and speedstep_high_freq, with 307 * Limit must be within speedstep_low_freq and speedstep_high_freq, with
309 * at least one border included. 308 * at least one border included.
310 */ 309 */
311static int speedstep_verify (struct cpufreq_policy *policy) 310static int speedstep_verify(struct cpufreq_policy *policy)
312{ 311{
313 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); 312 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
314} 313}
@@ -344,7 +343,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
344 return -EIO; 343 return -EIO;
345 344
346 dprintk("currently at %s speed setting - %i MHz\n", 345 dprintk("currently at %s speed setting - %i MHz\n",
347 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", 346 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
347 ? "low" : "high",
348 (speed / 1000)); 348 (speed / 1000));
349 349
350 /* cpuinfo and default policy values */ 350 /* cpuinfo and default policy values */
@@ -352,9 +352,9 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
352 352
353 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); 353 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
354 if (result) 354 if (result)
355 return (result); 355 return result;
356 356
357 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); 357 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
358 358
359 return 0; 359 return 0;
360} 360}
@@ -366,7 +366,7 @@ static int speedstep_cpu_exit(struct cpufreq_policy *policy)
366 return 0; 366 return 0;
367} 367}
368 368
369static struct freq_attr* speedstep_attr[] = { 369static struct freq_attr *speedstep_attr[] = {
370 &cpufreq_freq_attr_scaling_available_freqs, 370 &cpufreq_freq_attr_scaling_available_freqs,
371 NULL, 371 NULL,
372}; 372};
@@ -396,13 +396,15 @@ static int __init speedstep_init(void)
396 /* detect processor */ 396 /* detect processor */
397 speedstep_processor = speedstep_detect_processor(); 397 speedstep_processor = speedstep_detect_processor();
398 if (!speedstep_processor) { 398 if (!speedstep_processor) {
399 dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); 399 dprintk("Intel(R) SpeedStep(TM) capable processor "
400 "not found\n");
400 return -ENODEV; 401 return -ENODEV;
401 } 402 }
402 403
403 /* detect chipset */ 404 /* detect chipset */
404 if (!speedstep_detect_chipset()) { 405 if (!speedstep_detect_chipset()) {
405 dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); 406 dprintk("Intel(R) SpeedStep(TM) for this chipset not "
407 "(yet) available.\n");
406 return -ENODEV; 408 return -ENODEV;
407 } 409 }
408 410
@@ -431,9 +433,11 @@ static void __exit speedstep_exit(void)
431} 433}
432 434
433 435
434MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>"); 436MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
435MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); 437 "Dominik Brodowski <linux@brodo.de>");
436MODULE_LICENSE ("GPL"); 438MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
439 "with ICH-M southbridges.");
440MODULE_LICENSE("GPL");
437 441
438module_init(speedstep_init); 442module_init(speedstep_init);
439module_exit(speedstep_exit); 443module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index cdac7d62369b..2e3c6862657b 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -16,12 +16,16 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17 17
18#include <asm/msr.h> 18#include <asm/msr.h>
19#include <asm/tsc.h>
19#include "speedstep-lib.h" 20#include "speedstep-lib.h"
20 21
21#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) 22#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
23 "speedstep-lib", msg)
24
25#define PFX "speedstep-lib: "
22 26
23#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK 27#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
24static int relaxed_check = 0; 28static int relaxed_check;
25#else 29#else
26#define relaxed_check 0 30#define relaxed_check 0
27#endif 31#endif
@@ -30,14 +34,14 @@ static int relaxed_check = 0;
30 * GET PROCESSOR CORE SPEED IN KHZ * 34 * GET PROCESSOR CORE SPEED IN KHZ *
31 *********************************************************************/ 35 *********************************************************************/
32 36
33static unsigned int pentium3_get_frequency (unsigned int processor) 37static unsigned int pentium3_get_frequency(unsigned int processor)
34{ 38{
35 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ 39 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
36 struct { 40 struct {
37 unsigned int ratio; /* Frequency Multiplier (x10) */ 41 unsigned int ratio; /* Frequency Multiplier (x10) */
38 u8 bitmap; /* power on configuration bits 42 u8 bitmap; /* power on configuration bits
39 [27, 25:22] (in MSR 0x2a) */ 43 [27, 25:22] (in MSR 0x2a) */
40 } msr_decode_mult [] = { 44 } msr_decode_mult[] = {
41 { 30, 0x01 }, 45 { 30, 0x01 },
42 { 35, 0x05 }, 46 { 35, 0x05 },
43 { 40, 0x02 }, 47 { 40, 0x02 },
@@ -52,7 +56,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
52 { 85, 0x26 }, 56 { 85, 0x26 },
53 { 90, 0x20 }, 57 { 90, 0x20 },
54 { 100, 0x2b }, 58 { 100, 0x2b },
55 { 0, 0xff } /* error or unknown value */ 59 { 0, 0xff } /* error or unknown value */
56 }; 60 };
57 61
58 /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ 62 /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
@@ -60,7 +64,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
60 unsigned int value; /* Front Side Bus speed in MHz */ 64 unsigned int value; /* Front Side Bus speed in MHz */
61 u8 bitmap; /* power on configuration bits [18: 19] 65 u8 bitmap; /* power on configuration bits [18: 19]
62 (in MSR 0x2a) */ 66 (in MSR 0x2a) */
63 } msr_decode_fsb [] = { 67 } msr_decode_fsb[] = {
64 { 66, 0x0 }, 68 { 66, 0x0 },
65 { 100, 0x2 }, 69 { 100, 0x2 },
66 { 133, 0x1 }, 70 { 133, 0x1 },
@@ -85,7 +89,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
85 } 89 }
86 90
87 /* decode the multiplier */ 91 /* decode the multiplier */
88 if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { 92 if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
89 dprintk("workaround for early PIIIs\n"); 93 dprintk("workaround for early PIIIs\n");
90 msr_lo &= 0x03c00000; 94 msr_lo &= 0x03c00000;
91 } else 95 } else
@@ -97,9 +101,10 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
97 j++; 101 j++;
98 } 102 }
99 103
100 dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); 104 dprintk("speed is %u\n",
105 (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
101 106
102 return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); 107 return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
103} 108}
104 109
105 110
@@ -112,20 +117,23 @@ static unsigned int pentiumM_get_frequency(void)
112 117
113 /* see table B-2 of 24547212.pdf */ 118 /* see table B-2 of 24547212.pdf */
114 if (msr_lo & 0x00040000) { 119 if (msr_lo & 0x00040000) {
115 printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); 120 printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n",
121 msr_lo, msr_tmp);
116 return 0; 122 return 0;
117 } 123 }
118 124
119 msr_tmp = (msr_lo >> 22) & 0x1f; 125 msr_tmp = (msr_lo >> 22) & 0x1f;
120 dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); 126 dprintk("bits 22-26 are 0x%x, speed is %u\n",
127 msr_tmp, (msr_tmp * 100 * 1000));
121 128
122 return (msr_tmp * 100 * 1000); 129 return msr_tmp * 100 * 1000;
123} 130}
124 131
125static unsigned int pentium_core_get_frequency(void) 132static unsigned int pentium_core_get_frequency(void)
126{ 133{
127 u32 fsb = 0; 134 u32 fsb = 0;
128 u32 msr_lo, msr_tmp; 135 u32 msr_lo, msr_tmp;
136 int ret;
129 137
130 rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); 138 rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
131 /* see table B-2 of 25366920.pdf */ 139 /* see table B-2 of 25366920.pdf */
@@ -153,12 +161,15 @@ static unsigned int pentium_core_get_frequency(void)
153 } 161 }
154 162
155 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); 163 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
156 dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); 164 dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
165 msr_lo, msr_tmp);
157 166
158 msr_tmp = (msr_lo >> 22) & 0x1f; 167 msr_tmp = (msr_lo >> 22) & 0x1f;
159 dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); 168 dprintk("bits 22-26 are 0x%x, speed is %u\n",
169 msr_tmp, (msr_tmp * fsb));
160 170
161 return (msr_tmp * fsb); 171 ret = (msr_tmp * fsb);
172 return ret;
162} 173}
163 174
164 175
@@ -167,6 +178,16 @@ static unsigned int pentium4_get_frequency(void)
167 struct cpuinfo_x86 *c = &boot_cpu_data; 178 struct cpuinfo_x86 *c = &boot_cpu_data;
168 u32 msr_lo, msr_hi, mult; 179 u32 msr_lo, msr_hi, mult;
169 unsigned int fsb = 0; 180 unsigned int fsb = 0;
181 unsigned int ret;
182 u8 fsb_code;
183
184 /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency
185 * to System Bus Frequency Ratio Field in the Processor Frequency
186 * Configuration Register of the MSR. Therefore the current
187 * frequency cannot be calculated and has to be measured.
188 */
189 if (c->x86_model < 2)
190 return cpu_khz;
170 191
171 rdmsr(0x2c, msr_lo, msr_hi); 192 rdmsr(0x2c, msr_lo, msr_hi);
172 193
@@ -177,62 +198,61 @@ static unsigned int pentium4_get_frequency(void)
177 * revision #12 in Table B-1: MSRs in the Pentium 4 and 198 * revision #12 in Table B-1: MSRs in the Pentium 4 and
178 * Intel Xeon Processors, on page B-4 and B-5. 199 * Intel Xeon Processors, on page B-4 and B-5.
179 */ 200 */
180 if (c->x86_model < 2) 201 fsb_code = (msr_lo >> 16) & 0x7;
202 switch (fsb_code) {
203 case 0:
181 fsb = 100 * 1000; 204 fsb = 100 * 1000;
182 else { 205 break;
183 u8 fsb_code = (msr_lo >> 16) & 0x7; 206 case 1:
184 switch (fsb_code) { 207 fsb = 13333 * 10;
185 case 0: 208 break;
186 fsb = 100 * 1000; 209 case 2:
187 break; 210 fsb = 200 * 1000;
188 case 1: 211 break;
189 fsb = 13333 * 10;
190 break;
191 case 2:
192 fsb = 200 * 1000;
193 break;
194 }
195 } 212 }
196 213
197 if (!fsb) 214 if (!fsb)
198 printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n"); 215 printk(KERN_DEBUG PFX "couldn't detect FSB speed. "
216 "Please send an e-mail to <linux@brodo.de>\n");
199 217
200 /* Multiplier. */ 218 /* Multiplier. */
201 mult = msr_lo >> 24; 219 mult = msr_lo >> 24;
202 220
203 dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); 221 dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
222 fsb, mult, (fsb * mult));
204 223
205 return (fsb * mult); 224 ret = (fsb * mult);
225 return ret;
206} 226}
207 227
208 228
209unsigned int speedstep_get_processor_frequency(unsigned int processor) 229unsigned int speedstep_get_frequency(unsigned int processor)
210{ 230{
211 switch (processor) { 231 switch (processor) {
212 case SPEEDSTEP_PROCESSOR_PCORE: 232 case SPEEDSTEP_CPU_PCORE:
213 return pentium_core_get_frequency(); 233 return pentium_core_get_frequency();
214 case SPEEDSTEP_PROCESSOR_PM: 234 case SPEEDSTEP_CPU_PM:
215 return pentiumM_get_frequency(); 235 return pentiumM_get_frequency();
216 case SPEEDSTEP_PROCESSOR_P4D: 236 case SPEEDSTEP_CPU_P4D:
217 case SPEEDSTEP_PROCESSOR_P4M: 237 case SPEEDSTEP_CPU_P4M:
218 return pentium4_get_frequency(); 238 return pentium4_get_frequency();
219 case SPEEDSTEP_PROCESSOR_PIII_T: 239 case SPEEDSTEP_CPU_PIII_T:
220 case SPEEDSTEP_PROCESSOR_PIII_C: 240 case SPEEDSTEP_CPU_PIII_C:
221 case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: 241 case SPEEDSTEP_CPU_PIII_C_EARLY:
222 return pentium3_get_frequency(processor); 242 return pentium3_get_frequency(processor);
223 default: 243 default:
224 return 0; 244 return 0;
225 }; 245 };
226 return 0; 246 return 0;
227} 247}
228EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); 248EXPORT_SYMBOL_GPL(speedstep_get_frequency);
229 249
230 250
231/********************************************************************* 251/*********************************************************************
232 * DETECT SPEEDSTEP-CAPABLE PROCESSOR * 252 * DETECT SPEEDSTEP-CAPABLE PROCESSOR *
233 *********************************************************************/ 253 *********************************************************************/
234 254
235unsigned int speedstep_detect_processor (void) 255unsigned int speedstep_detect_processor(void)
236{ 256{
237 struct cpuinfo_x86 *c = &cpu_data(0); 257 struct cpuinfo_x86 *c = &cpu_data(0);
238 u32 ebx, msr_lo, msr_hi; 258 u32 ebx, msr_lo, msr_hi;
@@ -261,7 +281,7 @@ unsigned int speedstep_detect_processor (void)
261 * sample has ebx = 0x0f, production has 0x0e. 281 * sample has ebx = 0x0f, production has 0x0e.
262 */ 282 */
263 if ((ebx == 0x0e) || (ebx == 0x0f)) 283 if ((ebx == 0x0e) || (ebx == 0x0f))
264 return SPEEDSTEP_PROCESSOR_P4M; 284 return SPEEDSTEP_CPU_P4M;
265 break; 285 break;
266 case 7: 286 case 7:
267 /* 287 /*
@@ -272,7 +292,7 @@ unsigned int speedstep_detect_processor (void)
272 * samples are only of B-stepping... 292 * samples are only of B-stepping...
273 */ 293 */
274 if (ebx == 0x0e) 294 if (ebx == 0x0e)
275 return SPEEDSTEP_PROCESSOR_P4M; 295 return SPEEDSTEP_CPU_P4M;
276 break; 296 break;
277 case 9: 297 case 9:
278 /* 298 /*
@@ -288,10 +308,13 @@ unsigned int speedstep_detect_processor (void)
288 * M-P4-Ms may have either ebx=0xe or 0xf [see above] 308 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
289 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] 309 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
290 * also, M-P4M HTs have ebx=0x8, too 310 * also, M-P4M HTs have ebx=0x8, too
291 * For now, they are distinguished by the model_id string 311 * For now, they are distinguished by the model_id
312 * string
292 */ 313 */
293 if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) 314 if ((ebx == 0x0e) ||
294 return SPEEDSTEP_PROCESSOR_P4M; 315 (strstr(c->x86_model_id,
316 "Mobile Intel(R) Pentium(R) 4") != NULL))
317 return SPEEDSTEP_CPU_P4M;
295 break; 318 break;
296 default: 319 default:
297 break; 320 break;
@@ -301,7 +324,8 @@ unsigned int speedstep_detect_processor (void)
301 324
302 switch (c->x86_model) { 325 switch (c->x86_model) {
303 case 0x0B: /* Intel PIII [Tualatin] */ 326 case 0x0B: /* Intel PIII [Tualatin] */
304 /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ 327 /* cpuid_ebx(1) is 0x04 for desktop PIII,
328 * 0x06 for mobile PIII-M */
305 ebx = cpuid_ebx(0x00000001); 329 ebx = cpuid_ebx(0x00000001);
306 dprintk("ebx is %x\n", ebx); 330 dprintk("ebx is %x\n", ebx);
307 331
@@ -313,14 +337,15 @@ unsigned int speedstep_detect_processor (void)
313 /* So far all PIII-M processors support SpeedStep. See 337 /* So far all PIII-M processors support SpeedStep. See
314 * Intel's 24540640.pdf of June 2003 338 * Intel's 24540640.pdf of June 2003
315 */ 339 */
316 return SPEEDSTEP_PROCESSOR_PIII_T; 340 return SPEEDSTEP_CPU_PIII_T;
317 341
318 case 0x08: /* Intel PIII [Coppermine] */ 342 case 0x08: /* Intel PIII [Coppermine] */
319 343
320 /* all mobile PIII Coppermines have FSB 100 MHz 344 /* all mobile PIII Coppermines have FSB 100 MHz
321 * ==> sort out a few desktop PIIIs. */ 345 * ==> sort out a few desktop PIIIs. */
322 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); 346 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
323 dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); 347 dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
348 msr_lo, msr_hi);
324 msr_lo &= 0x00c0000; 349 msr_lo &= 0x00c0000;
325 if (msr_lo != 0x0080000) 350 if (msr_lo != 0x0080000)
326 return 0; 351 return 0;
@@ -332,13 +357,15 @@ unsigned int speedstep_detect_processor (void)
332 * bit 56 or 57 is set 357 * bit 56 or 57 is set
333 */ 358 */
334 rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); 359 rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
335 dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); 360 dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
336 if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { 361 msr_lo, msr_hi);
362 if ((msr_hi & (1<<18)) &&
363 (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
337 if (c->x86_mask == 0x01) { 364 if (c->x86_mask == 0x01) {
338 dprintk("early PIII version\n"); 365 dprintk("early PIII version\n");
339 return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; 366 return SPEEDSTEP_CPU_PIII_C_EARLY;
340 } else 367 } else
341 return SPEEDSTEP_PROCESSOR_PIII_C; 368 return SPEEDSTEP_CPU_PIII_C;
342 } 369 }
343 370
344 default: 371 default:
@@ -369,7 +396,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
369 dprintk("trying to determine both speeds\n"); 396 dprintk("trying to determine both speeds\n");
370 397
371 /* get current speed */ 398 /* get current speed */
372 prev_speed = speedstep_get_processor_frequency(processor); 399 prev_speed = speedstep_get_frequency(processor);
373 if (!prev_speed) 400 if (!prev_speed)
374 return -EIO; 401 return -EIO;
375 402
@@ -379,7 +406,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
379 406
380 /* switch to low state */ 407 /* switch to low state */
381 set_state(SPEEDSTEP_LOW); 408 set_state(SPEEDSTEP_LOW);
382 *low_speed = speedstep_get_processor_frequency(processor); 409 *low_speed = speedstep_get_frequency(processor);
383 if (!*low_speed) { 410 if (!*low_speed) {
384 ret = -EIO; 411 ret = -EIO;
385 goto out; 412 goto out;
@@ -398,7 +425,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
398 if (transition_latency) 425 if (transition_latency)
399 do_gettimeofday(&tv2); 426 do_gettimeofday(&tv2);
400 427
401 *high_speed = speedstep_get_processor_frequency(processor); 428 *high_speed = speedstep_get_frequency(processor);
402 if (!*high_speed) { 429 if (!*high_speed) {
403 ret = -EIO; 430 ret = -EIO;
404 goto out; 431 goto out;
@@ -426,9 +453,12 @@ unsigned int speedstep_get_freqs(unsigned int processor,
426 /* check if the latency measurement is too high or too low 453 /* check if the latency measurement is too high or too low
427 * and set it to a safe value (500uSec) in that case 454 * and set it to a safe value (500uSec) in that case
428 */ 455 */
429 if (*transition_latency > 10000000 || *transition_latency < 50000) { 456 if (*transition_latency > 10000000 ||
430 printk (KERN_WARNING "speedstep: frequency transition measured seems out of " 457 *transition_latency < 50000) {
431 "range (%u nSec), falling back to a safe one of %u nSec.\n", 458 printk(KERN_WARNING PFX "frequency transition "
459 "measured seems out of range (%u "
460 "nSec), falling back to a safe one of"
461 "%u nSec.\n",
432 *transition_latency, 500000); 462 *transition_latency, 500000);
433 *transition_latency = 500000; 463 *transition_latency = 500000;
434 } 464 }
@@ -436,15 +466,16 @@ unsigned int speedstep_get_freqs(unsigned int processor,
436 466
437out: 467out:
438 local_irq_restore(flags); 468 local_irq_restore(flags);
439 return (ret); 469 return ret;
440} 470}
441EXPORT_SYMBOL_GPL(speedstep_get_freqs); 471EXPORT_SYMBOL_GPL(speedstep_get_freqs);
442 472
443#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK 473#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
444module_param(relaxed_check, int, 0444); 474module_param(relaxed_check, int, 0444);
445MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); 475MODULE_PARM_DESC(relaxed_check,
476 "Don't do all checks for speedstep capability.");
446#endif 477#endif
447 478
448MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>"); 479MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
449MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); 480MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
450MODULE_LICENSE ("GPL"); 481MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
index b11bcc608cac..2b6c04e5a304 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
@@ -12,17 +12,17 @@
12 12
13/* processors */ 13/* processors */
14 14
15#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ 15#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */
16#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ 16#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */
17#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ 17#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */
18#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ 18#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */
19 19
20/* the following processors are not speedstep-capable and are not auto-detected 20/* the following processors are not speedstep-capable and are not auto-detected
21 * in speedstep_detect_processor(). However, their speed can be detected using 21 * in speedstep_detect_processor(). However, their speed can be detected using
22 * the speedstep_get_processor_frequency() call. */ 22 * the speedstep_get_frequency() call. */
23#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ 23#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */
24#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ 24#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */
25#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ 25#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */
26 26
27/* speedstep states -- only two of them */ 27/* speedstep states -- only two of them */
28 28
@@ -34,7 +34,7 @@
34extern unsigned int speedstep_detect_processor (void); 34extern unsigned int speedstep_detect_processor (void);
35 35
36/* detect the current speed (in khz) of the processor */ 36/* detect the current speed (in khz) of the processor */
37extern unsigned int speedstep_get_processor_frequency(unsigned int processor); 37extern unsigned int speedstep_get_frequency(unsigned int processor);
38 38
39 39
40/* detect the low and high speeds of the processor. The callback 40/* detect the low and high speeds of the processor. The callback
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 8a85c93bd62a..befea088e4f5 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -19,8 +19,8 @@
19#include <linux/cpufreq.h> 19#include <linux/cpufreq.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/io.h>
22#include <asm/ist.h> 23#include <asm/ist.h>
23#include <asm/io.h>
24 24
25#include "speedstep-lib.h" 25#include "speedstep-lib.h"
26 26
@@ -30,12 +30,12 @@
30 * If user gives it, these are used. 30 * If user gives it, these are used.
31 * 31 *
32 */ 32 */
33static int smi_port = 0; 33static int smi_port;
34static int smi_cmd = 0; 34static int smi_cmd;
35static unsigned int smi_sig = 0; 35static unsigned int smi_sig;
36 36
37/* info about the processor */ 37/* info about the processor */
38static unsigned int speedstep_processor = 0; 38static unsigned int speedstep_processor;
39 39
40/* 40/*
41 * There are only two frequency states for each processor. Values 41 * There are only two frequency states for each processor. Values
@@ -56,12 +56,13 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
56 * of DMA activity going on? */ 56 * of DMA activity going on? */
57#define SMI_TRIES 5 57#define SMI_TRIES 5
58 58
59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) 59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
60 "speedstep-smi", msg)
60 61
61/** 62/**
62 * speedstep_smi_ownership 63 * speedstep_smi_ownership
63 */ 64 */
64static int speedstep_smi_ownership (void) 65static int speedstep_smi_ownership(void)
65{ 66{
66 u32 command, result, magic, dummy; 67 u32 command, result, magic, dummy;
67 u32 function = GET_SPEEDSTEP_OWNER; 68 u32 function = GET_SPEEDSTEP_OWNER;
@@ -70,16 +71,18 @@ static int speedstep_smi_ownership (void)
70 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); 71 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
71 magic = virt_to_phys(magic_data); 72 magic = virt_to_phys(magic_data);
72 73
73 dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); 74 dprintk("trying to obtain ownership with command %x at port %x\n",
75 command, smi_port);
74 76
75 __asm__ __volatile__( 77 __asm__ __volatile__(
76 "push %%ebp\n" 78 "push %%ebp\n"
77 "out %%al, (%%dx)\n" 79 "out %%al, (%%dx)\n"
78 "pop %%ebp\n" 80 "pop %%ebp\n"
79 : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), 81 : "=D" (result),
80 "=S" (dummy) 82 "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
83 "=S" (dummy)
81 : "a" (command), "b" (function), "c" (0), "d" (smi_port), 84 : "a" (command), "b" (function), "c" (0), "d" (smi_port),
82 "D" (0), "S" (magic) 85 "D" (0), "S" (magic)
83 : "memory" 86 : "memory"
84 ); 87 );
85 88
@@ -97,10 +100,10 @@ static int speedstep_smi_ownership (void)
97 * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing 100 * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
98 * shows that the latter occurs if !(ist_info.event & 0xFFFF). 101 * shows that the latter occurs if !(ist_info.event & 0xFFFF).
99 */ 102 */
100static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) 103static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
101{ 104{
102 u32 command, result = 0, edi, high_mhz, low_mhz, dummy; 105 u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
103 u32 state=0; 106 u32 state = 0;
104 u32 function = GET_SPEEDSTEP_FREQS; 107 u32 function = GET_SPEEDSTEP_FREQS;
105 108
106 if (!(ist_info.event & 0xFFFF)) { 109 if (!(ist_info.event & 0xFFFF)) {
@@ -110,17 +113,25 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
110 113
111 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); 114 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
112 115
113 dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); 116 dprintk("trying to determine frequencies with command %x at port %x\n",
117 command, smi_port);
114 118
115 __asm__ __volatile__( 119 __asm__ __volatile__(
116 "push %%ebp\n" 120 "push %%ebp\n"
117 "out %%al, (%%dx)\n" 121 "out %%al, (%%dx)\n"
118 "pop %%ebp" 122 "pop %%ebp"
119 : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy) 123 : "=a" (result),
120 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) 124 "=b" (high_mhz),
125 "=c" (low_mhz),
126 "=d" (state), "=D" (edi), "=S" (dummy)
127 : "a" (command),
128 "b" (function),
129 "c" (state),
130 "d" (smi_port), "S" (0), "D" (0)
121 ); 131 );
122 132
123 dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); 133 dprintk("result %x, low_freq %u, high_freq %u\n",
134 result, low_mhz, high_mhz);
124 135
125 /* abort if results are obviously incorrect... */ 136 /* abort if results are obviously incorrect... */
126 if ((high_mhz + low_mhz) < 600) 137 if ((high_mhz + low_mhz) < 600)
@@ -137,26 +148,30 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
137 * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) 148 * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
138 * 149 *
139 */ 150 */
140static int speedstep_get_state (void) 151static int speedstep_get_state(void)
141{ 152{
142 u32 function=GET_SPEEDSTEP_STATE; 153 u32 function = GET_SPEEDSTEP_STATE;
143 u32 result, state, edi, command, dummy; 154 u32 result, state, edi, command, dummy;
144 155
145 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); 156 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
146 157
147 dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); 158 dprintk("trying to determine current setting with command %x "
159 "at port %x\n", command, smi_port);
148 160
149 __asm__ __volatile__( 161 __asm__ __volatile__(
150 "push %%ebp\n" 162 "push %%ebp\n"
151 "out %%al, (%%dx)\n" 163 "out %%al, (%%dx)\n"
152 "pop %%ebp\n" 164 "pop %%ebp\n"
153 : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy) 165 : "=a" (result),
154 : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0) 166 "=b" (state), "=D" (edi),
167 "=c" (dummy), "=d" (dummy), "=S" (dummy)
168 : "a" (command), "b" (function), "c" (0),
169 "d" (smi_port), "S" (0), "D" (0)
155 ); 170 );
156 171
157 dprintk("state is %x, result is %x\n", state, result); 172 dprintk("state is %x, result is %x\n", state, result);
158 173
159 return (state & 1); 174 return state & 1;
160} 175}
161 176
162 177
@@ -165,11 +180,11 @@ static int speedstep_get_state (void)
165 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) 180 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
166 * 181 *
167 */ 182 */
168static void speedstep_set_state (unsigned int state) 183static void speedstep_set_state(unsigned int state)
169{ 184{
170 unsigned int result = 0, command, new_state, dummy; 185 unsigned int result = 0, command, new_state, dummy;
171 unsigned long flags; 186 unsigned long flags;
172 unsigned int function=SET_SPEEDSTEP_STATE; 187 unsigned int function = SET_SPEEDSTEP_STATE;
173 unsigned int retry = 0; 188 unsigned int retry = 0;
174 189
175 if (state > 0x1) 190 if (state > 0x1)
@@ -180,11 +195,14 @@ static void speedstep_set_state (unsigned int state)
180 195
181 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); 196 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
182 197
183 dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); 198 dprintk("trying to set frequency to state %u "
199 "with command %x at port %x\n",
200 state, command, smi_port);
184 201
185 do { 202 do {
186 if (retry) { 203 if (retry) {
187 dprintk("retry %u, previous result %u, waiting...\n", retry, result); 204 dprintk("retry %u, previous result %u, waiting...\n",
205 retry, result);
188 mdelay(retry * 50); 206 mdelay(retry * 50);
189 } 207 }
190 retry++; 208 retry++;
@@ -192,20 +210,26 @@ static void speedstep_set_state (unsigned int state)
192 "push %%ebp\n" 210 "push %%ebp\n"
193 "out %%al, (%%dx)\n" 211 "out %%al, (%%dx)\n"
194 "pop %%ebp" 212 "pop %%ebp"
195 : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy), 213 : "=b" (new_state), "=D" (result),
196 "=d" (dummy), "=S" (dummy) 214 "=c" (dummy), "=a" (dummy),
197 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) 215 "=d" (dummy), "=S" (dummy)
216 : "a" (command), "b" (function), "c" (state),
217 "d" (smi_port), "S" (0), "D" (0)
198 ); 218 );
199 } while ((new_state != state) && (retry <= SMI_TRIES)); 219 } while ((new_state != state) && (retry <= SMI_TRIES));
200 220
201 /* enable IRQs */ 221 /* enable IRQs */
202 local_irq_restore(flags); 222 local_irq_restore(flags);
203 223
204 if (new_state == state) { 224 if (new_state == state)
205 dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); 225 dprintk("change to %u MHz succeeded after %u tries "
206 } else { 226 "with result %u\n",
207 printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result); 227 (speedstep_freqs[new_state].frequency / 1000),
208 } 228 retry, result);
229 else
230 printk(KERN_ERR "cpufreq: change to state %u "
231 "failed with new_state %u and result %u\n",
232 state, new_state, result);
209 233
210 return; 234 return;
211} 235}
@@ -219,13 +243,14 @@ static void speedstep_set_state (unsigned int state)
219 * 243 *
220 * Sets a new CPUFreq policy/freq. 244 * Sets a new CPUFreq policy/freq.
221 */ 245 */
222static int speedstep_target (struct cpufreq_policy *policy, 246static int speedstep_target(struct cpufreq_policy *policy,
223 unsigned int target_freq, unsigned int relation) 247 unsigned int target_freq, unsigned int relation)
224{ 248{
225 unsigned int newstate = 0; 249 unsigned int newstate = 0;
226 struct cpufreq_freqs freqs; 250 struct cpufreq_freqs freqs;
227 251
228 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) 252 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
253 target_freq, relation, &newstate))
229 return -EINVAL; 254 return -EINVAL;
230 255
231 freqs.old = speedstep_freqs[speedstep_get_state()].frequency; 256 freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
@@ -250,7 +275,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
250 * Limit must be within speedstep_low_freq and speedstep_high_freq, with 275 * Limit must be within speedstep_low_freq and speedstep_high_freq, with
251 * at least one border included. 276 * at least one border included.
252 */ 277 */
253static int speedstep_verify (struct cpufreq_policy *policy) 278static int speedstep_verify(struct cpufreq_policy *policy)
254{ 279{
255 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); 280 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
256} 281}
@@ -259,7 +284,8 @@ static int speedstep_verify (struct cpufreq_policy *policy)
259static int speedstep_cpu_init(struct cpufreq_policy *policy) 284static int speedstep_cpu_init(struct cpufreq_policy *policy)
260{ 285{
261 int result; 286 int result;
262 unsigned int speed,state; 287 unsigned int speed, state;
288 unsigned int *low, *high;
263 289
264 /* capability check */ 290 /* capability check */
265 if (policy->cpu != 0) 291 if (policy->cpu != 0)
@@ -272,19 +298,23 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
272 } 298 }
273 299
274 /* detect low and high frequency */ 300 /* detect low and high frequency */
275 result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, 301 low = &speedstep_freqs[SPEEDSTEP_LOW].frequency;
276 &speedstep_freqs[SPEEDSTEP_HIGH].frequency); 302 high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency;
303
304 result = speedstep_smi_get_freqs(low, high);
277 if (result) { 305 if (result) {
278 /* fall back to speedstep_lib.c dection mechanism: try both states out */ 306 /* fall back to speedstep_lib.c dection mechanism:
279 dprintk("could not detect low and high frequencies by SMI call.\n"); 307 * try both states out */
308 dprintk("could not detect low and high frequencies "
309 "by SMI call.\n");
280 result = speedstep_get_freqs(speedstep_processor, 310 result = speedstep_get_freqs(speedstep_processor,
281 &speedstep_freqs[SPEEDSTEP_LOW].frequency, 311 low, high,
282 &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
283 NULL, 312 NULL,
284 &speedstep_set_state); 313 &speedstep_set_state);
285 314
286 if (result) { 315 if (result) {
287 dprintk("could not detect two different speeds -- aborting.\n"); 316 dprintk("could not detect two different speeds"
317 " -- aborting.\n");
288 return result; 318 return result;
289 } else 319 } else
290 dprintk("workaround worked.\n"); 320 dprintk("workaround worked.\n");
@@ -295,7 +325,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
295 speed = speedstep_freqs[state].frequency; 325 speed = speedstep_freqs[state].frequency;
296 326
297 dprintk("currently at %s speed setting - %i MHz\n", 327 dprintk("currently at %s speed setting - %i MHz\n",
298 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", 328 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
329 ? "low" : "high",
299 (speed / 1000)); 330 (speed / 1000));
300 331
301 /* cpuinfo and default policy values */ 332 /* cpuinfo and default policy values */
@@ -304,7 +335,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
304 335
305 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); 336 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
306 if (result) 337 if (result)
307 return (result); 338 return result;
308 339
309 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); 340 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
310 341
@@ -321,7 +352,7 @@ static unsigned int speedstep_get(unsigned int cpu)
321{ 352{
322 if (cpu) 353 if (cpu)
323 return -ENODEV; 354 return -ENODEV;
324 return speedstep_get_processor_frequency(speedstep_processor); 355 return speedstep_get_frequency(speedstep_processor);
325} 356}
326 357
327 358
@@ -335,7 +366,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
335 return result; 366 return result;
336} 367}
337 368
338static struct freq_attr* speedstep_attr[] = { 369static struct freq_attr *speedstep_attr[] = {
339 &cpufreq_freq_attr_scaling_available_freqs, 370 &cpufreq_freq_attr_scaling_available_freqs,
340 NULL, 371 NULL,
341}; 372};
@@ -364,21 +395,23 @@ static int __init speedstep_init(void)
364 speedstep_processor = speedstep_detect_processor(); 395 speedstep_processor = speedstep_detect_processor();
365 396
366 switch (speedstep_processor) { 397 switch (speedstep_processor) {
367 case SPEEDSTEP_PROCESSOR_PIII_T: 398 case SPEEDSTEP_CPU_PIII_T:
368 case SPEEDSTEP_PROCESSOR_PIII_C: 399 case SPEEDSTEP_CPU_PIII_C:
369 case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: 400 case SPEEDSTEP_CPU_PIII_C_EARLY:
370 break; 401 break;
371 default: 402 default:
372 speedstep_processor = 0; 403 speedstep_processor = 0;
373 } 404 }
374 405
375 if (!speedstep_processor) { 406 if (!speedstep_processor) {
376 dprintk ("No supported Intel CPU detected.\n"); 407 dprintk("No supported Intel CPU detected.\n");
377 return -ENODEV; 408 return -ENODEV;
378 } 409 }
379 410
380 dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", 411 dprintk("signature:0x%.8lx, command:0x%.8lx, "
381 ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); 412 "event:0x%.8lx, perf_level:0x%.8lx.\n",
413 ist_info.signature, ist_info.command,
414 ist_info.event, ist_info.perf_level);
382 415
383 /* Error if no IST-SMI BIOS or no PARM 416 /* Error if no IST-SMI BIOS or no PARM
384 sig= 'ISGE' aka 'Intel Speedstep Gate E' */ 417 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
@@ -416,17 +449,20 @@ static void __exit speedstep_exit(void)
416 cpufreq_unregister_driver(&speedstep_driver); 449 cpufreq_unregister_driver(&speedstep_driver);
417} 450}
418 451
419module_param(smi_port, int, 0444); 452module_param(smi_port, int, 0444);
420module_param(smi_cmd, int, 0444); 453module_param(smi_cmd, int, 0444);
421module_param(smi_sig, uint, 0444); 454module_param(smi_sig, uint, 0444);
422 455
423MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); 456MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value "
424MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); 457 "-- Intel's default setting is 0xb2");
425MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); 458MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value "
459 "-- Intel's default setting is 0x82");
460MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the "
461 "SMI interface.");
426 462
427MODULE_AUTHOR ("Hiroshi Miura"); 463MODULE_AUTHOR("Hiroshi Miura");
428MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); 464MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface.");
429MODULE_LICENSE ("GPL"); 465MODULE_LICENSE("GPL");
430 466
431module_init(speedstep_init); 467module_init(speedstep_init);
432module_exit(speedstep_exit); 468module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index ffd0f5ed071a..593171e967ef 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -61,23 +61,23 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
61 */ 61 */
62static unsigned char Cx86_dir0_msb __cpuinitdata = 0; 62static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
63 63
64static char Cx86_model[][9] __cpuinitdata = { 64static const char __cpuinitconst Cx86_model[][9] = {
65 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", 65 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
66 "M II ", "Unknown" 66 "M II ", "Unknown"
67}; 67};
68static char Cx486_name[][5] __cpuinitdata = { 68static const char __cpuinitconst Cx486_name[][5] = {
69 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", 69 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
70 "SRx2", "DRx2" 70 "SRx2", "DRx2"
71}; 71};
72static char Cx486S_name[][4] __cpuinitdata = { 72static const char __cpuinitconst Cx486S_name[][4] = {
73 "S", "S2", "Se", "S2e" 73 "S", "S2", "Se", "S2e"
74}; 74};
75static char Cx486D_name[][4] __cpuinitdata = { 75static const char __cpuinitconst Cx486D_name[][4] = {
76 "DX", "DX2", "?", "?", "?", "DX4" 76 "DX", "DX2", "?", "?", "?", "DX4"
77}; 77};
78static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; 78static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
79static char cyrix_model_mult1[] __cpuinitdata = "12??43"; 79static const char __cpuinitconst cyrix_model_mult1[] = "12??43";
80static char cyrix_model_mult2[] __cpuinitdata = "12233445"; 80static const char __cpuinitconst cyrix_model_mult2[] = "12233445";
81 81
82/* 82/*
83 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old 83 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -435,7 +435,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
435 } 435 }
436} 436}
437 437
438static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { 438static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = {
439 .c_vendor = "Cyrix", 439 .c_vendor = "Cyrix",
440 .c_ident = { "CyrixInstead" }, 440 .c_ident = { "CyrixInstead" },
441 .c_early_init = early_init_cyrix, 441 .c_early_init = early_init_cyrix,
@@ -446,7 +446,7 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
446 446
447cpu_dev_register(cyrix_cpu_dev); 447cpu_dev_register(cyrix_cpu_dev);
448 448
449static struct cpu_dev nsc_cpu_dev __cpuinitdata = { 449static const struct cpu_dev __cpuinitconst nsc_cpu_dev = {
450 .c_vendor = "NSC", 450 .c_vendor = "NSC",
451 .c_ident = { "Geode by NSC" }, 451 .c_ident = { "Geode by NSC" },
452 .c_init = init_nsc, 452 .c_init = init_nsc,
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 191117f1ad51..7437fa133c02 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
4#include <linux/string.h> 4#include <linux/string.h>
5#include <linux/bitops.h> 5#include <linux/bitops.h>
6#include <linux/smp.h> 6#include <linux/smp.h>
7#include <linux/sched.h>
7#include <linux/thread_info.h> 8#include <linux/thread_info.h>
8#include <linux/module.h> 9#include <linux/module.h>
9 10
@@ -54,13 +55,23 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
54 c->x86_cache_alignment = 128; 55 c->x86_cache_alignment = 128;
55#endif 56#endif
56 57
58 /* CPUID workaround for 0F33/0F34 CPU */
59 if (c->x86 == 0xF && c->x86_model == 0x3
60 && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
61 c->x86_phys_bits = 36;
62
57 /* 63 /*
58 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 64 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
59 * with P/T states and does not stop in deep C-states 65 * with P/T states and does not stop in deep C-states.
66 *
67 * It is also reliable across cores and sockets. (but not across
68 * cabinets - we turn it off in that case explicitly.)
60 */ 69 */
61 if (c->x86_power & (1 << 8)) { 70 if (c->x86_power & (1 << 8)) {
62 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 71 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 72 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
73 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
74 sched_clock_stable = 1;
64 } 75 }
65 76
66 /* 77 /*
@@ -410,7 +421,7 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
410} 421}
411#endif 422#endif
412 423
413static struct cpu_dev intel_cpu_dev __cpuinitdata = { 424static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
414 .c_vendor = "Intel", 425 .c_vendor = "Intel",
415 .c_ident = { "GenuineIntel" }, 426 .c_ident = { "GenuineIntel" },
416#ifdef CONFIG_X86_32 427#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 8e6ce2c146d6..483eda96e102 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -32,7 +32,7 @@ struct _cache_table
32}; 32};
33 33
34/* all the cache descriptor types we care about (no TLB or trace cache entries) */ 34/* all the cache descriptor types we care about (no TLB or trace cache entries) */
35static struct _cache_table cache_table[] __cpuinitdata = 35static const struct _cache_table __cpuinitconst cache_table[] =
36{ 36{
37 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 37 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
38 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 38 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -206,15 +206,15 @@ union l3_cache {
206 unsigned val; 206 unsigned val;
207}; 207};
208 208
209static unsigned short assocs[] __cpuinitdata = { 209static const unsigned short __cpuinitconst assocs[] = {
210 [1] = 1, [2] = 2, [4] = 4, [6] = 8, 210 [1] = 1, [2] = 2, [4] = 4, [6] = 8,
211 [8] = 16, [0xa] = 32, [0xb] = 48, 211 [8] = 16, [0xa] = 32, [0xb] = 48,
212 [0xc] = 64, 212 [0xc] = 64,
213 [0xf] = 0xffff // ?? 213 [0xf] = 0xffff // ??
214}; 214};
215 215
216static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; 216static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
217static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; 217static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
218 218
219static void __cpuinit 219static void __cpuinit
220amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 220amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 1f429ee3477d..56dde9c4bc96 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -92,7 +92,8 @@ struct thresh_restart {
92}; 92};
93 93
94/* must be called with correct cpu affinity */ 94/* must be called with correct cpu affinity */
95static long threshold_restart_bank(void *_tr) 95/* Called via smp_call_function_single() */
96static void threshold_restart_bank(void *_tr)
96{ 97{
97 struct thresh_restart *tr = _tr; 98 struct thresh_restart *tr = _tr;
98 u32 mci_misc_hi, mci_misc_lo; 99 u32 mci_misc_hi, mci_misc_lo;
@@ -119,7 +120,6 @@ static long threshold_restart_bank(void *_tr)
119 120
120 mci_misc_hi |= MASK_COUNT_EN_HI; 121 mci_misc_hi |= MASK_COUNT_EN_HI;
121 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 122 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
122 return 0;
123} 123}
124 124
125/* cpu init entry point, called from mce.c with preempt off */ 125/* cpu init entry point, called from mce.c with preempt off */
@@ -279,7 +279,7 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
279 tr.b = b; 279 tr.b = b;
280 tr.reset = 0; 280 tr.reset = 0;
281 tr.old_limit = 0; 281 tr.old_limit = 0;
282 work_on_cpu(b->cpu, threshold_restart_bank, &tr); 282 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
283 283
284 return end - buf; 284 return end - buf;
285} 285}
@@ -301,23 +301,32 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
301 tr.b = b; 301 tr.b = b;
302 tr.reset = 0; 302 tr.reset = 0;
303 303
304 work_on_cpu(b->cpu, threshold_restart_bank, &tr); 304 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
305 305
306 return end - buf; 306 return end - buf;
307} 307}
308 308
309static long local_error_count(void *_b) 309struct threshold_block_cross_cpu {
310 struct threshold_block *tb;
311 long retval;
312};
313
314static void local_error_count_handler(void *_tbcc)
310{ 315{
311 struct threshold_block *b = _b; 316 struct threshold_block_cross_cpu *tbcc = _tbcc;
317 struct threshold_block *b = tbcc->tb;
312 u32 low, high; 318 u32 low, high;
313 319
314 rdmsr(b->address, low, high); 320 rdmsr(b->address, low, high);
315 return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); 321 tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
316} 322}
317 323
318static ssize_t show_error_count(struct threshold_block *b, char *buf) 324static ssize_t show_error_count(struct threshold_block *b, char *buf)
319{ 325{
320 return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); 326 struct threshold_block_cross_cpu tbcc = { .tb = b, };
327
328 smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1);
329 return sprintf(buf, "%lx\n", tbcc.retval);
321} 330}
322 331
323static ssize_t store_error_count(struct threshold_block *b, 332static ssize_t store_error_count(struct threshold_block *b,
@@ -325,7 +334,7 @@ static ssize_t store_error_count(struct threshold_block *b,
325{ 334{
326 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; 335 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
327 336
328 work_on_cpu(b->cpu, threshold_restart_bank, &tr); 337 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
329 return 1; 338 return 1;
330} 339}
331 340
@@ -394,7 +403,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
394 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) 403 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
395 return 0; 404 return 0;
396 405
397 if (rdmsr_safe(address, &low, &high)) 406 if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
398 return 0; 407 return 0;
399 408
400 if (!(high & MASK_VALID_HI)) { 409 if (!(high & MASK_VALID_HI)) {
@@ -458,12 +467,11 @@ out_free:
458 return err; 467 return err;
459} 468}
460 469
461static __cpuinit long local_allocate_threshold_blocks(void *_bank) 470static __cpuinit long
471local_allocate_threshold_blocks(int cpu, unsigned int bank)
462{ 472{
463 unsigned int *bank = _bank; 473 return allocate_threshold_blocks(cpu, bank, 0,
464 474 MSR_IA32_MC0_MISC + bank * 4);
465 return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
466 MSR_IA32_MC0_MISC + *bank * 4);
467} 475}
468 476
469/* symlinks sibling shared banks to first core. first core owns dir/files. */ 477/* symlinks sibling shared banks to first core. first core owns dir/files. */
@@ -526,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
526 534
527 per_cpu(threshold_banks, cpu)[bank] = b; 535 per_cpu(threshold_banks, cpu)[bank] = b;
528 536
529 err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); 537 err = local_allocate_threshold_blocks(cpu, bank);
530 if (err) 538 if (err)
531 goto out_free; 539 goto out_free;
532 540
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 96b2a85545aa..d6b72df89d69 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -270,7 +270,7 @@ void cmci_reenable(void)
270 cmci_discover(banks, 0); 270 cmci_discover(banks, 0);
271} 271}
272 272
273static __cpuinit void intel_init_cmci(void) 273static void intel_init_cmci(void)
274{ 274{
275 int banks; 275 int banks;
276 276
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index 191fc0533649..f4361b56f8e9 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
1obj-y := main.o if.o generic.o state.o 1obj-y := main.o if.o generic.o state.o cleanup.o
2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o 2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
3 3
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
new file mode 100644
index 000000000000..ce0fe4b5c04f
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -0,0 +1,1101 @@
1/* MTRR (Memory Type Range Register) cleanup
2
3 Copyright (C) 2009 Yinghai Lu
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18*/
19
20#include <linux/module.h>
21#include <linux/init.h>
22#include <linux/pci.h>
23#include <linux/smp.h>
24#include <linux/cpu.h>
25#include <linux/mutex.h>
26#include <linux/sort.h>
27
28#include <asm/e820.h>
29#include <asm/mtrr.h>
30#include <asm/uaccess.h>
31#include <asm/processor.h>
32#include <asm/msr.h>
33#include <asm/kvm_para.h>
34#include "mtrr.h"
35
36/* should be related to MTRR_VAR_RANGES nums */
37#define RANGE_NUM 256
38
39struct res_range {
40 unsigned long start;
41 unsigned long end;
42};
43
44static int __init
45add_range(struct res_range *range, int nr_range, unsigned long start,
46 unsigned long end)
47{
48 /* out of slots */
49 if (nr_range >= RANGE_NUM)
50 return nr_range;
51
52 range[nr_range].start = start;
53 range[nr_range].end = end;
54
55 nr_range++;
56
57 return nr_range;
58}
59
60static int __init
61add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
62 unsigned long end)
63{
64 int i;
65
66 /* try to merge it with old one */
67 for (i = 0; i < nr_range; i++) {
68 unsigned long final_start, final_end;
69 unsigned long common_start, common_end;
70
71 if (!range[i].end)
72 continue;
73
74 common_start = max(range[i].start, start);
75 common_end = min(range[i].end, end);
76 if (common_start > common_end + 1)
77 continue;
78
79 final_start = min(range[i].start, start);
80 final_end = max(range[i].end, end);
81
82 range[i].start = final_start;
83 range[i].end = final_end;
84 return nr_range;
85 }
86
87 /* need to add that */
88 return add_range(range, nr_range, start, end);
89}
90
91static void __init
92subtract_range(struct res_range *range, unsigned long start, unsigned long end)
93{
94 int i, j;
95
96 for (j = 0; j < RANGE_NUM; j++) {
97 if (!range[j].end)
98 continue;
99
100 if (start <= range[j].start && end >= range[j].end) {
101 range[j].start = 0;
102 range[j].end = 0;
103 continue;
104 }
105
106 if (start <= range[j].start && end < range[j].end &&
107 range[j].start < end + 1) {
108 range[j].start = end + 1;
109 continue;
110 }
111
112
113 if (start > range[j].start && end >= range[j].end &&
114 range[j].end > start - 1) {
115 range[j].end = start - 1;
116 continue;
117 }
118
119 if (start > range[j].start && end < range[j].end) {
120 /* find the new spare */
121 for (i = 0; i < RANGE_NUM; i++) {
122 if (range[i].end == 0)
123 break;
124 }
125 if (i < RANGE_NUM) {
126 range[i].end = range[j].end;
127 range[i].start = end + 1;
128 } else {
129 printk(KERN_ERR "run of slot in ranges\n");
130 }
131 range[j].end = start - 1;
132 continue;
133 }
134 }
135}
136
137static int __init cmp_range(const void *x1, const void *x2)
138{
139 const struct res_range *r1 = x1;
140 const struct res_range *r2 = x2;
141 long start1, start2;
142
143 start1 = r1->start;
144 start2 = r2->start;
145
146 return start1 - start2;
147}
148
149struct var_mtrr_range_state {
150 unsigned long base_pfn;
151 unsigned long size_pfn;
152 mtrr_type type;
153};
154
155static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
156static int __initdata debug_print;
157
158static int __init
159x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
160 unsigned long extra_remove_base,
161 unsigned long extra_remove_size)
162{
163 unsigned long base, size;
164 mtrr_type type;
165 int i;
166
167 for (i = 0; i < num_var_ranges; i++) {
168 type = range_state[i].type;
169 if (type != MTRR_TYPE_WRBACK)
170 continue;
171 base = range_state[i].base_pfn;
172 size = range_state[i].size_pfn;
173 nr_range = add_range_with_merge(range, nr_range, base,
174 base + size - 1);
175 }
176 if (debug_print) {
177 printk(KERN_DEBUG "After WB checking\n");
178 for (i = 0; i < nr_range; i++)
179 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
180 range[i].start, range[i].end + 1);
181 }
182
183 /* take out UC ranges */
184 for (i = 0; i < num_var_ranges; i++) {
185 type = range_state[i].type;
186 if (type != MTRR_TYPE_UNCACHABLE &&
187 type != MTRR_TYPE_WRPROT)
188 continue;
189 size = range_state[i].size_pfn;
190 if (!size)
191 continue;
192 base = range_state[i].base_pfn;
193 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
194 (mtrr_state.enabled & 1)) {
195 /* Var MTRR contains UC entry below 1M? Skip it: */
196 printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d "
197 "contains strange UC entry under 1M, check "
198 "with your system vendor!\n", i);
199 if (base + size <= (1<<(20-PAGE_SHIFT)))
200 continue;
201 size -= (1<<(20-PAGE_SHIFT)) - base;
202 base = 1<<(20-PAGE_SHIFT);
203 }
204 subtract_range(range, base, base + size - 1);
205 }
206 if (extra_remove_size)
207 subtract_range(range, extra_remove_base,
208 extra_remove_base + extra_remove_size - 1);
209
210 /* get new range num */
211 nr_range = 0;
212 for (i = 0; i < RANGE_NUM; i++) {
213 if (!range[i].end)
214 continue;
215 nr_range++;
216 }
217 if (debug_print) {
218 printk(KERN_DEBUG "After UC checking\n");
219 for (i = 0; i < nr_range; i++)
220 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
221 range[i].start, range[i].end + 1);
222 }
223
224 /* sort the ranges */
225 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
226 if (debug_print) {
227 printk(KERN_DEBUG "After sorting\n");
228 for (i = 0; i < nr_range; i++)
229 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
230 range[i].start, range[i].end + 1);
231 }
232
233 /* clear those is not used */
234 for (i = nr_range; i < RANGE_NUM; i++)
235 memset(&range[i], 0, sizeof(range[i]));
236
237 return nr_range;
238}
239
240static struct res_range __initdata range[RANGE_NUM];
241static int __initdata nr_range;
242
243#ifdef CONFIG_MTRR_SANITIZER
244
245static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
246{
247 unsigned long sum;
248 int i;
249
250 sum = 0;
251 for (i = 0; i < nr_range; i++)
252 sum += range[i].end + 1 - range[i].start;
253
254 return sum;
255}
256
257static int enable_mtrr_cleanup __initdata =
258 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
259
260static int __init disable_mtrr_cleanup_setup(char *str)
261{
262 enable_mtrr_cleanup = 0;
263 return 0;
264}
265early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
266
267static int __init enable_mtrr_cleanup_setup(char *str)
268{
269 enable_mtrr_cleanup = 1;
270 return 0;
271}
272early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
273
274static int __init mtrr_cleanup_debug_setup(char *str)
275{
276 debug_print = 1;
277 return 0;
278}
279early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
280
281struct var_mtrr_state {
282 unsigned long range_startk;
283 unsigned long range_sizek;
284 unsigned long chunk_sizek;
285 unsigned long gran_sizek;
286 unsigned int reg;
287};
288
289static void __init
290set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
291 unsigned char type, unsigned int address_bits)
292{
293 u32 base_lo, base_hi, mask_lo, mask_hi;
294 u64 base, mask;
295
296 if (!sizek) {
297 fill_mtrr_var_range(reg, 0, 0, 0, 0);
298 return;
299 }
300
301 mask = (1ULL << address_bits) - 1;
302 mask &= ~((((u64)sizek) << 10) - 1);
303
304 base = ((u64)basek) << 10;
305
306 base |= type;
307 mask |= 0x800;
308
309 base_lo = base & ((1ULL<<32) - 1);
310 base_hi = base >> 32;
311
312 mask_lo = mask & ((1ULL<<32) - 1);
313 mask_hi = mask >> 32;
314
315 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
316}
317
318static void __init
319save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
320 unsigned char type)
321{
322 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
323 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
324 range_state[reg].type = type;
325}
326
327static void __init
328set_var_mtrr_all(unsigned int address_bits)
329{
330 unsigned long basek, sizek;
331 unsigned char type;
332 unsigned int reg;
333
334 for (reg = 0; reg < num_var_ranges; reg++) {
335 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
336 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
337 type = range_state[reg].type;
338
339 set_var_mtrr(reg, basek, sizek, type, address_bits);
340 }
341}
342
343static unsigned long to_size_factor(unsigned long sizek, char *factorp)
344{
345 char factor;
346 unsigned long base = sizek;
347
348 if (base & ((1<<10) - 1)) {
349 /* not MB alignment */
350 factor = 'K';
351 } else if (base & ((1<<20) - 1)) {
352 factor = 'M';
353 base >>= 10;
354 } else {
355 factor = 'G';
356 base >>= 20;
357 }
358
359 *factorp = factor;
360
361 return base;
362}
363
364static unsigned int __init
365range_to_mtrr(unsigned int reg, unsigned long range_startk,
366 unsigned long range_sizek, unsigned char type)
367{
368 if (!range_sizek || (reg >= num_var_ranges))
369 return reg;
370
371 while (range_sizek) {
372 unsigned long max_align, align;
373 unsigned long sizek;
374
375 /* Compute the maximum size I can make a range */
376 if (range_startk)
377 max_align = ffs(range_startk) - 1;
378 else
379 max_align = 32;
380 align = fls(range_sizek) - 1;
381 if (align > max_align)
382 align = max_align;
383
384 sizek = 1 << align;
385 if (debug_print) {
386 char start_factor = 'K', size_factor = 'K';
387 unsigned long start_base, size_base;
388
389 start_base = to_size_factor(range_startk,
390 &start_factor),
391 size_base = to_size_factor(sizek, &size_factor),
392
393 printk(KERN_DEBUG "Setting variable MTRR %d, "
394 "base: %ld%cB, range: %ld%cB, type %s\n",
395 reg, start_base, start_factor,
396 size_base, size_factor,
397 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
398 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
399 );
400 }
401 save_var_mtrr(reg++, range_startk, sizek, type);
402 range_startk += sizek;
403 range_sizek -= sizek;
404 if (reg >= num_var_ranges)
405 break;
406 }
407 return reg;
408}
409
410static unsigned __init
411range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
412 unsigned long sizek)
413{
414 unsigned long hole_basek, hole_sizek;
415 unsigned long second_basek, second_sizek;
416 unsigned long range0_basek, range0_sizek;
417 unsigned long range_basek, range_sizek;
418 unsigned long chunk_sizek;
419 unsigned long gran_sizek;
420
421 hole_basek = 0;
422 hole_sizek = 0;
423 second_basek = 0;
424 second_sizek = 0;
425 chunk_sizek = state->chunk_sizek;
426 gran_sizek = state->gran_sizek;
427
428 /* align with gran size, prevent small block used up MTRRs */
429 range_basek = ALIGN(state->range_startk, gran_sizek);
430 if ((range_basek > basek) && basek)
431 return second_sizek;
432 state->range_sizek -= (range_basek - state->range_startk);
433 range_sizek = ALIGN(state->range_sizek, gran_sizek);
434
435 while (range_sizek > state->range_sizek) {
436 range_sizek -= gran_sizek;
437 if (!range_sizek)
438 return 0;
439 }
440 state->range_sizek = range_sizek;
441
442 /* try to append some small hole */
443 range0_basek = state->range_startk;
444 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
445
446 /* no increase */
447 if (range0_sizek == state->range_sizek) {
448 if (debug_print)
449 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
450 range0_basek<<10,
451 (range0_basek + state->range_sizek)<<10);
452 state->reg = range_to_mtrr(state->reg, range0_basek,
453 state->range_sizek, MTRR_TYPE_WRBACK);
454 return 0;
455 }
456
457 /* only cut back, when it is not the last */
458 if (sizek) {
459 while (range0_basek + range0_sizek > (basek + sizek)) {
460 if (range0_sizek >= chunk_sizek)
461 range0_sizek -= chunk_sizek;
462 else
463 range0_sizek = 0;
464
465 if (!range0_sizek)
466 break;
467 }
468 }
469
470second_try:
471 range_basek = range0_basek + range0_sizek;
472
473 /* one hole in the middle */
474 if (range_basek > basek && range_basek <= (basek + sizek))
475 second_sizek = range_basek - basek;
476
477 if (range0_sizek > state->range_sizek) {
478
479 /* one hole in middle or at end */
480 hole_sizek = range0_sizek - state->range_sizek - second_sizek;
481
482 /* hole size should be less than half of range0 size */
483 if (hole_sizek >= (range0_sizek >> 1) &&
484 range0_sizek >= chunk_sizek) {
485 range0_sizek -= chunk_sizek;
486 second_sizek = 0;
487 hole_sizek = 0;
488
489 goto second_try;
490 }
491 }
492
493 if (range0_sizek) {
494 if (debug_print)
495 printk(KERN_DEBUG "range0: %016lx - %016lx\n",
496 range0_basek<<10,
497 (range0_basek + range0_sizek)<<10);
498 state->reg = range_to_mtrr(state->reg, range0_basek,
499 range0_sizek, MTRR_TYPE_WRBACK);
500 }
501
502 if (range0_sizek < state->range_sizek) {
503 /* need to handle left over */
504 range_sizek = state->range_sizek - range0_sizek;
505
506 if (debug_print)
507 printk(KERN_DEBUG "range: %016lx - %016lx\n",
508 range_basek<<10,
509 (range_basek + range_sizek)<<10);
510 state->reg = range_to_mtrr(state->reg, range_basek,
511 range_sizek, MTRR_TYPE_WRBACK);
512 }
513
514 if (hole_sizek) {
515 hole_basek = range_basek - hole_sizek - second_sizek;
516 if (debug_print)
517 printk(KERN_DEBUG "hole: %016lx - %016lx\n",
518 hole_basek<<10,
519 (hole_basek + hole_sizek)<<10);
520 state->reg = range_to_mtrr(state->reg, hole_basek,
521 hole_sizek, MTRR_TYPE_UNCACHABLE);
522 }
523
524 return second_sizek;
525}
526
527static void __init
528set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
529 unsigned long size_pfn)
530{
531 unsigned long basek, sizek;
532 unsigned long second_sizek = 0;
533
534 if (state->reg >= num_var_ranges)
535 return;
536
537 basek = base_pfn << (PAGE_SHIFT - 10);
538 sizek = size_pfn << (PAGE_SHIFT - 10);
539
540 /* See if I can merge with the last range */
541 if ((basek <= 1024) ||
542 (state->range_startk + state->range_sizek == basek)) {
543 unsigned long endk = basek + sizek;
544 state->range_sizek = endk - state->range_startk;
545 return;
546 }
547 /* Write the range mtrrs */
548 if (state->range_sizek != 0)
549 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
550
551 /* Allocate an msr */
552 state->range_startk = basek + second_sizek;
553 state->range_sizek = sizek - second_sizek;
554}
555
556/* mininum size of mtrr block that can take hole */
557static u64 mtrr_chunk_size __initdata = (256ULL<<20);
558
559static int __init parse_mtrr_chunk_size_opt(char *p)
560{
561 if (!p)
562 return -EINVAL;
563 mtrr_chunk_size = memparse(p, &p);
564 return 0;
565}
566early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
567
568/* granity of mtrr of block */
569static u64 mtrr_gran_size __initdata;
570
571static int __init parse_mtrr_gran_size_opt(char *p)
572{
573 if (!p)
574 return -EINVAL;
575 mtrr_gran_size = memparse(p, &p);
576 return 0;
577}
578early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
579
580static int nr_mtrr_spare_reg __initdata =
581 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
582
583static int __init parse_mtrr_spare_reg(char *arg)
584{
585 if (arg)
586 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
587 return 0;
588}
589
590early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
591
592static int __init
593x86_setup_var_mtrrs(struct res_range *range, int nr_range,
594 u64 chunk_size, u64 gran_size)
595{
596 struct var_mtrr_state var_state;
597 int i;
598 int num_reg;
599
600 var_state.range_startk = 0;
601 var_state.range_sizek = 0;
602 var_state.reg = 0;
603 var_state.chunk_sizek = chunk_size >> 10;
604 var_state.gran_sizek = gran_size >> 10;
605
606 memset(range_state, 0, sizeof(range_state));
607
608 /* Write the range etc */
609 for (i = 0; i < nr_range; i++)
610 set_var_mtrr_range(&var_state, range[i].start,
611 range[i].end - range[i].start + 1);
612
613 /* Write the last range */
614 if (var_state.range_sizek != 0)
615 range_to_mtrr_with_hole(&var_state, 0, 0);
616
617 num_reg = var_state.reg;
618 /* Clear out the extra MTRR's */
619 while (var_state.reg < num_var_ranges) {
620 save_var_mtrr(var_state.reg, 0, 0, 0);
621 var_state.reg++;
622 }
623
624 return num_reg;
625}
626
627struct mtrr_cleanup_result {
628 unsigned long gran_sizek;
629 unsigned long chunk_sizek;
630 unsigned long lose_cover_sizek;
631 unsigned int num_reg;
632 int bad;
633};
634
635/*
636 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
637 * chunk size: gran_size, ..., 2G
638 * so we need (1+16)*8
639 */
640#define NUM_RESULT 136
641#define PSHIFT (PAGE_SHIFT - 10)
642
643static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
644static unsigned long __initdata min_loss_pfn[RANGE_NUM];
645
646static void __init print_out_mtrr_range_state(void)
647{
648 int i;
649 char start_factor = 'K', size_factor = 'K';
650 unsigned long start_base, size_base;
651 mtrr_type type;
652
653 for (i = 0; i < num_var_ranges; i++) {
654
655 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
656 if (!size_base)
657 continue;
658
659 size_base = to_size_factor(size_base, &size_factor),
660 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
661 start_base = to_size_factor(start_base, &start_factor),
662 type = range_state[i].type;
663
664 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
665 i, start_base, start_factor,
666 size_base, size_factor,
667 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
668 ((type == MTRR_TYPE_WRPROT) ? "WP" :
669 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
670 );
671 }
672}
673
674static int __init mtrr_need_cleanup(void)
675{
676 int i;
677 mtrr_type type;
678 unsigned long size;
679 /* extra one for all 0 */
680 int num[MTRR_NUM_TYPES + 1];
681
682 /* check entries number */
683 memset(num, 0, sizeof(num));
684 for (i = 0; i < num_var_ranges; i++) {
685 type = range_state[i].type;
686 size = range_state[i].size_pfn;
687 if (type >= MTRR_NUM_TYPES)
688 continue;
689 if (!size)
690 type = MTRR_NUM_TYPES;
691 if (type == MTRR_TYPE_WRPROT)
692 type = MTRR_TYPE_UNCACHABLE;
693 num[type]++;
694 }
695
696 /* check if we got UC entries */
697 if (!num[MTRR_TYPE_UNCACHABLE])
698 return 0;
699
700 /* check if we only had WB and UC */
701 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
702 num_var_ranges - num[MTRR_NUM_TYPES])
703 return 0;
704
705 return 1;
706}
707
708static unsigned long __initdata range_sums;
709static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
710 unsigned long extra_remove_base,
711 unsigned long extra_remove_size,
712 int i)
713{
714 int num_reg;
715 static struct res_range range_new[RANGE_NUM];
716 static int nr_range_new;
717 unsigned long range_sums_new;
718
719 /* convert ranges to var ranges state */
720 num_reg = x86_setup_var_mtrrs(range, nr_range,
721 chunk_size, gran_size);
722
723 /* we got new setting in range_state, check it */
724 memset(range_new, 0, sizeof(range_new));
725 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
726 extra_remove_base, extra_remove_size);
727 range_sums_new = sum_ranges(range_new, nr_range_new);
728
729 result[i].chunk_sizek = chunk_size >> 10;
730 result[i].gran_sizek = gran_size >> 10;
731 result[i].num_reg = num_reg;
732 if (range_sums < range_sums_new) {
733 result[i].lose_cover_sizek =
734 (range_sums_new - range_sums) << PSHIFT;
735 result[i].bad = 1;
736 } else
737 result[i].lose_cover_sizek =
738 (range_sums - range_sums_new) << PSHIFT;
739
740 /* double check it */
741 if (!result[i].bad && !result[i].lose_cover_sizek) {
742 if (nr_range_new != nr_range ||
743 memcmp(range, range_new, sizeof(range)))
744 result[i].bad = 1;
745 }
746
747 if (!result[i].bad && (range_sums - range_sums_new <
748 min_loss_pfn[num_reg])) {
749 min_loss_pfn[num_reg] =
750 range_sums - range_sums_new;
751 }
752}
753
754static void __init mtrr_print_out_one_result(int i)
755{
756 char gran_factor, chunk_factor, lose_factor;
757 unsigned long gran_base, chunk_base, lose_base;
758
759 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
760 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
761 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
762 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
763 result[i].bad ? "*BAD*" : " ",
764 gran_base, gran_factor, chunk_base, chunk_factor);
765 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
766 result[i].num_reg, result[i].bad ? "-" : "",
767 lose_base, lose_factor);
768}
769
770static int __init mtrr_search_optimal_index(void)
771{
772 int i;
773 int num_reg_good;
774 int index_good;
775
776 if (nr_mtrr_spare_reg >= num_var_ranges)
777 nr_mtrr_spare_reg = num_var_ranges - 1;
778 num_reg_good = -1;
779 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
780 if (!min_loss_pfn[i])
781 num_reg_good = i;
782 }
783
784 index_good = -1;
785 if (num_reg_good != -1) {
786 for (i = 0; i < NUM_RESULT; i++) {
787 if (!result[i].bad &&
788 result[i].num_reg == num_reg_good &&
789 !result[i].lose_cover_sizek) {
790 index_good = i;
791 break;
792 }
793 }
794 }
795
796 return index_good;
797}
798
799
800int __init mtrr_cleanup(unsigned address_bits)
801{
802 unsigned long extra_remove_base, extra_remove_size;
803 unsigned long base, size, def, dummy;
804 mtrr_type type;
805 u64 chunk_size, gran_size;
806 int index_good;
807 int i;
808
809 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
810 return 0;
811 rdmsr(MTRRdefType_MSR, def, dummy);
812 def &= 0xff;
813 if (def != MTRR_TYPE_UNCACHABLE)
814 return 0;
815
816 /* get it and store it aside */
817 memset(range_state, 0, sizeof(range_state));
818 for (i = 0; i < num_var_ranges; i++) {
819 mtrr_if->get(i, &base, &size, &type);
820 range_state[i].base_pfn = base;
821 range_state[i].size_pfn = size;
822 range_state[i].type = type;
823 }
824
825 /* check if we need handle it and can handle it */
826 if (!mtrr_need_cleanup())
827 return 0;
828
829 /* print original var MTRRs at first, for debugging: */
830 printk(KERN_DEBUG "original variable MTRRs\n");
831 print_out_mtrr_range_state();
832
833 memset(range, 0, sizeof(range));
834 extra_remove_size = 0;
835 extra_remove_base = 1 << (32 - PAGE_SHIFT);
836 if (mtrr_tom2)
837 extra_remove_size =
838 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
839 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
840 extra_remove_size);
841 /*
842 * [0, 1M) should always be coverred by var mtrr with WB
843 * and fixed mtrrs should take effective before var mtrr for it
844 */
845 nr_range = add_range_with_merge(range, nr_range, 0,
846 (1ULL<<(20 - PAGE_SHIFT)) - 1);
847 /* sort the ranges */
848 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
849
850 range_sums = sum_ranges(range, nr_range);
851 printk(KERN_INFO "total RAM coverred: %ldM\n",
852 range_sums >> (20 - PAGE_SHIFT));
853
854 if (mtrr_chunk_size && mtrr_gran_size) {
855 i = 0;
856 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
857 extra_remove_base, extra_remove_size, i);
858
859 mtrr_print_out_one_result(i);
860
861 if (!result[i].bad) {
862 set_var_mtrr_all(address_bits);
863 printk(KERN_DEBUG "New variable MTRRs\n");
864 print_out_mtrr_range_state();
865 return 1;
866 }
867 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
868 "will find optimal one\n");
869 }
870
871 i = 0;
872 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
873 memset(result, 0, sizeof(result));
874 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
875
876 for (chunk_size = gran_size; chunk_size < (1ULL<<32);
877 chunk_size <<= 1) {
878
879 if (i >= NUM_RESULT)
880 continue;
881
882 mtrr_calc_range_state(chunk_size, gran_size,
883 extra_remove_base, extra_remove_size, i);
884 if (debug_print) {
885 mtrr_print_out_one_result(i);
886 printk(KERN_INFO "\n");
887 }
888
889 i++;
890 }
891 }
892
893 /* try to find the optimal index */
894 index_good = mtrr_search_optimal_index();
895
896 if (index_good != -1) {
897 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
898 i = index_good;
899 mtrr_print_out_one_result(i);
900
901 /* convert ranges to var ranges state */
902 chunk_size = result[i].chunk_sizek;
903 chunk_size <<= 10;
904 gran_size = result[i].gran_sizek;
905 gran_size <<= 10;
906 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
907 set_var_mtrr_all(address_bits);
908 printk(KERN_DEBUG "New variable MTRRs\n");
909 print_out_mtrr_range_state();
910 return 1;
911 } else {
912 /* print out all */
913 for (i = 0; i < NUM_RESULT; i++)
914 mtrr_print_out_one_result(i);
915 }
916
917 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
918 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
919
920 return 0;
921}
922#else
923int __init mtrr_cleanup(unsigned address_bits)
924{
925 return 0;
926}
927#endif
928
929static int disable_mtrr_trim;
930
931static int __init disable_mtrr_trim_setup(char *str)
932{
933 disable_mtrr_trim = 1;
934 return 0;
935}
936early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
937
938/*
939 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
940 * for memory >4GB. Check for that here.
941 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
942 * apply to are wrong, but so far we don't know of any such case in the wild.
943 */
944#define Tom2Enabled (1U << 21)
945#define Tom2ForceMemTypeWB (1U << 22)
946
947int __init amd_special_default_mtrr(void)
948{
949 u32 l, h;
950
951 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
952 return 0;
953 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
954 return 0;
955 /* In case some hypervisor doesn't pass SYSCFG through */
956 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
957 return 0;
958 /*
959 * Memory between 4GB and top of mem is forced WB by this magic bit.
960 * Reserved before K8RevF, but should be zero there.
961 */
962 if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
963 (Tom2Enabled | Tom2ForceMemTypeWB))
964 return 1;
965 return 0;
966}
967
968static u64 __init real_trim_memory(unsigned long start_pfn,
969 unsigned long limit_pfn)
970{
971 u64 trim_start, trim_size;
972 trim_start = start_pfn;
973 trim_start <<= PAGE_SHIFT;
974 trim_size = limit_pfn;
975 trim_size <<= PAGE_SHIFT;
976 trim_size -= trim_start;
977
978 return e820_update_range(trim_start, trim_size, E820_RAM,
979 E820_RESERVED);
980}
981/**
982 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
983 * @end_pfn: ending page frame number
984 *
985 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
986 * memory configurations. This routine checks that the highest MTRR matches
987 * the end of memory, to make sure the MTRRs having a write back type cover
988 * all of the memory the kernel is intending to use. If not, it'll trim any
989 * memory off the end by adjusting end_pfn, removing it from the kernel's
990 * allocation pools, warning the user with an obnoxious message.
991 */
992int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
993{
994 unsigned long i, base, size, highest_pfn = 0, def, dummy;
995 mtrr_type type;
996 u64 total_trim_size;
997
998 /* extra one for all 0 */
999 int num[MTRR_NUM_TYPES + 1];
1000 /*
1001 * Make sure we only trim uncachable memory on machines that
1002 * support the Intel MTRR architecture:
1003 */
1004 if (!is_cpu(INTEL) || disable_mtrr_trim)
1005 return 0;
1006 rdmsr(MTRRdefType_MSR, def, dummy);
1007 def &= 0xff;
1008 if (def != MTRR_TYPE_UNCACHABLE)
1009 return 0;
1010
1011 /* get it and store it aside */
1012 memset(range_state, 0, sizeof(range_state));
1013 for (i = 0; i < num_var_ranges; i++) {
1014 mtrr_if->get(i, &base, &size, &type);
1015 range_state[i].base_pfn = base;
1016 range_state[i].size_pfn = size;
1017 range_state[i].type = type;
1018 }
1019
1020 /* Find highest cached pfn */
1021 for (i = 0; i < num_var_ranges; i++) {
1022 type = range_state[i].type;
1023 if (type != MTRR_TYPE_WRBACK)
1024 continue;
1025 base = range_state[i].base_pfn;
1026 size = range_state[i].size_pfn;
1027 if (highest_pfn < base + size)
1028 highest_pfn = base + size;
1029 }
1030
1031 /* kvm/qemu doesn't have mtrr set right, don't trim them all */
1032 if (!highest_pfn) {
1033 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
1034 return 0;
1035 }
1036
1037 /* check entries number */
1038 memset(num, 0, sizeof(num));
1039 for (i = 0; i < num_var_ranges; i++) {
1040 type = range_state[i].type;
1041 if (type >= MTRR_NUM_TYPES)
1042 continue;
1043 size = range_state[i].size_pfn;
1044 if (!size)
1045 type = MTRR_NUM_TYPES;
1046 num[type]++;
1047 }
1048
1049 /* no entry for WB? */
1050 if (!num[MTRR_TYPE_WRBACK])
1051 return 0;
1052
1053 /* check if we only had WB and UC */
1054 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1055 num_var_ranges - num[MTRR_NUM_TYPES])
1056 return 0;
1057
1058 memset(range, 0, sizeof(range));
1059 nr_range = 0;
1060 if (mtrr_tom2) {
1061 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1062 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1063 if (highest_pfn < range[nr_range].end + 1)
1064 highest_pfn = range[nr_range].end + 1;
1065 nr_range++;
1066 }
1067 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1068
1069 total_trim_size = 0;
1070 /* check the head */
1071 if (range[0].start)
1072 total_trim_size += real_trim_memory(0, range[0].start);
1073 /* check the holes */
1074 for (i = 0; i < nr_range - 1; i++) {
1075 if (range[i].end + 1 < range[i+1].start)
1076 total_trim_size += real_trim_memory(range[i].end + 1,
1077 range[i+1].start);
1078 }
1079 /* check the top */
1080 i = nr_range - 1;
1081 if (range[i].end + 1 < end_pfn)
1082 total_trim_size += real_trim_memory(range[i].end + 1,
1083 end_pfn);
1084
1085 if (total_trim_size) {
1086 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
1087 " all of memory, losing %lluMB of RAM.\n",
1088 total_trim_size >> 20);
1089
1090 if (!changed_by_mtrr_cleanup)
1091 WARN_ON(1);
1092
1093 printk(KERN_INFO "update e820 for mtrr\n");
1094 update_e820();
1095
1096 return 1;
1097 }
1098
1099 return 0;
1100}
1101
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0c0a455fe95c..37f28fc7cf95 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -33,13 +33,31 @@ u64 mtrr_tom2;
33struct mtrr_state_type mtrr_state = {}; 33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state); 34EXPORT_SYMBOL_GPL(mtrr_state);
35 35
36static int __initdata mtrr_show; 36/**
37static int __init mtrr_debug(char *opt) 37 * BIOS is expected to clear MtrrFixDramModEn bit, see for example
38 * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
39 * Opteron Processors" (26094 Rev. 3.30 February 2006), section
40 * "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set
41 * to 1 during BIOS initalization of the fixed MTRRs, then cleared to
42 * 0 for operation."
43 */
44static inline void k8_check_syscfg_dram_mod_en(void)
38{ 45{
39 mtrr_show = 1; 46 u32 lo, hi;
40 return 0; 47
48 if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
49 (boot_cpu_data.x86 >= 0x0f)))
50 return;
51
52 rdmsr(MSR_K8_SYSCFG, lo, hi);
53 if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
54 printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
55 " not cleared by BIOS, clearing this bit\n",
56 smp_processor_id());
57 lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
58 mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi);
59 }
41} 60}
42early_param("mtrr.show", mtrr_debug);
43 61
44/* 62/*
45 * Returns the effective MTRR type for the region 63 * Returns the effective MTRR type for the region
@@ -174,6 +192,8 @@ get_fixed_ranges(mtrr_type * frs)
174 unsigned int *p = (unsigned int *) frs; 192 unsigned int *p = (unsigned int *) frs;
175 int i; 193 int i;
176 194
195 k8_check_syscfg_dram_mod_en();
196
177 rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); 197 rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
178 198
179 for (i = 0; i < 2; i++) 199 for (i = 0; i < 2; i++)
@@ -188,18 +208,94 @@ void mtrr_save_fixed_ranges(void *info)
188 get_fixed_ranges(mtrr_state.fixed_ranges); 208 get_fixed_ranges(mtrr_state.fixed_ranges);
189} 209}
190 210
191static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) 211static unsigned __initdata last_fixed_start;
212static unsigned __initdata last_fixed_end;
213static mtrr_type __initdata last_fixed_type;
214
215static void __init print_fixed_last(void)
216{
217 if (!last_fixed_end)
218 return;
219
220 printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start,
221 last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
222
223 last_fixed_end = 0;
224}
225
226static void __init update_fixed_last(unsigned base, unsigned end,
227 mtrr_type type)
228{
229 last_fixed_start = base;
230 last_fixed_end = end;
231 last_fixed_type = type;
232}
233
234static void __init print_fixed(unsigned base, unsigned step,
235 const mtrr_type *types)
192{ 236{
193 unsigned i; 237 unsigned i;
194 238
195 for (i = 0; i < 8; ++i, ++types, base += step) 239 for (i = 0; i < 8; ++i, ++types, base += step) {
196 printk(KERN_INFO "MTRR %05X-%05X %s\n", 240 if (last_fixed_end == 0) {
197 base, base + step - 1, mtrr_attrib_to_str(*types)); 241 update_fixed_last(base, base + step, *types);
242 continue;
243 }
244 if (last_fixed_end == base && last_fixed_type == *types) {
245 last_fixed_end = base + step;
246 continue;
247 }
248 /* new segments: gap or different type */
249 print_fixed_last();
250 update_fixed_last(base, base + step, *types);
251 }
198} 252}
199 253
200static void prepare_set(void); 254static void prepare_set(void);
201static void post_set(void); 255static void post_set(void);
202 256
257static void __init print_mtrr_state(void)
258{
259 unsigned int i;
260 int high_width;
261
262 printk(KERN_DEBUG "MTRR default type: %s\n",
263 mtrr_attrib_to_str(mtrr_state.def_type));
264 if (mtrr_state.have_fixed) {
265 printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n",
266 mtrr_state.enabled & 1 ? "en" : "dis");
267 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
268 for (i = 0; i < 2; ++i)
269 print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
270 for (i = 0; i < 8; ++i)
271 print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
272
273 /* tail */
274 print_fixed_last();
275 }
276 printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
277 mtrr_state.enabled & 2 ? "en" : "dis");
278 high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
279 for (i = 0; i < num_var_ranges; ++i) {
280 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
281 printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n",
282 i,
283 high_width,
284 mtrr_state.var_ranges[i].base_hi,
285 mtrr_state.var_ranges[i].base_lo >> 12,
286 high_width,
287 mtrr_state.var_ranges[i].mask_hi,
288 mtrr_state.var_ranges[i].mask_lo >> 12,
289 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
290 else
291 printk(KERN_DEBUG " %u disabled\n", i);
292 }
293 if (mtrr_tom2) {
294 printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
295 mtrr_tom2, mtrr_tom2>>20);
296 }
297}
298
203/* Grab all of the MTRR state for this CPU into *state */ 299/* Grab all of the MTRR state for this CPU into *state */
204void __init get_mtrr_state(void) 300void __init get_mtrr_state(void)
205{ 301{
@@ -231,41 +327,9 @@ void __init get_mtrr_state(void)
231 mtrr_tom2 |= low; 327 mtrr_tom2 |= low;
232 mtrr_tom2 &= 0xffffff800000ULL; 328 mtrr_tom2 &= 0xffffff800000ULL;
233 } 329 }
234 if (mtrr_show) { 330
235 int high_width; 331 print_mtrr_state();
236 332
237 printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type));
238 if (mtrr_state.have_fixed) {
239 printk(KERN_INFO "MTRR fixed ranges %sabled:\n",
240 mtrr_state.enabled & 1 ? "en" : "dis");
241 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
242 for (i = 0; i < 2; ++i)
243 print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
244 for (i = 0; i < 8; ++i)
245 print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
246 }
247 printk(KERN_INFO "MTRR variable ranges %sabled:\n",
248 mtrr_state.enabled & 2 ? "en" : "dis");
249 high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
250 for (i = 0; i < num_var_ranges; ++i) {
251 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
252 printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n",
253 i,
254 high_width,
255 mtrr_state.var_ranges[i].base_hi,
256 mtrr_state.var_ranges[i].base_lo >> 12,
257 high_width,
258 mtrr_state.var_ranges[i].mask_hi,
259 mtrr_state.var_ranges[i].mask_lo >> 12,
260 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
261 else
262 printk(KERN_INFO "MTRR %u disabled\n", i);
263 }
264 if (mtrr_tom2) {
265 printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
266 mtrr_tom2, mtrr_tom2>>20);
267 }
268 }
269 mtrr_state_set = 1; 333 mtrr_state_set = 1;
270 334
271 /* PAT setup for BP. We need to go through sync steps here */ 335 /* PAT setup for BP. We need to go through sync steps here */
@@ -308,27 +372,10 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
308} 372}
309 373
310/** 374/**
311 * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs
312 * see AMD publication no. 24593, chapter 3.2.1 for more information
313 */
314static inline void k8_enable_fixed_iorrs(void)
315{
316 unsigned lo, hi;
317
318 rdmsr(MSR_K8_SYSCFG, lo, hi);
319 mtrr_wrmsr(MSR_K8_SYSCFG, lo
320 | K8_MTRRFIXRANGE_DRAM_ENABLE
321 | K8_MTRRFIXRANGE_DRAM_MODIFY, hi);
322}
323
324/**
325 * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have 375 * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
326 * @msr: MSR address of the MTTR which should be checked and updated 376 * @msr: MSR address of the MTTR which should be checked and updated
327 * @changed: pointer which indicates whether the MTRR needed to be changed 377 * @changed: pointer which indicates whether the MTRR needed to be changed
328 * @msrwords: pointer to the MSR values which the MSR should have 378 * @msrwords: pointer to the MSR values which the MSR should have
329 *
330 * If K8 extentions are wanted, update the K8 SYSCFG MSR also.
331 * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information.
332 */ 379 */
333static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) 380static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
334{ 381{
@@ -337,10 +384,6 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
337 rdmsr(msr, lo, hi); 384 rdmsr(msr, lo, hi);
338 385
339 if (lo != msrwords[0] || hi != msrwords[1]) { 386 if (lo != msrwords[0] || hi != msrwords[1]) {
340 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
341 (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
342 ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
343 k8_enable_fixed_iorrs();
344 mtrr_wrmsr(msr, msrwords[0], msrwords[1]); 387 mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
345 *changed = true; 388 *changed = true;
346 } 389 }
@@ -376,22 +419,31 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
376{ 419{
377 unsigned int mask_lo, mask_hi, base_lo, base_hi; 420 unsigned int mask_lo, mask_hi, base_lo, base_hi;
378 unsigned int tmp, hi; 421 unsigned int tmp, hi;
422 int cpu;
423
424 /*
425 * get_mtrr doesn't need to update mtrr_state, also it could be called
426 * from any cpu, so try to print it out directly.
427 */
428 cpu = get_cpu();
379 429
380 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); 430 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
431
381 if ((mask_lo & 0x800) == 0) { 432 if ((mask_lo & 0x800) == 0) {
382 /* Invalid (i.e. free) range */ 433 /* Invalid (i.e. free) range */
383 *base = 0; 434 *base = 0;
384 *size = 0; 435 *size = 0;
385 *type = 0; 436 *type = 0;
386 return; 437 goto out_put_cpu;
387 } 438 }
388 439
389 rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); 440 rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
390 441
391 /* Work out the shifted address mask. */ 442 /* Work out the shifted address mask: */
392 tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; 443 tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
393 mask_lo = size_or_mask | tmp; 444 mask_lo = size_or_mask | tmp;
394 /* Expand tmp with high bits to all 1s*/ 445
446 /* Expand tmp with high bits to all 1s: */
395 hi = fls(tmp); 447 hi = fls(tmp);
396 if (hi > 0) { 448 if (hi > 0) {
397 tmp |= ~((1<<(hi - 1)) - 1); 449 tmp |= ~((1<<(hi - 1)) - 1);
@@ -402,11 +454,19 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
402 } 454 }
403 } 455 }
404 456
405 /* This works correctly if size is a power of two, i.e. a 457 /*
406 contiguous range. */ 458 * This works correctly if size is a power of two, i.e. a
459 * contiguous range:
460 */
407 *size = -mask_lo; 461 *size = -mask_lo;
408 *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; 462 *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
409 *type = base_lo & 0xff; 463 *type = base_lo & 0xff;
464
465 printk(KERN_DEBUG " get_mtrr: cpu%d reg%02d base=%010lx size=%010lx %s\n",
466 cpu, reg, *base, *size,
467 mtrr_attrib_to_str(*type & 0xff));
468out_put_cpu:
469 put_cpu();
410} 470}
411 471
412/** 472/**
@@ -419,6 +479,8 @@ static int set_fixed_ranges(mtrr_type * frs)
419 bool changed = false; 479 bool changed = false;
420 int block=-1, range; 480 int block=-1, range;
421 481
482 k8_check_syscfg_dram_mod_en();
483
422 while (fixed_range_blocks[++block].ranges) 484 while (fixed_range_blocks[++block].ranges)
423 for (range=0; range < fixed_range_blocks[block].ranges; range++) 485 for (range=0; range < fixed_range_blocks[block].ranges; range++)
424 set_fixed_range(fixed_range_blocks[block].base_msr + range, 486 set_fixed_range(fixed_range_blocks[block].base_msr + range,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 236a401b8259..03cda01f57c7 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -574,7 +574,7 @@ struct mtrr_value {
574 unsigned long lsize; 574 unsigned long lsize;
575}; 575};
576 576
577static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; 577static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
578 578
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 579static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
580{ 580{
@@ -582,9 +582,9 @@ static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
582 582
583 for (i = 0; i < num_var_ranges; i++) { 583 for (i = 0; i < num_var_ranges; i++) {
584 mtrr_if->get(i, 584 mtrr_if->get(i,
585 &mtrr_state[i].lbase, 585 &mtrr_value[i].lbase,
586 &mtrr_state[i].lsize, 586 &mtrr_value[i].lsize,
587 &mtrr_state[i].ltype); 587 &mtrr_value[i].ltype);
588 } 588 }
589 return 0; 589 return 0;
590} 590}
@@ -594,11 +594,11 @@ static int mtrr_restore(struct sys_device * sysdev)
594 int i; 594 int i;
595 595
596 for (i = 0; i < num_var_ranges; i++) { 596 for (i = 0; i < num_var_ranges; i++) {
597 if (mtrr_state[i].lsize) 597 if (mtrr_value[i].lsize)
598 set_mtrr(i, 598 set_mtrr(i,
599 mtrr_state[i].lbase, 599 mtrr_value[i].lbase,
600 mtrr_state[i].lsize, 600 mtrr_value[i].lsize,
601 mtrr_state[i].ltype); 601 mtrr_value[i].ltype);
602 } 602 }
603 return 0; 603 return 0;
604} 604}
@@ -610,1058 +610,7 @@ static struct sysdev_driver mtrr_sysdev_driver = {
610 .resume = mtrr_restore, 610 .resume = mtrr_restore,
611}; 611};
612 612
613/* should be related to MTRR_VAR_RANGES nums */ 613int __initdata changed_by_mtrr_cleanup;
614#define RANGE_NUM 256
615
616struct res_range {
617 unsigned long start;
618 unsigned long end;
619};
620
621static int __init
622add_range(struct res_range *range, int nr_range, unsigned long start,
623 unsigned long end)
624{
625 /* out of slots */
626 if (nr_range >= RANGE_NUM)
627 return nr_range;
628
629 range[nr_range].start = start;
630 range[nr_range].end = end;
631
632 nr_range++;
633
634 return nr_range;
635}
636
637static int __init
638add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
639 unsigned long end)
640{
641 int i;
642
643 /* try to merge it with old one */
644 for (i = 0; i < nr_range; i++) {
645 unsigned long final_start, final_end;
646 unsigned long common_start, common_end;
647
648 if (!range[i].end)
649 continue;
650
651 common_start = max(range[i].start, start);
652 common_end = min(range[i].end, end);
653 if (common_start > common_end + 1)
654 continue;
655
656 final_start = min(range[i].start, start);
657 final_end = max(range[i].end, end);
658
659 range[i].start = final_start;
660 range[i].end = final_end;
661 return nr_range;
662 }
663
664 /* need to add that */
665 return add_range(range, nr_range, start, end);
666}
667
668static void __init
669subtract_range(struct res_range *range, unsigned long start, unsigned long end)
670{
671 int i, j;
672
673 for (j = 0; j < RANGE_NUM; j++) {
674 if (!range[j].end)
675 continue;
676
677 if (start <= range[j].start && end >= range[j].end) {
678 range[j].start = 0;
679 range[j].end = 0;
680 continue;
681 }
682
683 if (start <= range[j].start && end < range[j].end &&
684 range[j].start < end + 1) {
685 range[j].start = end + 1;
686 continue;
687 }
688
689
690 if (start > range[j].start && end >= range[j].end &&
691 range[j].end > start - 1) {
692 range[j].end = start - 1;
693 continue;
694 }
695
696 if (start > range[j].start && end < range[j].end) {
697 /* find the new spare */
698 for (i = 0; i < RANGE_NUM; i++) {
699 if (range[i].end == 0)
700 break;
701 }
702 if (i < RANGE_NUM) {
703 range[i].end = range[j].end;
704 range[i].start = end + 1;
705 } else {
706 printk(KERN_ERR "run of slot in ranges\n");
707 }
708 range[j].end = start - 1;
709 continue;
710 }
711 }
712}
713
714static int __init cmp_range(const void *x1, const void *x2)
715{
716 const struct res_range *r1 = x1;
717 const struct res_range *r2 = x2;
718 long start1, start2;
719
720 start1 = r1->start;
721 start2 = r2->start;
722
723 return start1 - start2;
724}
725
726struct var_mtrr_range_state {
727 unsigned long base_pfn;
728 unsigned long size_pfn;
729 mtrr_type type;
730};
731
732static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
733static int __initdata debug_print;
734
735static int __init
736x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
737 unsigned long extra_remove_base,
738 unsigned long extra_remove_size)
739{
740 unsigned long i, base, size;
741 mtrr_type type;
742
743 for (i = 0; i < num_var_ranges; i++) {
744 type = range_state[i].type;
745 if (type != MTRR_TYPE_WRBACK)
746 continue;
747 base = range_state[i].base_pfn;
748 size = range_state[i].size_pfn;
749 nr_range = add_range_with_merge(range, nr_range, base,
750 base + size - 1);
751 }
752 if (debug_print) {
753 printk(KERN_DEBUG "After WB checking\n");
754 for (i = 0; i < nr_range; i++)
755 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
756 range[i].start, range[i].end + 1);
757 }
758
759 /* take out UC ranges */
760 for (i = 0; i < num_var_ranges; i++) {
761 type = range_state[i].type;
762 if (type != MTRR_TYPE_UNCACHABLE &&
763 type != MTRR_TYPE_WRPROT)
764 continue;
765 size = range_state[i].size_pfn;
766 if (!size)
767 continue;
768 base = range_state[i].base_pfn;
769 subtract_range(range, base, base + size - 1);
770 }
771 if (extra_remove_size)
772 subtract_range(range, extra_remove_base,
773 extra_remove_base + extra_remove_size - 1);
774
775 /* get new range num */
776 nr_range = 0;
777 for (i = 0; i < RANGE_NUM; i++) {
778 if (!range[i].end)
779 continue;
780 nr_range++;
781 }
782 if (debug_print) {
783 printk(KERN_DEBUG "After UC checking\n");
784 for (i = 0; i < nr_range; i++)
785 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
786 range[i].start, range[i].end + 1);
787 }
788
789 /* sort the ranges */
790 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
791 if (debug_print) {
792 printk(KERN_DEBUG "After sorting\n");
793 for (i = 0; i < nr_range; i++)
794 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
795 range[i].start, range[i].end + 1);
796 }
797
798 /* clear those is not used */
799 for (i = nr_range; i < RANGE_NUM; i++)
800 memset(&range[i], 0, sizeof(range[i]));
801
802 return nr_range;
803}
804
805static struct res_range __initdata range[RANGE_NUM];
806static int __initdata nr_range;
807
808#ifdef CONFIG_MTRR_SANITIZER
809
810static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
811{
812 unsigned long sum;
813 int i;
814
815 sum = 0;
816 for (i = 0; i < nr_range; i++)
817 sum += range[i].end + 1 - range[i].start;
818
819 return sum;
820}
821
822static int enable_mtrr_cleanup __initdata =
823 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
824
825static int __init disable_mtrr_cleanup_setup(char *str)
826{
827 enable_mtrr_cleanup = 0;
828 return 0;
829}
830early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
831
832static int __init enable_mtrr_cleanup_setup(char *str)
833{
834 enable_mtrr_cleanup = 1;
835 return 0;
836}
837early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
838
839static int __init mtrr_cleanup_debug_setup(char *str)
840{
841 debug_print = 1;
842 return 0;
843}
844early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
845
846struct var_mtrr_state {
847 unsigned long range_startk;
848 unsigned long range_sizek;
849 unsigned long chunk_sizek;
850 unsigned long gran_sizek;
851 unsigned int reg;
852};
853
854static void __init
855set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
856 unsigned char type, unsigned int address_bits)
857{
858 u32 base_lo, base_hi, mask_lo, mask_hi;
859 u64 base, mask;
860
861 if (!sizek) {
862 fill_mtrr_var_range(reg, 0, 0, 0, 0);
863 return;
864 }
865
866 mask = (1ULL << address_bits) - 1;
867 mask &= ~((((u64)sizek) << 10) - 1);
868
869 base = ((u64)basek) << 10;
870
871 base |= type;
872 mask |= 0x800;
873
874 base_lo = base & ((1ULL<<32) - 1);
875 base_hi = base >> 32;
876
877 mask_lo = mask & ((1ULL<<32) - 1);
878 mask_hi = mask >> 32;
879
880 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
881}
882
883static void __init
884save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
885 unsigned char type)
886{
887 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
888 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
889 range_state[reg].type = type;
890}
891
892static void __init
893set_var_mtrr_all(unsigned int address_bits)
894{
895 unsigned long basek, sizek;
896 unsigned char type;
897 unsigned int reg;
898
899 for (reg = 0; reg < num_var_ranges; reg++) {
900 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
901 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
902 type = range_state[reg].type;
903
904 set_var_mtrr(reg, basek, sizek, type, address_bits);
905 }
906}
907
908static unsigned long to_size_factor(unsigned long sizek, char *factorp)
909{
910 char factor;
911 unsigned long base = sizek;
912
913 if (base & ((1<<10) - 1)) {
914 /* not MB alignment */
915 factor = 'K';
916 } else if (base & ((1<<20) - 1)){
917 factor = 'M';
918 base >>= 10;
919 } else {
920 factor = 'G';
921 base >>= 20;
922 }
923
924 *factorp = factor;
925
926 return base;
927}
928
929static unsigned int __init
930range_to_mtrr(unsigned int reg, unsigned long range_startk,
931 unsigned long range_sizek, unsigned char type)
932{
933 if (!range_sizek || (reg >= num_var_ranges))
934 return reg;
935
936 while (range_sizek) {
937 unsigned long max_align, align;
938 unsigned long sizek;
939
940 /* Compute the maximum size I can make a range */
941 if (range_startk)
942 max_align = ffs(range_startk) - 1;
943 else
944 max_align = 32;
945 align = fls(range_sizek) - 1;
946 if (align > max_align)
947 align = max_align;
948
949 sizek = 1 << align;
950 if (debug_print) {
951 char start_factor = 'K', size_factor = 'K';
952 unsigned long start_base, size_base;
953
954 start_base = to_size_factor(range_startk, &start_factor),
955 size_base = to_size_factor(sizek, &size_factor),
956
957 printk(KERN_DEBUG "Setting variable MTRR %d, "
958 "base: %ld%cB, range: %ld%cB, type %s\n",
959 reg, start_base, start_factor,
960 size_base, size_factor,
961 (type == MTRR_TYPE_UNCACHABLE)?"UC":
962 ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
963 );
964 }
965 save_var_mtrr(reg++, range_startk, sizek, type);
966 range_startk += sizek;
967 range_sizek -= sizek;
968 if (reg >= num_var_ranges)
969 break;
970 }
971 return reg;
972}
973
974static unsigned __init
975range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
976 unsigned long sizek)
977{
978 unsigned long hole_basek, hole_sizek;
979 unsigned long second_basek, second_sizek;
980 unsigned long range0_basek, range0_sizek;
981 unsigned long range_basek, range_sizek;
982 unsigned long chunk_sizek;
983 unsigned long gran_sizek;
984
985 hole_basek = 0;
986 hole_sizek = 0;
987 second_basek = 0;
988 second_sizek = 0;
989 chunk_sizek = state->chunk_sizek;
990 gran_sizek = state->gran_sizek;
991
992 /* align with gran size, prevent small block used up MTRRs */
993 range_basek = ALIGN(state->range_startk, gran_sizek);
994 if ((range_basek > basek) && basek)
995 return second_sizek;
996 state->range_sizek -= (range_basek - state->range_startk);
997 range_sizek = ALIGN(state->range_sizek, gran_sizek);
998
999 while (range_sizek > state->range_sizek) {
1000 range_sizek -= gran_sizek;
1001 if (!range_sizek)
1002 return 0;
1003 }
1004 state->range_sizek = range_sizek;
1005
1006 /* try to append some small hole */
1007 range0_basek = state->range_startk;
1008 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
1009
1010 /* no increase */
1011 if (range0_sizek == state->range_sizek) {
1012 if (debug_print)
1013 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
1014 range0_basek<<10,
1015 (range0_basek + state->range_sizek)<<10);
1016 state->reg = range_to_mtrr(state->reg, range0_basek,
1017 state->range_sizek, MTRR_TYPE_WRBACK);
1018 return 0;
1019 }
1020
1021 /* only cut back, when it is not the last */
1022 if (sizek) {
1023 while (range0_basek + range0_sizek > (basek + sizek)) {
1024 if (range0_sizek >= chunk_sizek)
1025 range0_sizek -= chunk_sizek;
1026 else
1027 range0_sizek = 0;
1028
1029 if (!range0_sizek)
1030 break;
1031 }
1032 }
1033
1034second_try:
1035 range_basek = range0_basek + range0_sizek;
1036
1037 /* one hole in the middle */
1038 if (range_basek > basek && range_basek <= (basek + sizek))
1039 second_sizek = range_basek - basek;
1040
1041 if (range0_sizek > state->range_sizek) {
1042
1043 /* one hole in middle or at end */
1044 hole_sizek = range0_sizek - state->range_sizek - second_sizek;
1045
1046 /* hole size should be less than half of range0 size */
1047 if (hole_sizek >= (range0_sizek >> 1) &&
1048 range0_sizek >= chunk_sizek) {
1049 range0_sizek -= chunk_sizek;
1050 second_sizek = 0;
1051 hole_sizek = 0;
1052
1053 goto second_try;
1054 }
1055 }
1056
1057 if (range0_sizek) {
1058 if (debug_print)
1059 printk(KERN_DEBUG "range0: %016lx - %016lx\n",
1060 range0_basek<<10,
1061 (range0_basek + range0_sizek)<<10);
1062 state->reg = range_to_mtrr(state->reg, range0_basek,
1063 range0_sizek, MTRR_TYPE_WRBACK);
1064 }
1065
1066 if (range0_sizek < state->range_sizek) {
1067 /* need to handle left over */
1068 range_sizek = state->range_sizek - range0_sizek;
1069
1070 if (debug_print)
1071 printk(KERN_DEBUG "range: %016lx - %016lx\n",
1072 range_basek<<10,
1073 (range_basek + range_sizek)<<10);
1074 state->reg = range_to_mtrr(state->reg, range_basek,
1075 range_sizek, MTRR_TYPE_WRBACK);
1076 }
1077
1078 if (hole_sizek) {
1079 hole_basek = range_basek - hole_sizek - second_sizek;
1080 if (debug_print)
1081 printk(KERN_DEBUG "hole: %016lx - %016lx\n",
1082 hole_basek<<10,
1083 (hole_basek + hole_sizek)<<10);
1084 state->reg = range_to_mtrr(state->reg, hole_basek,
1085 hole_sizek, MTRR_TYPE_UNCACHABLE);
1086 }
1087
1088 return second_sizek;
1089}
1090
1091static void __init
1092set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
1093 unsigned long size_pfn)
1094{
1095 unsigned long basek, sizek;
1096 unsigned long second_sizek = 0;
1097
1098 if (state->reg >= num_var_ranges)
1099 return;
1100
1101 basek = base_pfn << (PAGE_SHIFT - 10);
1102 sizek = size_pfn << (PAGE_SHIFT - 10);
1103
1104 /* See if I can merge with the last range */
1105 if ((basek <= 1024) ||
1106 (state->range_startk + state->range_sizek == basek)) {
1107 unsigned long endk = basek + sizek;
1108 state->range_sizek = endk - state->range_startk;
1109 return;
1110 }
1111 /* Write the range mtrrs */
1112 if (state->range_sizek != 0)
1113 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
1114
1115 /* Allocate an msr */
1116 state->range_startk = basek + second_sizek;
1117 state->range_sizek = sizek - second_sizek;
1118}
1119
1120/* mininum size of mtrr block that can take hole */
1121static u64 mtrr_chunk_size __initdata = (256ULL<<20);
1122
1123static int __init parse_mtrr_chunk_size_opt(char *p)
1124{
1125 if (!p)
1126 return -EINVAL;
1127 mtrr_chunk_size = memparse(p, &p);
1128 return 0;
1129}
1130early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
1131
1132/* granity of mtrr of block */
1133static u64 mtrr_gran_size __initdata;
1134
1135static int __init parse_mtrr_gran_size_opt(char *p)
1136{
1137 if (!p)
1138 return -EINVAL;
1139 mtrr_gran_size = memparse(p, &p);
1140 return 0;
1141}
1142early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
1143
1144static int nr_mtrr_spare_reg __initdata =
1145 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
1146
1147static int __init parse_mtrr_spare_reg(char *arg)
1148{
1149 if (arg)
1150 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
1151 return 0;
1152}
1153
1154early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
1155
1156static int __init
1157x86_setup_var_mtrrs(struct res_range *range, int nr_range,
1158 u64 chunk_size, u64 gran_size)
1159{
1160 struct var_mtrr_state var_state;
1161 int i;
1162 int num_reg;
1163
1164 var_state.range_startk = 0;
1165 var_state.range_sizek = 0;
1166 var_state.reg = 0;
1167 var_state.chunk_sizek = chunk_size >> 10;
1168 var_state.gran_sizek = gran_size >> 10;
1169
1170 memset(range_state, 0, sizeof(range_state));
1171
1172 /* Write the range etc */
1173 for (i = 0; i < nr_range; i++)
1174 set_var_mtrr_range(&var_state, range[i].start,
1175 range[i].end - range[i].start + 1);
1176
1177 /* Write the last range */
1178 if (var_state.range_sizek != 0)
1179 range_to_mtrr_with_hole(&var_state, 0, 0);
1180
1181 num_reg = var_state.reg;
1182 /* Clear out the extra MTRR's */
1183 while (var_state.reg < num_var_ranges) {
1184 save_var_mtrr(var_state.reg, 0, 0, 0);
1185 var_state.reg++;
1186 }
1187
1188 return num_reg;
1189}
1190
1191struct mtrr_cleanup_result {
1192 unsigned long gran_sizek;
1193 unsigned long chunk_sizek;
1194 unsigned long lose_cover_sizek;
1195 unsigned int num_reg;
1196 int bad;
1197};
1198
1199/*
1200 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
1201 * chunk size: gran_size, ..., 2G
1202 * so we need (1+16)*8
1203 */
1204#define NUM_RESULT 136
1205#define PSHIFT (PAGE_SHIFT - 10)
1206
1207static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
1208static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1209
1210static void __init print_out_mtrr_range_state(void)
1211{
1212 int i;
1213 char start_factor = 'K', size_factor = 'K';
1214 unsigned long start_base, size_base;
1215 mtrr_type type;
1216
1217 for (i = 0; i < num_var_ranges; i++) {
1218
1219 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
1220 if (!size_base)
1221 continue;
1222
1223 size_base = to_size_factor(size_base, &size_factor),
1224 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
1225 start_base = to_size_factor(start_base, &start_factor),
1226 type = range_state[i].type;
1227
1228 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
1229 i, start_base, start_factor,
1230 size_base, size_factor,
1231 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
1232 ((type == MTRR_TYPE_WRPROT) ? "WP" :
1233 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
1234 );
1235 }
1236}
1237
1238static int __init mtrr_need_cleanup(void)
1239{
1240 int i;
1241 mtrr_type type;
1242 unsigned long size;
1243 /* extra one for all 0 */
1244 int num[MTRR_NUM_TYPES + 1];
1245
1246 /* check entries number */
1247 memset(num, 0, sizeof(num));
1248 for (i = 0; i < num_var_ranges; i++) {
1249 type = range_state[i].type;
1250 size = range_state[i].size_pfn;
1251 if (type >= MTRR_NUM_TYPES)
1252 continue;
1253 if (!size)
1254 type = MTRR_NUM_TYPES;
1255 if (type == MTRR_TYPE_WRPROT)
1256 type = MTRR_TYPE_UNCACHABLE;
1257 num[type]++;
1258 }
1259
1260 /* check if we got UC entries */
1261 if (!num[MTRR_TYPE_UNCACHABLE])
1262 return 0;
1263
1264 /* check if we only had WB and UC */
1265 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1266 num_var_ranges - num[MTRR_NUM_TYPES])
1267 return 0;
1268
1269 return 1;
1270}
1271
1272static unsigned long __initdata range_sums;
1273static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
1274 unsigned long extra_remove_base,
1275 unsigned long extra_remove_size,
1276 int i)
1277{
1278 int num_reg;
1279 static struct res_range range_new[RANGE_NUM];
1280 static int nr_range_new;
1281 unsigned long range_sums_new;
1282
1283 /* convert ranges to var ranges state */
1284 num_reg = x86_setup_var_mtrrs(range, nr_range,
1285 chunk_size, gran_size);
1286
1287 /* we got new setting in range_state, check it */
1288 memset(range_new, 0, sizeof(range_new));
1289 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1290 extra_remove_base, extra_remove_size);
1291 range_sums_new = sum_ranges(range_new, nr_range_new);
1292
1293 result[i].chunk_sizek = chunk_size >> 10;
1294 result[i].gran_sizek = gran_size >> 10;
1295 result[i].num_reg = num_reg;
1296 if (range_sums < range_sums_new) {
1297 result[i].lose_cover_sizek =
1298 (range_sums_new - range_sums) << PSHIFT;
1299 result[i].bad = 1;
1300 } else
1301 result[i].lose_cover_sizek =
1302 (range_sums - range_sums_new) << PSHIFT;
1303
1304 /* double check it */
1305 if (!result[i].bad && !result[i].lose_cover_sizek) {
1306 if (nr_range_new != nr_range ||
1307 memcmp(range, range_new, sizeof(range)))
1308 result[i].bad = 1;
1309 }
1310
1311 if (!result[i].bad && (range_sums - range_sums_new <
1312 min_loss_pfn[num_reg])) {
1313 min_loss_pfn[num_reg] =
1314 range_sums - range_sums_new;
1315 }
1316}
1317
1318static void __init mtrr_print_out_one_result(int i)
1319{
1320 char gran_factor, chunk_factor, lose_factor;
1321 unsigned long gran_base, chunk_base, lose_base;
1322
1323 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1324 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1325 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1326 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1327 result[i].bad ? "*BAD*" : " ",
1328 gran_base, gran_factor, chunk_base, chunk_factor);
1329 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1330 result[i].num_reg, result[i].bad ? "-" : "",
1331 lose_base, lose_factor);
1332}
1333
1334static int __init mtrr_search_optimal_index(void)
1335{
1336 int i;
1337 int num_reg_good;
1338 int index_good;
1339
1340 if (nr_mtrr_spare_reg >= num_var_ranges)
1341 nr_mtrr_spare_reg = num_var_ranges - 1;
1342 num_reg_good = -1;
1343 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1344 if (!min_loss_pfn[i])
1345 num_reg_good = i;
1346 }
1347
1348 index_good = -1;
1349 if (num_reg_good != -1) {
1350 for (i = 0; i < NUM_RESULT; i++) {
1351 if (!result[i].bad &&
1352 result[i].num_reg == num_reg_good &&
1353 !result[i].lose_cover_sizek) {
1354 index_good = i;
1355 break;
1356 }
1357 }
1358 }
1359
1360 return index_good;
1361}
1362
1363
1364static int __init mtrr_cleanup(unsigned address_bits)
1365{
1366 unsigned long extra_remove_base, extra_remove_size;
1367 unsigned long base, size, def, dummy;
1368 mtrr_type type;
1369 u64 chunk_size, gran_size;
1370 int index_good;
1371 int i;
1372
1373 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
1374 return 0;
1375 rdmsr(MTRRdefType_MSR, def, dummy);
1376 def &= 0xff;
1377 if (def != MTRR_TYPE_UNCACHABLE)
1378 return 0;
1379
1380 /* get it and store it aside */
1381 memset(range_state, 0, sizeof(range_state));
1382 for (i = 0; i < num_var_ranges; i++) {
1383 mtrr_if->get(i, &base, &size, &type);
1384 range_state[i].base_pfn = base;
1385 range_state[i].size_pfn = size;
1386 range_state[i].type = type;
1387 }
1388
1389 /* check if we need handle it and can handle it */
1390 if (!mtrr_need_cleanup())
1391 return 0;
1392
1393 /* print original var MTRRs at first, for debugging: */
1394 printk(KERN_DEBUG "original variable MTRRs\n");
1395 print_out_mtrr_range_state();
1396
1397 memset(range, 0, sizeof(range));
1398 extra_remove_size = 0;
1399 extra_remove_base = 1 << (32 - PAGE_SHIFT);
1400 if (mtrr_tom2)
1401 extra_remove_size =
1402 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
1403 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
1404 extra_remove_size);
1405 /*
1406 * [0, 1M) should always be coverred by var mtrr with WB
1407 * and fixed mtrrs should take effective before var mtrr for it
1408 */
1409 nr_range = add_range_with_merge(range, nr_range, 0,
1410 (1ULL<<(20 - PAGE_SHIFT)) - 1);
1411 /* sort the ranges */
1412 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
1413
1414 range_sums = sum_ranges(range, nr_range);
1415 printk(KERN_INFO "total RAM coverred: %ldM\n",
1416 range_sums >> (20 - PAGE_SHIFT));
1417
1418 if (mtrr_chunk_size && mtrr_gran_size) {
1419 i = 0;
1420 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
1421 extra_remove_base, extra_remove_size, i);
1422
1423 mtrr_print_out_one_result(i);
1424
1425 if (!result[i].bad) {
1426 set_var_mtrr_all(address_bits);
1427 return 1;
1428 }
1429 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
1430 "will find optimal one\n");
1431 }
1432
1433 i = 0;
1434 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
1435 memset(result, 0, sizeof(result));
1436 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
1437
1438 for (chunk_size = gran_size; chunk_size < (1ULL<<32);
1439 chunk_size <<= 1) {
1440
1441 if (i >= NUM_RESULT)
1442 continue;
1443
1444 mtrr_calc_range_state(chunk_size, gran_size,
1445 extra_remove_base, extra_remove_size, i);
1446 if (debug_print) {
1447 mtrr_print_out_one_result(i);
1448 printk(KERN_INFO "\n");
1449 }
1450
1451 i++;
1452 }
1453 }
1454
1455 /* try to find the optimal index */
1456 index_good = mtrr_search_optimal_index();
1457
1458 if (index_good != -1) {
1459 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
1460 i = index_good;
1461 mtrr_print_out_one_result(i);
1462
1463 /* convert ranges to var ranges state */
1464 chunk_size = result[i].chunk_sizek;
1465 chunk_size <<= 10;
1466 gran_size = result[i].gran_sizek;
1467 gran_size <<= 10;
1468 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
1469 set_var_mtrr_all(address_bits);
1470 printk(KERN_DEBUG "New variable MTRRs\n");
1471 print_out_mtrr_range_state();
1472 return 1;
1473 } else {
1474 /* print out all */
1475 for (i = 0; i < NUM_RESULT; i++)
1476 mtrr_print_out_one_result(i);
1477 }
1478
1479 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
1480 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
1481
1482 return 0;
1483}
1484#else
1485static int __init mtrr_cleanup(unsigned address_bits)
1486{
1487 return 0;
1488}
1489#endif
1490
1491static int __initdata changed_by_mtrr_cleanup;
1492
1493static int disable_mtrr_trim;
1494
1495static int __init disable_mtrr_trim_setup(char *str)
1496{
1497 disable_mtrr_trim = 1;
1498 return 0;
1499}
1500early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
1501
1502/*
1503 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
1504 * for memory >4GB. Check for that here.
1505 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
1506 * apply to are wrong, but so far we don't know of any such case in the wild.
1507 */
1508#define Tom2Enabled (1U << 21)
1509#define Tom2ForceMemTypeWB (1U << 22)
1510
1511int __init amd_special_default_mtrr(void)
1512{
1513 u32 l, h;
1514
1515 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
1516 return 0;
1517 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
1518 return 0;
1519 /* In case some hypervisor doesn't pass SYSCFG through */
1520 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
1521 return 0;
1522 /*
1523 * Memory between 4GB and top of mem is forced WB by this magic bit.
1524 * Reserved before K8RevF, but should be zero there.
1525 */
1526 if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
1527 (Tom2Enabled | Tom2ForceMemTypeWB))
1528 return 1;
1529 return 0;
1530}
1531
1532static u64 __init real_trim_memory(unsigned long start_pfn,
1533 unsigned long limit_pfn)
1534{
1535 u64 trim_start, trim_size;
1536 trim_start = start_pfn;
1537 trim_start <<= PAGE_SHIFT;
1538 trim_size = limit_pfn;
1539 trim_size <<= PAGE_SHIFT;
1540 trim_size -= trim_start;
1541
1542 return e820_update_range(trim_start, trim_size, E820_RAM,
1543 E820_RESERVED);
1544}
1545/**
1546 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
1547 * @end_pfn: ending page frame number
1548 *
1549 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
1550 * memory configurations. This routine checks that the highest MTRR matches
1551 * the end of memory, to make sure the MTRRs having a write back type cover
1552 * all of the memory the kernel is intending to use. If not, it'll trim any
1553 * memory off the end by adjusting end_pfn, removing it from the kernel's
1554 * allocation pools, warning the user with an obnoxious message.
1555 */
1556int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1557{
1558 unsigned long i, base, size, highest_pfn = 0, def, dummy;
1559 mtrr_type type;
1560 u64 total_trim_size;
1561
1562 /* extra one for all 0 */
1563 int num[MTRR_NUM_TYPES + 1];
1564 /*
1565 * Make sure we only trim uncachable memory on machines that
1566 * support the Intel MTRR architecture:
1567 */
1568 if (!is_cpu(INTEL) || disable_mtrr_trim)
1569 return 0;
1570 rdmsr(MTRRdefType_MSR, def, dummy);
1571 def &= 0xff;
1572 if (def != MTRR_TYPE_UNCACHABLE)
1573 return 0;
1574
1575 /* get it and store it aside */
1576 memset(range_state, 0, sizeof(range_state));
1577 for (i = 0; i < num_var_ranges; i++) {
1578 mtrr_if->get(i, &base, &size, &type);
1579 range_state[i].base_pfn = base;
1580 range_state[i].size_pfn = size;
1581 range_state[i].type = type;
1582 }
1583
1584 /* Find highest cached pfn */
1585 for (i = 0; i < num_var_ranges; i++) {
1586 type = range_state[i].type;
1587 if (type != MTRR_TYPE_WRBACK)
1588 continue;
1589 base = range_state[i].base_pfn;
1590 size = range_state[i].size_pfn;
1591 if (highest_pfn < base + size)
1592 highest_pfn = base + size;
1593 }
1594
1595 /* kvm/qemu doesn't have mtrr set right, don't trim them all */
1596 if (!highest_pfn) {
1597 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
1598 return 0;
1599 }
1600
1601 /* check entries number */
1602 memset(num, 0, sizeof(num));
1603 for (i = 0; i < num_var_ranges; i++) {
1604 type = range_state[i].type;
1605 if (type >= MTRR_NUM_TYPES)
1606 continue;
1607 size = range_state[i].size_pfn;
1608 if (!size)
1609 type = MTRR_NUM_TYPES;
1610 num[type]++;
1611 }
1612
1613 /* no entry for WB? */
1614 if (!num[MTRR_TYPE_WRBACK])
1615 return 0;
1616
1617 /* check if we only had WB and UC */
1618 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1619 num_var_ranges - num[MTRR_NUM_TYPES])
1620 return 0;
1621
1622 memset(range, 0, sizeof(range));
1623 nr_range = 0;
1624 if (mtrr_tom2) {
1625 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1626 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1627 if (highest_pfn < range[nr_range].end + 1)
1628 highest_pfn = range[nr_range].end + 1;
1629 nr_range++;
1630 }
1631 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1632
1633 total_trim_size = 0;
1634 /* check the head */
1635 if (range[0].start)
1636 total_trim_size += real_trim_memory(0, range[0].start);
1637 /* check the holes */
1638 for (i = 0; i < nr_range - 1; i++) {
1639 if (range[i].end + 1 < range[i+1].start)
1640 total_trim_size += real_trim_memory(range[i].end + 1,
1641 range[i+1].start);
1642 }
1643 /* check the top */
1644 i = nr_range - 1;
1645 if (range[i].end + 1 < end_pfn)
1646 total_trim_size += real_trim_memory(range[i].end + 1,
1647 end_pfn);
1648
1649 if (total_trim_size) {
1650 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
1651 " all of memory, losing %lluMB of RAM.\n",
1652 total_trim_size >> 20);
1653
1654 if (!changed_by_mtrr_cleanup)
1655 WARN_ON(1);
1656
1657 printk(KERN_INFO "update e820 for mtrr\n");
1658 update_e820();
1659
1660 return 1;
1661 }
1662
1663 return 0;
1664}
1665 614
1666/** 615/**
1667 * mtrr_bp_init - initialize mtrrs on the boot CPU 616 * mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index ffd60409cc6d..77f67f7b347a 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -79,6 +79,7 @@ extern struct mtrr_ops * mtrr_if;
79 79
80extern unsigned int num_var_ranges; 80extern unsigned int num_var_ranges;
81extern u64 mtrr_tom2; 81extern u64 mtrr_tom2;
82extern struct mtrr_state_type mtrr_state;
82 83
83void mtrr_state_warn(void); 84void mtrr_state_warn(void);
84const char *mtrr_attrib_to_str(int x); 85const char *mtrr_attrib_to_str(int x);
@@ -88,3 +89,6 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned);
88int amd_init_mtrr(void); 89int amd_init_mtrr(void);
89int cyrix_init_mtrr(void); 90int cyrix_init_mtrr(void);
90int centaur_init_mtrr(void); 91int centaur_init_mtrr(void);
92
93extern int changed_by_mtrr_cleanup;
94extern int mtrr_cleanup(unsigned address_bits);
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 52b3fefbd5af..bb62b3e5caad 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
98#endif 98#endif
99} 99}
100 100
101static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { 101static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = {
102 .c_vendor = "Transmeta", 102 .c_vendor = "Transmeta",
103 .c_ident = { "GenuineTMx86", "TransmetaCPU" }, 103 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
104 .c_early_init = early_init_transmeta, 104 .c_early_init = early_init_transmeta,
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index e777f79e0960..fd2c37bf7acb 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -8,7 +8,7 @@
8 * so no special init takes place. 8 * so no special init takes place.
9 */ 9 */
10 10
11static struct cpu_dev umc_cpu_dev __cpuinitdata = { 11static const struct cpu_dev __cpuinitconst umc_cpu_dev = {
12 .c_vendor = "UMC", 12 .c_vendor = "UMC",
13 .c_ident = { "UMC UMC UMC" }, 13 .c_ident = { "UMC UMC UMC" },
14 .c_models = { 14 .c_models = {
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 87d103ded1c3..dd2130b0fb3e 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -10,6 +10,7 @@
10#include <linux/kdebug.h> 10#include <linux/kdebug.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/ptrace.h> 12#include <linux/ptrace.h>
13#include <linux/ftrace.h>
13#include <linux/kexec.h> 14#include <linux/kexec.h>
14#include <linux/bug.h> 15#include <linux/bug.h>
15#include <linux/nmi.h> 16#include <linux/nmi.h>
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 508bec1cee27..ef2c3563357d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -110,19 +110,50 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
110/* 110/*
111 * Add a memory region to the kernel e820 map. 111 * Add a memory region to the kernel e820 map.
112 */ 112 */
113void __init e820_add_region(u64 start, u64 size, int type) 113static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
114 int type)
114{ 115{
115 int x = e820.nr_map; 116 int x = e820x->nr_map;
116 117
117 if (x == ARRAY_SIZE(e820.map)) { 118 if (x == ARRAY_SIZE(e820x->map)) {
118 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); 119 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
119 return; 120 return;
120 } 121 }
121 122
122 e820.map[x].addr = start; 123 e820x->map[x].addr = start;
123 e820.map[x].size = size; 124 e820x->map[x].size = size;
124 e820.map[x].type = type; 125 e820x->map[x].type = type;
125 e820.nr_map++; 126 e820x->nr_map++;
127}
128
129void __init e820_add_region(u64 start, u64 size, int type)
130{
131 __e820_add_region(&e820, start, size, type);
132}
133
134static void __init e820_print_type(u32 type)
135{
136 switch (type) {
137 case E820_RAM:
138 case E820_RESERVED_KERN:
139 printk(KERN_CONT "(usable)");
140 break;
141 case E820_RESERVED:
142 printk(KERN_CONT "(reserved)");
143 break;
144 case E820_ACPI:
145 printk(KERN_CONT "(ACPI data)");
146 break;
147 case E820_NVS:
148 printk(KERN_CONT "(ACPI NVS)");
149 break;
150 case E820_UNUSABLE:
151 printk(KERN_CONT "(unusable)");
152 break;
153 default:
154 printk(KERN_CONT "type %u", type);
155 break;
156 }
126} 157}
127 158
128void __init e820_print_map(char *who) 159void __init e820_print_map(char *who)
@@ -134,27 +165,8 @@ void __init e820_print_map(char *who)
134 (unsigned long long) e820.map[i].addr, 165 (unsigned long long) e820.map[i].addr,
135 (unsigned long long) 166 (unsigned long long)
136 (e820.map[i].addr + e820.map[i].size)); 167 (e820.map[i].addr + e820.map[i].size));
137 switch (e820.map[i].type) { 168 e820_print_type(e820.map[i].type);
138 case E820_RAM: 169 printk(KERN_CONT "\n");
139 case E820_RESERVED_KERN:
140 printk(KERN_CONT "(usable)\n");
141 break;
142 case E820_RESERVED:
143 printk(KERN_CONT "(reserved)\n");
144 break;
145 case E820_ACPI:
146 printk(KERN_CONT "(ACPI data)\n");
147 break;
148 case E820_NVS:
149 printk(KERN_CONT "(ACPI NVS)\n");
150 break;
151 case E820_UNUSABLE:
152 printk("(unusable)\n");
153 break;
154 default:
155 printk(KERN_CONT "type %u\n", e820.map[i].type);
156 break;
157 }
158 } 170 }
159} 171}
160 172
@@ -221,7 +233,7 @@ void __init e820_print_map(char *who)
221 */ 233 */
222 234
223int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, 235int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
224 int *pnr_map) 236 u32 *pnr_map)
225{ 237{
226 struct change_member { 238 struct change_member {
227 struct e820entry *pbios; /* pointer to original bios entry */ 239 struct e820entry *pbios; /* pointer to original bios entry */
@@ -417,11 +429,12 @@ static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
417 return __append_e820_map(biosmap, nr_map); 429 return __append_e820_map(biosmap, nr_map);
418} 430}
419 431
420static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, 432static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
421 u64 size, unsigned old_type, 433 u64 size, unsigned old_type,
422 unsigned new_type) 434 unsigned new_type)
423{ 435{
424 int i; 436 u64 end;
437 unsigned int i;
425 u64 real_updated_size = 0; 438 u64 real_updated_size = 0;
426 439
427 BUG_ON(old_type == new_type); 440 BUG_ON(old_type == new_type);
@@ -429,27 +442,55 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
429 if (size > (ULLONG_MAX - start)) 442 if (size > (ULLONG_MAX - start))
430 size = ULLONG_MAX - start; 443 size = ULLONG_MAX - start;
431 444
432 for (i = 0; i < e820.nr_map; i++) { 445 end = start + size;
446 printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
447 (unsigned long long) start,
448 (unsigned long long) end);
449 e820_print_type(old_type);
450 printk(KERN_CONT " ==> ");
451 e820_print_type(new_type);
452 printk(KERN_CONT "\n");
453
454 for (i = 0; i < e820x->nr_map; i++) {
433 struct e820entry *ei = &e820x->map[i]; 455 struct e820entry *ei = &e820x->map[i];
434 u64 final_start, final_end; 456 u64 final_start, final_end;
457 u64 ei_end;
458
435 if (ei->type != old_type) 459 if (ei->type != old_type)
436 continue; 460 continue;
437 /* totally covered? */ 461
438 if (ei->addr >= start && 462 ei_end = ei->addr + ei->size;
439 (ei->addr + ei->size) <= (start + size)) { 463 /* totally covered by new range? */
464 if (ei->addr >= start && ei_end <= end) {
440 ei->type = new_type; 465 ei->type = new_type;
441 real_updated_size += ei->size; 466 real_updated_size += ei->size;
442 continue; 467 continue;
443 } 468 }
469
470 /* new range is totally covered? */
471 if (ei->addr < start && ei_end > end) {
472 __e820_add_region(e820x, start, size, new_type);
473 __e820_add_region(e820x, end, ei_end - end, ei->type);
474 ei->size = start - ei->addr;
475 real_updated_size += size;
476 continue;
477 }
478
444 /* partially covered */ 479 /* partially covered */
445 final_start = max(start, ei->addr); 480 final_start = max(start, ei->addr);
446 final_end = min(start + size, ei->addr + ei->size); 481 final_end = min(end, ei_end);
447 if (final_start >= final_end) 482 if (final_start >= final_end)
448 continue; 483 continue;
449 e820_add_region(final_start, final_end - final_start, 484
450 new_type); 485 __e820_add_region(e820x, final_start, final_end - final_start,
486 new_type);
487
451 real_updated_size += final_end - final_start; 488 real_updated_size += final_end - final_start;
452 489
490 /*
491 * left range could be head or tail, so need to update
492 * size at first.
493 */
453 ei->size -= final_end - final_start; 494 ei->size -= final_end - final_start;
454 if (ei->addr < final_start) 495 if (ei->addr < final_start)
455 continue; 496 continue;
@@ -461,13 +502,13 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
461u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, 502u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
462 unsigned new_type) 503 unsigned new_type)
463{ 504{
464 return e820_update_range_map(&e820, start, size, old_type, new_type); 505 return __e820_update_range(&e820, start, size, old_type, new_type);
465} 506}
466 507
467static u64 __init e820_update_range_saved(u64 start, u64 size, 508static u64 __init e820_update_range_saved(u64 start, u64 size,
468 unsigned old_type, unsigned new_type) 509 unsigned old_type, unsigned new_type)
469{ 510{
470 return e820_update_range_map(&e820_saved, start, size, old_type, 511 return __e820_update_range(&e820_saved, start, size, old_type,
471 new_type); 512 new_type);
472} 513}
473 514
@@ -511,7 +552,7 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
511 552
512void __init update_e820(void) 553void __init update_e820(void)
513{ 554{
514 int nr_map; 555 u32 nr_map;
515 556
516 nr_map = e820.nr_map; 557 nr_map = e820.nr_map;
517 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) 558 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
@@ -522,7 +563,7 @@ void __init update_e820(void)
522} 563}
523static void __init update_e820_saved(void) 564static void __init update_e820_saved(void)
524{ 565{
525 int nr_map; 566 u32 nr_map;
526 567
527 nr_map = e820_saved.nr_map; 568 nr_map = e820_saved.nr_map;
528 if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) 569 if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
@@ -1020,8 +1061,8 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
1020 continue; 1061 continue;
1021 return addr; 1062 return addr;
1022 } 1063 }
1023 return -1UL;
1024 1064
1065 return -1ULL;
1025} 1066}
1026 1067
1027/* 1068/*
@@ -1034,13 +1075,22 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
1034 u64 start; 1075 u64 start;
1035 1076
1036 start = startt; 1077 start = startt;
1037 while (size < sizet) 1078 while (size < sizet && (start + 1))
1038 start = find_e820_area_size(start, &size, align); 1079 start = find_e820_area_size(start, &size, align);
1039 1080
1040 if (size < sizet) 1081 if (size < sizet)
1041 return 0; 1082 return 0;
1042 1083
1084#ifdef CONFIG_X86_32
1085 if (start >= MAXMEM)
1086 return 0;
1087 if (start + size > MAXMEM)
1088 size = MAXMEM - start;
1089#endif
1090
1043 addr = round_down(start + size - sizet, align); 1091 addr = round_down(start + size - sizet, align);
1092 if (addr < start)
1093 return 0;
1044 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); 1094 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
1045 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); 1095 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
1046 printk(KERN_INFO "update e820 for early_reserve_e820\n"); 1096 printk(KERN_INFO "update e820 for early_reserve_e820\n");
@@ -1253,7 +1303,7 @@ early_param("memmap", parse_memmap_opt);
1253void __init finish_e820_parsing(void) 1303void __init finish_e820_parsing(void)
1254{ 1304{
1255 if (userdef) { 1305 if (userdef) {
1256 int nr = e820.nr_map; 1306 u32 nr = e820.nr_map;
1257 1307
1258 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) 1308 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
1259 early_panic("Invalid user supplied memory map"); 1309 early_panic("Invalid user supplied memory map");
@@ -1336,7 +1386,7 @@ void __init e820_reserve_resources_late(void)
1336char *__init default_machine_specific_memory_setup(void) 1386char *__init default_machine_specific_memory_setup(void)
1337{ 1387{
1338 char *who = "BIOS-e820"; 1388 char *who = "BIOS-e820";
1339 int new_nr; 1389 u32 new_nr;
1340 /* 1390 /*
1341 * Try to copy the BIOS-supplied E820-map. 1391 * Try to copy the BIOS-supplied E820-map.
1342 * 1392 *
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 639ad98238a2..335f049d110f 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -250,7 +250,7 @@ static int dbgp_wait_until_complete(void)
250 return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); 250 return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
251} 251}
252 252
253static void dbgp_mdelay(int ms) 253static void __init dbgp_mdelay(int ms)
254{ 254{
255 int i; 255 int i;
256 256
@@ -311,7 +311,7 @@ static void dbgp_set_data(const void *buf, int size)
311 writel(hi, &ehci_debug->data47); 311 writel(hi, &ehci_debug->data47);
312} 312}
313 313
314static void dbgp_get_data(void *buf, int size) 314static void __init dbgp_get_data(void *buf, int size)
315{ 315{
316 unsigned char *bytes = buf; 316 unsigned char *bytes = buf;
317 u32 lo, hi; 317 u32 lo, hi;
@@ -355,7 +355,7 @@ static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
355 return ret; 355 return ret;
356} 356}
357 357
358static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, 358static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
359 int size) 359 int size)
360{ 360{
361 u32 pids, addr, ctrl; 361 u32 pids, addr, ctrl;
@@ -386,8 +386,8 @@ static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
386 return ret; 386 return ret;
387} 387}
388 388
389static int dbgp_control_msg(unsigned devnum, int requesttype, int request, 389static int __init dbgp_control_msg(unsigned devnum, int requesttype,
390 int value, int index, void *data, int size) 390 int request, int value, int index, void *data, int size)
391{ 391{
392 u32 pids, addr, ctrl; 392 u32 pids, addr, ctrl;
393 struct usb_ctrlrequest req; 393 struct usb_ctrlrequest req;
@@ -489,7 +489,7 @@ static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
489 return 0; 489 return 0;
490} 490}
491 491
492static int ehci_reset_port(int port) 492static int __init ehci_reset_port(int port)
493{ 493{
494 u32 portsc; 494 u32 portsc;
495 u32 delay_time, delay; 495 u32 delay_time, delay;
@@ -532,7 +532,7 @@ static int ehci_reset_port(int port)
532 return -EBUSY; 532 return -EBUSY;
533} 533}
534 534
535static int ehci_wait_for_port(int port) 535static int __init ehci_wait_for_port(int port)
536{ 536{
537 u32 status; 537 u32 status;
538 int ret, reps; 538 int ret, reps;
@@ -557,13 +557,13 @@ static inline void dbgp_printk(const char *fmt, ...) { }
557 557
558typedef void (*set_debug_port_t)(int port); 558typedef void (*set_debug_port_t)(int port);
559 559
560static void default_set_debug_port(int port) 560static void __init default_set_debug_port(int port)
561{ 561{
562} 562}
563 563
564static set_debug_port_t set_debug_port = default_set_debug_port; 564static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
565 565
566static void nvidia_set_debug_port(int port) 566static void __init nvidia_set_debug_port(int port)
567{ 567{
568 u32 dword; 568 u32 dword;
569 dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 569 dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 899e8938e79f..c929add475c9 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -442,8 +442,7 @@ sysenter_past_esp:
442 442
443 GET_THREAD_INFO(%ebp) 443 GET_THREAD_INFO(%ebp)
444 444
445 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 445 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
446 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
447 jnz sysenter_audit 446 jnz sysenter_audit
448sysenter_do_call: 447sysenter_do_call:
449 cmpl $(nr_syscalls), %eax 448 cmpl $(nr_syscalls), %eax
@@ -454,7 +453,7 @@ sysenter_do_call:
454 DISABLE_INTERRUPTS(CLBR_ANY) 453 DISABLE_INTERRUPTS(CLBR_ANY)
455 TRACE_IRQS_OFF 454 TRACE_IRQS_OFF
456 movl TI_flags(%ebp), %ecx 455 movl TI_flags(%ebp), %ecx
457 testw $_TIF_ALLWORK_MASK, %cx 456 testl $_TIF_ALLWORK_MASK, %ecx
458 jne sysexit_audit 457 jne sysexit_audit
459sysenter_exit: 458sysenter_exit:
460/* if something modifies registers it must also disable sysexit */ 459/* if something modifies registers it must also disable sysexit */
@@ -468,7 +467,7 @@ sysenter_exit:
468 467
469#ifdef CONFIG_AUDITSYSCALL 468#ifdef CONFIG_AUDITSYSCALL
470sysenter_audit: 469sysenter_audit:
471 testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 470 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
472 jnz syscall_trace_entry 471 jnz syscall_trace_entry
473 addl $4,%esp 472 addl $4,%esp
474 CFI_ADJUST_CFA_OFFSET -4 473 CFI_ADJUST_CFA_OFFSET -4
@@ -485,7 +484,7 @@ sysenter_audit:
485 jmp sysenter_do_call 484 jmp sysenter_do_call
486 485
487sysexit_audit: 486sysexit_audit:
488 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx 487 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
489 jne syscall_exit_work 488 jne syscall_exit_work
490 TRACE_IRQS_ON 489 TRACE_IRQS_ON
491 ENABLE_INTERRUPTS(CLBR_ANY) 490 ENABLE_INTERRUPTS(CLBR_ANY)
@@ -498,7 +497,7 @@ sysexit_audit:
498 DISABLE_INTERRUPTS(CLBR_ANY) 497 DISABLE_INTERRUPTS(CLBR_ANY)
499 TRACE_IRQS_OFF 498 TRACE_IRQS_OFF
500 movl TI_flags(%ebp), %ecx 499 movl TI_flags(%ebp), %ecx
501 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx 500 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
502 jne syscall_exit_work 501 jne syscall_exit_work
503 movl PT_EAX(%esp),%eax /* reload syscall return value */ 502 movl PT_EAX(%esp),%eax /* reload syscall return value */
504 jmp sysenter_exit 503 jmp sysenter_exit
@@ -523,8 +522,7 @@ ENTRY(system_call)
523 SAVE_ALL 522 SAVE_ALL
524 GET_THREAD_INFO(%ebp) 523 GET_THREAD_INFO(%ebp)
525 # system call tracing in operation / emulation 524 # system call tracing in operation / emulation
526 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 525 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
527 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
528 jnz syscall_trace_entry 526 jnz syscall_trace_entry
529 cmpl $(nr_syscalls), %eax 527 cmpl $(nr_syscalls), %eax
530 jae syscall_badsys 528 jae syscall_badsys
@@ -538,7 +536,7 @@ syscall_exit:
538 # between sampling and the iret 536 # between sampling and the iret
539 TRACE_IRQS_OFF 537 TRACE_IRQS_OFF
540 movl TI_flags(%ebp), %ecx 538 movl TI_flags(%ebp), %ecx
541 testw $_TIF_ALLWORK_MASK, %cx # current->work 539 testl $_TIF_ALLWORK_MASK, %ecx # current->work
542 jne syscall_exit_work 540 jne syscall_exit_work
543 541
544restore_all: 542restore_all:
@@ -673,7 +671,7 @@ END(syscall_trace_entry)
673 # perform syscall exit tracing 671 # perform syscall exit tracing
674 ALIGN 672 ALIGN
675syscall_exit_work: 673syscall_exit_work:
676 testb $_TIF_WORK_SYSCALL_EXIT, %cl 674 testl $_TIF_WORK_SYSCALL_EXIT, %ecx
677 jz work_pending 675 jz work_pending
678 TRACE_IRQS_ON 676 TRACE_IRQS_ON
679 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call 677 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7ba4621c0dfa..a331ec38af9e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -368,6 +368,7 @@ ENTRY(save_rest)
368END(save_rest) 368END(save_rest)
369 369
370/* save complete stack frame */ 370/* save complete stack frame */
371 .pushsection .kprobes.text, "ax"
371ENTRY(save_paranoid) 372ENTRY(save_paranoid)
372 XCPT_FRAME 1 RDI+8 373 XCPT_FRAME 1 RDI+8
373 cld 374 cld
@@ -396,6 +397,7 @@ ENTRY(save_paranoid)
3961: ret 3971: ret
397 CFI_ENDPROC 398 CFI_ENDPROC
398END(save_paranoid) 399END(save_paranoid)
400 .popsection
399 401
400/* 402/*
401 * A newly forked process directly context switches into this address. 403 * A newly forked process directly context switches into this address.
@@ -416,7 +418,6 @@ ENTRY(ret_from_fork)
416 418
417 GET_THREAD_INFO(%rcx) 419 GET_THREAD_INFO(%rcx)
418 420
419 CFI_REMEMBER_STATE
420 RESTORE_REST 421 RESTORE_REST
421 422
422 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 423 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
@@ -428,7 +429,6 @@ ENTRY(ret_from_fork)
428 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET 429 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
429 jmp ret_from_sys_call # go to the SYSRET fastpath 430 jmp ret_from_sys_call # go to the SYSRET fastpath
430 431
431 CFI_RESTORE_STATE
432 CFI_ENDPROC 432 CFI_ENDPROC
433END(ret_from_fork) 433END(ret_from_fork)
434 434
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 231bdd3c5b1c..76f7141e0f91 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -389,79 +389,6 @@ void ftrace_nmi_exit(void)
389 389
390#endif /* !CONFIG_DYNAMIC_FTRACE */ 390#endif /* !CONFIG_DYNAMIC_FTRACE */
391 391
392/* Add a function return address to the trace stack on thread info.*/
393static int push_return_trace(unsigned long ret, unsigned long long time,
394 unsigned long func, int *depth)
395{
396 int index;
397
398 if (!current->ret_stack)
399 return -EBUSY;
400
401 /* The return trace stack is full */
402 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
403 atomic_inc(&current->trace_overrun);
404 return -EBUSY;
405 }
406
407 index = ++current->curr_ret_stack;
408 barrier();
409 current->ret_stack[index].ret = ret;
410 current->ret_stack[index].func = func;
411 current->ret_stack[index].calltime = time;
412 *depth = index;
413
414 return 0;
415}
416
417/* Retrieve a function return address to the trace stack on thread info.*/
418static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
419{
420 int index;
421
422 index = current->curr_ret_stack;
423
424 if (unlikely(index < 0)) {
425 ftrace_graph_stop();
426 WARN_ON(1);
427 /* Might as well panic, otherwise we have no where to go */
428 *ret = (unsigned long)panic;
429 return;
430 }
431
432 *ret = current->ret_stack[index].ret;
433 trace->func = current->ret_stack[index].func;
434 trace->calltime = current->ret_stack[index].calltime;
435 trace->overrun = atomic_read(&current->trace_overrun);
436 trace->depth = index;
437 barrier();
438 current->curr_ret_stack--;
439
440}
441
442/*
443 * Send the trace to the ring-buffer.
444 * @return the original return address.
445 */
446unsigned long ftrace_return_to_handler(void)
447{
448 struct ftrace_graph_ret trace;
449 unsigned long ret;
450
451 pop_return_trace(&trace, &ret);
452 trace.rettime = cpu_clock(raw_smp_processor_id());
453 ftrace_graph_return(&trace);
454
455 if (unlikely(!ret)) {
456 ftrace_graph_stop();
457 WARN_ON(1);
458 /* Might as well panic. What else to do? */
459 ret = (unsigned long)panic;
460 }
461
462 return ret;
463}
464
465/* 392/*
466 * Hook the return address and push it in the stack of return addrs 393 * Hook the return address and push it in the stack of return addrs
467 * in current thread info. 394 * in current thread info.
@@ -521,7 +448,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
521 448
522 calltime = cpu_clock(raw_smp_processor_id()); 449 calltime = cpu_clock(raw_smp_processor_id());
523 450
524 if (push_return_trace(old, calltime, 451 if (ftrace_push_return_trace(old, calltime,
525 self_addr, &trace.depth) == -EBUSY) { 452 self_addr, &trace.depth) == -EBUSY) {
526 *parent = old; 453 *parent = old;
527 return; 454 return;
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index ac108d1fe182..3f8579f8d42c 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
18{ 18{
19 reserve_trampoline_memory(); 19 reserve_trampoline_memory();
20 20
21 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 21 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
22 22
23#ifdef CONFIG_BLK_DEV_INITRD 23#ifdef CONFIG_BLK_DEV_INITRD
24 /* Reserve INITRD */ 24 /* Reserve INITRD */
@@ -29,9 +29,6 @@ void __init i386_start_kernel(void)
29 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); 29 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
30 } 30 }
31#endif 31#endif
32 reserve_early(init_pg_tables_start, init_pg_tables_end,
33 "INIT_PG_TABLE");
34
35 reserve_ebda_region(); 32 reserve_ebda_region();
36 33
37 /* 34 /*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f5b272247690..70eaa852c732 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
100 100
101 reserve_trampoline_memory(); 101 reserve_trampoline_memory();
102 102
103 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 103 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
104 104
105#ifdef CONFIG_BLK_DEV_INITRD 105#ifdef CONFIG_BLK_DEV_INITRD
106 /* Reserve INITRD */ 106 /* Reserve INITRD */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c32ca19d591a..30683883e0cd 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -38,42 +38,40 @@
38#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id 38#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
39 39
40/* 40/*
41 * This is how much memory *in addition to the memory covered up to 41 * This is how much memory in addition to the memory covered up to
42 * and including _end* we need mapped initially. 42 * and including _end we need mapped initially.
43 * We need: 43 * We need:
44 * - one bit for each possible page, but only in low memory, which means 44 * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
45 * 2^32/4096/8 = 128K worst case (4G/4G split.) 45 * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
46 * - enough space to map all low memory, which means
47 * (2^32/4096) / 1024 pages (worst case, non PAE)
48 * (2^32/4096) / 512 + 4 pages (worst case for PAE)
49 * - a few pages for allocator use before the kernel pagetable has
50 * been set up
51 * 46 *
52 * Modulo rounding, each megabyte assigned here requires a kilobyte of 47 * Modulo rounding, each megabyte assigned here requires a kilobyte of
53 * memory, which is currently unreclaimed. 48 * memory, which is currently unreclaimed.
54 * 49 *
55 * This should be a multiple of a page. 50 * This should be a multiple of a page.
51 *
52 * KERNEL_IMAGE_SIZE should be greater than pa(_end)
53 * and small than max_low_pfn, otherwise will waste some page table entries
56 */ 54 */
57LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
58
59/*
60 * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
61 * pagetables from above the 16MB DMA limit, so we'll have to set
62 * up pagetables 16MB more (worst-case):
63 */
64#ifdef CONFIG_DEBUG_PAGEALLOC
65LOW_PAGES = LOW_PAGES + 0x1000000
66#endif
67 55
68#if PTRS_PER_PMD > 1 56#if PTRS_PER_PMD > 1
69PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD 57#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
70#else 58#else
71PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) 59#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
72#endif 60#endif
73BOOTBITMAP_SIZE = LOW_PAGES / 8
74ALLOCATOR_SLOP = 4
75 61
76INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm 62/* Enough space to fit pagetables for the low memory linear map */
63MAPPING_BEYOND_END = \
64 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
65
66/*
67 * Worst-case size of the kernel mapping we need to make:
68 * the worst-case size of the kernel itself, plus the extra we need
69 * to map for the linear map.
70 */
71KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT
72
73INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
74RESERVE_BRK(pagetables, INIT_MAP_SIZE)
77 75
78/* 76/*
79 * 32-bit kernel entrypoint; only used by the boot CPU. On entry, 77 * 32-bit kernel entrypoint; only used by the boot CPU. On entry,
@@ -166,10 +164,10 @@ num_subarch_entries = (. - subarch_entries) / 4
166 164
167/* 165/*
168 * Initialize page tables. This creates a PDE and a set of page 166 * Initialize page tables. This creates a PDE and a set of page
169 * tables, which are located immediately beyond _end. The variable 167 * tables, which are located immediately beyond __brk_base. The variable
170 * init_pg_tables_end is set up to point to the first "safe" location. 168 * _brk_end is set up to point to the first "safe" location.
171 * Mappings are created both at virtual address 0 (identity mapping) 169 * Mappings are created both at virtual address 0 (identity mapping)
172 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. 170 * and PAGE_OFFSET for up to _end.
173 * 171 *
174 * Note that the stack is not yet set up! 172 * Note that the stack is not yet set up!
175 */ 173 */
@@ -190,8 +188,7 @@ default_entry:
190 188
191 xorl %ebx,%ebx /* %ebx is kept at zero */ 189 xorl %ebx,%ebx /* %ebx is kept at zero */
192 190
193 movl $pa(pg0), %edi 191 movl $pa(__brk_base), %edi
194 movl %edi, pa(init_pg_tables_start)
195 movl $pa(swapper_pg_pmd), %edx 192 movl $pa(swapper_pg_pmd), %edx
196 movl $PTE_IDENT_ATTR, %eax 193 movl $PTE_IDENT_ATTR, %eax
19710: 19410:
@@ -209,14 +206,14 @@ default_entry:
209 loop 11b 206 loop 11b
210 207
211 /* 208 /*
212 * End condition: we must map up to and including INIT_MAP_BEYOND_END 209 * End condition: we must map up to the end + MAPPING_BEYOND_END.
213 * bytes beyond the end of our own page tables.
214 */ 210 */
215 leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp 211 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
216 cmpl %ebp,%eax 212 cmpl %ebp,%eax
217 jb 10b 213 jb 10b
2181: 2141:
219 movl %edi,pa(init_pg_tables_end) 215 addl $__PAGE_OFFSET, %edi
216 movl %edi, pa(_brk_end)
220 shrl $12, %eax 217 shrl $12, %eax
221 movl %eax, pa(max_pfn_mapped) 218 movl %eax, pa(max_pfn_mapped)
222 219
@@ -227,8 +224,7 @@ default_entry:
227 224
228page_pde_offset = (__PAGE_OFFSET >> 20); 225page_pde_offset = (__PAGE_OFFSET >> 20);
229 226
230 movl $pa(pg0), %edi 227 movl $pa(__brk_base), %edi
231 movl %edi, pa(init_pg_tables_start)
232 movl $pa(swapper_pg_dir), %edx 228 movl $pa(swapper_pg_dir), %edx
233 movl $PTE_IDENT_ATTR, %eax 229 movl $PTE_IDENT_ATTR, %eax
23410: 23010:
@@ -242,14 +238,13 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
242 addl $0x1000,%eax 238 addl $0x1000,%eax
243 loop 11b 239 loop 11b
244 /* 240 /*
245 * End condition: we must map up to and including INIT_MAP_BEYOND_END 241 * End condition: we must map up to the end + MAPPING_BEYOND_END.
246 * bytes beyond the end of our own page tables; the +0x007 is
247 * the attribute bits
248 */ 242 */
249 leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp 243 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
250 cmpl %ebp,%eax 244 cmpl %ebp,%eax
251 jb 10b 245 jb 10b
252 movl %edi,pa(init_pg_tables_end) 246 addl $__PAGE_OFFSET, %edi
247 movl %edi, pa(_brk_end)
253 shrl $12, %eax 248 shrl $12, %eax
254 movl %eax, pa(max_pfn_mapped) 249 movl %eax, pa(max_pfn_mapped)
255 250
@@ -636,6 +631,7 @@ swapper_pg_fixmap:
636 .fill 1024,4,0 631 .fill 1024,4,0
637ENTRY(empty_zero_page) 632ENTRY(empty_zero_page)
638 .fill 4096,1,0 633 .fill 4096,1,0
634
639/* 635/*
640 * This starts the data section. 636 * This starts the data section.
641 */ 637 */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a00545fe5cdd..648b3a2a3a44 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -80,6 +80,7 @@ static inline void hpet_clear_mapping(void)
80 */ 80 */
81static int boot_hpet_disable; 81static int boot_hpet_disable;
82int hpet_force_user; 82int hpet_force_user;
83static int hpet_verbose;
83 84
84static int __init hpet_setup(char *str) 85static int __init hpet_setup(char *str)
85{ 86{
@@ -88,6 +89,8 @@ static int __init hpet_setup(char *str)
88 boot_hpet_disable = 1; 89 boot_hpet_disable = 1;
89 if (!strncmp("force", str, 5)) 90 if (!strncmp("force", str, 5))
90 hpet_force_user = 1; 91 hpet_force_user = 1;
92 if (!strncmp("verbose", str, 7))
93 hpet_verbose = 1;
91 } 94 }
92 return 1; 95 return 1;
93} 96}
@@ -119,6 +122,43 @@ int is_hpet_enabled(void)
119} 122}
120EXPORT_SYMBOL_GPL(is_hpet_enabled); 123EXPORT_SYMBOL_GPL(is_hpet_enabled);
121 124
125static void _hpet_print_config(const char *function, int line)
126{
127 u32 i, timers, l, h;
128 printk(KERN_INFO "hpet: %s(%d):\n", function, line);
129 l = hpet_readl(HPET_ID);
130 h = hpet_readl(HPET_PERIOD);
131 timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
132 printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h);
133 l = hpet_readl(HPET_CFG);
134 h = hpet_readl(HPET_STATUS);
135 printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h);
136 l = hpet_readl(HPET_COUNTER);
137 h = hpet_readl(HPET_COUNTER+4);
138 printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
139
140 for (i = 0; i < timers; i++) {
141 l = hpet_readl(HPET_Tn_CFG(i));
142 h = hpet_readl(HPET_Tn_CFG(i)+4);
143 printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n",
144 i, l, h);
145 l = hpet_readl(HPET_Tn_CMP(i));
146 h = hpet_readl(HPET_Tn_CMP(i)+4);
147 printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n",
148 i, l, h);
149 l = hpet_readl(HPET_Tn_ROUTE(i));
150 h = hpet_readl(HPET_Tn_ROUTE(i)+4);
151 printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n",
152 i, l, h);
153 }
154}
155
156#define hpet_print_config() \
157do { \
158 if (hpet_verbose) \
159 _hpet_print_config(__FUNCTION__, __LINE__); \
160} while (0)
161
122/* 162/*
123 * When the hpet driver (/dev/hpet) is enabled, we need to reserve 163 * When the hpet driver (/dev/hpet) is enabled, we need to reserve
124 * timer 0 and timer 1 in case of RTC emulation. 164 * timer 0 and timer 1 in case of RTC emulation.
@@ -191,27 +231,37 @@ static struct clock_event_device hpet_clockevent = {
191 .rating = 50, 231 .rating = 50,
192}; 232};
193 233
194static void hpet_start_counter(void) 234static void hpet_stop_counter(void)
195{ 235{
196 unsigned long cfg = hpet_readl(HPET_CFG); 236 unsigned long cfg = hpet_readl(HPET_CFG);
197
198 cfg &= ~HPET_CFG_ENABLE; 237 cfg &= ~HPET_CFG_ENABLE;
199 hpet_writel(cfg, HPET_CFG); 238 hpet_writel(cfg, HPET_CFG);
200 hpet_writel(0, HPET_COUNTER); 239 hpet_writel(0, HPET_COUNTER);
201 hpet_writel(0, HPET_COUNTER + 4); 240 hpet_writel(0, HPET_COUNTER + 4);
241}
242
243static void hpet_start_counter(void)
244{
245 unsigned long cfg = hpet_readl(HPET_CFG);
202 cfg |= HPET_CFG_ENABLE; 246 cfg |= HPET_CFG_ENABLE;
203 hpet_writel(cfg, HPET_CFG); 247 hpet_writel(cfg, HPET_CFG);
204} 248}
205 249
250static void hpet_restart_counter(void)
251{
252 hpet_stop_counter();
253 hpet_start_counter();
254}
255
206static void hpet_resume_device(void) 256static void hpet_resume_device(void)
207{ 257{
208 force_hpet_resume(); 258 force_hpet_resume();
209} 259}
210 260
211static void hpet_restart_counter(void) 261static void hpet_resume_counter(void)
212{ 262{
213 hpet_resume_device(); 263 hpet_resume_device();
214 hpet_start_counter(); 264 hpet_restart_counter();
215} 265}
216 266
217static void hpet_enable_legacy_int(void) 267static void hpet_enable_legacy_int(void)
@@ -259,29 +309,23 @@ static int hpet_setup_msi_irq(unsigned int irq);
259static void hpet_set_mode(enum clock_event_mode mode, 309static void hpet_set_mode(enum clock_event_mode mode,
260 struct clock_event_device *evt, int timer) 310 struct clock_event_device *evt, int timer)
261{ 311{
262 unsigned long cfg, cmp, now; 312 unsigned long cfg;
263 uint64_t delta; 313 uint64_t delta;
264 314
265 switch (mode) { 315 switch (mode) {
266 case CLOCK_EVT_MODE_PERIODIC: 316 case CLOCK_EVT_MODE_PERIODIC:
317 hpet_stop_counter();
267 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; 318 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
268 delta >>= evt->shift; 319 delta >>= evt->shift;
269 now = hpet_readl(HPET_COUNTER);
270 cmp = now + (unsigned long) delta;
271 cfg = hpet_readl(HPET_Tn_CFG(timer)); 320 cfg = hpet_readl(HPET_Tn_CFG(timer));
272 /* Make sure we use edge triggered interrupts */ 321 /* Make sure we use edge triggered interrupts */
273 cfg &= ~HPET_TN_LEVEL; 322 cfg &= ~HPET_TN_LEVEL;
274 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 323 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
275 HPET_TN_SETVAL | HPET_TN_32BIT; 324 HPET_TN_SETVAL | HPET_TN_32BIT;
276 hpet_writel(cfg, HPET_Tn_CFG(timer)); 325 hpet_writel(cfg, HPET_Tn_CFG(timer));
277 /*
278 * The first write after writing TN_SETVAL to the
279 * config register sets the counter value, the second
280 * write sets the period.
281 */
282 hpet_writel(cmp, HPET_Tn_CMP(timer));
283 udelay(1);
284 hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); 326 hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
327 hpet_start_counter();
328 hpet_print_config();
285 break; 329 break;
286 330
287 case CLOCK_EVT_MODE_ONESHOT: 331 case CLOCK_EVT_MODE_ONESHOT:
@@ -308,6 +352,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
308 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); 352 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
309 enable_irq(hdev->irq); 353 enable_irq(hdev->irq);
310 } 354 }
355 hpet_print_config();
311 break; 356 break;
312 } 357 }
313} 358}
@@ -526,6 +571,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
526 571
527 num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); 572 num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
528 num_timers++; /* Value read out starts from 0 */ 573 num_timers++; /* Value read out starts from 0 */
574 hpet_print_config();
529 575
530 hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); 576 hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
531 if (!hpet_devs) 577 if (!hpet_devs)
@@ -695,7 +741,7 @@ static struct clocksource clocksource_hpet = {
695 .mask = HPET_MASK, 741 .mask = HPET_MASK,
696 .shift = HPET_SHIFT, 742 .shift = HPET_SHIFT,
697 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 743 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
698 .resume = hpet_restart_counter, 744 .resume = hpet_resume_counter,
699#ifdef CONFIG_X86_64 745#ifdef CONFIG_X86_64
700 .vread = vread_hpet, 746 .vread = vread_hpet,
701#endif 747#endif
@@ -707,7 +753,7 @@ static int hpet_clocksource_register(void)
707 cycle_t t1; 753 cycle_t t1;
708 754
709 /* Start the counter */ 755 /* Start the counter */
710 hpet_start_counter(); 756 hpet_restart_counter();
711 757
712 /* Verify whether hpet counter works */ 758 /* Verify whether hpet counter works */
713 t1 = read_hpet(); 759 t1 = read_hpet();
@@ -793,6 +839,7 @@ int __init hpet_enable(void)
793 * information and the number of channels 839 * information and the number of channels
794 */ 840 */
795 id = hpet_readl(HPET_ID); 841 id = hpet_readl(HPET_ID);
842 hpet_print_config();
796 843
797#ifdef CONFIG_HPET_EMULATE_RTC 844#ifdef CONFIG_HPET_EMULATE_RTC
798 /* 845 /*
@@ -845,6 +892,7 @@ static __init int hpet_late_init(void)
845 return -ENODEV; 892 return -ENODEV;
846 893
847 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 894 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
895 hpet_print_config();
848 896
849 for_each_online_cpu(cpu) { 897 for_each_online_cpu(cpu) {
850 hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); 898 hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 10f92fb532f3..3475440baa54 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -3,17 +3,17 @@
3 * 3 *
4 */ 4 */
5#include <linux/clockchips.h> 5#include <linux/clockchips.h>
6#include <linux/init.h>
7#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/spinlock.h>
8#include <linux/jiffies.h> 8#include <linux/jiffies.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/spinlock.h> 10#include <linux/delay.h>
11#include <linux/init.h>
12#include <linux/io.h>
11 13
12#include <asm/smp.h>
13#include <asm/delay.h>
14#include <asm/i8253.h> 14#include <asm/i8253.h>
15#include <asm/io.h>
16#include <asm/hpet.h> 15#include <asm/hpet.h>
16#include <asm/smp.h>
17 17
18DEFINE_SPINLOCK(i8253_lock); 18DEFINE_SPINLOCK(i8253_lock);
19EXPORT_SYMBOL(i8253_lock); 19EXPORT_SYMBOL(i8253_lock);
@@ -40,7 +40,7 @@ static void init_pit_timer(enum clock_event_mode mode,
40{ 40{
41 spin_lock(&i8253_lock); 41 spin_lock(&i8253_lock);
42 42
43 switch(mode) { 43 switch (mode) {
44 case CLOCK_EVT_MODE_PERIODIC: 44 case CLOCK_EVT_MODE_PERIODIC:
45 /* binary, mode 2, LSB/MSB, ch 0 */ 45 /* binary, mode 2, LSB/MSB, ch 0 */
46 outb_pit(0x34, PIT_MODE); 46 outb_pit(0x34, PIT_MODE);
@@ -95,7 +95,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
95 * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - 95 * registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
96 * !using_apic_timer decisions in do_timer_interrupt_hook() 96 * !using_apic_timer decisions in do_timer_interrupt_hook()
97 */ 97 */
98static struct clock_event_device pit_clockevent = { 98static struct clock_event_device pit_ce = {
99 .name = "pit", 99 .name = "pit",
100 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 100 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
101 .set_mode = init_pit_timer, 101 .set_mode = init_pit_timer,
@@ -114,15 +114,13 @@ void __init setup_pit_timer(void)
114 * Start pit with the boot cpu mask and make it global after the 114 * Start pit with the boot cpu mask and make it global after the
115 * IO_APIC has been initialized. 115 * IO_APIC has been initialized.
116 */ 116 */
117 pit_clockevent.cpumask = cpumask_of(smp_processor_id()); 117 pit_ce.cpumask = cpumask_of(smp_processor_id());
118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 118 pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift);
119 pit_clockevent.shift); 119 pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce);
120 pit_clockevent.max_delta_ns = 120 pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce);
121 clockevent_delta2ns(0x7FFF, &pit_clockevent); 121
122 pit_clockevent.min_delta_ns = 122 clockevents_register_device(&pit_ce);
123 clockevent_delta2ns(0xF, &pit_clockevent); 123 global_clock_event = &pit_ce;
124 clockevents_register_device(&pit_clockevent);
125 global_clock_event = &pit_clockevent;
126} 124}
127 125
128#ifndef CONFIG_X86_64 126#ifndef CONFIG_X86_64
@@ -133,11 +131,11 @@ void __init setup_pit_timer(void)
133 */ 131 */
134static cycle_t pit_read(void) 132static cycle_t pit_read(void)
135{ 133{
134 static int old_count;
135 static u32 old_jifs;
136 unsigned long flags; 136 unsigned long flags;
137 int count; 137 int count;
138 u32 jifs; 138 u32 jifs;
139 static int old_count;
140 static u32 old_jifs;
141 139
142 spin_lock_irqsave(&i8253_lock, flags); 140 spin_lock_irqsave(&i8253_lock, flags);
143 /* 141 /*
@@ -179,9 +177,9 @@ static cycle_t pit_read(void)
179 * Previous attempts to handle these cases intelligently were 177 * Previous attempts to handle these cases intelligently were
180 * buggy, so we just do the simple thing now. 178 * buggy, so we just do the simple thing now.
181 */ 179 */
182 if (count > old_count && jifs == old_jifs) { 180 if (count > old_count && jifs == old_jifs)
183 count = old_count; 181 count = old_count;
184 } 182
185 old_count = count; 183 old_count = count;
186 old_jifs = jifs; 184 old_jifs = jifs;
187 185
@@ -192,13 +190,13 @@ static cycle_t pit_read(void)
192 return (cycle_t)(jifs * LATCH) + count; 190 return (cycle_t)(jifs * LATCH) + count;
193} 191}
194 192
195static struct clocksource clocksource_pit = { 193static struct clocksource pit_cs = {
196 .name = "pit", 194 .name = "pit",
197 .rating = 110, 195 .rating = 110,
198 .read = pit_read, 196 .read = pit_read,
199 .mask = CLOCKSOURCE_MASK(32), 197 .mask = CLOCKSOURCE_MASK(32),
200 .mult = 0, 198 .mult = 0,
201 .shift = 20, 199 .shift = 20,
202}; 200};
203 201
204static void pit_disable_clocksource(void) 202static void pit_disable_clocksource(void)
@@ -206,9 +204,9 @@ static void pit_disable_clocksource(void)
206 /* 204 /*
207 * Use mult to check whether it is registered or not 205 * Use mult to check whether it is registered or not
208 */ 206 */
209 if (clocksource_pit.mult) { 207 if (pit_cs.mult) {
210 clocksource_unregister(&clocksource_pit); 208 clocksource_unregister(&pit_cs);
211 clocksource_pit.mult = 0; 209 pit_cs.mult = 0;
212 } 210 }
213} 211}
214 212
@@ -222,13 +220,13 @@ static int __init init_pit_clocksource(void)
222 * - when local APIC timer is active (PIT is switched off) 220 * - when local APIC timer is active (PIT is switched off)
223 */ 221 */
224 if (num_possible_cpus() > 1 || is_hpet_enabled() || 222 if (num_possible_cpus() > 1 || is_hpet_enabled() ||
225 pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) 223 pit_ce.mode != CLOCK_EVT_MODE_PERIODIC)
226 return 0; 224 return 0;
227 225
228 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 226 pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift);
229 clocksource_pit.shift); 227
230 return clocksource_register(&clocksource_pit); 228 return clocksource_register(&pit_cs);
231} 229}
232arch_initcall(init_pit_clocksource); 230arch_initcall(init_pit_clocksource);
233 231
234#endif 232#endif /* !CONFIG_X86_64 */
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 720d2607aacb..a979b5bd2fc0 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -7,10 +7,10 @@
7 */ 7 */
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/init.h>
11#include <linux/delay.h> 10#include <linux/delay.h>
11#include <linux/init.h>
12#include <linux/dmi.h> 12#include <linux/dmi.h>
13#include <asm/io.h> 13#include <linux/io.h>
14 14
15int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE; 15int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
16 16
@@ -47,8 +47,7 @@ EXPORT_SYMBOL(native_io_delay);
47static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id) 47static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
48{ 48{
49 if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) { 49 if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
50 printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", 50 pr_notice("%s: using 0xed I/O delay port\n", id->ident);
51 id->ident);
52 io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; 51 io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
53 } 52 }
54 53
@@ -64,40 +63,40 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
64 .callback = dmi_io_delay_0xed_port, 63 .callback = dmi_io_delay_0xed_port,
65 .ident = "Compaq Presario V6000", 64 .ident = "Compaq Presario V6000",
66 .matches = { 65 .matches = {
67 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 66 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
68 DMI_MATCH(DMI_BOARD_NAME, "30B7") 67 DMI_MATCH(DMI_BOARD_NAME, "30B7")
69 } 68 }
70 }, 69 },
71 { 70 {
72 .callback = dmi_io_delay_0xed_port, 71 .callback = dmi_io_delay_0xed_port,
73 .ident = "HP Pavilion dv9000z", 72 .ident = "HP Pavilion dv9000z",
74 .matches = { 73 .matches = {
75 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 74 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
76 DMI_MATCH(DMI_BOARD_NAME, "30B9") 75 DMI_MATCH(DMI_BOARD_NAME, "30B9")
77 } 76 }
78 }, 77 },
79 { 78 {
80 .callback = dmi_io_delay_0xed_port, 79 .callback = dmi_io_delay_0xed_port,
81 .ident = "HP Pavilion dv6000", 80 .ident = "HP Pavilion dv6000",
82 .matches = { 81 .matches = {
83 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 82 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
84 DMI_MATCH(DMI_BOARD_NAME, "30B8") 83 DMI_MATCH(DMI_BOARD_NAME, "30B8")
85 } 84 }
86 }, 85 },
87 { 86 {
88 .callback = dmi_io_delay_0xed_port, 87 .callback = dmi_io_delay_0xed_port,
89 .ident = "HP Pavilion tx1000", 88 .ident = "HP Pavilion tx1000",
90 .matches = { 89 .matches = {
91 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 90 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
92 DMI_MATCH(DMI_BOARD_NAME, "30BF") 91 DMI_MATCH(DMI_BOARD_NAME, "30BF")
93 } 92 }
94 }, 93 },
95 { 94 {
96 .callback = dmi_io_delay_0xed_port, 95 .callback = dmi_io_delay_0xed_port,
97 .ident = "Presario F700", 96 .ident = "Presario F700",
98 .matches = { 97 .matches = {
99 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 98 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
100 DMI_MATCH(DMI_BOARD_NAME, "30D3") 99 DMI_MATCH(DMI_BOARD_NAME, "30D3")
101 } 100 }
102 }, 101 },
103 { } 102 { }
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index b864341dcc45..3aaf7b9e3a8b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -45,19 +45,24 @@ void ack_bad_irq(unsigned int irq)
45/* 45/*
46 * /proc/interrupts printing: 46 * /proc/interrupts printing:
47 */ 47 */
48static int show_other_interrupts(struct seq_file *p) 48static int show_other_interrupts(struct seq_file *p, int prec)
49{ 49{
50 int j; 50 int j;
51 51
52 seq_printf(p, "NMI: "); 52 seq_printf(p, "%*s: ", prec, "NMI");
53 for_each_online_cpu(j) 53 for_each_online_cpu(j)
54 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); 54 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
55 seq_printf(p, " Non-maskable interrupts\n"); 55 seq_printf(p, " Non-maskable interrupts\n");
56#ifdef CONFIG_X86_LOCAL_APIC 56#ifdef CONFIG_X86_LOCAL_APIC
57 seq_printf(p, "LOC: "); 57 seq_printf(p, "%*s: ", prec, "LOC");
58 for_each_online_cpu(j) 58 for_each_online_cpu(j)
59 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); 59 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
60 seq_printf(p, " Local timer interrupts\n"); 60 seq_printf(p, " Local timer interrupts\n");
61
62 seq_printf(p, "%*s: ", prec, "SPU");
63 for_each_online_cpu(j)
64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
65 seq_printf(p, " Spurious interrupts\n");
61#endif 66#endif
62 if (generic_interrupt_extension) { 67 if (generic_interrupt_extension) {
63 seq_printf(p, "PLT: "); 68 seq_printf(p, "PLT: ");
@@ -66,40 +71,34 @@ static int show_other_interrupts(struct seq_file *p)
66 seq_printf(p, " Platform interrupts\n"); 71 seq_printf(p, " Platform interrupts\n");
67 } 72 }
68#ifdef CONFIG_SMP 73#ifdef CONFIG_SMP
69 seq_printf(p, "RES: "); 74 seq_printf(p, "%*s: ", prec, "RES");
70 for_each_online_cpu(j) 75 for_each_online_cpu(j)
71 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); 76 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
72 seq_printf(p, " Rescheduling interrupts\n"); 77 seq_printf(p, " Rescheduling interrupts\n");
73 seq_printf(p, "CAL: "); 78 seq_printf(p, "%*s: ", prec, "CAL");
74 for_each_online_cpu(j) 79 for_each_online_cpu(j)
75 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); 80 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
76 seq_printf(p, " Function call interrupts\n"); 81 seq_printf(p, " Function call interrupts\n");
77 seq_printf(p, "TLB: "); 82 seq_printf(p, "%*s: ", prec, "TLB");
78 for_each_online_cpu(j) 83 for_each_online_cpu(j)
79 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); 84 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
80 seq_printf(p, " TLB shootdowns\n"); 85 seq_printf(p, " TLB shootdowns\n");
81#endif 86#endif
82#ifdef CONFIG_X86_MCE 87#ifdef CONFIG_X86_MCE
83 seq_printf(p, "TRM: "); 88 seq_printf(p, "%*s: ", prec, "TRM");
84 for_each_online_cpu(j) 89 for_each_online_cpu(j)
85 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); 90 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
86 seq_printf(p, " Thermal event interrupts\n"); 91 seq_printf(p, " Thermal event interrupts\n");
87# ifdef CONFIG_X86_64 92# ifdef CONFIG_X86_64
88 seq_printf(p, "THR: "); 93 seq_printf(p, "%*s: ", prec, "THR");
89 for_each_online_cpu(j) 94 for_each_online_cpu(j)
90 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 95 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
91 seq_printf(p, " Threshold APIC interrupts\n"); 96 seq_printf(p, " Threshold APIC interrupts\n");
92# endif 97# endif
93#endif 98#endif
94#ifdef CONFIG_X86_LOCAL_APIC 99 seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
95 seq_printf(p, "SPU: ");
96 for_each_online_cpu(j)
97 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
98 seq_printf(p, " Spurious interrupts\n");
99#endif
100 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
101#if defined(CONFIG_X86_IO_APIC) 100#if defined(CONFIG_X86_IO_APIC)
102 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); 101 seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
103#endif 102#endif
104 return 0; 103 return 0;
105} 104}
@@ -107,19 +106,22 @@ static int show_other_interrupts(struct seq_file *p)
107int show_interrupts(struct seq_file *p, void *v) 106int show_interrupts(struct seq_file *p, void *v)
108{ 107{
109 unsigned long flags, any_count = 0; 108 unsigned long flags, any_count = 0;
110 int i = *(loff_t *) v, j; 109 int i = *(loff_t *) v, j, prec;
111 struct irqaction *action; 110 struct irqaction *action;
112 struct irq_desc *desc; 111 struct irq_desc *desc;
113 112
114 if (i > nr_irqs) 113 if (i > nr_irqs)
115 return 0; 114 return 0;
116 115
116 for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
117 j *= 10;
118
117 if (i == nr_irqs) 119 if (i == nr_irqs)
118 return show_other_interrupts(p); 120 return show_other_interrupts(p, prec);
119 121
120 /* print header */ 122 /* print header */
121 if (i == 0) { 123 if (i == 0) {
122 seq_printf(p, " "); 124 seq_printf(p, "%*s", prec + 8, "");
123 for_each_online_cpu(j) 125 for_each_online_cpu(j)
124 seq_printf(p, "CPU%-8d", j); 126 seq_printf(p, "CPU%-8d", j);
125 seq_putc(p, '\n'); 127 seq_putc(p, '\n');
@@ -130,23 +132,15 @@ int show_interrupts(struct seq_file *p, void *v)
130 return 0; 132 return 0;
131 133
132 spin_lock_irqsave(&desc->lock, flags); 134 spin_lock_irqsave(&desc->lock, flags);
133#ifndef CONFIG_SMP
134 any_count = kstat_irqs(i);
135#else
136 for_each_online_cpu(j) 135 for_each_online_cpu(j)
137 any_count |= kstat_irqs_cpu(i, j); 136 any_count |= kstat_irqs_cpu(i, j);
138#endif
139 action = desc->action; 137 action = desc->action;
140 if (!action && !any_count) 138 if (!action && !any_count)
141 goto out; 139 goto out;
142 140
143 seq_printf(p, "%3d: ", i); 141 seq_printf(p, "%*d: ", prec, i);
144#ifndef CONFIG_SMP
145 seq_printf(p, "%10u ", kstat_irqs(i));
146#else
147 for_each_online_cpu(j) 142 for_each_online_cpu(j)
148 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 143 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
149#endif
150 seq_printf(p, " %8s", desc->chip->name); 144 seq_printf(p, " %8s", desc->chip->name);
151 seq_printf(p, "-%-8s", desc->name); 145 seq_printf(p, "-%-8s", desc->name);
152 146
@@ -171,6 +165,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
171 165
172#ifdef CONFIG_X86_LOCAL_APIC 166#ifdef CONFIG_X86_LOCAL_APIC
173 sum += irq_stats(cpu)->apic_timer_irqs; 167 sum += irq_stats(cpu)->apic_timer_irqs;
168 sum += irq_stats(cpu)->irq_spurious_count;
174#endif 169#endif
175 if (generic_interrupt_extension) 170 if (generic_interrupt_extension)
176 sum += irq_stats(cpu)->generic_irqs; 171 sum += irq_stats(cpu)->generic_irqs;
@@ -185,9 +180,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
185 sum += irq_stats(cpu)->irq_threshold_count; 180 sum += irq_stats(cpu)->irq_threshold_count;
186#endif 181#endif
187#endif 182#endif
188#ifdef CONFIG_X86_LOCAL_APIC
189 sum += irq_stats(cpu)->irq_spurious_count;
190#endif
191 return sum; 183 return sum;
192} 184}
193 185
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index ff7d3b0124f1..e444357375ce 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -8,11 +8,11 @@
8 */ 8 */
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/stat.h> 11#include <linux/module.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/stat.h>
13#include <linux/io.h> 14#include <linux/io.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15#include <linux/module.h>
16 16
17#include <asm/setup.h> 17#include <asm/setup.h>
18 18
@@ -26,9 +26,8 @@ struct setup_data_node {
26 u32 len; 26 u32 len;
27}; 27};
28 28
29static ssize_t 29static ssize_t setup_data_read(struct file *file, char __user *user_buf,
30setup_data_read(struct file *file, char __user *user_buf, size_t count, 30 size_t count, loff_t *ppos)
31 loff_t *ppos)
32{ 31{
33 struct setup_data_node *node = file->private_data; 32 struct setup_data_node *node = file->private_data;
34 unsigned long remain; 33 unsigned long remain;
@@ -39,20 +38,21 @@ setup_data_read(struct file *file, char __user *user_buf, size_t count,
39 38
40 if (pos < 0) 39 if (pos < 0)
41 return -EINVAL; 40 return -EINVAL;
41
42 if (pos >= node->len) 42 if (pos >= node->len)
43 return 0; 43 return 0;
44 44
45 if (count > node->len - pos) 45 if (count > node->len - pos)
46 count = node->len - pos; 46 count = node->len - pos;
47
47 pa = node->paddr + sizeof(struct setup_data) + pos; 48 pa = node->paddr + sizeof(struct setup_data) + pos;
48 pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); 49 pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
49 if (PageHighMem(pg)) { 50 if (PageHighMem(pg)) {
50 p = ioremap_cache(pa, count); 51 p = ioremap_cache(pa, count);
51 if (!p) 52 if (!p)
52 return -ENXIO; 53 return -ENXIO;
53 } else { 54 } else
54 p = __va(pa); 55 p = __va(pa);
55 }
56 56
57 remain = copy_to_user(user_buf, p, count); 57 remain = copy_to_user(user_buf, p, count);
58 58
@@ -70,12 +70,13 @@ setup_data_read(struct file *file, char __user *user_buf, size_t count,
70static int setup_data_open(struct inode *inode, struct file *file) 70static int setup_data_open(struct inode *inode, struct file *file)
71{ 71{
72 file->private_data = inode->i_private; 72 file->private_data = inode->i_private;
73
73 return 0; 74 return 0;
74} 75}
75 76
76static const struct file_operations fops_setup_data = { 77static const struct file_operations fops_setup_data = {
77 .read = setup_data_read, 78 .read = setup_data_read,
78 .open = setup_data_open, 79 .open = setup_data_open,
79}; 80};
80 81
81static int __init 82static int __init
@@ -84,57 +85,50 @@ create_setup_data_node(struct dentry *parent, int no,
84{ 85{
85 struct dentry *d, *type, *data; 86 struct dentry *d, *type, *data;
86 char buf[16]; 87 char buf[16];
87 int error;
88 88
89 sprintf(buf, "%d", no); 89 sprintf(buf, "%d", no);
90 d = debugfs_create_dir(buf, parent); 90 d = debugfs_create_dir(buf, parent);
91 if (!d) { 91 if (!d)
92 error = -ENOMEM; 92 return -ENOMEM;
93 goto err_return; 93
94 }
95 type = debugfs_create_x32("type", S_IRUGO, d, &node->type); 94 type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
96 if (!type) { 95 if (!type)
97 error = -ENOMEM;
98 goto err_dir; 96 goto err_dir;
99 } 97
100 data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); 98 data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
101 if (!data) { 99 if (!data)
102 error = -ENOMEM;
103 goto err_type; 100 goto err_type;
104 } 101
105 return 0; 102 return 0;
106 103
107err_type: 104err_type:
108 debugfs_remove(type); 105 debugfs_remove(type);
109err_dir: 106err_dir:
110 debugfs_remove(d); 107 debugfs_remove(d);
111err_return: 108 return -ENOMEM;
112 return error;
113} 109}
114 110
115static int __init create_setup_data_nodes(struct dentry *parent) 111static int __init create_setup_data_nodes(struct dentry *parent)
116{ 112{
117 struct setup_data_node *node; 113 struct setup_data_node *node;
118 struct setup_data *data; 114 struct setup_data *data;
119 int error, no = 0; 115 int error = -ENOMEM;
120 struct dentry *d; 116 struct dentry *d;
121 struct page *pg; 117 struct page *pg;
122 u64 pa_data; 118 u64 pa_data;
119 int no = 0;
123 120
124 d = debugfs_create_dir("setup_data", parent); 121 d = debugfs_create_dir("setup_data", parent);
125 if (!d) { 122 if (!d)
126 error = -ENOMEM; 123 return -ENOMEM;
127 goto err_return;
128 }
129 124
130 pa_data = boot_params.hdr.setup_data; 125 pa_data = boot_params.hdr.setup_data;
131 126
132 while (pa_data) { 127 while (pa_data) {
133 node = kmalloc(sizeof(*node), GFP_KERNEL); 128 node = kmalloc(sizeof(*node), GFP_KERNEL);
134 if (!node) { 129 if (!node)
135 error = -ENOMEM;
136 goto err_dir; 130 goto err_dir;
137 } 131
138 pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); 132 pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
139 if (PageHighMem(pg)) { 133 if (PageHighMem(pg)) {
140 data = ioremap_cache(pa_data, sizeof(*data)); 134 data = ioremap_cache(pa_data, sizeof(*data));
@@ -143,9 +137,8 @@ static int __init create_setup_data_nodes(struct dentry *parent)
143 error = -ENXIO; 137 error = -ENXIO;
144 goto err_dir; 138 goto err_dir;
145 } 139 }
146 } else { 140 } else
147 data = __va(pa_data); 141 data = __va(pa_data);
148 }
149 142
150 node->paddr = pa_data; 143 node->paddr = pa_data;
151 node->type = data->type; 144 node->type = data->type;
@@ -159,11 +152,11 @@ static int __init create_setup_data_nodes(struct dentry *parent)
159 goto err_dir; 152 goto err_dir;
160 no++; 153 no++;
161 } 154 }
155
162 return 0; 156 return 0;
163 157
164err_dir: 158err_dir:
165 debugfs_remove(d); 159 debugfs_remove(d);
166err_return:
167 return error; 160 return error;
168} 161}
169 162
@@ -175,28 +168,26 @@ static struct debugfs_blob_wrapper boot_params_blob = {
175static int __init boot_params_kdebugfs_init(void) 168static int __init boot_params_kdebugfs_init(void)
176{ 169{
177 struct dentry *dbp, *version, *data; 170 struct dentry *dbp, *version, *data;
178 int error; 171 int error = -ENOMEM;
179 172
180 dbp = debugfs_create_dir("boot_params", NULL); 173 dbp = debugfs_create_dir("boot_params", NULL);
181 if (!dbp) { 174 if (!dbp)
182 error = -ENOMEM; 175 return -ENOMEM;
183 goto err_return; 176
184 }
185 version = debugfs_create_x16("version", S_IRUGO, dbp, 177 version = debugfs_create_x16("version", S_IRUGO, dbp,
186 &boot_params.hdr.version); 178 &boot_params.hdr.version);
187 if (!version) { 179 if (!version)
188 error = -ENOMEM;
189 goto err_dir; 180 goto err_dir;
190 } 181
191 data = debugfs_create_blob("data", S_IRUGO, dbp, 182 data = debugfs_create_blob("data", S_IRUGO, dbp,
192 &boot_params_blob); 183 &boot_params_blob);
193 if (!data) { 184 if (!data)
194 error = -ENOMEM;
195 goto err_version; 185 goto err_version;
196 } 186
197 error = create_setup_data_nodes(dbp); 187 error = create_setup_data_nodes(dbp);
198 if (error) 188 if (error)
199 goto err_data; 189 goto err_data;
190
200 return 0; 191 return 0;
201 192
202err_data: 193err_data:
@@ -205,10 +196,9 @@ err_version:
205 debugfs_remove(version); 196 debugfs_remove(version);
206err_dir: 197err_dir:
207 debugfs_remove(dbp); 198 debugfs_remove(dbp);
208err_return:
209 return error; 199 return error;
210} 200}
211#endif 201#endif /* CONFIG_DEBUG_BOOT_PARAMS */
212 202
213static int __init arch_kdebugfs_init(void) 203static int __init arch_kdebugfs_init(void)
214{ 204{
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e948b28a5a9a..55b94614e348 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -193,6 +193,9 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes)
193 kprobe_opcode_t opcode; 193 kprobe_opcode_t opcode;
194 kprobe_opcode_t *orig_opcodes = opcodes; 194 kprobe_opcode_t *orig_opcodes = opcodes;
195 195
196 if (search_exception_tables((unsigned long)opcodes))
197 return 0; /* Page fault may occur on this address. */
198
196retry: 199retry:
197 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) 200 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
198 return 0; 201 return 0;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 478bca986eca..33019ddb56b4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -138,12 +138,6 @@ static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
138 kvm_mmu_write(ptep, pte_val(pte)); 138 kvm_mmu_write(ptep, pte_val(pte));
139} 139}
140 140
141static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
142 pte_t *ptep, pte_t pte)
143{
144 kvm_mmu_write(ptep, pte_val(pte));
145}
146
147static void kvm_pte_clear(struct mm_struct *mm, 141static void kvm_pte_clear(struct mm_struct *mm,
148 unsigned long addr, pte_t *ptep) 142 unsigned long addr, pte_t *ptep)
149{ 143{
@@ -220,7 +214,6 @@ static void paravirt_ops_setup(void)
220#if PAGETABLE_LEVELS >= 3 214#if PAGETABLE_LEVELS >= 3
221#ifdef CONFIG_X86_PAE 215#ifdef CONFIG_X86_PAE
222 pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; 216 pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
223 pv_mmu_ops.set_pte_present = kvm_set_pte_present;
224 pv_mmu_ops.pte_clear = kvm_pte_clear; 217 pv_mmu_ops.pte_clear = kvm_pte_clear;
225 pv_mmu_ops.pmd_clear = kvm_pmd_clear; 218 pv_mmu_ops.pmd_clear = kvm_pmd_clear;
226#endif 219#endif
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 666e43df51f9..712d15fdc416 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -226,7 +226,7 @@ static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
226 return 0; 226 return 0;
227} 227}
228 228
229static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { 229static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
230 { 230 {
231 .callback = set_check_enable_amd_mmconf, 231 .callback = set_check_enable_amd_mmconf,
232 .ident = "Sun Microsystems Machine", 232 .ident = "Sun Microsystems Machine",
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index e8192401da47..dce99dca6cf8 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -109,9 +109,6 @@ static void __init MP_bus_info(struct mpc_bus *m)
109 } else 109 } else
110 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); 110 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
111} 111}
112#endif
113
114#ifdef CONFIG_X86_IO_APIC
115 112
116static int bad_ioapic(unsigned long address) 113static int bad_ioapic(unsigned long address)
117{ 114{
@@ -224,8 +221,12 @@ static void __init MP_intsrc_info(struct mpc_intsrc *m)
224 if (++mp_irq_entries == MAX_IRQ_SOURCES) 221 if (++mp_irq_entries == MAX_IRQ_SOURCES)
225 panic("Max # of irq sources exceeded!!\n"); 222 panic("Max # of irq sources exceeded!!\n");
226} 223}
224#else /* CONFIG_X86_IO_APIC */
225static inline void __init MP_bus_info(struct mpc_bus *m) {}
226static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
227static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
228#endif /* CONFIG_X86_IO_APIC */
227 229
228#endif
229 230
230static void __init MP_lintsrc_info(struct mpc_lintsrc *m) 231static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
231{ 232{
@@ -275,6 +276,20 @@ static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
275 return 1; 276 return 1;
276} 277}
277 278
279static void skip_entry(unsigned char **ptr, int *count, int size)
280{
281 *ptr += size;
282 *count += size;
283}
284
285static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
286{
287 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"
288 "type %x\n", *mpt);
289 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
290 1, mpc, mpc->length, 1);
291}
292
278static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) 293static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
279{ 294{
280 char str[16]; 295 char str[16];
@@ -310,61 +325,30 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
310 while (count < mpc->length) { 325 while (count < mpc->length) {
311 switch (*mpt) { 326 switch (*mpt) {
312 case MP_PROCESSOR: 327 case MP_PROCESSOR:
313 { 328 /* ACPI may have already provided this data */
314 struct mpc_cpu *m = (struct mpc_cpu *)mpt; 329 if (!acpi_lapic)
315 /* ACPI may have already provided this data */ 330 MP_processor_info((struct mpc_cpu *)mpt);
316 if (!acpi_lapic) 331 skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
317 MP_processor_info(m); 332 break;
318 mpt += sizeof(*m);
319 count += sizeof(*m);
320 break;
321 }
322 case MP_BUS: 333 case MP_BUS:
323 { 334 MP_bus_info((struct mpc_bus *)mpt);
324 struct mpc_bus *m = (struct mpc_bus *)mpt; 335 skip_entry(&mpt, &count, sizeof(struct mpc_bus));
325#ifdef CONFIG_X86_IO_APIC 336 break;
326 MP_bus_info(m);
327#endif
328 mpt += sizeof(*m);
329 count += sizeof(*m);
330 break;
331 }
332 case MP_IOAPIC: 337 case MP_IOAPIC:
333 { 338 MP_ioapic_info((struct mpc_ioapic *)mpt);
334#ifdef CONFIG_X86_IO_APIC 339 skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
335 struct mpc_ioapic *m = (struct mpc_ioapic *)mpt; 340 break;
336 MP_ioapic_info(m);
337#endif
338 mpt += sizeof(struct mpc_ioapic);
339 count += sizeof(struct mpc_ioapic);
340 break;
341 }
342 case MP_INTSRC: 341 case MP_INTSRC:
343 { 342 MP_intsrc_info((struct mpc_intsrc *)mpt);
344#ifdef CONFIG_X86_IO_APIC 343 skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
345 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; 344 break;
346
347 MP_intsrc_info(m);
348#endif
349 mpt += sizeof(struct mpc_intsrc);
350 count += sizeof(struct mpc_intsrc);
351 break;
352 }
353 case MP_LINTSRC: 345 case MP_LINTSRC:
354 { 346 MP_lintsrc_info((struct mpc_lintsrc *)mpt);
355 struct mpc_lintsrc *m = 347 skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
356 (struct mpc_lintsrc *)mpt; 348 break;
357 MP_lintsrc_info(m);
358 mpt += sizeof(*m);
359 count += sizeof(*m);
360 break;
361 }
362 default: 349 default:
363 /* wrong mptable */ 350 /* wrong mptable */
364 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); 351 smp_dump_mptable(mpc, mpt);
365 printk(KERN_ERR "type %x\n", *mpt);
366 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
367 1, mpc, mpc->length, 1);
368 count = mpc->length; 352 count = mpc->length;
369 break; 353 break;
370 } 354 }
@@ -571,6 +555,55 @@ static unsigned long __init get_mpc_size(unsigned long physptr)
571 return size; 555 return size;
572} 556}
573 557
558static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
559{
560 struct mpc_table *mpc;
561 unsigned long size;
562
563 size = get_mpc_size(mpf->physptr);
564 mpc = early_ioremap(mpf->physptr, size);
565 /*
566 * Read the physical hardware table. Anything here will
567 * override the defaults.
568 */
569 if (!smp_read_mpc(mpc, early)) {
570#ifdef CONFIG_X86_LOCAL_APIC
571 smp_found_config = 0;
572#endif
573 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"
574 "... disabling SMP support. (tell your hw vendor)\n");
575 early_iounmap(mpc, size);
576 return -1;
577 }
578 early_iounmap(mpc, size);
579
580 if (early)
581 return -1;
582
583#ifdef CONFIG_X86_IO_APIC
584 /*
585 * If there are no explicit MP IRQ entries, then we are
586 * broken. We set up most of the low 16 IO-APIC pins to
587 * ISA defaults and hope it will work.
588 */
589 if (!mp_irq_entries) {
590 struct mpc_bus bus;
591
592 printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
593 "using default mptable. (tell your hw vendor)\n");
594
595 bus.type = MP_BUS;
596 bus.busid = 0;
597 memcpy(bus.bustype, "ISA ", 6);
598 MP_bus_info(&bus);
599
600 construct_default_ioirq_mptable(0);
601 }
602#endif
603
604 return 0;
605}
606
574/* 607/*
575 * Scan the memory blocks for an SMP configuration block. 608 * Scan the memory blocks for an SMP configuration block.
576 */ 609 */
@@ -624,51 +657,8 @@ static void __init __get_smp_config(unsigned int early)
624 construct_default_ISA_mptable(mpf->feature1); 657 construct_default_ISA_mptable(mpf->feature1);
625 658
626 } else if (mpf->physptr) { 659 } else if (mpf->physptr) {
627 struct mpc_table *mpc; 660 if (check_physptr(mpf, early))
628 unsigned long size;
629
630 size = get_mpc_size(mpf->physptr);
631 mpc = early_ioremap(mpf->physptr, size);
632 /*
633 * Read the physical hardware table. Anything here will
634 * override the defaults.
635 */
636 if (!smp_read_mpc(mpc, early)) {
637#ifdef CONFIG_X86_LOCAL_APIC
638 smp_found_config = 0;
639#endif
640 printk(KERN_ERR
641 "BIOS bug, MP table errors detected!...\n");
642 printk(KERN_ERR "... disabling SMP support. "
643 "(tell your hw vendor)\n");
644 early_iounmap(mpc, size);
645 return;
646 }
647 early_iounmap(mpc, size);
648
649 if (early)
650 return; 661 return;
651#ifdef CONFIG_X86_IO_APIC
652 /*
653 * If there are no explicit MP IRQ entries, then we are
654 * broken. We set up most of the low 16 IO-APIC pins to
655 * ISA defaults and hope it will work.
656 */
657 if (!mp_irq_entries) {
658 struct mpc_bus bus;
659
660 printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
661 "using default mptable. "
662 "(tell your hw vendor)\n");
663
664 bus.type = MP_BUS;
665 bus.busid = 0;
666 memcpy(bus.bustype, "ISA ", 6);
667 MP_bus_info(&bus);
668
669 construct_default_ioirq_mptable(0);
670 }
671#endif
672 } else 662 } else
673 BUG(); 663 BUG();
674 664
@@ -689,6 +679,31 @@ void __init get_smp_config(void)
689 __get_smp_config(0); 679 __get_smp_config(0);
690} 680}
691 681
682static void smp_reserve_bootmem(struct mpf_intel *mpf)
683{
684 unsigned long size = get_mpc_size(mpf->physptr);
685#ifdef CONFIG_X86_32
686 /*
687 * We cannot access to MPC table to compute table size yet,
688 * as only few megabytes from the bottom is mapped now.
689 * PC-9800's MPC table places on the very last of physical
690 * memory; so that simply reserving PAGE_SIZE from mpf->physptr
691 * yields BUG() in reserve_bootmem.
692 * also need to make sure physptr is below than max_low_pfn
693 * we don't need reserve the area above max_low_pfn
694 */
695 unsigned long end = max_low_pfn * PAGE_SIZE;
696
697 if (mpf->physptr < end) {
698 if (mpf->physptr + size > end)
699 size = end - mpf->physptr;
700 reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
701 }
702#else
703 reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
704#endif
705}
706
692static int __init smp_scan_config(unsigned long base, unsigned long length, 707static int __init smp_scan_config(unsigned long base, unsigned long length,
693 unsigned reserve) 708 unsigned reserve)
694{ 709{
@@ -717,35 +732,9 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
717 if (!reserve) 732 if (!reserve)
718 return 1; 733 return 1;
719 reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), 734 reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
720 BOOTMEM_DEFAULT);
721 if (mpf->physptr) {
722 unsigned long size = get_mpc_size(mpf->physptr);
723#ifdef CONFIG_X86_32
724 /*
725 * We cannot access to MPC table to compute
726 * table size yet, as only few megabytes from
727 * the bottom is mapped now.
728 * PC-9800's MPC table places on the very last
729 * of physical memory; so that simply reserving
730 * PAGE_SIZE from mpf->physptr yields BUG()
731 * in reserve_bootmem.
732 * also need to make sure physptr is below than
733 * max_low_pfn
734 * we don't need reserve the area above max_low_pfn
735 */
736 unsigned long end = max_low_pfn * PAGE_SIZE;
737
738 if (mpf->physptr < end) {
739 if (mpf->physptr + size > end)
740 size = end - mpf->physptr;
741 reserve_bootmem_generic(mpf->physptr, size,
742 BOOTMEM_DEFAULT);
743 }
744#else
745 reserve_bootmem_generic(mpf->physptr, size,
746 BOOTMEM_DEFAULT); 735 BOOTMEM_DEFAULT);
747#endif 736 if (mpf->physptr)
748 } 737 smp_reserve_bootmem(mpf);
749 738
750 return 1; 739 return 1;
751 } 740 }
@@ -848,7 +837,57 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
848#define SPARE_SLOT_NUM 20 837#define SPARE_SLOT_NUM 20
849 838
850static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; 839static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
851#endif 840
841static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
842{
843 int i;
844
845 apic_printk(APIC_VERBOSE, "OLD ");
846 print_MP_intsrc_info(m);
847
848 i = get_MP_intsrc_index(m);
849 if (i > 0) {
850 assign_to_mpc_intsrc(&mp_irqs[i], m);
851 apic_printk(APIC_VERBOSE, "NEW ");
852 print_mp_irq_info(&mp_irqs[i]);
853 return;
854 }
855 if (!i) {
856 /* legacy, do nothing */
857 return;
858 }
859 if (*nr_m_spare < SPARE_SLOT_NUM) {
860 /*
861 * not found (-1), or duplicated (-2) are invalid entries,
862 * we need to use the slot later
863 */
864 m_spare[*nr_m_spare] = m;
865 *nr_m_spare += 1;
866 }
867}
868#else /* CONFIG_X86_IO_APIC */
869static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
870#endif /* CONFIG_X86_IO_APIC */
871
872static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
873 int count)
874{
875 if (!mpc_new_phys) {
876 pr_info("No spare slots, try to append...take your risk, "
877 "new mpc_length %x\n", count);
878 } else {
879 if (count <= mpc_new_length)
880 pr_info("No spare slots, try to append..., "
881 "new mpc_length %x\n", count);
882 else {
883 pr_err("mpc_new_length %lx is too small\n",
884 mpc_new_length);
885 return -1;
886 }
887 }
888
889 return 0;
890}
852 891
853static int __init replace_intsrc_all(struct mpc_table *mpc, 892static int __init replace_intsrc_all(struct mpc_table *mpc,
854 unsigned long mpc_new_phys, 893 unsigned long mpc_new_phys,
@@ -856,77 +895,33 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
856{ 895{
857#ifdef CONFIG_X86_IO_APIC 896#ifdef CONFIG_X86_IO_APIC
858 int i; 897 int i;
859 int nr_m_spare = 0;
860#endif 898#endif
861
862 int count = sizeof(*mpc); 899 int count = sizeof(*mpc);
900 int nr_m_spare = 0;
863 unsigned char *mpt = ((unsigned char *)mpc) + count; 901 unsigned char *mpt = ((unsigned char *)mpc) + count;
864 902
865 printk(KERN_INFO "mpc_length %x\n", mpc->length); 903 printk(KERN_INFO "mpc_length %x\n", mpc->length);
866 while (count < mpc->length) { 904 while (count < mpc->length) {
867 switch (*mpt) { 905 switch (*mpt) {
868 case MP_PROCESSOR: 906 case MP_PROCESSOR:
869 { 907 skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
870 struct mpc_cpu *m = (struct mpc_cpu *)mpt; 908 break;
871 mpt += sizeof(*m);
872 count += sizeof(*m);
873 break;
874 }
875 case MP_BUS: 909 case MP_BUS:
876 { 910 skip_entry(&mpt, &count, sizeof(struct mpc_bus));
877 struct mpc_bus *m = (struct mpc_bus *)mpt; 911 break;
878 mpt += sizeof(*m);
879 count += sizeof(*m);
880 break;
881 }
882 case MP_IOAPIC: 912 case MP_IOAPIC:
883 { 913 skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
884 mpt += sizeof(struct mpc_ioapic); 914 break;
885 count += sizeof(struct mpc_ioapic);
886 break;
887 }
888 case MP_INTSRC: 915 case MP_INTSRC:
889 { 916 check_irq_src((struct mpc_intsrc *)mpt, &nr_m_spare);
890#ifdef CONFIG_X86_IO_APIC 917 skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
891 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; 918 break;
892
893 printk(KERN_INFO "OLD ");
894 print_MP_intsrc_info(m);
895 i = get_MP_intsrc_index(m);
896 if (i > 0) {
897 assign_to_mpc_intsrc(&mp_irqs[i], m);
898 printk(KERN_INFO "NEW ");
899 print_mp_irq_info(&mp_irqs[i]);
900 } else if (!i) {
901 /* legacy, do nothing */
902 } else if (nr_m_spare < SPARE_SLOT_NUM) {
903 /*
904 * not found (-1), or duplicated (-2)
905 * are invalid entries,
906 * we need to use the slot later
907 */
908 m_spare[nr_m_spare] = m;
909 nr_m_spare++;
910 }
911#endif
912 mpt += sizeof(struct mpc_intsrc);
913 count += sizeof(struct mpc_intsrc);
914 break;
915 }
916 case MP_LINTSRC: 919 case MP_LINTSRC:
917 { 920 skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
918 struct mpc_lintsrc *m = 921 break;
919 (struct mpc_lintsrc *)mpt;
920 mpt += sizeof(*m);
921 count += sizeof(*m);
922 break;
923 }
924 default: 922 default:
925 /* wrong mptable */ 923 /* wrong mptable */
926 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); 924 smp_dump_mptable(mpc, mpt);
927 printk(KERN_ERR "type %x\n", *mpt);
928 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
929 1, mpc, mpc->length, 1);
930 goto out; 925 goto out;
931 } 926 }
932 } 927 }
@@ -943,23 +938,15 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
943 continue; 938 continue;
944 939
945 if (nr_m_spare > 0) { 940 if (nr_m_spare > 0) {
946 printk(KERN_INFO "*NEW* found "); 941 apic_printk(APIC_VERBOSE, "*NEW* found\n");
947 nr_m_spare--; 942 nr_m_spare--;
948 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); 943 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
949 m_spare[nr_m_spare] = NULL; 944 m_spare[nr_m_spare] = NULL;
950 } else { 945 } else {
951 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; 946 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
952 count += sizeof(struct mpc_intsrc); 947 count += sizeof(struct mpc_intsrc);
953 if (!mpc_new_phys) { 948 if (!check_slot(mpc_new_phys, mpc_new_length, count))
954 printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); 949 goto out;
955 } else {
956 if (count <= mpc_new_length)
957 printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
958 else {
959 printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
960 goto out;
961 }
962 }
963 assign_to_mpc_intsrc(&mp_irqs[i], m); 950 assign_to_mpc_intsrc(&mp_irqs[i], m);
964 mpc->length = count; 951 mpc->length = count;
965 mpt += sizeof(struct mpc_intsrc); 952 mpt += sizeof(struct mpc_intsrc);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 63dd358d8ee1..8e45f4464880 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -470,7 +470,6 @@ struct pv_mmu_ops pv_mmu_ops = {
470#if PAGETABLE_LEVELS >= 3 470#if PAGETABLE_LEVELS >= 3
471#ifdef CONFIG_X86_PAE 471#ifdef CONFIG_X86_PAE
472 .set_pte_atomic = native_set_pte_atomic, 472 .set_pte_atomic = native_set_pte_atomic,
473 .set_pte_present = native_set_pte_present,
474 .pte_clear = native_pte_clear, 473 .pte_clear = native_pte_clear,
475 .pmd_clear = native_pmd_clear, 474 .pmd_clear = native_pmd_clear,
476#endif 475#endif
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index d28bbdc35e4e..755c21e906f3 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -380,8 +380,9 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
380 return tbl; 380 return tbl;
381} 381}
382 382
383static void calgary_unmap_sg(struct device *dev, 383static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
384 struct scatterlist *sglist, int nelems, int direction) 384 int nelems,enum dma_data_direction dir,
385 struct dma_attrs *attrs)
385{ 386{
386 struct iommu_table *tbl = find_iommu_table(dev); 387 struct iommu_table *tbl = find_iommu_table(dev);
387 struct scatterlist *s; 388 struct scatterlist *s;
@@ -404,7 +405,8 @@ static void calgary_unmap_sg(struct device *dev,
404} 405}
405 406
406static int calgary_map_sg(struct device *dev, struct scatterlist *sg, 407static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
407 int nelems, int direction) 408 int nelems, enum dma_data_direction dir,
409 struct dma_attrs *attrs)
408{ 410{
409 struct iommu_table *tbl = find_iommu_table(dev); 411 struct iommu_table *tbl = find_iommu_table(dev);
410 struct scatterlist *s; 412 struct scatterlist *s;
@@ -429,15 +431,14 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
429 s->dma_address = (entry << PAGE_SHIFT) | s->offset; 431 s->dma_address = (entry << PAGE_SHIFT) | s->offset;
430 432
431 /* insert into HW table */ 433 /* insert into HW table */
432 tce_build(tbl, entry, npages, vaddr & PAGE_MASK, 434 tce_build(tbl, entry, npages, vaddr & PAGE_MASK, dir);
433 direction);
434 435
435 s->dma_length = s->length; 436 s->dma_length = s->length;
436 } 437 }
437 438
438 return nelems; 439 return nelems;
439error: 440error:
440 calgary_unmap_sg(dev, sg, nelems, direction); 441 calgary_unmap_sg(dev, sg, nelems, dir, NULL);
441 for_each_sg(sg, s, nelems, i) { 442 for_each_sg(sg, s, nelems, i) {
442 sg->dma_address = bad_dma_address; 443 sg->dma_address = bad_dma_address;
443 sg->dma_length = 0; 444 sg->dma_length = 0;
@@ -445,10 +446,12 @@ error:
445 return 0; 446 return 0;
446} 447}
447 448
448static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, 449static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
449 size_t size, int direction) 450 unsigned long offset, size_t size,
451 enum dma_data_direction dir,
452 struct dma_attrs *attrs)
450{ 453{
451 void *vaddr = phys_to_virt(paddr); 454 void *vaddr = page_address(page) + offset;
452 unsigned long uaddr; 455 unsigned long uaddr;
453 unsigned int npages; 456 unsigned int npages;
454 struct iommu_table *tbl = find_iommu_table(dev); 457 struct iommu_table *tbl = find_iommu_table(dev);
@@ -456,17 +459,18 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
456 uaddr = (unsigned long)vaddr; 459 uaddr = (unsigned long)vaddr;
457 npages = iommu_num_pages(uaddr, size, PAGE_SIZE); 460 npages = iommu_num_pages(uaddr, size, PAGE_SIZE);
458 461
459 return iommu_alloc(dev, tbl, vaddr, npages, direction); 462 return iommu_alloc(dev, tbl, vaddr, npages, dir);
460} 463}
461 464
462static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, 465static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
463 size_t size, int direction) 466 size_t size, enum dma_data_direction dir,
467 struct dma_attrs *attrs)
464{ 468{
465 struct iommu_table *tbl = find_iommu_table(dev); 469 struct iommu_table *tbl = find_iommu_table(dev);
466 unsigned int npages; 470 unsigned int npages;
467 471
468 npages = iommu_num_pages(dma_handle, size, PAGE_SIZE); 472 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
469 iommu_free(tbl, dma_handle, npages); 473 iommu_free(tbl, dma_addr, npages);
470} 474}
471 475
472static void* calgary_alloc_coherent(struct device *dev, size_t size, 476static void* calgary_alloc_coherent(struct device *dev, size_t size,
@@ -515,13 +519,13 @@ static void calgary_free_coherent(struct device *dev, size_t size,
515 free_pages((unsigned long)vaddr, get_order(size)); 519 free_pages((unsigned long)vaddr, get_order(size));
516} 520}
517 521
518static struct dma_mapping_ops calgary_dma_ops = { 522static struct dma_map_ops calgary_dma_ops = {
519 .alloc_coherent = calgary_alloc_coherent, 523 .alloc_coherent = calgary_alloc_coherent,
520 .free_coherent = calgary_free_coherent, 524 .free_coherent = calgary_free_coherent,
521 .map_single = calgary_map_single,
522 .unmap_single = calgary_unmap_single,
523 .map_sg = calgary_map_sg, 525 .map_sg = calgary_map_sg,
524 .unmap_sg = calgary_unmap_sg, 526 .unmap_sg = calgary_unmap_sg,
527 .map_page = calgary_map_page,
528 .unmap_page = calgary_unmap_page,
525}; 529};
526 530
527static inline void __iomem * busno_to_bbar(unsigned char num) 531static inline void __iomem * busno_to_bbar(unsigned char num)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b25428533141..c7c4776ff630 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,4 +1,5 @@
1#include <linux/dma-mapping.h> 1#include <linux/dma-mapping.h>
2#include <linux/dma-debug.h>
2#include <linux/dmar.h> 3#include <linux/dmar.h>
3#include <linux/bootmem.h> 4#include <linux/bootmem.h>
4#include <linux/pci.h> 5#include <linux/pci.h>
@@ -12,7 +13,7 @@
12 13
13static int forbid_dac __read_mostly; 14static int forbid_dac __read_mostly;
14 15
15struct dma_mapping_ops *dma_ops; 16struct dma_map_ops *dma_ops;
16EXPORT_SYMBOL(dma_ops); 17EXPORT_SYMBOL(dma_ops);
17 18
18static int iommu_sac_force __read_mostly; 19static int iommu_sac_force __read_mostly;
@@ -44,6 +45,9 @@ struct device x86_dma_fallback_dev = {
44}; 45};
45EXPORT_SYMBOL(x86_dma_fallback_dev); 46EXPORT_SYMBOL(x86_dma_fallback_dev);
46 47
48/* Number of entries preallocated for DMA-API debugging */
49#define PREALLOC_DMA_DEBUG_ENTRIES 32768
50
47int dma_set_mask(struct device *dev, u64 mask) 51int dma_set_mask(struct device *dev, u64 mask)
48{ 52{
49 if (!dev->dma_mask || !dma_supported(dev, mask)) 53 if (!dev->dma_mask || !dma_supported(dev, mask))
@@ -224,7 +228,7 @@ early_param("iommu", iommu_setup);
224 228
225int dma_supported(struct device *dev, u64 mask) 229int dma_supported(struct device *dev, u64 mask)
226{ 230{
227 struct dma_mapping_ops *ops = get_dma_ops(dev); 231 struct dma_map_ops *ops = get_dma_ops(dev);
228 232
229#ifdef CONFIG_PCI 233#ifdef CONFIG_PCI
230 if (mask > 0xffffffff && forbid_dac > 0) { 234 if (mask > 0xffffffff && forbid_dac > 0) {
@@ -265,6 +269,12 @@ EXPORT_SYMBOL(dma_supported);
265 269
266static int __init pci_iommu_init(void) 270static int __init pci_iommu_init(void)
267{ 271{
272 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
273
274#ifdef CONFIG_PCI
275 dma_debug_add_bus(&pci_bus_type);
276#endif
277
268 calgary_iommu_init(); 278 calgary_iommu_init();
269 279
270 intel_iommu_init(); 280 intel_iommu_init();
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d5768b1af080..b284b58c035c 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -255,10 +255,13 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
255} 255}
256 256
257/* Map a single area into the IOMMU */ 257/* Map a single area into the IOMMU */
258static dma_addr_t 258static dma_addr_t gart_map_page(struct device *dev, struct page *page,
259gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) 259 unsigned long offset, size_t size,
260 enum dma_data_direction dir,
261 struct dma_attrs *attrs)
260{ 262{
261 unsigned long bus; 263 unsigned long bus;
264 phys_addr_t paddr = page_to_phys(page) + offset;
262 265
263 if (!dev) 266 if (!dev)
264 dev = &x86_dma_fallback_dev; 267 dev = &x86_dma_fallback_dev;
@@ -275,8 +278,9 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
275/* 278/*
276 * Free a DMA mapping. 279 * Free a DMA mapping.
277 */ 280 */
278static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, 281static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
279 size_t size, int direction) 282 size_t size, enum dma_data_direction dir,
283 struct dma_attrs *attrs)
280{ 284{
281 unsigned long iommu_page; 285 unsigned long iommu_page;
282 int npages; 286 int npages;
@@ -298,8 +302,8 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
298/* 302/*
299 * Wrapper for pci_unmap_single working with scatterlists. 303 * Wrapper for pci_unmap_single working with scatterlists.
300 */ 304 */
301static void 305static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
302gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) 306 enum dma_data_direction dir, struct dma_attrs *attrs)
303{ 307{
304 struct scatterlist *s; 308 struct scatterlist *s;
305 int i; 309 int i;
@@ -307,7 +311,7 @@ gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
307 for_each_sg(sg, s, nents, i) { 311 for_each_sg(sg, s, nents, i) {
308 if (!s->dma_length || !s->length) 312 if (!s->dma_length || !s->length)
309 break; 313 break;
310 gart_unmap_single(dev, s->dma_address, s->dma_length, dir); 314 gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
311 } 315 }
312} 316}
313 317
@@ -329,7 +333,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
329 addr = dma_map_area(dev, addr, s->length, dir, 0); 333 addr = dma_map_area(dev, addr, s->length, dir, 0);
330 if (addr == bad_dma_address) { 334 if (addr == bad_dma_address) {
331 if (i > 0) 335 if (i > 0)
332 gart_unmap_sg(dev, sg, i, dir); 336 gart_unmap_sg(dev, sg, i, dir, NULL);
333 nents = 0; 337 nents = 0;
334 sg[0].dma_length = 0; 338 sg[0].dma_length = 0;
335 break; 339 break;
@@ -400,8 +404,8 @@ dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
400 * DMA map all entries in a scatterlist. 404 * DMA map all entries in a scatterlist.
401 * Merge chunks that have page aligned sizes into a continuous mapping. 405 * Merge chunks that have page aligned sizes into a continuous mapping.
402 */ 406 */
403static int 407static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
404gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) 408 enum dma_data_direction dir, struct dma_attrs *attrs)
405{ 409{
406 struct scatterlist *s, *ps, *start_sg, *sgmap; 410 struct scatterlist *s, *ps, *start_sg, *sgmap;
407 int need = 0, nextneed, i, out, start; 411 int need = 0, nextneed, i, out, start;
@@ -468,7 +472,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
468 472
469error: 473error:
470 flush_gart(); 474 flush_gart();
471 gart_unmap_sg(dev, sg, out, dir); 475 gart_unmap_sg(dev, sg, out, dir, NULL);
472 476
473 /* When it was forced or merged try again in a dumb way */ 477 /* When it was forced or merged try again in a dumb way */
474 if (force_iommu || iommu_merge) { 478 if (force_iommu || iommu_merge) {
@@ -521,7 +525,7 @@ static void
521gart_free_coherent(struct device *dev, size_t size, void *vaddr, 525gart_free_coherent(struct device *dev, size_t size, void *vaddr,
522 dma_addr_t dma_addr) 526 dma_addr_t dma_addr)
523{ 527{
524 gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); 528 gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
525 free_pages((unsigned long)vaddr, get_order(size)); 529 free_pages((unsigned long)vaddr, get_order(size));
526} 530}
527 531
@@ -707,11 +711,11 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
707 return -1; 711 return -1;
708} 712}
709 713
710static struct dma_mapping_ops gart_dma_ops = { 714static struct dma_map_ops gart_dma_ops = {
711 .map_single = gart_map_single,
712 .unmap_single = gart_unmap_single,
713 .map_sg = gart_map_sg, 715 .map_sg = gart_map_sg,
714 .unmap_sg = gart_unmap_sg, 716 .unmap_sg = gart_unmap_sg,
717 .map_page = gart_map_page,
718 .unmap_page = gart_unmap_page,
715 .alloc_coherent = gart_alloc_coherent, 719 .alloc_coherent = gart_alloc_coherent,
716 .free_coherent = gart_free_coherent, 720 .free_coherent = gart_free_coherent,
717}; 721};
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index c70ab5a5d4c8..c6d703b39326 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -1,14 +1,14 @@
1/* Fallback functions when the main IOMMU code is not compiled in. This 1/* Fallback functions when the main IOMMU code is not compiled in. This
2 code is roughly equivalent to i386. */ 2 code is roughly equivalent to i386. */
3#include <linux/mm.h>
4#include <linux/init.h>
5#include <linux/pci.h>
6#include <linux/string.h>
7#include <linux/dma-mapping.h> 3#include <linux/dma-mapping.h>
8#include <linux/scatterlist.h> 4#include <linux/scatterlist.h>
5#include <linux/string.h>
6#include <linux/init.h>
7#include <linux/pci.h>
8#include <linux/mm.h>
9 9
10#include <asm/iommu.h>
11#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/iommu.h>
12#include <asm/dma.h> 12#include <asm/dma.h>
13 13
14static int 14static int
@@ -25,19 +25,19 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
25 return 1; 25 return 1;
26} 26}
27 27
28static dma_addr_t 28static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
29nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, 29 unsigned long offset, size_t size,
30 int direction) 30 enum dma_data_direction dir,
31 struct dma_attrs *attrs)
31{ 32{
32 dma_addr_t bus = paddr; 33 dma_addr_t bus = page_to_phys(page) + offset;
33 WARN_ON(size == 0); 34 WARN_ON(size == 0);
34 if (!check_addr("map_single", hwdev, bus, size)) 35 if (!check_addr("map_single", dev, bus, size))
35 return bad_dma_address; 36 return bad_dma_address;
36 flush_write_buffers(); 37 flush_write_buffers();
37 return bus; 38 return bus;
38} 39}
39 40
40
41/* Map a set of buffers described by scatterlist in streaming 41/* Map a set of buffers described by scatterlist in streaming
42 * mode for DMA. This is the scatter-gather version of the 42 * mode for DMA. This is the scatter-gather version of the
43 * above pci_map_single interface. Here the scatter gather list 43 * above pci_map_single interface. Here the scatter gather list
@@ -54,7 +54,8 @@ nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
54 * the same here. 54 * the same here.
55 */ 55 */
56static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, 56static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
57 int nents, int direction) 57 int nents, enum dma_data_direction dir,
58 struct dma_attrs *attrs)
58{ 59{
59 struct scatterlist *s; 60 struct scatterlist *s;
60 int i; 61 int i;
@@ -78,12 +79,12 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
78 free_pages((unsigned long)vaddr, get_order(size)); 79 free_pages((unsigned long)vaddr, get_order(size));
79} 80}
80 81
81struct dma_mapping_ops nommu_dma_ops = { 82struct dma_map_ops nommu_dma_ops = {
82 .alloc_coherent = dma_generic_alloc_coherent, 83 .alloc_coherent = dma_generic_alloc_coherent,
83 .free_coherent = nommu_free_coherent, 84 .free_coherent = nommu_free_coherent,
84 .map_single = nommu_map_single, 85 .map_sg = nommu_map_sg,
85 .map_sg = nommu_map_sg, 86 .map_page = nommu_map_page,
86 .is_phys = 1, 87 .is_phys = 1,
87}; 88};
88 89
89void __init no_iommu_init(void) 90void __init no_iommu_init(void)
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb.c
index d59c91747665..34f12e9996ed 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -33,18 +33,11 @@ phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
33 return baddr; 33 return baddr;
34} 34}
35 35
36int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) 36int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
37{ 37{
38 return 0; 38 return 0;
39} 39}
40 40
41static dma_addr_t
42swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
43 int direction)
44{
45 return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
46}
47
48static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, 41static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
49 dma_addr_t *dma_handle, gfp_t flags) 42 dma_addr_t *dma_handle, gfp_t flags)
50{ 43{
@@ -57,20 +50,20 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
57 return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); 50 return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
58} 51}
59 52
60struct dma_mapping_ops swiotlb_dma_ops = { 53struct dma_map_ops swiotlb_dma_ops = {
61 .mapping_error = swiotlb_dma_mapping_error, 54 .mapping_error = swiotlb_dma_mapping_error,
62 .alloc_coherent = x86_swiotlb_alloc_coherent, 55 .alloc_coherent = x86_swiotlb_alloc_coherent,
63 .free_coherent = swiotlb_free_coherent, 56 .free_coherent = swiotlb_free_coherent,
64 .map_single = swiotlb_map_single_phys,
65 .unmap_single = swiotlb_unmap_single,
66 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 57 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
67 .sync_single_for_device = swiotlb_sync_single_for_device, 58 .sync_single_for_device = swiotlb_sync_single_for_device,
68 .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, 59 .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
69 .sync_single_range_for_device = swiotlb_sync_single_range_for_device, 60 .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
70 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, 61 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
71 .sync_sg_for_device = swiotlb_sync_sg_for_device, 62 .sync_sg_for_device = swiotlb_sync_sg_for_device,
72 .map_sg = swiotlb_map_sg, 63 .map_sg = swiotlb_map_sg_attrs,
73 .unmap_sg = swiotlb_unmap_sg, 64 .unmap_sg = swiotlb_unmap_sg_attrs,
65 .map_page = swiotlb_map_page,
66 .unmap_page = swiotlb_unmap_page,
74 .dma_supported = NULL, 67 .dma_supported = NULL,
75}; 68};
76 69
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 78533a519d8f..25e28087a3ee 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -65,11 +65,11 @@ void exit_thread(void)
65{ 65{
66 struct task_struct *me = current; 66 struct task_struct *me = current;
67 struct thread_struct *t = &me->thread; 67 struct thread_struct *t = &me->thread;
68 unsigned long *bp = t->io_bitmap_ptr;
68 69
69 if (me->thread.io_bitmap_ptr) { 70 if (bp) {
70 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 71 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
71 72
72 kfree(t->io_bitmap_ptr);
73 t->io_bitmap_ptr = NULL; 73 t->io_bitmap_ptr = NULL;
74 clear_thread_flag(TIF_IO_BITMAP); 74 clear_thread_flag(TIF_IO_BITMAP);
75 /* 75 /*
@@ -78,6 +78,7 @@ void exit_thread(void)
78 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 78 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
79 t->io_bitmap_max = 0; 79 t->io_bitmap_max = 0;
80 put_cpu(); 80 put_cpu();
81 kfree(bp);
81 } 82 }
82 83
83 ds_exit_thread(current); 84 ds_exit_thread(current);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 3d9672e59c16..19378715f415 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -685,9 +685,8 @@ static int ptrace_bts_config(struct task_struct *child,
685 if (!cfg.signal) 685 if (!cfg.signal)
686 return -EINVAL; 686 return -EINVAL;
687 687
688 return -EOPNOTSUPP;
689
690 child->thread.bts_ovfl_signal = cfg.signal; 688 child->thread.bts_ovfl_signal = cfg.signal;
689 return -EOPNOTSUPP;
691 } 690 }
692 691
693 if ((cfg.flags & PTRACE_BTS_O_ALLOC) && 692 if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 309949e9e1c1..e95022e4f5d5 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -74,8 +74,7 @@ static void ich_force_hpet_resume(void)
74 if (!force_hpet_address) 74 if (!force_hpet_address)
75 return; 75 return;
76 76
77 if (rcba_base == NULL) 77 BUG_ON(rcba_base == NULL);
78 BUG();
79 78
80 /* read the Function Disable register, dword mode only */ 79 /* read the Function Disable register, dword mode only */
81 val = readl(rcba_base + 0x3404); 80 val = readl(rcba_base + 0x3404);
@@ -172,7 +171,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4,
172 ich_force_enable_hpet); 171 ich_force_enable_hpet);
173DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, 172DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
174 ich_force_enable_hpet); 173 ich_force_enable_hpet);
175 174DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */
175 ich_force_enable_hpet);
176 176
177static struct pci_dev *cached_dev; 177static struct pci_dev *cached_dev;
178 178
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index dd6f2b71561b..5d465b207e72 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -1,14 +1,14 @@
1/* 1/*
2 * RTC related functions 2 * RTC related functions
3 */ 3 */
4#include <linux/platform_device.h>
5#include <linux/mc146818rtc.h>
4#include <linux/acpi.h> 6#include <linux/acpi.h>
5#include <linux/bcd.h> 7#include <linux/bcd.h>
6#include <linux/mc146818rtc.h>
7#include <linux/platform_device.h>
8#include <linux/pnp.h> 8#include <linux/pnp.h>
9 9
10#include <asm/time.h>
11#include <asm/vsyscall.h> 10#include <asm/vsyscall.h>
11#include <asm/time.h>
12 12
13#ifdef CONFIG_X86_32 13#ifdef CONFIG_X86_32
14/* 14/*
@@ -16,9 +16,9 @@
16 * register we are working with. It is required for NMI access to the 16 * register we are working with. It is required for NMI access to the
17 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. 17 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
18 */ 18 */
19volatile unsigned long cmos_lock = 0; 19volatile unsigned long cmos_lock;
20EXPORT_SYMBOL(cmos_lock); 20EXPORT_SYMBOL(cmos_lock);
21#endif 21#endif /* CONFIG_X86_32 */
22 22
23/* For two digit years assume time is always after that */ 23/* For two digit years assume time is always after that */
24#define CMOS_YEARS_OFFS 2000 24#define CMOS_YEARS_OFFS 2000
@@ -38,9 +38,9 @@ EXPORT_SYMBOL(rtc_lock);
38 */ 38 */
39int mach_set_rtc_mmss(unsigned long nowtime) 39int mach_set_rtc_mmss(unsigned long nowtime)
40{ 40{
41 int retval = 0;
42 int real_seconds, real_minutes, cmos_minutes; 41 int real_seconds, real_minutes, cmos_minutes;
43 unsigned char save_control, save_freq_select; 42 unsigned char save_control, save_freq_select;
43 int retval = 0;
44 44
45 /* tell the clock it's being set */ 45 /* tell the clock it's being set */
46 save_control = CMOS_READ(RTC_CONTROL); 46 save_control = CMOS_READ(RTC_CONTROL);
@@ -72,8 +72,8 @@ int mach_set_rtc_mmss(unsigned long nowtime)
72 real_seconds = bin2bcd(real_seconds); 72 real_seconds = bin2bcd(real_seconds);
73 real_minutes = bin2bcd(real_minutes); 73 real_minutes = bin2bcd(real_minutes);
74 } 74 }
75 CMOS_WRITE(real_seconds,RTC_SECONDS); 75 CMOS_WRITE(real_seconds, RTC_SECONDS);
76 CMOS_WRITE(real_minutes,RTC_MINUTES); 76 CMOS_WRITE(real_minutes, RTC_MINUTES);
77 } else { 77 } else {
78 printk(KERN_WARNING 78 printk(KERN_WARNING
79 "set_rtc_mmss: can't update from %d to %d\n", 79 "set_rtc_mmss: can't update from %d to %d\n",
@@ -151,6 +151,7 @@ unsigned char rtc_cmos_read(unsigned char addr)
151 outb(addr, RTC_PORT(0)); 151 outb(addr, RTC_PORT(0));
152 val = inb(RTC_PORT(1)); 152 val = inb(RTC_PORT(1));
153 lock_cmos_suffix(addr); 153 lock_cmos_suffix(addr);
154
154 return val; 155 return val;
155} 156}
156EXPORT_SYMBOL(rtc_cmos_read); 157EXPORT_SYMBOL(rtc_cmos_read);
@@ -166,8 +167,8 @@ EXPORT_SYMBOL(rtc_cmos_write);
166 167
167static int set_rtc_mmss(unsigned long nowtime) 168static int set_rtc_mmss(unsigned long nowtime)
168{ 169{
169 int retval;
170 unsigned long flags; 170 unsigned long flags;
171 int retval;
171 172
172 spin_lock_irqsave(&rtc_lock, flags); 173 spin_lock_irqsave(&rtc_lock, flags);
173 retval = set_wallclock(nowtime); 174 retval = set_wallclock(nowtime);
@@ -242,6 +243,7 @@ static __init int add_rtc_cmos(void)
242 platform_device_register(&rtc_device); 243 platform_device_register(&rtc_device);
243 dev_info(&rtc_device.dev, 244 dev_info(&rtc_device.dev,
244 "registered platform RTC device (no PNP device found)\n"); 245 "registered platform RTC device (no PNP device found)\n");
246
245 return 0; 247 return 0;
246} 248}
247device_initcall(add_rtc_cmos); 249device_initcall(add_rtc_cmos);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f28c56e6bf94..a0d26237d7cf 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,8 +112,13 @@
112#define ARCH_SETUP 112#define ARCH_SETUP
113#endif 113#endif
114 114
115RESERVE_BRK(dmi_alloc, 65536);
116
115unsigned int boot_cpu_id __read_mostly; 117unsigned int boot_cpu_id __read_mostly;
116 118
119static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
120unsigned long _brk_end = (unsigned long)__brk_base;
121
117#ifdef CONFIG_X86_64 122#ifdef CONFIG_X86_64
118int default_cpu_present_to_apicid(int mps_cpu) 123int default_cpu_present_to_apicid(int mps_cpu)
119{ 124{
@@ -158,12 +163,6 @@ static struct resource bss_resource = {
158 163
159 164
160#ifdef CONFIG_X86_32 165#ifdef CONFIG_X86_32
161/* This value is set up by the early boot code to point to the value
162 immediately after the boot time page tables. It contains a *physical*
163 address, and must not be in the .bss segment! */
164unsigned long init_pg_tables_start __initdata = ~0UL;
165unsigned long init_pg_tables_end __initdata = ~0UL;
166
167static struct resource video_ram_resource = { 166static struct resource video_ram_resource = {
168 .name = "Video RAM area", 167 .name = "Video RAM area",
169 .start = 0xa0000, 168 .start = 0xa0000,
@@ -219,12 +218,6 @@ unsigned long mmu_cr4_features = X86_CR4_PAE;
219int bootloader_type; 218int bootloader_type;
220 219
221/* 220/*
222 * Early DMI memory
223 */
224int dmi_alloc_index;
225char dmi_alloc_data[DMI_MAX_DATA];
226
227/*
228 * Setup options 221 * Setup options
229 */ 222 */
230struct screen_info screen_info; 223struct screen_info screen_info;
@@ -269,6 +262,35 @@ static inline void copy_edd(void)
269} 262}
270#endif 263#endif
271 264
265void * __init extend_brk(size_t size, size_t align)
266{
267 size_t mask = align - 1;
268 void *ret;
269
270 BUG_ON(_brk_start == 0);
271 BUG_ON(align & mask);
272
273 _brk_end = (_brk_end + mask) & ~mask;
274 BUG_ON((char *)(_brk_end + size) > __brk_limit);
275
276 ret = (void *)_brk_end;
277 _brk_end += size;
278
279 memset(ret, 0, size);
280
281 return ret;
282}
283
284static void __init reserve_brk(void)
285{
286 if (_brk_end > _brk_start)
287 reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
288
289 /* Mark brk area as locked down and no longer taking any
290 new allocations */
291 _brk_start = 0;
292}
293
272#ifdef CONFIG_BLK_DEV_INITRD 294#ifdef CONFIG_BLK_DEV_INITRD
273 295
274#ifdef CONFIG_X86_32 296#ifdef CONFIG_X86_32
@@ -717,11 +739,7 @@ void __init setup_arch(char **cmdline_p)
717 init_mm.start_code = (unsigned long) _text; 739 init_mm.start_code = (unsigned long) _text;
718 init_mm.end_code = (unsigned long) _etext; 740 init_mm.end_code = (unsigned long) _etext;
719 init_mm.end_data = (unsigned long) _edata; 741 init_mm.end_data = (unsigned long) _edata;
720#ifdef CONFIG_X86_32 742 init_mm.brk = _brk_end;
721 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
722#else
723 init_mm.brk = (unsigned long) &_end;
724#endif
725 743
726 code_resource.start = virt_to_phys(_text); 744 code_resource.start = virt_to_phys(_text);
727 code_resource.end = virt_to_phys(_etext)-1; 745 code_resource.end = virt_to_phys(_etext)-1;
@@ -842,6 +860,8 @@ void __init setup_arch(char **cmdline_p)
842 setup_bios_corruption_check(); 860 setup_bios_corruption_check();
843#endif 861#endif
844 862
863 reserve_brk();
864
845 /* max_pfn_mapped is updated here */ 865 /* max_pfn_mapped is updated here */
846 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 866 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
847 max_pfn_mapped = max_low_pfn_mapped; 867 max_pfn_mapped = max_low_pfn_mapped;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index efa615f2bf43..400331b50a53 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -233,8 +233,8 @@ proceed:
233 "%zu bytes\n", vm.addr, static_size); 233 "%zu bytes\n", vm.addr, static_size);
234 234
235 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 235 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
236 PERCPU_FIRST_CHUNK_RESERVE, 236 PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
237 PMD_SIZE, dyn_size, vm.addr, NULL); 237 PMD_SIZE, vm.addr, NULL);
238 goto out_free_ar; 238 goto out_free_ar;
239 239
240enomem: 240enomem:
@@ -257,31 +257,13 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
257 * Embedding allocator 257 * Embedding allocator
258 * 258 *
259 * The first chunk is sized to just contain the static area plus 259 * The first chunk is sized to just contain the static area plus
260 * module and dynamic reserves, and allocated as a contiguous area 260 * module and dynamic reserves and embedded into linear physical
261 * using bootmem allocator and used as-is without being mapped into 261 * mapping so that it can use PMD mapping without additional TLB
262 * vmalloc area. This enables the first chunk to piggy back on the 262 * pressure.
263 * linear physical PMD mapping and doesn't add any additional pressure
264 * to TLB. Note that if the needed size is smaller than the minimum
265 * unit size, the leftover is returned to the bootmem allocator.
266 */ 263 */
267static void *pcpue_ptr __initdata;
268static size_t pcpue_size __initdata;
269static size_t pcpue_unit_size __initdata;
270
271static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
272{
273 size_t off = (size_t)pageno << PAGE_SHIFT;
274
275 if (off >= pcpue_size)
276 return NULL;
277
278 return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
279}
280
281static ssize_t __init setup_pcpu_embed(size_t static_size) 264static ssize_t __init setup_pcpu_embed(size_t static_size)
282{ 265{
283 unsigned int cpu; 266 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
284 size_t dyn_size;
285 267
286 /* 268 /*
287 * If large page isn't supported, there's no benefit in doing 269 * If large page isn't supported, there's no benefit in doing
@@ -291,33 +273,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
291 if (!cpu_has_pse || pcpu_need_numa()) 273 if (!cpu_has_pse || pcpu_need_numa())
292 return -EINVAL; 274 return -EINVAL;
293 275
294 /* allocate and copy */ 276 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
295 pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + 277 reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
296 PERCPU_DYNAMIC_RESERVE);
297 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
298 dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
299
300 pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size,
301 PAGE_SIZE);
302 if (!pcpue_ptr)
303 return -ENOMEM;
304
305 for_each_possible_cpu(cpu) {
306 void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
307
308 free_bootmem(__pa(ptr + pcpue_size),
309 pcpue_unit_size - pcpue_size);
310 memcpy(ptr, __per_cpu_load, static_size);
311 }
312
313 /* we're ready, commit */
314 pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
315 pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
316
317 return pcpu_setup_first_chunk(pcpue_get_page, static_size,
318 PERCPU_FIRST_CHUNK_RESERVE,
319 pcpue_unit_size, dyn_size,
320 pcpue_ptr, NULL);
321} 278}
322 279
323/* 280/*
@@ -375,8 +332,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
375 pcpu4k_nr_static_pages, static_size); 332 pcpu4k_nr_static_pages, static_size);
376 333
377 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 334 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
378 PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, 335 PERCPU_FIRST_CHUNK_RESERVE, -1,
379 pcpu4k_populate_pte); 336 -1, NULL, pcpu4k_populate_pte);
380 goto out_free_ar; 337 goto out_free_ar;
381 338
382enomem: 339enomem:
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index d2cc6428c587..dfcc74ab0ab6 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -211,31 +211,27 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
211{ 211{
212 /* Default to using normal stack */ 212 /* Default to using normal stack */
213 unsigned long sp = regs->sp; 213 unsigned long sp = regs->sp;
214 int onsigstack = on_sig_stack(sp);
214 215
215#ifdef CONFIG_X86_64 216#ifdef CONFIG_X86_64
216 /* redzone */ 217 /* redzone */
217 sp -= 128; 218 sp -= 128;
218#endif /* CONFIG_X86_64 */ 219#endif /* CONFIG_X86_64 */
219 220
220 /* 221 if (!onsigstack) {
221 * If we are on the alternate signal stack and would overflow it, don't. 222 /* This is the X/Open sanctioned signal stack switching. */
222 * Return an always-bogus address instead so we will die with SIGSEGV. 223 if (ka->sa.sa_flags & SA_ONSTACK) {
223 */ 224 if (sas_ss_flags(sp) == 0)
224 if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) 225 sp = current->sas_ss_sp + current->sas_ss_size;
225 return (void __user *) -1L; 226 } else {
226
227 /* This is the X/Open sanctioned signal stack switching. */
228 if (ka->sa.sa_flags & SA_ONSTACK) {
229 if (sas_ss_flags(sp) == 0)
230 sp = current->sas_ss_sp + current->sas_ss_size;
231 } else {
232#ifdef CONFIG_X86_32 227#ifdef CONFIG_X86_32
233 /* This is the legacy signal stack switching. */ 228 /* This is the legacy signal stack switching. */
234 if ((regs->ss & 0xffff) != __USER_DS && 229 if ((regs->ss & 0xffff) != __USER_DS &&
235 !(ka->sa.sa_flags & SA_RESTORER) && 230 !(ka->sa.sa_flags & SA_RESTORER) &&
236 ka->sa.sa_restorer) 231 ka->sa.sa_restorer)
237 sp = (unsigned long) ka->sa.sa_restorer; 232 sp = (unsigned long) ka->sa.sa_restorer;
238#endif /* CONFIG_X86_32 */ 233#endif /* CONFIG_X86_32 */
234 }
239 } 235 }
240 236
241 if (used_math()) { 237 if (used_math()) {
@@ -244,12 +240,22 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
244 sp = round_down(sp, 64); 240 sp = round_down(sp, 64);
245#endif /* CONFIG_X86_64 */ 241#endif /* CONFIG_X86_64 */
246 *fpstate = (void __user *)sp; 242 *fpstate = (void __user *)sp;
247
248 if (save_i387_xstate(*fpstate) < 0)
249 return (void __user *)-1L;
250 } 243 }
251 244
252 return (void __user *)align_sigframe(sp - frame_size); 245 sp = align_sigframe(sp - frame_size);
246
247 /*
248 * If we are on the alternate signal stack and would overflow it, don't.
249 * Return an always-bogus address instead so we will die with SIGSEGV.
250 */
251 if (onsigstack && !likely(on_sig_stack(sp)))
252 return (void __user *)-1L;
253
254 /* save i387 state */
255 if (used_math() && save_i387_xstate(*fpstate) < 0)
256 return (void __user *)-1L;
257
258 return (void __user *)sp;
253} 259}
254 260
255#ifdef CONFIG_X86_32 261#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 8afb69180c9b..deb5ebb32c3b 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -751,7 +751,7 @@ static int __init uv_bau_init(void)
751 int node; 751 int node;
752 int nblades; 752 int nblades;
753 int last_blade; 753 int last_blade;
754 int cur_cpu = 0; 754 int cur_cpu;
755 755
756 if (!is_uv_system()) 756 if (!is_uv_system())
757 return 0; 757 return 0;
@@ -765,6 +765,7 @@ static int __init uv_bau_init(void)
765 uv_mmask = (1UL << uv_hub_info->n_val) - 1; 765 uv_mmask = (1UL << uv_hub_info->n_val) - 1;
766 nblades = 0; 766 nblades = 0;
767 last_blade = -1; 767 last_blade = -1;
768 cur_cpu = 0;
768 for_each_online_node(node) { 769 for_each_online_node(node) {
769 blade = uv_node_to_blade_id(node); 770 blade = uv_node_to_blade_id(node);
770 if (blade == last_blade) 771 if (blade == last_blade)
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 0fcc95a354f7..7e4515957a1c 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -25,10 +25,10 @@
25 * 25 *
26 * Send feedback to <colpatch@us.ibm.com> 26 * Send feedback to <colpatch@us.ibm.com>
27 */ 27 */
28#include <linux/init.h>
29#include <linux/smp.h>
30#include <linux/nodemask.h> 28#include <linux/nodemask.h>
31#include <linux/mmzone.h> 29#include <linux/mmzone.h>
30#include <linux/init.h>
31#include <linux/smp.h>
32#include <asm/cpu.h> 32#include <asm/cpu.h>
33 33
34static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); 34static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
@@ -47,6 +47,7 @@ int __ref arch_register_cpu(int num)
47 */ 47 */
48 if (num) 48 if (num)
49 per_cpu(cpu_devices, num).cpu.hotpluggable = 1; 49 per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
50
50 return register_cpu(&per_cpu(cpu_devices, num).cpu, num); 51 return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
51} 52}
52EXPORT_SYMBOL(arch_register_cpu); 53EXPORT_SYMBOL(arch_register_cpu);
@@ -56,12 +57,13 @@ void arch_unregister_cpu(int num)
56 unregister_cpu(&per_cpu(cpu_devices, num).cpu); 57 unregister_cpu(&per_cpu(cpu_devices, num).cpu);
57} 58}
58EXPORT_SYMBOL(arch_unregister_cpu); 59EXPORT_SYMBOL(arch_unregister_cpu);
59#else 60#else /* CONFIG_HOTPLUG_CPU */
61
60static int __init arch_register_cpu(int num) 62static int __init arch_register_cpu(int num)
61{ 63{
62 return register_cpu(&per_cpu(cpu_devices, num).cpu, num); 64 return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
63} 65}
64#endif /*CONFIG_HOTPLUG_CPU*/ 66#endif /* CONFIG_HOTPLUG_CPU */
65 67
66static int __init topology_init(void) 68static int __init topology_init(void)
67{ 69{
@@ -70,11 +72,11 @@ static int __init topology_init(void)
70#ifdef CONFIG_NUMA 72#ifdef CONFIG_NUMA
71 for_each_online_node(i) 73 for_each_online_node(i)
72 register_one_node(i); 74 register_one_node(i);
73#endif /* CONFIG_NUMA */ 75#endif
74 76
75 for_each_present_cpu(i) 77 for_each_present_cpu(i)
76 arch_register_cpu(i); 78 arch_register_cpu(i);
79
77 return 0; 80 return 0;
78} 81}
79
80subsys_initcall(topology_init); 82subsys_initcall(topology_init);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 83d53ce5d4c4..7a567ebe6361 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -17,20 +17,21 @@
17#include <asm/delay.h> 17#include <asm/delay.h>
18#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
19 19
20unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 20unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
21EXPORT_SYMBOL(cpu_khz); 21EXPORT_SYMBOL(cpu_khz);
22unsigned int tsc_khz; 22
23unsigned int __read_mostly tsc_khz;
23EXPORT_SYMBOL(tsc_khz); 24EXPORT_SYMBOL(tsc_khz);
24 25
25/* 26/*
26 * TSC can be unstable due to cpufreq or due to unsynced TSCs 27 * TSC can be unstable due to cpufreq or due to unsynced TSCs
27 */ 28 */
28static int tsc_unstable; 29static int __read_mostly tsc_unstable;
29 30
30/* native_sched_clock() is called before tsc_init(), so 31/* native_sched_clock() is called before tsc_init(), so
31 we must start with the TSC soft disabled to prevent 32 we must start with the TSC soft disabled to prevent
32 erroneous rdtsc usage on !cpu_has_tsc processors */ 33 erroneous rdtsc usage on !cpu_has_tsc processors */
33static int tsc_disabled = -1; 34static int __read_mostly tsc_disabled = -1;
34 35
35static int tsc_clocksource_reliable; 36static int tsc_clocksource_reliable;
36/* 37/*
@@ -273,30 +274,43 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
273 * use the TSC value at the transitions to calculate a pretty 274 * use the TSC value at the transitions to calculate a pretty
274 * good value for the TSC frequencty. 275 * good value for the TSC frequencty.
275 */ 276 */
276static inline int pit_expect_msb(unsigned char val) 277static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
277{ 278{
278 int count = 0; 279 int count;
280 u64 tsc = 0;
279 281
280 for (count = 0; count < 50000; count++) { 282 for (count = 0; count < 50000; count++) {
281 /* Ignore LSB */ 283 /* Ignore LSB */
282 inb(0x42); 284 inb(0x42);
283 if (inb(0x42) != val) 285 if (inb(0x42) != val)
284 break; 286 break;
287 tsc = get_cycles();
285 } 288 }
286 return count > 50; 289 *deltap = get_cycles() - tsc;
290 *tscp = tsc;
291
292 /*
293 * We require _some_ success, but the quality control
294 * will be based on the error terms on the TSC values.
295 */
296 return count > 5;
287} 297}
288 298
289/* 299/*
290 * How many MSB values do we want to see? We aim for a 300 * How many MSB values do we want to see? We aim for
291 * 15ms calibration, which assuming a 2us counter read 301 * a maximum error rate of 500ppm (in practice the
292 * error should give us roughly 150 ppm precision for 302 * real error is much smaller), but refuse to spend
293 * the calibration. 303 * more than 25ms on it.
294 */ 304 */
295#define QUICK_PIT_MS 15 305#define MAX_QUICK_PIT_MS 25
296#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 306#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
297 307
298static unsigned long quick_pit_calibrate(void) 308static unsigned long quick_pit_calibrate(void)
299{ 309{
310 int i;
311 u64 tsc, delta;
312 unsigned long d1, d2;
313
300 /* Set the Gate high, disable speaker */ 314 /* Set the Gate high, disable speaker */
301 outb((inb(0x61) & ~0x02) | 0x01, 0x61); 315 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
302 316
@@ -315,45 +329,52 @@ static unsigned long quick_pit_calibrate(void)
315 outb(0xff, 0x42); 329 outb(0xff, 0x42);
316 outb(0xff, 0x42); 330 outb(0xff, 0x42);
317 331
318 if (pit_expect_msb(0xff)) { 332 /*
319 int i; 333 * The PIT starts counting at the next edge, so we
320 u64 t1, t2, delta; 334 * need to delay for a microsecond. The easiest way
321 unsigned char expect = 0xfe; 335 * to do that is to just read back the 16-bit counter
322 336 * once from the PIT.
323 t1 = get_cycles(); 337 */
324 for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { 338 inb(0x42);
325 if (!pit_expect_msb(expect)) 339 inb(0x42);
326 goto failed; 340
341 if (pit_expect_msb(0xff, &tsc, &d1)) {
342 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
343 if (!pit_expect_msb(0xff-i, &delta, &d2))
344 break;
345
346 /*
347 * Iterate until the error is less than 500 ppm
348 */
349 delta -= tsc;
350 if (d1+d2 < delta >> 11)
351 goto success;
327 } 352 }
328 t2 = get_cycles();
329
330 /*
331 * Make sure we can rely on the second TSC timestamp:
332 */
333 if (!pit_expect_msb(expect))
334 goto failed;
335
336 /*
337 * Ok, if we get here, then we've seen the
338 * MSB of the PIT decrement QUICK_PIT_ITERATIONS
339 * times, and each MSB had many hits, so we never
340 * had any sudden jumps.
341 *
342 * As a result, we can depend on there not being
343 * any odd delays anywhere, and the TSC reads are
344 * reliable.
345 *
346 * kHz = ticks / time-in-seconds / 1000;
347 * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
348 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
349 */
350 delta = (t2 - t1)*PIT_TICK_RATE;
351 do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
352 printk("Fast TSC calibration using PIT\n");
353 return delta;
354 } 353 }
355failed: 354 printk("Fast TSC calibration failed\n");
356 return 0; 355 return 0;
356
357success:
358 /*
359 * Ok, if we get here, then we've seen the
360 * MSB of the PIT decrement 'i' times, and the
361 * error has shrunk to less than 500 ppm.
362 *
363 * As a result, we can depend on there not being
364 * any odd delays anywhere, and the TSC reads are
365 * reliable (within the error). We also adjust the
366 * delta to the middle of the error bars, just
367 * because it looks nicer.
368 *
369 * kHz = ticks / time-in-seconds / 1000;
370 * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
371 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
372 */
373 delta += (long)(d2 - d1)/2;
374 delta *= PIT_TICK_RATE;
375 do_div(delta, i*256*1000);
376 printk("Fast TSC calibration using PIT\n");
377 return delta;
357} 378}
358 379
359/** 380/**
@@ -523,8 +544,6 @@ unsigned long native_calibrate_tsc(void)
523 return tsc_pit_min; 544 return tsc_pit_min;
524} 545}
525 546
526#ifdef CONFIG_X86_32
527/* Only called from the Powernow K7 cpu freq driver */
528int recalibrate_cpu_khz(void) 547int recalibrate_cpu_khz(void)
529{ 548{
530#ifndef CONFIG_SMP 549#ifndef CONFIG_SMP
@@ -546,7 +565,6 @@ int recalibrate_cpu_khz(void)
546 565
547EXPORT_SYMBOL(recalibrate_cpu_khz); 566EXPORT_SYMBOL(recalibrate_cpu_khz);
548 567
549#endif /* CONFIG_X86_32 */
550 568
551/* Accelerators for sched_clock() 569/* Accelerators for sched_clock()
552 * convert from cycles(64bits) => nanoseconds (64bits) 570 * convert from cycles(64bits) => nanoseconds (64bits)
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 2cc4a90e2cb3..95deb9f2211e 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -395,11 +395,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval)
395 vmi_ops.update_pte(ptep, VMI_PAGE_PT); 395 vmi_ops.update_pte(ptep, VMI_PAGE_PT);
396} 396}
397 397
398static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
399{
400 vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1));
401}
402
403static void vmi_set_pud(pud_t *pudp, pud_t pudval) 398static void vmi_set_pud(pud_t *pudp, pud_t pudval)
404{ 399{
405 /* Um, eww */ 400 /* Um, eww */
@@ -750,7 +745,6 @@ static inline int __init activate_vmi(void)
750 pv_mmu_ops.set_pmd = vmi_set_pmd; 745 pv_mmu_ops.set_pmd = vmi_set_pmd;
751#ifdef CONFIG_X86_PAE 746#ifdef CONFIG_X86_PAE
752 pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic; 747 pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
753 pv_mmu_ops.set_pte_present = vmi_set_pte_present;
754 pv_mmu_ops.set_pud = vmi_set_pud; 748 pv_mmu_ops.set_pud = vmi_set_pud;
755 pv_mmu_ops.pte_clear = vmi_pte_clear; 749 pv_mmu_ops.pte_clear = vmi_pte_clear;
756 pv_mmu_ops.pmd_clear = vmi_pmd_clear; 750 pv_mmu_ops.pmd_clear = vmi_pmd_clear;
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 0d860963f268..62ad500d55f3 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -189,15 +189,24 @@ SECTIONS
189 *(.bss) 189 *(.bss)
190 . = ALIGN(4); 190 . = ALIGN(4);
191 __bss_stop = .; 191 __bss_stop = .;
192 _end = . ; 192 }
193 /* This is where the kernel creates the early boot page tables */ 193
194 .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
194 . = ALIGN(PAGE_SIZE); 195 . = ALIGN(PAGE_SIZE);
195 pg0 = . ; 196 __brk_base = . ;
197 . += 64 * 1024 ; /* 64k alignment slop space */
198 *(.brk_reservation) /* areas brk users have reserved */
199 __brk_limit = . ;
200 }
201
202 .end : AT(ADDR(.end) - LOAD_OFFSET) {
203 _end = . ;
196 } 204 }
197 205
198 /* Sections to be discarded */ 206 /* Sections to be discarded */
199 /DISCARD/ : { 207 /DISCARD/ : {
200 *(.exitcall.exit) 208 *(.exitcall.exit)
209 *(.discard)
201 } 210 }
202 211
203 STABS_DEBUG 212 STABS_DEBUG
@@ -205,6 +214,12 @@ SECTIONS
205 DWARF_DEBUG 214 DWARF_DEBUG
206} 215}
207 216
217/*
218 * Build-time check on the image size:
219 */
220ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
221 "kernel image bigger than KERNEL_IMAGE_SIZE")
222
208#ifdef CONFIG_KEXEC 223#ifdef CONFIG_KEXEC
209/* Link time checks */ 224/* Link time checks */
210#include <asm/kexec.h> 225#include <asm/kexec.h>
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 5bf54e40c6ef..c8742507b030 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -29,8 +29,8 @@ SECTIONS
29{ 29{
30 . = __START_KERNEL; 30 . = __START_KERNEL;
31 phys_startup_64 = startup_64 - LOAD_OFFSET; 31 phys_startup_64 = startup_64 - LOAD_OFFSET;
32 _text = .; /* Text and read-only data */
33 .text : AT(ADDR(.text) - LOAD_OFFSET) { 32 .text : AT(ADDR(.text) - LOAD_OFFSET) {
33 _text = .; /* Text and read-only data */
34 /* First the code that has to be first for bootstrapping */ 34 /* First the code that has to be first for bootstrapping */
35 *(.text.head) 35 *(.text.head)
36 _stext = .; 36 _stext = .;
@@ -61,13 +61,13 @@ SECTIONS
61 .data : AT(ADDR(.data) - LOAD_OFFSET) { 61 .data : AT(ADDR(.data) - LOAD_OFFSET) {
62 DATA_DATA 62 DATA_DATA
63 CONSTRUCTORS 63 CONSTRUCTORS
64 _edata = .; /* End of data section */
64 } :data 65 } :data
65 66
66 _edata = .; /* End of data section */
67 67
68 . = ALIGN(PAGE_SIZE);
69 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
70 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { 68 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
69 . = ALIGN(PAGE_SIZE);
70 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
71 *(.data.cacheline_aligned) 71 *(.data.cacheline_aligned)
72 } 72 }
73 . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); 73 . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
@@ -125,29 +125,29 @@ SECTIONS
125#undef VVIRT_OFFSET 125#undef VVIRT_OFFSET
126#undef VVIRT 126#undef VVIRT
127 127
128 . = ALIGN(THREAD_SIZE); /* init_task */
129 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 128 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
129 . = ALIGN(THREAD_SIZE); /* init_task */
130 *(.data.init_task) 130 *(.data.init_task)
131 }:data.init 131 }:data.init
132 132
133 . = ALIGN(PAGE_SIZE);
134 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 133 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
134 . = ALIGN(PAGE_SIZE);
135 *(.data.page_aligned) 135 *(.data.page_aligned)
136 } 136 }
137 137
138 /* might get freed after init */
139 . = ALIGN(PAGE_SIZE);
140 __smp_alt_begin = .;
141 __smp_locks = .;
142 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { 138 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
139 /* might get freed after init */
140 . = ALIGN(PAGE_SIZE);
141 __smp_alt_begin = .;
142 __smp_locks = .;
143 *(.smp_locks) 143 *(.smp_locks)
144 __smp_locks_end = .;
145 . = ALIGN(PAGE_SIZE);
146 __smp_alt_end = .;
144 } 147 }
145 __smp_locks_end = .;
146 . = ALIGN(PAGE_SIZE);
147 __smp_alt_end = .;
148 148
149 . = ALIGN(PAGE_SIZE); /* Init code and data */ 149 . = ALIGN(PAGE_SIZE); /* Init code and data */
150 __init_begin = .; 150 __init_begin = .; /* paired with __init_end */
151 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 151 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
152 _sinittext = .; 152 _sinittext = .;
153 INIT_TEXT 153 INIT_TEXT
@@ -159,40 +159,42 @@ SECTIONS
159 __initdata_end = .; 159 __initdata_end = .;
160 } 160 }
161 161
162 . = ALIGN(16); 162 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
163 __setup_start = .; 163 . = ALIGN(16);
164 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } 164 __setup_start = .;
165 __setup_end = .; 165 *(.init.setup)
166 __initcall_start = .; 166 __setup_end = .;
167 }
167 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { 168 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
169 __initcall_start = .;
168 INITCALLS 170 INITCALLS
171 __initcall_end = .;
169 } 172 }
170 __initcall_end = .;
171 __con_initcall_start = .;
172 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { 173 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
174 __con_initcall_start = .;
173 *(.con_initcall.init) 175 *(.con_initcall.init)
176 __con_initcall_end = .;
174 } 177 }
175 __con_initcall_end = .;
176 __x86_cpu_dev_start = .;
177 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { 178 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
179 __x86_cpu_dev_start = .;
178 *(.x86_cpu_dev.init) 180 *(.x86_cpu_dev.init)
181 __x86_cpu_dev_end = .;
179 } 182 }
180 __x86_cpu_dev_end = .;
181 SECURITY_INIT 183 SECURITY_INIT
182 184
183 . = ALIGN(8); 185 . = ALIGN(8);
184 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 186 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
185 __parainstructions = .; 187 __parainstructions = .;
186 *(.parainstructions) 188 *(.parainstructions)
187 __parainstructions_end = .; 189 __parainstructions_end = .;
188 } 190 }
189 191
190 . = ALIGN(8);
191 __alt_instructions = .;
192 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { 192 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
193 . = ALIGN(8);
194 __alt_instructions = .;
193 *(.altinstructions) 195 *(.altinstructions)
196 __alt_instructions_end = .;
194 } 197 }
195 __alt_instructions_end = .;
196 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { 198 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
197 *(.altinstr_replacement) 199 *(.altinstr_replacement)
198 } 200 }
@@ -207,9 +209,11 @@ SECTIONS
207 209
208#ifdef CONFIG_BLK_DEV_INITRD 210#ifdef CONFIG_BLK_DEV_INITRD
209 . = ALIGN(PAGE_SIZE); 211 . = ALIGN(PAGE_SIZE);
210 __initramfs_start = .; 212 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
211 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } 213 __initramfs_start = .;
212 __initramfs_end = .; 214 *(.init.ramfs)
215 __initramfs_end = .;
216 }
213#endif 217#endif
214 218
215#ifdef CONFIG_SMP 219#ifdef CONFIG_SMP
@@ -229,20 +233,29 @@ SECTIONS
229 . = ALIGN(PAGE_SIZE); 233 . = ALIGN(PAGE_SIZE);
230 __init_end = .; 234 __init_end = .;
231 235
232 . = ALIGN(PAGE_SIZE);
233 __nosave_begin = .;
234 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { 236 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
235 *(.data.nosave) 237 . = ALIGN(PAGE_SIZE);
238 __nosave_begin = .;
239 *(.data.nosave)
240 . = ALIGN(PAGE_SIZE);
241 __nosave_end = .;
236 } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ 242 } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
237 . = ALIGN(PAGE_SIZE);
238 __nosave_end = .;
239 243
240 __bss_start = .; /* BSS */
241 .bss : AT(ADDR(.bss) - LOAD_OFFSET) { 244 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
245 . = ALIGN(PAGE_SIZE);
246 __bss_start = .; /* BSS */
242 *(.bss.page_aligned) 247 *(.bss.page_aligned)
243 *(.bss) 248 *(.bss)
244 } 249 __bss_stop = .;
245 __bss_stop = .; 250 }
251
252 .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
253 . = ALIGN(PAGE_SIZE);
254 __brk_base = . ;
255 . += 64 * 1024 ; /* 64k alignment slop space */
256 *(.brk_reservation) /* areas brk users have reserved */
257 __brk_limit = . ;
258 }
246 259
247 _end = . ; 260 _end = . ;
248 261
@@ -250,6 +263,7 @@ SECTIONS
250 /DISCARD/ : { 263 /DISCARD/ : {
251 *(.exitcall.exit) 264 *(.exitcall.exit)
252 *(.eh_frame) 265 *(.eh_frame)
266 *(.discard)
253 } 267 }
254 268
255 STABS_DEBUG 269 STABS_DEBUG
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 74de562812cc..a1d804bcd483 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -22,7 +22,7 @@
22#include <asm/paravirt.h> 22#include <asm/paravirt.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24 24
25#ifdef CONFIG_PARAVIRT 25#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
26/* 26/*
27 * Interrupt control on vSMPowered systems: 27 * Interrupt control on vSMPowered systems:
28 * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' 28 * ~AC is a shadow of IF. If IF is 'on' AC should be 'off'
@@ -114,6 +114,7 @@ static void __init set_vsmp_pv_ops(void)
114} 114}
115#endif 115#endif
116 116
117#ifdef CONFIG_PCI
117static int is_vsmp = -1; 118static int is_vsmp = -1;
118 119
119static void __init detect_vsmp_box(void) 120static void __init detect_vsmp_box(void)
@@ -139,6 +140,15 @@ int is_vsmp_box(void)
139 } 140 }
140} 141}
141 142
143#else
144static void __init detect_vsmp_box(void)
145{
146}
147int is_vsmp_box(void)
148{
149 return 0;
150}
151#endif
142void __init vsmp_init(void) 152void __init vsmp_init(void)
143{ 153{
144 detect_vsmp_box(); 154 detect_vsmp_box();
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bdee..0a303c3ed11f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -4,6 +4,10 @@
4config HAVE_KVM 4config HAVE_KVM
5 bool 5 bool
6 6
7config HAVE_KVM_IRQCHIP
8 bool
9 default y
10
7menuconfig VIRTUALIZATION 11menuconfig VIRTUALIZATION
8 bool "Virtualization" 12 bool "Virtualization"
9 depends on HAVE_KVM || X86 13 depends on HAVE_KVM || X86
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 72bd275a9b5c..c13bb92d3157 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
201 if (!atomic_inc_and_test(&pt->pending)) 201 if (!atomic_inc_and_test(&pt->pending))
202 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); 202 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
203 203
204 if (!pt->reinject)
205 atomic_set(&pt->pending, 1);
206
204 if (vcpu0 && waitqueue_active(&vcpu0->wq)) 207 if (vcpu0 && waitqueue_active(&vcpu0->wq))
205 wake_up_interruptible(&vcpu0->wq); 208 wake_up_interruptible(&vcpu0->wq);
206 209
@@ -536,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit)
536 pit->pit_state.irq_ack = 1; 539 pit->pit_state.irq_ack = 1;
537} 540}
538 541
542static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
543{
544 struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
545
546 if (!mask) {
547 atomic_set(&pit->pit_state.pit_timer.pending, 0);
548 pit->pit_state.irq_ack = 1;
549 }
550}
551
539struct kvm_pit *kvm_create_pit(struct kvm *kvm) 552struct kvm_pit *kvm_create_pit(struct kvm *kvm)
540{ 553{
541 struct kvm_pit *pit; 554 struct kvm_pit *pit;
@@ -545,9 +558,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
545 if (!pit) 558 if (!pit)
546 return NULL; 559 return NULL;
547 560
548 mutex_lock(&kvm->lock);
549 pit->irq_source_id = kvm_request_irq_source_id(kvm); 561 pit->irq_source_id = kvm_request_irq_source_id(kvm);
550 mutex_unlock(&kvm->lock);
551 if (pit->irq_source_id < 0) { 562 if (pit->irq_source_id < 0) {
552 kfree(pit); 563 kfree(pit);
553 return NULL; 564 return NULL;
@@ -580,10 +591,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
580 pit_state->irq_ack_notifier.gsi = 0; 591 pit_state->irq_ack_notifier.gsi = 0;
581 pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; 592 pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
582 kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); 593 kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
594 pit_state->pit_timer.reinject = true;
583 mutex_unlock(&pit->pit_state.lock); 595 mutex_unlock(&pit->pit_state.lock);
584 596
585 kvm_pit_reset(pit); 597 kvm_pit_reset(pit);
586 598
599 pit->mask_notifier.func = pit_mask_notifer;
600 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
601
587 return pit; 602 return pit;
588} 603}
589 604
@@ -592,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm)
592 struct hrtimer *timer; 607 struct hrtimer *timer;
593 608
594 if (kvm->arch.vpit) { 609 if (kvm->arch.vpit) {
610 kvm_unregister_irq_mask_notifier(kvm, 0,
611 &kvm->arch.vpit->mask_notifier);
595 mutex_lock(&kvm->arch.vpit->pit_state.lock); 612 mutex_lock(&kvm->arch.vpit->pit_state.lock);
596 timer = &kvm->arch.vpit->pit_state.pit_timer.timer; 613 timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
597 hrtimer_cancel(timer); 614 hrtimer_cancel(timer);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 4178022b97aa..6acbe4b505d5 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
9 s64 period; /* unit: ns */ 9 s64 period; /* unit: ns */
10 s64 scheduled; 10 s64 scheduled;
11 atomic_t pending; 11 atomic_t pending;
12 bool reinject;
12}; 13};
13 14
14struct kvm_kpit_channel_state { 15struct kvm_kpit_channel_state {
@@ -45,6 +46,7 @@ struct kvm_pit {
45 struct kvm *kvm; 46 struct kvm *kvm;
46 struct kvm_kpit_state pit_state; 47 struct kvm_kpit_state pit_state;
47 int irq_source_id; 48 int irq_source_id;
49 struct kvm_irq_mask_notifier mask_notifier;
48}; 50};
49 51
50#define KVM_PIT_BASE_ADDRESS 0x40 52#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 179dcb0103fd..1ccb50c74f18 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,11 +32,13 @@
32#include <linux/kvm_host.h> 32#include <linux/kvm_host.h>
33 33
34static void pic_lock(struct kvm_pic *s) 34static void pic_lock(struct kvm_pic *s)
35 __acquires(&s->lock)
35{ 36{
36 spin_lock(&s->lock); 37 spin_lock(&s->lock);
37} 38}
38 39
39static void pic_unlock(struct kvm_pic *s) 40static void pic_unlock(struct kvm_pic *s)
41 __releases(&s->lock)
40{ 42{
41 struct kvm *kvm = s->kvm; 43 struct kvm *kvm = s->kvm;
42 unsigned acks = s->pending_acks; 44 unsigned acks = s->pending_acks;
@@ -49,7 +51,8 @@ static void pic_unlock(struct kvm_pic *s)
49 spin_unlock(&s->lock); 51 spin_unlock(&s->lock);
50 52
51 while (acks) { 53 while (acks) {
52 kvm_notify_acked_irq(kvm, __ffs(acks)); 54 kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
55 __ffs(acks));
53 acks &= acks - 1; 56 acks &= acks - 1;
54 } 57 }
55 58
@@ -76,12 +79,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm)
76/* 79/*
77 * set irq level. If an edge is detected, then the IRR is set to 1 80 * set irq level. If an edge is detected, then the IRR is set to 1
78 */ 81 */
79static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) 82static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
80{ 83{
81 int mask; 84 int mask, ret = 1;
82 mask = 1 << irq; 85 mask = 1 << irq;
83 if (s->elcr & mask) /* level triggered */ 86 if (s->elcr & mask) /* level triggered */
84 if (level) { 87 if (level) {
88 ret = !(s->irr & mask);
85 s->irr |= mask; 89 s->irr |= mask;
86 s->last_irr |= mask; 90 s->last_irr |= mask;
87 } else { 91 } else {
@@ -90,11 +94,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
90 } 94 }
91 else /* edge triggered */ 95 else /* edge triggered */
92 if (level) { 96 if (level) {
93 if ((s->last_irr & mask) == 0) 97 if ((s->last_irr & mask) == 0) {
98 ret = !(s->irr & mask);
94 s->irr |= mask; 99 s->irr |= mask;
100 }
95 s->last_irr |= mask; 101 s->last_irr |= mask;
96 } else 102 } else
97 s->last_irr &= ~mask; 103 s->last_irr &= ~mask;
104
105 return (s->imr & mask) ? -1 : ret;
98} 106}
99 107
100/* 108/*
@@ -171,16 +179,19 @@ void kvm_pic_update_irq(struct kvm_pic *s)
171 pic_unlock(s); 179 pic_unlock(s);
172} 180}
173 181
174void kvm_pic_set_irq(void *opaque, int irq, int level) 182int kvm_pic_set_irq(void *opaque, int irq, int level)
175{ 183{
176 struct kvm_pic *s = opaque; 184 struct kvm_pic *s = opaque;
185 int ret = -1;
177 186
178 pic_lock(s); 187 pic_lock(s);
179 if (irq >= 0 && irq < PIC_NUM_PINS) { 188 if (irq >= 0 && irq < PIC_NUM_PINS) {
180 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 189 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
181 pic_update_irq(s); 190 pic_update_irq(s);
182 } 191 }
183 pic_unlock(s); 192 pic_unlock(s);
193
194 return ret;
184} 195}
185 196
186/* 197/*
@@ -232,7 +243,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
232 } 243 }
233 pic_update_irq(s); 244 pic_update_irq(s);
234 pic_unlock(s); 245 pic_unlock(s);
235 kvm_notify_acked_irq(kvm, irq); 246 kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
236 247
237 return intno; 248 return intno;
238} 249}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 82579ee538d0..9f593188129e 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
32#include "lapic.h" 32#include "lapic.h"
33 33
34#define PIC_NUM_PINS 16 34#define PIC_NUM_PINS 16
35#define SELECT_PIC(irq) \
36 ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
35 37
36struct kvm; 38struct kvm;
37struct kvm_vcpu; 39struct kvm_vcpu;
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 8e5ee99551f6..ed66e4c078dc 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = {
18}; 18};
19 19
20#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) 20#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
21#define NUM_DB_REGS 4
22 21
23struct kvm_vcpu; 22struct kvm_vcpu;
24 23
@@ -29,18 +28,23 @@ struct vcpu_svm {
29 struct svm_cpu_data *svm_data; 28 struct svm_cpu_data *svm_data;
30 uint64_t asid_generation; 29 uint64_t asid_generation;
31 30
32 unsigned long db_regs[NUM_DB_REGS];
33
34 u64 next_rip; 31 u64 next_rip;
35 32
36 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; 33 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
37 u64 host_gs_base; 34 u64 host_gs_base;
38 unsigned long host_cr2; 35 unsigned long host_cr2;
39 unsigned long host_db_regs[NUM_DB_REGS];
40 unsigned long host_dr6;
41 unsigned long host_dr7;
42 36
43 u32 *msrpm; 37 u32 *msrpm;
38 struct vmcb *hsave;
39 u64 hsave_msr;
40
41 u64 nested_vmcb;
42
43 /* These are the merged vectors */
44 u32 *nested_msrpm;
45
46 /* gpa pointers to the real vectors */
47 u64 nested_vmcb_msrpm;
44}; 48};
45 49
46#endif 50#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2d4477c71473..2a36f7f7c4c7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,20 @@ struct kvm_rmap_desc {
145 struct kvm_rmap_desc *more; 145 struct kvm_rmap_desc *more;
146}; 146};
147 147
148struct kvm_shadow_walk { 148struct kvm_shadow_walk_iterator {
149 int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu, 149 u64 addr;
150 u64 addr, u64 *spte, int level); 150 hpa_t shadow_addr;
151 int level;
152 u64 *sptep;
153 unsigned index;
151}; 154};
152 155
156#define for_each_shadow_entry(_vcpu, _addr, _walker) \
157 for (shadow_walk_init(&(_walker), _vcpu, _addr); \
158 shadow_walk_okay(&(_walker)); \
159 shadow_walk_next(&(_walker)))
160
161
153struct kvm_unsync_walk { 162struct kvm_unsync_walk {
154 int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); 163 int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
155}; 164};
@@ -343,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
343 352
344 BUG_ON(!mc->nobjs); 353 BUG_ON(!mc->nobjs);
345 p = mc->objects[--mc->nobjs]; 354 p = mc->objects[--mc->nobjs];
346 memset(p, 0, size);
347 return p; 355 return p;
348} 356}
349 357
@@ -794,10 +802,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
794 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 802 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
795 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 803 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
796 INIT_LIST_HEAD(&sp->oos_link); 804 INIT_LIST_HEAD(&sp->oos_link);
797 ASSERT(is_empty_shadow_page(sp->spt));
798 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 805 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
799 sp->multimapped = 0; 806 sp->multimapped = 0;
800 sp->global = 1;
801 sp->parent_pte = parent_pte; 807 sp->parent_pte = parent_pte;
802 --vcpu->kvm->arch.n_free_mmu_pages; 808 --vcpu->kvm->arch.n_free_mmu_pages;
803 return sp; 809 return sp;
@@ -983,8 +989,8 @@ struct kvm_mmu_pages {
983 idx < 512; \ 989 idx < 512; \
984 idx = find_next_bit(bitmap, 512, idx+1)) 990 idx = find_next_bit(bitmap, 512, idx+1))
985 991
986int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, 992static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
987 int idx) 993 int idx)
988{ 994{
989 int i; 995 int i;
990 996
@@ -1059,7 +1065,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1059 index = kvm_page_table_hashfn(gfn); 1065 index = kvm_page_table_hashfn(gfn);
1060 bucket = &kvm->arch.mmu_page_hash[index]; 1066 bucket = &kvm->arch.mmu_page_hash[index];
1061 hlist_for_each_entry(sp, node, bucket, hash_link) 1067 hlist_for_each_entry(sp, node, bucket, hash_link)
1062 if (sp->gfn == gfn && !sp->role.metaphysical 1068 if (sp->gfn == gfn && !sp->role.direct
1063 && !sp->role.invalid) { 1069 && !sp->role.invalid) {
1064 pgprintk("%s: found role %x\n", 1070 pgprintk("%s: found role %x\n",
1065 __func__, sp->role.word); 1071 __func__, sp->role.word);
@@ -1115,8 +1121,9 @@ struct mmu_page_path {
1115 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ 1121 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
1116 i = mmu_pages_next(&pvec, &parents, i)) 1122 i = mmu_pages_next(&pvec, &parents, i))
1117 1123
1118int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, 1124static int mmu_pages_next(struct kvm_mmu_pages *pvec,
1119 int i) 1125 struct mmu_page_path *parents,
1126 int i)
1120{ 1127{
1121 int n; 1128 int n;
1122 1129
@@ -1135,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
1135 return n; 1142 return n;
1136} 1143}
1137 1144
1138void mmu_pages_clear_parents(struct mmu_page_path *parents) 1145static void mmu_pages_clear_parents(struct mmu_page_path *parents)
1139{ 1146{
1140 struct kvm_mmu_page *sp; 1147 struct kvm_mmu_page *sp;
1141 unsigned int level = 0; 1148 unsigned int level = 0;
@@ -1193,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1193 gfn_t gfn, 1200 gfn_t gfn,
1194 gva_t gaddr, 1201 gva_t gaddr,
1195 unsigned level, 1202 unsigned level,
1196 int metaphysical, 1203 int direct,
1197 unsigned access, 1204 unsigned access,
1198 u64 *parent_pte) 1205 u64 *parent_pte)
1199{ 1206{
@@ -1204,10 +1211,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1204 struct kvm_mmu_page *sp; 1211 struct kvm_mmu_page *sp;
1205 struct hlist_node *node, *tmp; 1212 struct hlist_node *node, *tmp;
1206 1213
1207 role.word = 0; 1214 role = vcpu->arch.mmu.base_role;
1208 role.glevels = vcpu->arch.mmu.root_level;
1209 role.level = level; 1215 role.level = level;
1210 role.metaphysical = metaphysical; 1216 role.direct = direct;
1211 role.access = access; 1217 role.access = access;
1212 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { 1218 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
1213 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); 1219 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1242,8 +1248,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1242 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); 1248 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
1243 sp->gfn = gfn; 1249 sp->gfn = gfn;
1244 sp->role = role; 1250 sp->role = role;
1251 sp->global = role.cr4_pge;
1245 hlist_add_head(&sp->hash_link, bucket); 1252 hlist_add_head(&sp->hash_link, bucket);
1246 if (!metaphysical) { 1253 if (!direct) {
1247 if (rmap_write_protect(vcpu->kvm, gfn)) 1254 if (rmap_write_protect(vcpu->kvm, gfn))
1248 kvm_flush_remote_tlbs(vcpu->kvm); 1255 kvm_flush_remote_tlbs(vcpu->kvm);
1249 account_shadowed(vcpu->kvm, gfn); 1256 account_shadowed(vcpu->kvm, gfn);
@@ -1255,35 +1262,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1255 return sp; 1262 return sp;
1256} 1263}
1257 1264
1258static int walk_shadow(struct kvm_shadow_walk *walker, 1265static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
1259 struct kvm_vcpu *vcpu, u64 addr) 1266 struct kvm_vcpu *vcpu, u64 addr)
1260{ 1267{
1261 hpa_t shadow_addr; 1268 iterator->addr = addr;
1262 int level; 1269 iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
1263 int r; 1270 iterator->level = vcpu->arch.mmu.shadow_root_level;
1264 u64 *sptep; 1271 if (iterator->level == PT32E_ROOT_LEVEL) {
1265 unsigned index; 1272 iterator->shadow_addr
1266 1273 = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
1267 shadow_addr = vcpu->arch.mmu.root_hpa; 1274 iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
1268 level = vcpu->arch.mmu.shadow_root_level; 1275 --iterator->level;
1269 if (level == PT32E_ROOT_LEVEL) { 1276 if (!iterator->shadow_addr)
1270 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; 1277 iterator->level = 0;
1271 shadow_addr &= PT64_BASE_ADDR_MASK;
1272 if (!shadow_addr)
1273 return 1;
1274 --level;
1275 } 1278 }
1279}
1276 1280
1277 while (level >= PT_PAGE_TABLE_LEVEL) { 1281static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
1278 index = SHADOW_PT_INDEX(addr, level); 1282{
1279 sptep = ((u64 *)__va(shadow_addr)) + index; 1283 if (iterator->level < PT_PAGE_TABLE_LEVEL)
1280 r = walker->entry(walker, vcpu, addr, sptep, level); 1284 return false;
1281 if (r) 1285 iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
1282 return r; 1286 iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
1283 shadow_addr = *sptep & PT64_BASE_ADDR_MASK; 1287 return true;
1284 --level; 1288}
1285 } 1289
1286 return 0; 1290static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
1291{
1292 iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
1293 --iterator->level;
1287} 1294}
1288 1295
1289static void kvm_mmu_page_unlink_children(struct kvm *kvm, 1296static void kvm_mmu_page_unlink_children(struct kvm *kvm,
@@ -1388,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1388 kvm_mmu_page_unlink_children(kvm, sp); 1395 kvm_mmu_page_unlink_children(kvm, sp);
1389 kvm_mmu_unlink_parents(kvm, sp); 1396 kvm_mmu_unlink_parents(kvm, sp);
1390 kvm_flush_remote_tlbs(kvm); 1397 kvm_flush_remote_tlbs(kvm);
1391 if (!sp->role.invalid && !sp->role.metaphysical) 1398 if (!sp->role.invalid && !sp->role.direct)
1392 unaccount_shadowed(kvm, sp->gfn); 1399 unaccount_shadowed(kvm, sp->gfn);
1393 if (sp->unsync) 1400 if (sp->unsync)
1394 kvm_unlink_unsync_page(kvm, sp); 1401 kvm_unlink_unsync_page(kvm, sp);
@@ -1451,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
1451 index = kvm_page_table_hashfn(gfn); 1458 index = kvm_page_table_hashfn(gfn);
1452 bucket = &kvm->arch.mmu_page_hash[index]; 1459 bucket = &kvm->arch.mmu_page_hash[index];
1453 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) 1460 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
1454 if (sp->gfn == gfn && !sp->role.metaphysical) { 1461 if (sp->gfn == gfn && !sp->role.direct) {
1455 pgprintk("%s: gfn %lx role %x\n", __func__, gfn, 1462 pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
1456 sp->role.word); 1463 sp->role.word);
1457 r = 1; 1464 r = 1;
@@ -1463,11 +1470,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
1463 1470
1464static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) 1471static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1465{ 1472{
1473 unsigned index;
1474 struct hlist_head *bucket;
1466 struct kvm_mmu_page *sp; 1475 struct kvm_mmu_page *sp;
1476 struct hlist_node *node, *nn;
1467 1477
1468 while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { 1478 index = kvm_page_table_hashfn(gfn);
1469 pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); 1479 bucket = &kvm->arch.mmu_page_hash[index];
1470 kvm_mmu_zap_page(kvm, sp); 1480 hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
1481 if (sp->gfn == gfn && !sp->role.direct
1482 && !sp->role.invalid) {
1483 pgprintk("%s: zap %lx %x\n",
1484 __func__, gfn, sp->role.word);
1485 kvm_mmu_zap_page(kvm, sp);
1486 }
1471 } 1487 }
1472} 1488}
1473 1489
@@ -1622,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1622 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1638 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1623 /* don't unsync if pagetable is shadowed with multiple roles */ 1639 /* don't unsync if pagetable is shadowed with multiple roles */
1624 hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { 1640 hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
1625 if (s->gfn != sp->gfn || s->role.metaphysical) 1641 if (s->gfn != sp->gfn || s->role.direct)
1626 continue; 1642 continue;
1627 if (s->role.word != sp->role.word) 1643 if (s->role.word != sp->role.word)
1628 return 1; 1644 return 1;
@@ -1669,8 +1685,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1669 u64 mt_mask = shadow_mt_mask; 1685 u64 mt_mask = shadow_mt_mask;
1670 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); 1686 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
1671 1687
1672 if (!(vcpu->arch.cr4 & X86_CR4_PGE))
1673 global = 0;
1674 if (!global && sp->global) { 1688 if (!global && sp->global) {
1675 sp->global = 0; 1689 sp->global = 0;
1676 if (sp->unsync) { 1690 if (sp->unsync) {
@@ -1777,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1777 pgprintk("hfn old %lx new %lx\n", 1791 pgprintk("hfn old %lx new %lx\n",
1778 spte_to_pfn(*shadow_pte), pfn); 1792 spte_to_pfn(*shadow_pte), pfn);
1779 rmap_remove(vcpu->kvm, shadow_pte); 1793 rmap_remove(vcpu->kvm, shadow_pte);
1780 } else { 1794 } else
1781 if (largepage) 1795 was_rmapped = 1;
1782 was_rmapped = is_large_pte(*shadow_pte);
1783 else
1784 was_rmapped = 1;
1785 }
1786 } 1796 }
1787 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1797 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
1788 dirty, largepage, global, gfn, pfn, speculative, true)) { 1798 dirty, largepage, global, gfn, pfn, speculative, true)) {
@@ -1820,67 +1830,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
1820{ 1830{
1821} 1831}
1822 1832
1823struct direct_shadow_walk { 1833static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1824 struct kvm_shadow_walk walker; 1834 int largepage, gfn_t gfn, pfn_t pfn)
1825 pfn_t pfn;
1826 int write;
1827 int largepage;
1828 int pt_write;
1829};
1830
1831static int direct_map_entry(struct kvm_shadow_walk *_walk,
1832 struct kvm_vcpu *vcpu,
1833 u64 addr, u64 *sptep, int level)
1834{ 1835{
1835 struct direct_shadow_walk *walk = 1836 struct kvm_shadow_walk_iterator iterator;
1836 container_of(_walk, struct direct_shadow_walk, walker);
1837 struct kvm_mmu_page *sp; 1837 struct kvm_mmu_page *sp;
1838 int pt_write = 0;
1838 gfn_t pseudo_gfn; 1839 gfn_t pseudo_gfn;
1839 gfn_t gfn = addr >> PAGE_SHIFT;
1840
1841 if (level == PT_PAGE_TABLE_LEVEL
1842 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
1843 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
1844 0, walk->write, 1, &walk->pt_write,
1845 walk->largepage, 0, gfn, walk->pfn, false);
1846 ++vcpu->stat.pf_fixed;
1847 return 1;
1848 }
1849 1840
1850 if (*sptep == shadow_trap_nonpresent_pte) { 1841 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
1851 pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; 1842 if (iterator.level == PT_PAGE_TABLE_LEVEL
1852 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1, 1843 || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
1853 1, ACC_ALL, sptep); 1844 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
1854 if (!sp) { 1845 0, write, 1, &pt_write,
1855 pgprintk("nonpaging_map: ENOMEM\n"); 1846 largepage, 0, gfn, pfn, false);
1856 kvm_release_pfn_clean(walk->pfn); 1847 ++vcpu->stat.pf_fixed;
1857 return -ENOMEM; 1848 break;
1858 } 1849 }
1859 1850
1860 set_shadow_pte(sptep, 1851 if (*iterator.sptep == shadow_trap_nonpresent_pte) {
1861 __pa(sp->spt) 1852 pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
1862 | PT_PRESENT_MASK | PT_WRITABLE_MASK 1853 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
1863 | shadow_user_mask | shadow_x_mask); 1854 iterator.level - 1,
1864 } 1855 1, ACC_ALL, iterator.sptep);
1865 return 0; 1856 if (!sp) {
1866} 1857 pgprintk("nonpaging_map: ENOMEM\n");
1858 kvm_release_pfn_clean(pfn);
1859 return -ENOMEM;
1860 }
1867 1861
1868static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, 1862 set_shadow_pte(iterator.sptep,
1869 int largepage, gfn_t gfn, pfn_t pfn) 1863 __pa(sp->spt)
1870{ 1864 | PT_PRESENT_MASK | PT_WRITABLE_MASK
1871 int r; 1865 | shadow_user_mask | shadow_x_mask);
1872 struct direct_shadow_walk walker = { 1866 }
1873 .walker = { .entry = direct_map_entry, }, 1867 }
1874 .pfn = pfn, 1868 return pt_write;
1875 .largepage = largepage,
1876 .write = write,
1877 .pt_write = 0,
1878 };
1879
1880 r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
1881 if (r < 0)
1882 return r;
1883 return walker.pt_write;
1884} 1869}
1885 1870
1886static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 1871static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
@@ -1962,7 +1947,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1962 int i; 1947 int i;
1963 gfn_t root_gfn; 1948 gfn_t root_gfn;
1964 struct kvm_mmu_page *sp; 1949 struct kvm_mmu_page *sp;
1965 int metaphysical = 0; 1950 int direct = 0;
1966 1951
1967 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; 1952 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
1968 1953
@@ -1971,18 +1956,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1971 1956
1972 ASSERT(!VALID_PAGE(root)); 1957 ASSERT(!VALID_PAGE(root));
1973 if (tdp_enabled) 1958 if (tdp_enabled)
1974 metaphysical = 1; 1959 direct = 1;
1975 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 1960 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
1976 PT64_ROOT_LEVEL, metaphysical, 1961 PT64_ROOT_LEVEL, direct,
1977 ACC_ALL, NULL); 1962 ACC_ALL, NULL);
1978 root = __pa(sp->spt); 1963 root = __pa(sp->spt);
1979 ++sp->root_count; 1964 ++sp->root_count;
1980 vcpu->arch.mmu.root_hpa = root; 1965 vcpu->arch.mmu.root_hpa = root;
1981 return; 1966 return;
1982 } 1967 }
1983 metaphysical = !is_paging(vcpu); 1968 direct = !is_paging(vcpu);
1984 if (tdp_enabled) 1969 if (tdp_enabled)
1985 metaphysical = 1; 1970 direct = 1;
1986 for (i = 0; i < 4; ++i) { 1971 for (i = 0; i < 4; ++i) {
1987 hpa_t root = vcpu->arch.mmu.pae_root[i]; 1972 hpa_t root = vcpu->arch.mmu.pae_root[i];
1988 1973
@@ -1996,7 +1981,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1996 } else if (vcpu->arch.mmu.root_level == 0) 1981 } else if (vcpu->arch.mmu.root_level == 0)
1997 root_gfn = 0; 1982 root_gfn = 0;
1998 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 1983 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
1999 PT32_ROOT_LEVEL, metaphysical, 1984 PT32_ROOT_LEVEL, direct,
2000 ACC_ALL, NULL); 1985 ACC_ALL, NULL);
2001 root = __pa(sp->spt); 1986 root = __pa(sp->spt);
2002 ++sp->root_count; 1987 ++sp->root_count;
@@ -2251,17 +2236,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
2251 2236
2252static int init_kvm_softmmu(struct kvm_vcpu *vcpu) 2237static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
2253{ 2238{
2239 int r;
2240
2254 ASSERT(vcpu); 2241 ASSERT(vcpu);
2255 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 2242 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
2256 2243
2257 if (!is_paging(vcpu)) 2244 if (!is_paging(vcpu))
2258 return nonpaging_init_context(vcpu); 2245 r = nonpaging_init_context(vcpu);
2259 else if (is_long_mode(vcpu)) 2246 else if (is_long_mode(vcpu))
2260 return paging64_init_context(vcpu); 2247 r = paging64_init_context(vcpu);
2261 else if (is_pae(vcpu)) 2248 else if (is_pae(vcpu))
2262 return paging32E_init_context(vcpu); 2249 r = paging32E_init_context(vcpu);
2263 else 2250 else
2264 return paging32_init_context(vcpu); 2251 r = paging32_init_context(vcpu);
2252
2253 vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
2254
2255 return r;
2265} 2256}
2266 2257
2267static int init_kvm_mmu(struct kvm_vcpu *vcpu) 2258static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -2492,7 +2483,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2492 index = kvm_page_table_hashfn(gfn); 2483 index = kvm_page_table_hashfn(gfn);
2493 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 2484 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
2494 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { 2485 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
2495 if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid) 2486 if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
2496 continue; 2487 continue;
2497 pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; 2488 pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
2498 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 2489 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
@@ -3130,7 +3121,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
3130 gfn_t gfn; 3121 gfn_t gfn;
3131 3122
3132 list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { 3123 list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
3133 if (sp->role.metaphysical) 3124 if (sp->role.direct)
3134 continue; 3125 continue;
3135 3126
3136 gfn = unalias_gfn(vcpu->kvm, sp->gfn); 3127 gfn = unalias_gfn(vcpu->kvm, sp->gfn);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 258e5d56298e..eaab2145f62b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -54,7 +54,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
54static inline int is_long_mode(struct kvm_vcpu *vcpu) 54static inline int is_long_mode(struct kvm_vcpu *vcpu)
55{ 55{
56#ifdef CONFIG_X86_64 56#ifdef CONFIG_X86_64
57 return vcpu->arch.shadow_efer & EFER_LME; 57 return vcpu->arch.shadow_efer & EFER_LMA;
58#else 58#else
59 return 0; 59 return 0;
60#endif 60#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9fd78b6e17ad..6bd70206c561 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,7 +25,6 @@
25#if PTTYPE == 64 25#if PTTYPE == 64
26 #define pt_element_t u64 26 #define pt_element_t u64
27 #define guest_walker guest_walker64 27 #define guest_walker guest_walker64
28 #define shadow_walker shadow_walker64
29 #define FNAME(name) paging##64_##name 28 #define FNAME(name) paging##64_##name
30 #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK 29 #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
31 #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK 30 #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -42,7 +41,6 @@
42#elif PTTYPE == 32 41#elif PTTYPE == 32
43 #define pt_element_t u32 42 #define pt_element_t u32
44 #define guest_walker guest_walker32 43 #define guest_walker guest_walker32
45 #define shadow_walker shadow_walker32
46 #define FNAME(name) paging##32_##name 44 #define FNAME(name) paging##32_##name
47 #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK 45 #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
48 #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK 46 #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -73,18 +71,6 @@ struct guest_walker {
73 u32 error_code; 71 u32 error_code;
74}; 72};
75 73
76struct shadow_walker {
77 struct kvm_shadow_walk walker;
78 struct guest_walker *guest_walker;
79 int user_fault;
80 int write_fault;
81 int largepage;
82 int *ptwrite;
83 pfn_t pfn;
84 u64 *sptep;
85 gpa_t pte_gpa;
86};
87
88static gfn_t gpte_to_gfn(pt_element_t gpte) 74static gfn_t gpte_to_gfn(pt_element_t gpte)
89{ 75{
90 return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; 76 return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -283,91 +269,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
283/* 269/*
284 * Fetch a shadow pte for a specific level in the paging hierarchy. 270 * Fetch a shadow pte for a specific level in the paging hierarchy.
285 */ 271 */
286static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, 272static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
287 struct kvm_vcpu *vcpu, u64 addr, 273 struct guest_walker *gw,
288 u64 *sptep, int level) 274 int user_fault, int write_fault, int largepage,
275 int *ptwrite, pfn_t pfn)
289{ 276{
290 struct shadow_walker *sw =
291 container_of(_sw, struct shadow_walker, walker);
292 struct guest_walker *gw = sw->guest_walker;
293 unsigned access = gw->pt_access; 277 unsigned access = gw->pt_access;
294 struct kvm_mmu_page *shadow_page; 278 struct kvm_mmu_page *shadow_page;
295 u64 spte; 279 u64 spte, *sptep;
296 int metaphysical; 280 int direct;
297 gfn_t table_gfn; 281 gfn_t table_gfn;
298 int r; 282 int r;
283 int level;
299 pt_element_t curr_pte; 284 pt_element_t curr_pte;
285 struct kvm_shadow_walk_iterator iterator;
300 286
301 if (level == PT_PAGE_TABLE_LEVEL 287 if (!is_present_pte(gw->ptes[gw->level - 1]))
302 || (sw->largepage && level == PT_DIRECTORY_LEVEL)) { 288 return NULL;
303 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
304 sw->user_fault, sw->write_fault,
305 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
306 sw->ptwrite, sw->largepage,
307 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
308 gw->gfn, sw->pfn, false);
309 sw->sptep = sptep;
310 return 1;
311 }
312 289
313 if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) 290 for_each_shadow_entry(vcpu, addr, iterator) {
314 return 0; 291 level = iterator.level;
292 sptep = iterator.sptep;
293 if (level == PT_PAGE_TABLE_LEVEL
294 || (largepage && level == PT_DIRECTORY_LEVEL)) {
295 mmu_set_spte(vcpu, sptep, access,
296 gw->pte_access & access,
297 user_fault, write_fault,
298 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
299 ptwrite, largepage,
300 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
301 gw->gfn, pfn, false);
302 break;
303 }
315 304
316 if (is_large_pte(*sptep)) { 305 if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
317 set_shadow_pte(sptep, shadow_trap_nonpresent_pte); 306 continue;
318 kvm_flush_remote_tlbs(vcpu->kvm);
319 rmap_remove(vcpu->kvm, sptep);
320 }
321 307
322 if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) { 308 if (is_large_pte(*sptep)) {
323 metaphysical = 1; 309 rmap_remove(vcpu->kvm, sptep);
324 if (!is_dirty_pte(gw->ptes[level - 1])) 310 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
325 access &= ~ACC_WRITE_MASK; 311 kvm_flush_remote_tlbs(vcpu->kvm);
326 table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
327 } else {
328 metaphysical = 0;
329 table_gfn = gw->table_gfn[level - 2];
330 }
331 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
332 metaphysical, access, sptep);
333 if (!metaphysical) {
334 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
335 &curr_pte, sizeof(curr_pte));
336 if (r || curr_pte != gw->ptes[level - 2]) {
337 kvm_mmu_put_page(shadow_page, sptep);
338 kvm_release_pfn_clean(sw->pfn);
339 sw->sptep = NULL;
340 return 1;
341 } 312 }
342 }
343 313
344 spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK 314 if (level == PT_DIRECTORY_LEVEL
345 | PT_WRITABLE_MASK | PT_USER_MASK; 315 && gw->level == PT_DIRECTORY_LEVEL) {
346 *sptep = spte; 316 direct = 1;
347 return 0; 317 if (!is_dirty_pte(gw->ptes[level - 1]))
348} 318 access &= ~ACC_WRITE_MASK;
349 319 table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
350static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 320 } else {
351 struct guest_walker *guest_walker, 321 direct = 0;
352 int user_fault, int write_fault, int largepage, 322 table_gfn = gw->table_gfn[level - 2];
353 int *ptwrite, pfn_t pfn) 323 }
354{ 324 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
355 struct shadow_walker walker = { 325 direct, access, sptep);
356 .walker = { .entry = FNAME(shadow_walk_entry), }, 326 if (!direct) {
357 .guest_walker = guest_walker, 327 r = kvm_read_guest_atomic(vcpu->kvm,
358 .user_fault = user_fault, 328 gw->pte_gpa[level - 2],
359 .write_fault = write_fault, 329 &curr_pte, sizeof(curr_pte));
360 .largepage = largepage, 330 if (r || curr_pte != gw->ptes[level - 2]) {
361 .ptwrite = ptwrite, 331 kvm_mmu_put_page(shadow_page, sptep);
362 .pfn = pfn, 332 kvm_release_pfn_clean(pfn);
363 }; 333 sptep = NULL;
364 334 break;
365 if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1])) 335 }
366 return NULL; 336 }
367 337
368 walk_shadow(&walker.walker, vcpu, addr); 338 spte = __pa(shadow_page->spt)
339 | PT_PRESENT_MASK | PT_ACCESSED_MASK
340 | PT_WRITABLE_MASK | PT_USER_MASK;
341 *sptep = spte;
342 }
369 343
370 return walker.sptep; 344 return sptep;
371} 345}
372 346
373/* 347/*
@@ -465,54 +439,56 @@ out_unlock:
465 return 0; 439 return 0;
466} 440}
467 441
468static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, 442static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
469 struct kvm_vcpu *vcpu, u64 addr,
470 u64 *sptep, int level)
471{ 443{
472 struct shadow_walker *sw = 444 struct kvm_shadow_walk_iterator iterator;
473 container_of(_sw, struct shadow_walker, walker); 445 pt_element_t gpte;
474 446 gpa_t pte_gpa = -1;
475 /* FIXME: properly handle invlpg on large guest pages */ 447 int level;
476 if (level == PT_PAGE_TABLE_LEVEL || 448 u64 *sptep;
477 ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { 449 int need_flush = 0;
478 struct kvm_mmu_page *sp = page_header(__pa(sptep));
479 450
480 sw->pte_gpa = (sp->gfn << PAGE_SHIFT); 451 spin_lock(&vcpu->kvm->mmu_lock);
481 sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
482 452
483 if (is_shadow_present_pte(*sptep)) { 453 for_each_shadow_entry(vcpu, gva, iterator) {
484 rmap_remove(vcpu->kvm, sptep); 454 level = iterator.level;
485 if (is_large_pte(*sptep)) 455 sptep = iterator.sptep;
486 --vcpu->kvm->stat.lpages; 456
457 /* FIXME: properly handle invlpg on large guest pages */
458 if (level == PT_PAGE_TABLE_LEVEL ||
459 ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
460 struct kvm_mmu_page *sp = page_header(__pa(sptep));
461
462 pte_gpa = (sp->gfn << PAGE_SHIFT);
463 pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
464
465 if (is_shadow_present_pte(*sptep)) {
466 rmap_remove(vcpu->kvm, sptep);
467 if (is_large_pte(*sptep))
468 --vcpu->kvm->stat.lpages;
469 need_flush = 1;
470 }
471 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
472 break;
487 } 473 }
488 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
489 return 1;
490 }
491 if (!is_shadow_present_pte(*sptep))
492 return 1;
493 return 0;
494}
495 474
496static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 475 if (!is_shadow_present_pte(*sptep))
497{ 476 break;
498 pt_element_t gpte; 477 }
499 struct shadow_walker walker = {
500 .walker = { .entry = FNAME(shadow_invlpg_entry), },
501 .pte_gpa = -1,
502 };
503 478
504 spin_lock(&vcpu->kvm->mmu_lock); 479 if (need_flush)
505 walk_shadow(&walker.walker, vcpu, gva); 480 kvm_flush_remote_tlbs(vcpu->kvm);
506 spin_unlock(&vcpu->kvm->mmu_lock); 481 spin_unlock(&vcpu->kvm->mmu_lock);
507 if (walker.pte_gpa == -1) 482
483 if (pte_gpa == -1)
508 return; 484 return;
509 if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, 485 if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
510 sizeof(pt_element_t))) 486 sizeof(pt_element_t)))
511 return; 487 return;
512 if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { 488 if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
513 if (mmu_topup_memory_caches(vcpu)) 489 if (mmu_topup_memory_caches(vcpu))
514 return; 490 return;
515 kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, 491 kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
516 sizeof(pt_element_t), 0); 492 sizeof(pt_element_t), 0);
517 } 493 }
518} 494}
@@ -540,7 +516,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
540 pt_element_t pt[256 / sizeof(pt_element_t)]; 516 pt_element_t pt[256 / sizeof(pt_element_t)];
541 gpa_t pte_gpa; 517 gpa_t pte_gpa;
542 518
543 if (sp->role.metaphysical 519 if (sp->role.direct
544 || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { 520 || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
545 nonpaging_prefetch_page(vcpu, sp); 521 nonpaging_prefetch_page(vcpu, sp);
546 return; 522 return;
@@ -619,7 +595,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
619 595
620#undef pt_element_t 596#undef pt_element_t
621#undef guest_walker 597#undef guest_walker
622#undef shadow_walker
623#undef FNAME 598#undef FNAME
624#undef PT_BASE_ADDR_MASK 599#undef PT_BASE_ADDR_MASK
625#undef PT_INDEX 600#undef PT_INDEX
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a9e769e4e251..1821c2078199 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,9 +38,6 @@ MODULE_LICENSE("GPL");
38#define IOPM_ALLOC_ORDER 2 38#define IOPM_ALLOC_ORDER 2
39#define MSRPM_ALLOC_ORDER 1 39#define MSRPM_ALLOC_ORDER 1
40 40
41#define DR7_GD_MASK (1 << 13)
42#define DR6_BD_MASK (1 << 13)
43
44#define SEG_TYPE_LDT 2 41#define SEG_TYPE_LDT 2
45#define SEG_TYPE_BUSY_TSS16 3 42#define SEG_TYPE_BUSY_TSS16 3
46 43
@@ -50,6 +47,15 @@ MODULE_LICENSE("GPL");
50 47
51#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 48#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
52 49
50/* Turn on to get debugging output*/
51/* #define NESTED_DEBUG */
52
53#ifdef NESTED_DEBUG
54#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
55#else
56#define nsvm_printk(fmt, args...) do {} while(0)
57#endif
58
53/* enable NPT for AMD64 and X86 with PAE */ 59/* enable NPT for AMD64 and X86 with PAE */
54#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 60#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
55static bool npt_enabled = true; 61static bool npt_enabled = true;
@@ -60,14 +66,29 @@ static int npt = 1;
60 66
61module_param(npt, int, S_IRUGO); 67module_param(npt, int, S_IRUGO);
62 68
69static int nested = 0;
70module_param(nested, int, S_IRUGO);
71
63static void kvm_reput_irq(struct vcpu_svm *svm); 72static void kvm_reput_irq(struct vcpu_svm *svm);
64static void svm_flush_tlb(struct kvm_vcpu *vcpu); 73static void svm_flush_tlb(struct kvm_vcpu *vcpu);
65 74
75static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
76static int nested_svm_vmexit(struct vcpu_svm *svm);
77static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
78 void *arg2, void *opaque);
79static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
80 bool has_error_code, u32 error_code);
81
66static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 82static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
67{ 83{
68 return container_of(vcpu, struct vcpu_svm, vcpu); 84 return container_of(vcpu, struct vcpu_svm, vcpu);
69} 85}
70 86
87static inline bool is_nested(struct vcpu_svm *svm)
88{
89 return svm->nested_vmcb;
90}
91
71static unsigned long iopm_base; 92static unsigned long iopm_base;
72 93
73struct kvm_ldttss_desc { 94struct kvm_ldttss_desc {
@@ -157,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val)
157 asm volatile ("mov %0, %%cr2" :: "r" (val)); 178 asm volatile ("mov %0, %%cr2" :: "r" (val));
158} 179}
159 180
160static inline unsigned long read_dr6(void)
161{
162 unsigned long dr6;
163
164 asm volatile ("mov %%dr6, %0" : "=r" (dr6));
165 return dr6;
166}
167
168static inline void write_dr6(unsigned long val)
169{
170 asm volatile ("mov %0, %%dr6" :: "r" (val));
171}
172
173static inline unsigned long read_dr7(void)
174{
175 unsigned long dr7;
176
177 asm volatile ("mov %%dr7, %0" : "=r" (dr7));
178 return dr7;
179}
180
181static inline void write_dr7(unsigned long val)
182{
183 asm volatile ("mov %0, %%dr7" :: "r" (val));
184}
185
186static inline void force_new_asid(struct kvm_vcpu *vcpu) 181static inline void force_new_asid(struct kvm_vcpu *vcpu)
187{ 182{
188 to_svm(vcpu)->asid_generation--; 183 to_svm(vcpu)->asid_generation--;
@@ -198,7 +193,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
198 if (!npt_enabled && !(efer & EFER_LMA)) 193 if (!npt_enabled && !(efer & EFER_LMA))
199 efer &= ~EFER_LME; 194 efer &= ~EFER_LME;
200 195
201 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; 196 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
202 vcpu->arch.shadow_efer = efer; 197 vcpu->arch.shadow_efer = efer;
203} 198}
204 199
@@ -207,6 +202,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
207{ 202{
208 struct vcpu_svm *svm = to_svm(vcpu); 203 struct vcpu_svm *svm = to_svm(vcpu);
209 204
205 /* If we are within a nested VM we'd better #VMEXIT and let the
206 guest handle the exception */
207 if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
208 return;
209
210 svm->vmcb->control.event_inj = nr 210 svm->vmcb->control.event_inj = nr
211 | SVM_EVTINJ_VALID 211 | SVM_EVTINJ_VALID
212 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) 212 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
@@ -242,7 +242,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
242 kvm_rip_write(vcpu, svm->next_rip); 242 kvm_rip_write(vcpu, svm->next_rip);
243 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 243 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
244 244
245 vcpu->arch.interrupt_window_open = 1; 245 vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
246} 246}
247 247
248static int has_svm(void) 248static int has_svm(void)
@@ -250,7 +250,7 @@ static int has_svm(void)
250 const char *msg; 250 const char *msg;
251 251
252 if (!cpu_has_svm(&msg)) { 252 if (!cpu_has_svm(&msg)) {
253 printk(KERN_INFO "has_svn: %s\n", msg); 253 printk(KERN_INFO "has_svm: %s\n", msg);
254 return 0; 254 return 0;
255 } 255 }
256 256
@@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage)
292 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 292 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
293 293
294 rdmsrl(MSR_EFER, efer); 294 rdmsrl(MSR_EFER, efer);
295 wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK); 295 wrmsrl(MSR_EFER, efer | EFER_SVME);
296 296
297 wrmsrl(MSR_VM_HSAVE_PA, 297 wrmsrl(MSR_VM_HSAVE_PA,
298 page_to_pfn(svm_data->save_area) << PAGE_SHIFT); 298 page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -417,6 +417,14 @@ static __init int svm_hardware_setup(void)
417 if (boot_cpu_has(X86_FEATURE_NX)) 417 if (boot_cpu_has(X86_FEATURE_NX))
418 kvm_enable_efer_bits(EFER_NX); 418 kvm_enable_efer_bits(EFER_NX);
419 419
420 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
421 kvm_enable_efer_bits(EFER_FFXSR);
422
423 if (nested) {
424 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
425 kvm_enable_efer_bits(EFER_SVME);
426 }
427
420 for_each_online_cpu(cpu) { 428 for_each_online_cpu(cpu) {
421 r = svm_cpu_init(cpu); 429 r = svm_cpu_init(cpu);
422 if (r) 430 if (r)
@@ -559,7 +567,7 @@ static void init_vmcb(struct vcpu_svm *svm)
559 init_sys_seg(&save->ldtr, SEG_TYPE_LDT); 567 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
560 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); 568 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
561 569
562 save->efer = MSR_EFER_SVME_MASK; 570 save->efer = EFER_SVME;
563 save->dr6 = 0xffff0ff0; 571 save->dr6 = 0xffff0ff0;
564 save->dr7 = 0x400; 572 save->dr7 = 0x400;
565 save->rflags = 2; 573 save->rflags = 2;
@@ -591,6 +599,9 @@ static void init_vmcb(struct vcpu_svm *svm)
591 save->cr4 = 0; 599 save->cr4 = 0;
592 } 600 }
593 force_new_asid(&svm->vcpu); 601 force_new_asid(&svm->vcpu);
602
603 svm->nested_vmcb = 0;
604 svm->vcpu.arch.hflags = HF_GIF_MASK;
594} 605}
595 606
596static int svm_vcpu_reset(struct kvm_vcpu *vcpu) 607static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -615,6 +626,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
615 struct vcpu_svm *svm; 626 struct vcpu_svm *svm;
616 struct page *page; 627 struct page *page;
617 struct page *msrpm_pages; 628 struct page *msrpm_pages;
629 struct page *hsave_page;
630 struct page *nested_msrpm_pages;
618 int err; 631 int err;
619 632
620 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 633 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -637,14 +650,25 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
637 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 650 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
638 if (!msrpm_pages) 651 if (!msrpm_pages)
639 goto uninit; 652 goto uninit;
653
654 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
655 if (!nested_msrpm_pages)
656 goto uninit;
657
640 svm->msrpm = page_address(msrpm_pages); 658 svm->msrpm = page_address(msrpm_pages);
641 svm_vcpu_init_msrpm(svm->msrpm); 659 svm_vcpu_init_msrpm(svm->msrpm);
642 660
661 hsave_page = alloc_page(GFP_KERNEL);
662 if (!hsave_page)
663 goto uninit;
664 svm->hsave = page_address(hsave_page);
665
666 svm->nested_msrpm = page_address(nested_msrpm_pages);
667
643 svm->vmcb = page_address(page); 668 svm->vmcb = page_address(page);
644 clear_page(svm->vmcb); 669 clear_page(svm->vmcb);
645 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 670 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
646 svm->asid_generation = 0; 671 svm->asid_generation = 0;
647 memset(svm->db_regs, 0, sizeof(svm->db_regs));
648 init_vmcb(svm); 672 init_vmcb(svm);
649 673
650 fx_init(&svm->vcpu); 674 fx_init(&svm->vcpu);
@@ -669,6 +693,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
669 693
670 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 694 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
671 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); 695 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
696 __free_page(virt_to_page(svm->hsave));
697 __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
672 kvm_vcpu_uninit(vcpu); 698 kvm_vcpu_uninit(vcpu);
673 kmem_cache_free(kvm_vcpu_cache, svm); 699 kmem_cache_free(kvm_vcpu_cache, svm);
674} 700}
@@ -718,6 +744,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
718 to_svm(vcpu)->vmcb->save.rflags = rflags; 744 to_svm(vcpu)->vmcb->save.rflags = rflags;
719} 745}
720 746
747static void svm_set_vintr(struct vcpu_svm *svm)
748{
749 svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
750}
751
752static void svm_clear_vintr(struct vcpu_svm *svm)
753{
754 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
755}
756
721static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) 757static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
722{ 758{
723 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; 759 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -760,20 +796,37 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
760 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 796 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
761 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 797 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
762 798
763 /* 799 switch (seg) {
764 * SVM always stores 0 for the 'G' bit in the CS selector in 800 case VCPU_SREG_CS:
765 * the VMCB on a VMEXIT. This hurts cross-vendor migration: 801 /*
766 * Intel's VMENTRY has a check on the 'G' bit. 802 * SVM always stores 0 for the 'G' bit in the CS selector in
767 */ 803 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
768 if (seg == VCPU_SREG_CS) 804 * Intel's VMENTRY has a check on the 'G' bit.
805 */
769 var->g = s->limit > 0xfffff; 806 var->g = s->limit > 0xfffff;
770 807 break;
771 /* 808 case VCPU_SREG_TR:
772 * Work around a bug where the busy flag in the tr selector 809 /*
773 * isn't exposed 810 * Work around a bug where the busy flag in the tr selector
774 */ 811 * isn't exposed
775 if (seg == VCPU_SREG_TR) 812 */
776 var->type |= 0x2; 813 var->type |= 0x2;
814 break;
815 case VCPU_SREG_DS:
816 case VCPU_SREG_ES:
817 case VCPU_SREG_FS:
818 case VCPU_SREG_GS:
819 /*
820 * The accessed bit must always be set in the segment
821 * descriptor cache, although it can be cleared in the
822 * descriptor, the cached bit always remains at 1. Since
823 * Intel has a check on this, set it here to support
824 * cross-vendor migration.
825 */
826 if (!var->unusable)
827 var->type |= 0x1;
828 break;
829 }
777 830
778 var->unusable = !var->present; 831 var->unusable = !var->present;
779} 832}
@@ -905,9 +958,37 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
905 958
906} 959}
907 960
908static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 961static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
909{ 962{
910 return -EOPNOTSUPP; 963 int old_debug = vcpu->guest_debug;
964 struct vcpu_svm *svm = to_svm(vcpu);
965
966 vcpu->guest_debug = dbg->control;
967
968 svm->vmcb->control.intercept_exceptions &=
969 ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
970 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
971 if (vcpu->guest_debug &
972 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
973 svm->vmcb->control.intercept_exceptions |=
974 1 << DB_VECTOR;
975 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
976 svm->vmcb->control.intercept_exceptions |=
977 1 << BP_VECTOR;
978 } else
979 vcpu->guest_debug = 0;
980
981 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
982 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
983 else
984 svm->vmcb->save.dr7 = vcpu->arch.dr7;
985
986 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
987 svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
988 else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
989 svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
990
991 return 0;
911} 992}
912 993
913static int svm_get_irq(struct kvm_vcpu *vcpu) 994static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -949,7 +1030,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
949 1030
950static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 1031static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
951{ 1032{
952 unsigned long val = to_svm(vcpu)->db_regs[dr]; 1033 struct vcpu_svm *svm = to_svm(vcpu);
1034 unsigned long val;
1035
1036 switch (dr) {
1037 case 0 ... 3:
1038 val = vcpu->arch.db[dr];
1039 break;
1040 case 6:
1041 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1042 val = vcpu->arch.dr6;
1043 else
1044 val = svm->vmcb->save.dr6;
1045 break;
1046 case 7:
1047 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1048 val = vcpu->arch.dr7;
1049 else
1050 val = svm->vmcb->save.dr7;
1051 break;
1052 default:
1053 val = 0;
1054 }
1055
953 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); 1056 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
954 return val; 1057 return val;
955} 1058}
@@ -959,33 +1062,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
959{ 1062{
960 struct vcpu_svm *svm = to_svm(vcpu); 1063 struct vcpu_svm *svm = to_svm(vcpu);
961 1064
962 *exception = 0; 1065 KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
963 1066
964 if (svm->vmcb->save.dr7 & DR7_GD_MASK) { 1067 *exception = 0;
965 svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
966 svm->vmcb->save.dr6 |= DR6_BD_MASK;
967 *exception = DB_VECTOR;
968 return;
969 }
970 1068
971 switch (dr) { 1069 switch (dr) {
972 case 0 ... 3: 1070 case 0 ... 3:
973 svm->db_regs[dr] = value; 1071 vcpu->arch.db[dr] = value;
1072 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1073 vcpu->arch.eff_db[dr] = value;
974 return; 1074 return;
975 case 4 ... 5: 1075 case 4 ... 5:
976 if (vcpu->arch.cr4 & X86_CR4_DE) { 1076 if (vcpu->arch.cr4 & X86_CR4_DE)
977 *exception = UD_VECTOR; 1077 *exception = UD_VECTOR;
1078 return;
1079 case 6:
1080 if (value & 0xffffffff00000000ULL) {
1081 *exception = GP_VECTOR;
978 return; 1082 return;
979 } 1083 }
980 case 7: { 1084 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
981 if (value & ~((1ULL << 32) - 1)) { 1085 return;
1086 case 7:
1087 if (value & 0xffffffff00000000ULL) {
982 *exception = GP_VECTOR; 1088 *exception = GP_VECTOR;
983 return; 1089 return;
984 } 1090 }
985 svm->vmcb->save.dr7 = value; 1091 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1092 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1093 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1094 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1095 }
986 return; 1096 return;
987 }
988 default: 1097 default:
1098 /* FIXME: Possible case? */
989 printk(KERN_DEBUG "%s: unexpected dr %u\n", 1099 printk(KERN_DEBUG "%s: unexpected dr %u\n",
990 __func__, dr); 1100 __func__, dr);
991 *exception = UD_VECTOR; 1101 *exception = UD_VECTOR;
@@ -1031,6 +1141,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1031 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1141 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1032} 1142}
1033 1143
1144static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1145{
1146 if (!(svm->vcpu.guest_debug &
1147 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
1148 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1149 return 1;
1150 }
1151 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1152 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1153 kvm_run->debug.arch.exception = DB_VECTOR;
1154 return 0;
1155}
1156
1157static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1158{
1159 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1160 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1161 kvm_run->debug.arch.exception = BP_VECTOR;
1162 return 0;
1163}
1164
1034static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1165static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1035{ 1166{
1036 int er; 1167 int er;
@@ -1080,7 +1211,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1080static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1211static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1081{ 1212{
1082 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1213 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1083 int size, down, in, string, rep; 1214 int size, in, string;
1084 unsigned port; 1215 unsigned port;
1085 1216
1086 ++svm->vcpu.stat.io_exits; 1217 ++svm->vcpu.stat.io_exits;
@@ -1099,8 +1230,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1099 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1230 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1100 port = io_info >> 16; 1231 port = io_info >> 16;
1101 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1232 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1102 rep = (io_info & SVM_IOIO_REP_MASK) != 0;
1103 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
1104 1233
1105 skip_emulated_instruction(&svm->vcpu); 1234 skip_emulated_instruction(&svm->vcpu);
1106 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1235 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
@@ -1139,6 +1268,567 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1139 return 1; 1268 return 1;
1140} 1269}
1141 1270
1271static int nested_svm_check_permissions(struct vcpu_svm *svm)
1272{
1273 if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
1274 || !is_paging(&svm->vcpu)) {
1275 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1276 return 1;
1277 }
1278
1279 if (svm->vmcb->save.cpl) {
1280 kvm_inject_gp(&svm->vcpu, 0);
1281 return 1;
1282 }
1283
1284 return 0;
1285}
1286
1287static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1288 bool has_error_code, u32 error_code)
1289{
1290 if (is_nested(svm)) {
1291 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1292 svm->vmcb->control.exit_code_hi = 0;
1293 svm->vmcb->control.exit_info_1 = error_code;
1294 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1295 if (nested_svm_exit_handled(svm, false)) {
1296 nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
1297
1298 nested_svm_vmexit(svm);
1299 return 1;
1300 }
1301 }
1302
1303 return 0;
1304}
1305
1306static inline int nested_svm_intr(struct vcpu_svm *svm)
1307{
1308 if (is_nested(svm)) {
1309 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1310 return 0;
1311
1312 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1313 return 0;
1314
1315 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1316
1317 if (nested_svm_exit_handled(svm, false)) {
1318 nsvm_printk("VMexit -> INTR\n");
1319 nested_svm_vmexit(svm);
1320 return 1;
1321 }
1322 }
1323
1324 return 0;
1325}
1326
1327static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
1328{
1329 struct page *page;
1330
1331 down_read(&current->mm->mmap_sem);
1332 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1333 up_read(&current->mm->mmap_sem);
1334
1335 if (is_error_page(page)) {
1336 printk(KERN_INFO "%s: could not find page at 0x%llx\n",
1337 __func__, gpa);
1338 kvm_release_page_clean(page);
1339 kvm_inject_gp(&svm->vcpu, 0);
1340 return NULL;
1341 }
1342 return page;
1343}
1344
1345static int nested_svm_do(struct vcpu_svm *svm,
1346 u64 arg1_gpa, u64 arg2_gpa, void *opaque,
1347 int (*handler)(struct vcpu_svm *svm,
1348 void *arg1,
1349 void *arg2,
1350 void *opaque))
1351{
1352 struct page *arg1_page;
1353 struct page *arg2_page = NULL;
1354 void *arg1;
1355 void *arg2 = NULL;
1356 int retval;
1357
1358 arg1_page = nested_svm_get_page(svm, arg1_gpa);
1359 if(arg1_page == NULL)
1360 return 1;
1361
1362 if (arg2_gpa) {
1363 arg2_page = nested_svm_get_page(svm, arg2_gpa);
1364 if(arg2_page == NULL) {
1365 kvm_release_page_clean(arg1_page);
1366 return 1;
1367 }
1368 }
1369
1370 arg1 = kmap_atomic(arg1_page, KM_USER0);
1371 if (arg2_gpa)
1372 arg2 = kmap_atomic(arg2_page, KM_USER1);
1373
1374 retval = handler(svm, arg1, arg2, opaque);
1375
1376 kunmap_atomic(arg1, KM_USER0);
1377 if (arg2_gpa)
1378 kunmap_atomic(arg2, KM_USER1);
1379
1380 kvm_release_page_dirty(arg1_page);
1381 if (arg2_gpa)
1382 kvm_release_page_dirty(arg2_page);
1383
1384 return retval;
1385}
1386
1387static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
1388 void *arg1,
1389 void *arg2,
1390 void *opaque)
1391{
1392 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1393 bool kvm_overrides = *(bool *)opaque;
1394 u32 exit_code = svm->vmcb->control.exit_code;
1395
1396 if (kvm_overrides) {
1397 switch (exit_code) {
1398 case SVM_EXIT_INTR:
1399 case SVM_EXIT_NMI:
1400 return 0;
1401 /* For now we are always handling NPFs when using them */
1402 case SVM_EXIT_NPF:
1403 if (npt_enabled)
1404 return 0;
1405 break;
1406 /* When we're shadowing, trap PFs */
1407 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1408 if (!npt_enabled)
1409 return 0;
1410 break;
1411 default:
1412 break;
1413 }
1414 }
1415
1416 switch (exit_code) {
1417 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
1418 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
1419 if (nested_vmcb->control.intercept_cr_read & cr_bits)
1420 return 1;
1421 break;
1422 }
1423 case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
1424 u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
1425 if (nested_vmcb->control.intercept_cr_write & cr_bits)
1426 return 1;
1427 break;
1428 }
1429 case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
1430 u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
1431 if (nested_vmcb->control.intercept_dr_read & dr_bits)
1432 return 1;
1433 break;
1434 }
1435 case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
1436 u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
1437 if (nested_vmcb->control.intercept_dr_write & dr_bits)
1438 return 1;
1439 break;
1440 }
1441 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1442 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1443 if (nested_vmcb->control.intercept_exceptions & excp_bits)
1444 return 1;
1445 break;
1446 }
1447 default: {
1448 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1449 nsvm_printk("exit code: 0x%x\n", exit_code);
1450 if (nested_vmcb->control.intercept & exit_bits)
1451 return 1;
1452 }
1453 }
1454
1455 return 0;
1456}
1457
1458static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
1459 void *arg1, void *arg2,
1460 void *opaque)
1461{
1462 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1463 u8 *msrpm = (u8 *)arg2;
1464 u32 t0, t1;
1465 u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1466 u32 param = svm->vmcb->control.exit_info_1 & 1;
1467
1468 if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1469 return 0;
1470
1471 switch(msr) {
1472 case 0 ... 0x1fff:
1473 t0 = (msr * 2) % 8;
1474 t1 = msr / 8;
1475 break;
1476 case 0xc0000000 ... 0xc0001fff:
1477 t0 = (8192 + msr - 0xc0000000) * 2;
1478 t1 = (t0 / 8);
1479 t0 %= 8;
1480 break;
1481 case 0xc0010000 ... 0xc0011fff:
1482 t0 = (16384 + msr - 0xc0010000) * 2;
1483 t1 = (t0 / 8);
1484 t0 %= 8;
1485 break;
1486 default:
1487 return 1;
1488 break;
1489 }
1490 if (msrpm[t1] & ((1 << param) << t0))
1491 return 1;
1492
1493 return 0;
1494}
1495
1496static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
1497{
1498 bool k = kvm_override;
1499
1500 switch (svm->vmcb->control.exit_code) {
1501 case SVM_EXIT_MSR:
1502 return nested_svm_do(svm, svm->nested_vmcb,
1503 svm->nested_vmcb_msrpm, NULL,
1504 nested_svm_exit_handled_msr);
1505 default: break;
1506 }
1507
1508 return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
1509 nested_svm_exit_handled_real);
1510}
1511
1512static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
1513 void *arg2, void *opaque)
1514{
1515 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1516 struct vmcb *hsave = svm->hsave;
1517 u64 nested_save[] = { nested_vmcb->save.cr0,
1518 nested_vmcb->save.cr3,
1519 nested_vmcb->save.cr4,
1520 nested_vmcb->save.efer,
1521 nested_vmcb->control.intercept_cr_read,
1522 nested_vmcb->control.intercept_cr_write,
1523 nested_vmcb->control.intercept_dr_read,
1524 nested_vmcb->control.intercept_dr_write,
1525 nested_vmcb->control.intercept_exceptions,
1526 nested_vmcb->control.intercept,
1527 nested_vmcb->control.msrpm_base_pa,
1528 nested_vmcb->control.iopm_base_pa,
1529 nested_vmcb->control.tsc_offset };
1530
1531 /* Give the current vmcb to the guest */
1532 memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
1533 nested_vmcb->save.cr0 = nested_save[0];
1534 if (!npt_enabled)
1535 nested_vmcb->save.cr3 = nested_save[1];
1536 nested_vmcb->save.cr4 = nested_save[2];
1537 nested_vmcb->save.efer = nested_save[3];
1538 nested_vmcb->control.intercept_cr_read = nested_save[4];
1539 nested_vmcb->control.intercept_cr_write = nested_save[5];
1540 nested_vmcb->control.intercept_dr_read = nested_save[6];
1541 nested_vmcb->control.intercept_dr_write = nested_save[7];
1542 nested_vmcb->control.intercept_exceptions = nested_save[8];
1543 nested_vmcb->control.intercept = nested_save[9];
1544 nested_vmcb->control.msrpm_base_pa = nested_save[10];
1545 nested_vmcb->control.iopm_base_pa = nested_save[11];
1546 nested_vmcb->control.tsc_offset = nested_save[12];
1547
1548 /* We always set V_INTR_MASKING and remember the old value in hflags */
1549 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1550 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
1551
1552 if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
1553 (nested_vmcb->control.int_vector)) {
1554 nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
1555 nested_vmcb->control.int_vector);
1556 }
1557
1558 /* Restore the original control entries */
1559 svm->vmcb->control = hsave->control;
1560
1561 /* Kill any pending exceptions */
1562 if (svm->vcpu.arch.exception.pending == true)
1563 nsvm_printk("WARNING: Pending Exception\n");
1564 svm->vcpu.arch.exception.pending = false;
1565
1566 /* Restore selected save entries */
1567 svm->vmcb->save.es = hsave->save.es;
1568 svm->vmcb->save.cs = hsave->save.cs;
1569 svm->vmcb->save.ss = hsave->save.ss;
1570 svm->vmcb->save.ds = hsave->save.ds;
1571 svm->vmcb->save.gdtr = hsave->save.gdtr;
1572 svm->vmcb->save.idtr = hsave->save.idtr;
1573 svm->vmcb->save.rflags = hsave->save.rflags;
1574 svm_set_efer(&svm->vcpu, hsave->save.efer);
1575 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
1576 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
1577 if (npt_enabled) {
1578 svm->vmcb->save.cr3 = hsave->save.cr3;
1579 svm->vcpu.arch.cr3 = hsave->save.cr3;
1580 } else {
1581 kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
1582 }
1583 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
1584 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
1585 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
1586 svm->vmcb->save.dr7 = 0;
1587 svm->vmcb->save.cpl = 0;
1588 svm->vmcb->control.exit_int_info = 0;
1589
1590 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
1591 /* Exit nested SVM mode */
1592 svm->nested_vmcb = 0;
1593
1594 return 0;
1595}
1596
1597static int nested_svm_vmexit(struct vcpu_svm *svm)
1598{
1599 nsvm_printk("VMexit\n");
1600 if (nested_svm_do(svm, svm->nested_vmcb, 0,
1601 NULL, nested_svm_vmexit_real))
1602 return 1;
1603
1604 kvm_mmu_reset_context(&svm->vcpu);
1605 kvm_mmu_load(&svm->vcpu);
1606
1607 return 0;
1608}
1609
1610static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
1611 void *arg2, void *opaque)
1612{
1613 int i;
1614 u32 *nested_msrpm = (u32*)arg1;
1615 for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
1616 svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
1617 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
1618
1619 return 0;
1620}
1621
1622static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
1623 void *arg2, void *opaque)
1624{
1625 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1626 struct vmcb *hsave = svm->hsave;
1627
1628 /* nested_vmcb is our indicator if nested SVM is activated */
1629 svm->nested_vmcb = svm->vmcb->save.rax;
1630
1631 /* Clear internal status */
1632 svm->vcpu.arch.exception.pending = false;
1633
1634 /* Save the old vmcb, so we don't need to pick what we save, but
1635 can restore everything when a VMEXIT occurs */
1636 memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
1637 /* We need to remember the original CR3 in the SPT case */
1638 if (!npt_enabled)
1639 hsave->save.cr3 = svm->vcpu.arch.cr3;
1640 hsave->save.cr4 = svm->vcpu.arch.cr4;
1641 hsave->save.rip = svm->next_rip;
1642
1643 if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
1644 svm->vcpu.arch.hflags |= HF_HIF_MASK;
1645 else
1646 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
1647
1648 /* Load the nested guest state */
1649 svm->vmcb->save.es = nested_vmcb->save.es;
1650 svm->vmcb->save.cs = nested_vmcb->save.cs;
1651 svm->vmcb->save.ss = nested_vmcb->save.ss;
1652 svm->vmcb->save.ds = nested_vmcb->save.ds;
1653 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
1654 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
1655 svm->vmcb->save.rflags = nested_vmcb->save.rflags;
1656 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
1657 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
1658 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
1659 if (npt_enabled) {
1660 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
1661 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
1662 } else {
1663 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
1664 kvm_mmu_reset_context(&svm->vcpu);
1665 }
1666 svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
1667 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
1668 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
1669 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
1670 /* In case we don't even reach vcpu_run, the fields are not updated */
1671 svm->vmcb->save.rax = nested_vmcb->save.rax;
1672 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
1673 svm->vmcb->save.rip = nested_vmcb->save.rip;
1674 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
1675 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
1676 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
1677
1678 /* We don't want a nested guest to be more powerful than the guest,
1679 so all intercepts are ORed */
1680 svm->vmcb->control.intercept_cr_read |=
1681 nested_vmcb->control.intercept_cr_read;
1682 svm->vmcb->control.intercept_cr_write |=
1683 nested_vmcb->control.intercept_cr_write;
1684 svm->vmcb->control.intercept_dr_read |=
1685 nested_vmcb->control.intercept_dr_read;
1686 svm->vmcb->control.intercept_dr_write |=
1687 nested_vmcb->control.intercept_dr_write;
1688 svm->vmcb->control.intercept_exceptions |=
1689 nested_vmcb->control.intercept_exceptions;
1690
1691 svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
1692
1693 svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
1694
1695 force_new_asid(&svm->vcpu);
1696 svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
1697 svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
1698 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
1699 if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
1700 nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
1701 nested_vmcb->control.int_ctl);
1702 }
1703 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
1704 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
1705 else
1706 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
1707
1708 nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
1709 nested_vmcb->control.exit_int_info,
1710 nested_vmcb->control.int_state);
1711
1712 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
1713 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
1714 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
1715 if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
1716 nsvm_printk("Injecting Event: 0x%x\n",
1717 nested_vmcb->control.event_inj);
1718 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
1719 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
1720
1721 svm->vcpu.arch.hflags |= HF_GIF_MASK;
1722
1723 return 0;
1724}
1725
1726static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1727{
1728 to_vmcb->save.fs = from_vmcb->save.fs;
1729 to_vmcb->save.gs = from_vmcb->save.gs;
1730 to_vmcb->save.tr = from_vmcb->save.tr;
1731 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
1732 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
1733 to_vmcb->save.star = from_vmcb->save.star;
1734 to_vmcb->save.lstar = from_vmcb->save.lstar;
1735 to_vmcb->save.cstar = from_vmcb->save.cstar;
1736 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
1737 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
1738 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
1739 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
1740
1741 return 1;
1742}
1743
1744static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
1745 void *arg2, void *opaque)
1746{
1747 return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
1748}
1749
1750static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
1751 void *arg2, void *opaque)
1752{
1753 return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
1754}
1755
1756static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1757{
1758 if (nested_svm_check_permissions(svm))
1759 return 1;
1760
1761 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1762 skip_emulated_instruction(&svm->vcpu);
1763
1764 nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
1765
1766 return 1;
1767}
1768
1769static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1770{
1771 if (nested_svm_check_permissions(svm))
1772 return 1;
1773
1774 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1775 skip_emulated_instruction(&svm->vcpu);
1776
1777 nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
1778
1779 return 1;
1780}
1781
1782static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1783{
1784 nsvm_printk("VMrun\n");
1785 if (nested_svm_check_permissions(svm))
1786 return 1;
1787
1788 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1789 skip_emulated_instruction(&svm->vcpu);
1790
1791 if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
1792 NULL, nested_svm_vmrun))
1793 return 1;
1794
1795 if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
1796 NULL, nested_svm_vmrun_msrpm))
1797 return 1;
1798
1799 return 1;
1800}
1801
1802static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1803{
1804 if (nested_svm_check_permissions(svm))
1805 return 1;
1806
1807 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1808 skip_emulated_instruction(&svm->vcpu);
1809
1810 svm->vcpu.arch.hflags |= HF_GIF_MASK;
1811
1812 return 1;
1813}
1814
1815static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1816{
1817 if (nested_svm_check_permissions(svm))
1818 return 1;
1819
1820 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1821 skip_emulated_instruction(&svm->vcpu);
1822
1823 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
1824
1825 /* After a CLGI no interrupts should come */
1826 svm_clear_vintr(svm);
1827 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1828
1829 return 1;
1830}
1831
1142static int invalid_op_interception(struct vcpu_svm *svm, 1832static int invalid_op_interception(struct vcpu_svm *svm,
1143 struct kvm_run *kvm_run) 1833 struct kvm_run *kvm_run)
1144{ 1834{
@@ -1250,6 +1940,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
1250 case MSR_IA32_LASTINTTOIP: 1940 case MSR_IA32_LASTINTTOIP:
1251 *data = svm->vmcb->save.last_excp_to; 1941 *data = svm->vmcb->save.last_excp_to;
1252 break; 1942 break;
1943 case MSR_VM_HSAVE_PA:
1944 *data = svm->hsave_msr;
1945 break;
1946 case MSR_VM_CR:
1947 *data = 0;
1948 break;
1949 case MSR_IA32_UCODE_REV:
1950 *data = 0x01000065;
1951 break;
1253 default: 1952 default:
1254 return kvm_get_msr_common(vcpu, ecx, data); 1953 return kvm_get_msr_common(vcpu, ecx, data);
1255 } 1954 }
@@ -1344,6 +2043,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1344 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data); 2043 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
1345 2044
1346 break; 2045 break;
2046 case MSR_VM_HSAVE_PA:
2047 svm->hsave_msr = data;
2048 break;
1347 default: 2049 default:
1348 return kvm_set_msr_common(vcpu, ecx, data); 2050 return kvm_set_msr_common(vcpu, ecx, data);
1349 } 2051 }
@@ -1380,7 +2082,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
1380{ 2082{
1381 KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler); 2083 KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
1382 2084
1383 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); 2085 svm_clear_vintr(svm);
1384 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2086 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1385 /* 2087 /*
1386 * If the user space waits to inject interrupts, exit as soon as 2088 * If the user space waits to inject interrupts, exit as soon as
@@ -1417,6 +2119,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1417 [SVM_EXIT_WRITE_DR3] = emulate_on_interception, 2119 [SVM_EXIT_WRITE_DR3] = emulate_on_interception,
1418 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 2120 [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
1419 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 2121 [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
2122 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
2123 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
1420 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 2124 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
1421 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 2125 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
1422 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 2126 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
@@ -1436,12 +2140,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1436 [SVM_EXIT_MSR] = msr_interception, 2140 [SVM_EXIT_MSR] = msr_interception,
1437 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 2141 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
1438 [SVM_EXIT_SHUTDOWN] = shutdown_interception, 2142 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
1439 [SVM_EXIT_VMRUN] = invalid_op_interception, 2143 [SVM_EXIT_VMRUN] = vmrun_interception,
1440 [SVM_EXIT_VMMCALL] = vmmcall_interception, 2144 [SVM_EXIT_VMMCALL] = vmmcall_interception,
1441 [SVM_EXIT_VMLOAD] = invalid_op_interception, 2145 [SVM_EXIT_VMLOAD] = vmload_interception,
1442 [SVM_EXIT_VMSAVE] = invalid_op_interception, 2146 [SVM_EXIT_VMSAVE] = vmsave_interception,
1443 [SVM_EXIT_STGI] = invalid_op_interception, 2147 [SVM_EXIT_STGI] = stgi_interception,
1444 [SVM_EXIT_CLGI] = invalid_op_interception, 2148 [SVM_EXIT_CLGI] = clgi_interception,
1445 [SVM_EXIT_SKINIT] = invalid_op_interception, 2149 [SVM_EXIT_SKINIT] = invalid_op_interception,
1446 [SVM_EXIT_WBINVD] = emulate_on_interception, 2150 [SVM_EXIT_WBINVD] = emulate_on_interception,
1447 [SVM_EXIT_MONITOR] = invalid_op_interception, 2151 [SVM_EXIT_MONITOR] = invalid_op_interception,
@@ -1457,6 +2161,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1457 KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, 2161 KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
1458 (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); 2162 (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
1459 2163
2164 if (is_nested(svm)) {
2165 nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
2166 exit_code, svm->vmcb->control.exit_info_1,
2167 svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
2168 if (nested_svm_exit_handled(svm, true)) {
2169 nested_svm_vmexit(svm);
2170 nsvm_printk("-> #VMEXIT\n");
2171 return 1;
2172 }
2173 }
2174
1460 if (npt_enabled) { 2175 if (npt_enabled) {
1461 int mmu_reload = 0; 2176 int mmu_reload = 0;
1462 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { 2177 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1544,6 +2259,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
1544{ 2259{
1545 struct vcpu_svm *svm = to_svm(vcpu); 2260 struct vcpu_svm *svm = to_svm(vcpu);
1546 2261
2262 nested_svm_intr(svm);
2263
1547 svm_inject_irq(svm, irq); 2264 svm_inject_irq(svm, irq);
1548} 2265}
1549 2266
@@ -1589,11 +2306,17 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1589 if (!kvm_cpu_has_interrupt(vcpu)) 2306 if (!kvm_cpu_has_interrupt(vcpu))
1590 goto out; 2307 goto out;
1591 2308
2309 if (nested_svm_intr(svm))
2310 goto out;
2311
2312 if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
2313 goto out;
2314
1592 if (!(vmcb->save.rflags & X86_EFLAGS_IF) || 2315 if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
1593 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || 2316 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
1594 (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { 2317 (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
1595 /* unable to deliver irq, set pending irq */ 2318 /* unable to deliver irq, set pending irq */
1596 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); 2319 svm_set_vintr(svm);
1597 svm_inject_irq(svm, 0x0); 2320 svm_inject_irq(svm, 0x0);
1598 goto out; 2321 goto out;
1599 } 2322 }
@@ -1615,7 +2338,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
1615 } 2338 }
1616 2339
1617 svm->vcpu.arch.interrupt_window_open = 2340 svm->vcpu.arch.interrupt_window_open =
1618 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); 2341 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2342 (svm->vcpu.arch.hflags & HF_GIF_MASK);
1619} 2343}
1620 2344
1621static void svm_do_inject_vector(struct vcpu_svm *svm) 2345static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1637,9 +2361,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
1637 struct vcpu_svm *svm = to_svm(vcpu); 2361 struct vcpu_svm *svm = to_svm(vcpu);
1638 struct vmcb_control_area *control = &svm->vmcb->control; 2362 struct vmcb_control_area *control = &svm->vmcb->control;
1639 2363
2364 if (nested_svm_intr(svm))
2365 return;
2366
1640 svm->vcpu.arch.interrupt_window_open = 2367 svm->vcpu.arch.interrupt_window_open =
1641 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && 2368 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
1642 (svm->vmcb->save.rflags & X86_EFLAGS_IF)); 2369 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
2370 (svm->vcpu.arch.hflags & HF_GIF_MASK));
1643 2371
1644 if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) 2372 if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
1645 /* 2373 /*
@@ -1652,9 +2380,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
1652 */ 2380 */
1653 if (!svm->vcpu.arch.interrupt_window_open && 2381 if (!svm->vcpu.arch.interrupt_window_open &&
1654 (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window)) 2382 (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
1655 control->intercept |= 1ULL << INTERCEPT_VINTR; 2383 svm_set_vintr(svm);
1656 else 2384 else
1657 control->intercept &= ~(1ULL << INTERCEPT_VINTR); 2385 svm_clear_vintr(svm);
1658} 2386}
1659 2387
1660static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) 2388static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -1662,22 +2390,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
1662 return 0; 2390 return 0;
1663} 2391}
1664 2392
1665static void save_db_regs(unsigned long *db_regs)
1666{
1667 asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
1668 asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1]));
1669 asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2]));
1670 asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3]));
1671}
1672
1673static void load_db_regs(unsigned long *db_regs)
1674{
1675 asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0]));
1676 asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1]));
1677 asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2]));
1678 asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
1679}
1680
1681static void svm_flush_tlb(struct kvm_vcpu *vcpu) 2393static void svm_flush_tlb(struct kvm_vcpu *vcpu)
1682{ 2394{
1683 force_new_asid(vcpu); 2395 force_new_asid(vcpu);
@@ -1736,19 +2448,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1736 gs_selector = kvm_read_gs(); 2448 gs_selector = kvm_read_gs();
1737 ldt_selector = kvm_read_ldt(); 2449 ldt_selector = kvm_read_ldt();
1738 svm->host_cr2 = kvm_read_cr2(); 2450 svm->host_cr2 = kvm_read_cr2();
1739 svm->host_dr6 = read_dr6(); 2451 if (!is_nested(svm))
1740 svm->host_dr7 = read_dr7(); 2452 svm->vmcb->save.cr2 = vcpu->arch.cr2;
1741 svm->vmcb->save.cr2 = vcpu->arch.cr2;
1742 /* required for live migration with NPT */ 2453 /* required for live migration with NPT */
1743 if (npt_enabled) 2454 if (npt_enabled)
1744 svm->vmcb->save.cr3 = vcpu->arch.cr3; 2455 svm->vmcb->save.cr3 = vcpu->arch.cr3;
1745 2456
1746 if (svm->vmcb->save.dr7 & 0xff) {
1747 write_dr7(0);
1748 save_db_regs(svm->host_db_regs);
1749 load_db_regs(svm->db_regs);
1750 }
1751
1752 clgi(); 2457 clgi();
1753 2458
1754 local_irq_enable(); 2459 local_irq_enable();
@@ -1824,16 +2529,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1824#endif 2529#endif
1825 ); 2530 );
1826 2531
1827 if ((svm->vmcb->save.dr7 & 0xff))
1828 load_db_regs(svm->host_db_regs);
1829
1830 vcpu->arch.cr2 = svm->vmcb->save.cr2; 2532 vcpu->arch.cr2 = svm->vmcb->save.cr2;
1831 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; 2533 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
1832 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; 2534 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
1833 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 2535 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
1834 2536
1835 write_dr6(svm->host_dr6);
1836 write_dr7(svm->host_dr7);
1837 kvm_write_cr2(svm->host_cr2); 2537 kvm_write_cr2(svm->host_cr2);
1838 2538
1839 kvm_load_fs(fs_selector); 2539 kvm_load_fs(fs_selector);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7611af576829..bb481330716f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ struct vcpu_vmx {
91 } rmode; 91 } rmode;
92 int vpid; 92 int vpid;
93 bool emulation_required; 93 bool emulation_required;
94 enum emulation_result invalid_state_emulation_result;
94 95
95 /* Support for vnmi-less CPUs */ 96 /* Support for vnmi-less CPUs */
96 int soft_vnmi_blocked; 97 int soft_vnmi_blocked;
@@ -189,21 +190,21 @@ static inline int is_page_fault(u32 intr_info)
189{ 190{
190 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 191 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
191 INTR_INFO_VALID_MASK)) == 192 INTR_INFO_VALID_MASK)) ==
192 (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); 193 (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
193} 194}
194 195
195static inline int is_no_device(u32 intr_info) 196static inline int is_no_device(u32 intr_info)
196{ 197{
197 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 198 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
198 INTR_INFO_VALID_MASK)) == 199 INTR_INFO_VALID_MASK)) ==
199 (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); 200 (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
200} 201}
201 202
202static inline int is_invalid_opcode(u32 intr_info) 203static inline int is_invalid_opcode(u32 intr_info)
203{ 204{
204 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 205 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
205 INTR_INFO_VALID_MASK)) == 206 INTR_INFO_VALID_MASK)) ==
206 (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); 207 (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
207} 208}
208 209
209static inline int is_external_interrupt(u32 intr_info) 210static inline int is_external_interrupt(u32 intr_info)
@@ -480,8 +481,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
480 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); 481 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
481 if (!vcpu->fpu_active) 482 if (!vcpu->fpu_active)
482 eb |= 1u << NM_VECTOR; 483 eb |= 1u << NM_VECTOR;
483 if (vcpu->guest_debug.enabled) 484 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
484 eb |= 1u << DB_VECTOR; 485 if (vcpu->guest_debug &
486 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
487 eb |= 1u << DB_VECTOR;
488 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
489 eb |= 1u << BP_VECTOR;
490 }
485 if (vcpu->arch.rmode.active) 491 if (vcpu->arch.rmode.active)
486 eb = ~0; 492 eb = ~0;
487 if (vm_need_ept()) 493 if (vm_need_ept())
@@ -747,29 +753,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
747 bool has_error_code, u32 error_code) 753 bool has_error_code, u32 error_code)
748{ 754{
749 struct vcpu_vmx *vmx = to_vmx(vcpu); 755 struct vcpu_vmx *vmx = to_vmx(vcpu);
756 u32 intr_info = nr | INTR_INFO_VALID_MASK;
750 757
751 if (has_error_code) 758 if (has_error_code) {
752 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); 759 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
760 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
761 }
753 762
754 if (vcpu->arch.rmode.active) { 763 if (vcpu->arch.rmode.active) {
755 vmx->rmode.irq.pending = true; 764 vmx->rmode.irq.pending = true;
756 vmx->rmode.irq.vector = nr; 765 vmx->rmode.irq.vector = nr;
757 vmx->rmode.irq.rip = kvm_rip_read(vcpu); 766 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
758 if (nr == BP_VECTOR) 767 if (nr == BP_VECTOR || nr == OF_VECTOR)
759 vmx->rmode.irq.rip++; 768 vmx->rmode.irq.rip++;
760 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 769 intr_info |= INTR_TYPE_SOFT_INTR;
761 nr | INTR_TYPE_SOFT_INTR 770 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
762 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
763 | INTR_INFO_VALID_MASK);
764 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); 771 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
765 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); 772 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
766 return; 773 return;
767 } 774 }
768 775
769 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 776 if (nr == BP_VECTOR || nr == OF_VECTOR) {
770 nr | INTR_TYPE_EXCEPTION 777 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
771 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) 778 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
772 | INTR_INFO_VALID_MASK); 779 } else
780 intr_info |= INTR_TYPE_HARD_EXCEPTION;
781
782 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
773} 783}
774 784
775static bool vmx_exception_injected(struct kvm_vcpu *vcpu) 785static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -856,11 +866,8 @@ static u64 guest_read_tsc(void)
856 * writes 'guest_tsc' into guest's timestamp counter "register" 866 * writes 'guest_tsc' into guest's timestamp counter "register"
857 * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc 867 * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
858 */ 868 */
859static void guest_write_tsc(u64 guest_tsc) 869static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
860{ 870{
861 u64 host_tsc;
862
863 rdtscll(host_tsc);
864 vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); 871 vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
865} 872}
866 873
@@ -925,14 +932,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
925{ 932{
926 struct vcpu_vmx *vmx = to_vmx(vcpu); 933 struct vcpu_vmx *vmx = to_vmx(vcpu);
927 struct kvm_msr_entry *msr; 934 struct kvm_msr_entry *msr;
935 u64 host_tsc;
928 int ret = 0; 936 int ret = 0;
929 937
930 switch (msr_index) { 938 switch (msr_index) {
931#ifdef CONFIG_X86_64
932 case MSR_EFER: 939 case MSR_EFER:
933 vmx_load_host_state(vmx); 940 vmx_load_host_state(vmx);
934 ret = kvm_set_msr_common(vcpu, msr_index, data); 941 ret = kvm_set_msr_common(vcpu, msr_index, data);
935 break; 942 break;
943#ifdef CONFIG_X86_64
936 case MSR_FS_BASE: 944 case MSR_FS_BASE:
937 vmcs_writel(GUEST_FS_BASE, data); 945 vmcs_writel(GUEST_FS_BASE, data);
938 break; 946 break;
@@ -950,7 +958,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
950 vmcs_writel(GUEST_SYSENTER_ESP, data); 958 vmcs_writel(GUEST_SYSENTER_ESP, data);
951 break; 959 break;
952 case MSR_IA32_TIME_STAMP_COUNTER: 960 case MSR_IA32_TIME_STAMP_COUNTER:
953 guest_write_tsc(data); 961 rdtscll(host_tsc);
962 guest_write_tsc(data, host_tsc);
954 break; 963 break;
955 case MSR_P6_PERFCTR0: 964 case MSR_P6_PERFCTR0:
956 case MSR_P6_PERFCTR1: 965 case MSR_P6_PERFCTR1:
@@ -999,40 +1008,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
999 } 1008 }
1000} 1009}
1001 1010
1002static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 1011static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1003{ 1012{
1004 unsigned long dr7 = 0x400; 1013 int old_debug = vcpu->guest_debug;
1005 int old_singlestep; 1014 unsigned long flags;
1006
1007 old_singlestep = vcpu->guest_debug.singlestep;
1008
1009 vcpu->guest_debug.enabled = dbg->enabled;
1010 if (vcpu->guest_debug.enabled) {
1011 int i;
1012 1015
1013 dr7 |= 0x200; /* exact */ 1016 vcpu->guest_debug = dbg->control;
1014 for (i = 0; i < 4; ++i) { 1017 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
1015 if (!dbg->breakpoints[i].enabled) 1018 vcpu->guest_debug = 0;
1016 continue;
1017 vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
1018 dr7 |= 2 << (i*2); /* global enable */
1019 dr7 |= 0 << (i*4+16); /* execution breakpoint */
1020 }
1021 1019
1022 vcpu->guest_debug.singlestep = dbg->singlestep; 1020 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1023 } else 1021 vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
1024 vcpu->guest_debug.singlestep = 0; 1022 else
1025 1023 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
1026 if (old_singlestep && !vcpu->guest_debug.singlestep) {
1027 unsigned long flags;
1028 1024
1029 flags = vmcs_readl(GUEST_RFLAGS); 1025 flags = vmcs_readl(GUEST_RFLAGS);
1026 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
1027 flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
1028 else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
1030 flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 1029 flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1031 vmcs_writel(GUEST_RFLAGS, flags); 1030 vmcs_writel(GUEST_RFLAGS, flags);
1032 }
1033 1031
1034 update_exception_bitmap(vcpu); 1032 update_exception_bitmap(vcpu);
1035 vmcs_writel(GUEST_DR7, dr7);
1036 1033
1037 return 0; 1034 return 0;
1038} 1035}
@@ -1433,6 +1430,29 @@ continue_rmode:
1433 init_rmode(vcpu->kvm); 1430 init_rmode(vcpu->kvm);
1434} 1431}
1435 1432
1433static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1434{
1435 struct vcpu_vmx *vmx = to_vmx(vcpu);
1436 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1437
1438 vcpu->arch.shadow_efer = efer;
1439 if (!msr)
1440 return;
1441 if (efer & EFER_LMA) {
1442 vmcs_write32(VM_ENTRY_CONTROLS,
1443 vmcs_read32(VM_ENTRY_CONTROLS) |
1444 VM_ENTRY_IA32E_MODE);
1445 msr->data = efer;
1446 } else {
1447 vmcs_write32(VM_ENTRY_CONTROLS,
1448 vmcs_read32(VM_ENTRY_CONTROLS) &
1449 ~VM_ENTRY_IA32E_MODE);
1450
1451 msr->data = efer & ~EFER_LME;
1452 }
1453 setup_msrs(vmx);
1454}
1455
1436#ifdef CONFIG_X86_64 1456#ifdef CONFIG_X86_64
1437 1457
1438static void enter_lmode(struct kvm_vcpu *vcpu) 1458static void enter_lmode(struct kvm_vcpu *vcpu)
@@ -1447,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1447 (guest_tr_ar & ~AR_TYPE_MASK) 1467 (guest_tr_ar & ~AR_TYPE_MASK)
1448 | AR_TYPE_BUSY_64_TSS); 1468 | AR_TYPE_BUSY_64_TSS);
1449 } 1469 }
1450
1451 vcpu->arch.shadow_efer |= EFER_LMA; 1470 vcpu->arch.shadow_efer |= EFER_LMA;
1452 1471 vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
1453 find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;
1454 vmcs_write32(VM_ENTRY_CONTROLS,
1455 vmcs_read32(VM_ENTRY_CONTROLS)
1456 | VM_ENTRY_IA32E_MODE);
1457} 1472}
1458 1473
1459static void exit_lmode(struct kvm_vcpu *vcpu) 1474static void exit_lmode(struct kvm_vcpu *vcpu)
@@ -1612,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1612 vmcs_writel(GUEST_CR4, hw_cr4); 1627 vmcs_writel(GUEST_CR4, hw_cr4);
1613} 1628}
1614 1629
1615static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1616{
1617 struct vcpu_vmx *vmx = to_vmx(vcpu);
1618 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1619
1620 vcpu->arch.shadow_efer = efer;
1621 if (!msr)
1622 return;
1623 if (efer & EFER_LMA) {
1624 vmcs_write32(VM_ENTRY_CONTROLS,
1625 vmcs_read32(VM_ENTRY_CONTROLS) |
1626 VM_ENTRY_IA32E_MODE);
1627 msr->data = efer;
1628
1629 } else {
1630 vmcs_write32(VM_ENTRY_CONTROLS,
1631 vmcs_read32(VM_ENTRY_CONTROLS) &
1632 ~VM_ENTRY_IA32E_MODE);
1633
1634 msr->data = efer & ~EFER_LME;
1635 }
1636 setup_msrs(vmx);
1637}
1638
1639static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 1630static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1640{ 1631{
1641 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1632 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1653,7 +1644,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1653 var->limit = vmcs_read32(sf->limit); 1644 var->limit = vmcs_read32(sf->limit);
1654 var->selector = vmcs_read16(sf->selector); 1645 var->selector = vmcs_read16(sf->selector);
1655 ar = vmcs_read32(sf->ar_bytes); 1646 ar = vmcs_read32(sf->ar_bytes);
1656 if (ar & AR_UNUSABLE_MASK) 1647 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
1657 ar = 0; 1648 ar = 0;
1658 var->type = ar & 15; 1649 var->type = ar & 15;
1659 var->s = (ar >> 4) & 1; 1650 var->s = (ar >> 4) & 1;
@@ -1788,14 +1779,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
1788 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); 1779 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
1789 cs_rpl = cs.selector & SELECTOR_RPL_MASK; 1780 cs_rpl = cs.selector & SELECTOR_RPL_MASK;
1790 1781
1782 if (cs.unusable)
1783 return false;
1791 if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) 1784 if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
1792 return false; 1785 return false;
1793 if (!cs.s) 1786 if (!cs.s)
1794 return false; 1787 return false;
1795 if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) { 1788 if (cs.type & AR_TYPE_WRITEABLE_MASK) {
1796 if (cs.dpl > cs_rpl) 1789 if (cs.dpl > cs_rpl)
1797 return false; 1790 return false;
1798 } else if (cs.type & AR_TYPE_CODE_MASK) { 1791 } else {
1799 if (cs.dpl != cs_rpl) 1792 if (cs.dpl != cs_rpl)
1800 return false; 1793 return false;
1801 } 1794 }
@@ -1814,7 +1807,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
1814 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); 1807 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
1815 ss_rpl = ss.selector & SELECTOR_RPL_MASK; 1808 ss_rpl = ss.selector & SELECTOR_RPL_MASK;
1816 1809
1817 if ((ss.type != 3) || (ss.type != 7)) 1810 if (ss.unusable)
1811 return true;
1812 if (ss.type != 3 && ss.type != 7)
1818 return false; 1813 return false;
1819 if (!ss.s) 1814 if (!ss.s)
1820 return false; 1815 return false;
@@ -1834,6 +1829,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
1834 vmx_get_segment(vcpu, &var, seg); 1829 vmx_get_segment(vcpu, &var, seg);
1835 rpl = var.selector & SELECTOR_RPL_MASK; 1830 rpl = var.selector & SELECTOR_RPL_MASK;
1836 1831
1832 if (var.unusable)
1833 return true;
1837 if (!var.s) 1834 if (!var.s)
1838 return false; 1835 return false;
1839 if (!var.present) 1836 if (!var.present)
@@ -1855,9 +1852,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
1855 1852
1856 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); 1853 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
1857 1854
1855 if (tr.unusable)
1856 return false;
1858 if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ 1857 if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */
1859 return false; 1858 return false;
1860 if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */ 1859 if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
1861 return false; 1860 return false;
1862 if (!tr.present) 1861 if (!tr.present)
1863 return false; 1862 return false;
@@ -1871,6 +1870,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
1871 1870
1872 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); 1871 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
1873 1872
1873 if (ldtr.unusable)
1874 return true;
1874 if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ 1875 if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */
1875 return false; 1876 return false;
1876 if (ldtr.type != 2) 1877 if (ldtr.type != 2)
@@ -2112,7 +2113,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2112{ 2113{
2113 u32 host_sysenter_cs, msr_low, msr_high; 2114 u32 host_sysenter_cs, msr_low, msr_high;
2114 u32 junk; 2115 u32 junk;
2115 u64 host_pat; 2116 u64 host_pat, tsc_this, tsc_base;
2116 unsigned long a; 2117 unsigned long a;
2117 struct descriptor_table dt; 2118 struct descriptor_table dt;
2118 int i; 2119 int i;
@@ -2240,6 +2241,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2240 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 2241 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
2241 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 2242 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
2242 2243
2244 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
2245 rdtscll(tsc_this);
2246 if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
2247 tsc_base = tsc_this;
2248
2249 guest_write_tsc(0, tsc_base);
2243 2250
2244 return 0; 2251 return 0;
2245} 2252}
@@ -2319,7 +2326,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2319 kvm_rip_write(vcpu, 0); 2326 kvm_rip_write(vcpu, 0);
2320 kvm_register_write(vcpu, VCPU_REGS_RSP, 0); 2327 kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
2321 2328
2322 /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
2323 vmcs_writel(GUEST_DR7, 0x400); 2329 vmcs_writel(GUEST_DR7, 0x400);
2324 2330
2325 vmcs_writel(GUEST_GDTR_BASE, 0); 2331 vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2332,8 +2338,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2332 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); 2338 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
2333 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); 2339 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
2334 2340
2335 guest_write_tsc(0);
2336
2337 /* Special registers */ 2341 /* Special registers */
2338 vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 2342 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
2339 2343
@@ -2486,6 +2490,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
2486{ 2490{
2487 vmx_update_window_states(vcpu); 2491 vmx_update_window_states(vcpu);
2488 2492
2493 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
2494 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
2495 GUEST_INTR_STATE_STI |
2496 GUEST_INTR_STATE_MOV_SS);
2497
2489 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 2498 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
2490 if (vcpu->arch.interrupt.pending) { 2499 if (vcpu->arch.interrupt.pending) {
2491 enable_nmi_window(vcpu); 2500 enable_nmi_window(vcpu);
@@ -2536,24 +2545,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
2536 return 0; 2545 return 0;
2537} 2546}
2538 2547
2539static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
2540{
2541 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
2542
2543 set_debugreg(dbg->bp[0], 0);
2544 set_debugreg(dbg->bp[1], 1);
2545 set_debugreg(dbg->bp[2], 2);
2546 set_debugreg(dbg->bp[3], 3);
2547
2548 if (dbg->singlestep) {
2549 unsigned long flags;
2550
2551 flags = vmcs_readl(GUEST_RFLAGS);
2552 flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
2553 vmcs_writel(GUEST_RFLAGS, flags);
2554 }
2555}
2556
2557static int handle_rmode_exception(struct kvm_vcpu *vcpu, 2548static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2558 int vec, u32 err_code) 2549 int vec, u32 err_code)
2559{ 2550{
@@ -2570,9 +2561,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2570 * the required debugging infrastructure rework. 2561 * the required debugging infrastructure rework.
2571 */ 2562 */
2572 switch (vec) { 2563 switch (vec) {
2573 case DE_VECTOR:
2574 case DB_VECTOR: 2564 case DB_VECTOR:
2565 if (vcpu->guest_debug &
2566 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
2567 return 0;
2568 kvm_queue_exception(vcpu, vec);
2569 return 1;
2575 case BP_VECTOR: 2570 case BP_VECTOR:
2571 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2572 return 0;
2573 /* fall through */
2574 case DE_VECTOR:
2576 case OF_VECTOR: 2575 case OF_VECTOR:
2577 case BR_VECTOR: 2576 case BR_VECTOR:
2578 case UD_VECTOR: 2577 case UD_VECTOR:
@@ -2589,8 +2588,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2589static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2588static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2590{ 2589{
2591 struct vcpu_vmx *vmx = to_vmx(vcpu); 2590 struct vcpu_vmx *vmx = to_vmx(vcpu);
2592 u32 intr_info, error_code; 2591 u32 intr_info, ex_no, error_code;
2593 unsigned long cr2, rip; 2592 unsigned long cr2, rip, dr6;
2594 u32 vect_info; 2593 u32 vect_info;
2595 enum emulation_result er; 2594 enum emulation_result er;
2596 2595
@@ -2649,14 +2648,30 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2649 return 1; 2648 return 1;
2650 } 2649 }
2651 2650
2652 if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == 2651 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
2653 (INTR_TYPE_EXCEPTION | 1)) { 2652 switch (ex_no) {
2653 case DB_VECTOR:
2654 dr6 = vmcs_readl(EXIT_QUALIFICATION);
2655 if (!(vcpu->guest_debug &
2656 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
2657 vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
2658 kvm_queue_exception(vcpu, DB_VECTOR);
2659 return 1;
2660 }
2661 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
2662 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
2663 /* fall through */
2664 case BP_VECTOR:
2654 kvm_run->exit_reason = KVM_EXIT_DEBUG; 2665 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2655 return 0; 2666 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
2667 kvm_run->debug.arch.exception = ex_no;
2668 break;
2669 default:
2670 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
2671 kvm_run->ex.exception = ex_no;
2672 kvm_run->ex.error_code = error_code;
2673 break;
2656 } 2674 }
2657 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
2658 kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
2659 kvm_run->ex.error_code = error_code;
2660 return 0; 2675 return 0;
2661} 2676}
2662 2677
@@ -2677,7 +2692,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2677static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2692static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2678{ 2693{
2679 unsigned long exit_qualification; 2694 unsigned long exit_qualification;
2680 int size, down, in, string, rep; 2695 int size, in, string;
2681 unsigned port; 2696 unsigned port;
2682 2697
2683 ++vcpu->stat.io_exits; 2698 ++vcpu->stat.io_exits;
@@ -2693,8 +2708,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2693 2708
2694 size = (exit_qualification & 7) + 1; 2709 size = (exit_qualification & 7) + 1;
2695 in = (exit_qualification & 8) != 0; 2710 in = (exit_qualification & 8) != 0;
2696 down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
2697 rep = (exit_qualification & 32) != 0;
2698 port = exit_qualification >> 16; 2711 port = exit_qualification >> 16;
2699 2712
2700 skip_emulated_instruction(vcpu); 2713 skip_emulated_instruction(vcpu);
@@ -2795,21 +2808,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2795 unsigned long val; 2808 unsigned long val;
2796 int dr, reg; 2809 int dr, reg;
2797 2810
2798 /* 2811 dr = vmcs_readl(GUEST_DR7);
2799 * FIXME: this code assumes the host is debugging the guest. 2812 if (dr & DR7_GD) {
2800 * need to deal with guest debugging itself too. 2813 /*
2801 */ 2814 * As the vm-exit takes precedence over the debug trap, we
2815 * need to emulate the latter, either for the host or the
2816 * guest debugging itself.
2817 */
2818 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
2819 kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
2820 kvm_run->debug.arch.dr7 = dr;
2821 kvm_run->debug.arch.pc =
2822 vmcs_readl(GUEST_CS_BASE) +
2823 vmcs_readl(GUEST_RIP);
2824 kvm_run->debug.arch.exception = DB_VECTOR;
2825 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2826 return 0;
2827 } else {
2828 vcpu->arch.dr7 &= ~DR7_GD;
2829 vcpu->arch.dr6 |= DR6_BD;
2830 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
2831 kvm_queue_exception(vcpu, DB_VECTOR);
2832 return 1;
2833 }
2834 }
2835
2802 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2836 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2803 dr = exit_qualification & 7; 2837 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
2804 reg = (exit_qualification >> 8) & 15; 2838 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
2805 if (exit_qualification & 16) { 2839 if (exit_qualification & TYPE_MOV_FROM_DR) {
2806 /* mov from dr */
2807 switch (dr) { 2840 switch (dr) {
2841 case 0 ... 3:
2842 val = vcpu->arch.db[dr];
2843 break;
2808 case 6: 2844 case 6:
2809 val = 0xffff0ff0; 2845 val = vcpu->arch.dr6;
2810 break; 2846 break;
2811 case 7: 2847 case 7:
2812 val = 0x400; 2848 val = vcpu->arch.dr7;
2813 break; 2849 break;
2814 default: 2850 default:
2815 val = 0; 2851 val = 0;
@@ -2817,7 +2853,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2817 kvm_register_write(vcpu, reg, val); 2853 kvm_register_write(vcpu, reg, val);
2818 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); 2854 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
2819 } else { 2855 } else {
2820 /* mov to dr */ 2856 val = vcpu->arch.regs[reg];
2857 switch (dr) {
2858 case 0 ... 3:
2859 vcpu->arch.db[dr] = val;
2860 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
2861 vcpu->arch.eff_db[dr] = val;
2862 break;
2863 case 4 ... 5:
2864 if (vcpu->arch.cr4 & X86_CR4_DE)
2865 kvm_queue_exception(vcpu, UD_VECTOR);
2866 break;
2867 case 6:
2868 if (val & 0xffffffff00000000ULL) {
2869 kvm_queue_exception(vcpu, GP_VECTOR);
2870 break;
2871 }
2872 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
2873 break;
2874 case 7:
2875 if (val & 0xffffffff00000000ULL) {
2876 kvm_queue_exception(vcpu, GP_VECTOR);
2877 break;
2878 }
2879 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
2880 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
2881 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
2882 vcpu->arch.switch_db_regs =
2883 (val & DR7_BP_EN_MASK);
2884 }
2885 break;
2886 }
2887 KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
2821 } 2888 }
2822 skip_emulated_instruction(vcpu); 2889 skip_emulated_instruction(vcpu);
2823 return 1; 2890 return 1;
@@ -2968,17 +3035,25 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2968 } 3035 }
2969 tss_selector = exit_qualification; 3036 tss_selector = exit_qualification;
2970 3037
2971 return kvm_task_switch(vcpu, tss_selector, reason); 3038 if (!kvm_task_switch(vcpu, tss_selector, reason))
3039 return 0;
3040
3041 /* clear all local breakpoint enable flags */
3042 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
3043
3044 /*
3045 * TODO: What about debug traps on tss switch?
3046 * Are we supposed to inject them and update dr6?
3047 */
3048
3049 return 1;
2972} 3050}
2973 3051
2974static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3052static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2975{ 3053{
2976 u64 exit_qualification; 3054 u64 exit_qualification;
2977 enum emulation_result er;
2978 gpa_t gpa; 3055 gpa_t gpa;
2979 unsigned long hva;
2980 int gla_validity; 3056 int gla_validity;
2981 int r;
2982 3057
2983 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 3058 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2984 3059
@@ -3001,32 +3076,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3001 } 3076 }
3002 3077
3003 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 3078 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
3004 hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); 3079 return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
3005 if (!kvm_is_error_hva(hva)) {
3006 r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
3007 if (r < 0) {
3008 printk(KERN_ERR "EPT: Not enough memory!\n");
3009 return -ENOMEM;
3010 }
3011 return 1;
3012 } else {
3013 /* must be MMIO */
3014 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
3015
3016 if (er == EMULATE_FAIL) {
3017 printk(KERN_ERR
3018 "EPT: Fail to handle EPT violation vmexit!er is %d\n",
3019 er);
3020 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
3021 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
3022 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
3023 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
3024 (long unsigned int)exit_qualification);
3025 return -ENOTSUPP;
3026 } else if (er == EMULATE_DO_MMIO)
3027 return 0;
3028 }
3029 return 1;
3030} 3080}
3031 3081
3032static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3082static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3046,7 +3096,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3046 struct kvm_run *kvm_run) 3096 struct kvm_run *kvm_run)
3047{ 3097{
3048 struct vcpu_vmx *vmx = to_vmx(vcpu); 3098 struct vcpu_vmx *vmx = to_vmx(vcpu);
3049 int err; 3099 enum emulation_result err = EMULATE_DONE;
3050 3100
3051 preempt_enable(); 3101 preempt_enable();
3052 local_irq_enable(); 3102 local_irq_enable();
@@ -3071,10 +3121,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3071 local_irq_disable(); 3121 local_irq_disable();
3072 preempt_disable(); 3122 preempt_disable();
3073 3123
3074 /* Guest state should be valid now except if we need to 3124 vmx->invalid_state_emulation_result = err;
3075 * emulate an MMIO */
3076 if (guest_state_valid(vcpu))
3077 vmx->emulation_required = 0;
3078} 3125}
3079 3126
3080/* 3127/*
@@ -3123,8 +3170,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3123 3170
3124 /* If we need to emulate an MMIO from handle_invalid_guest_state 3171 /* If we need to emulate an MMIO from handle_invalid_guest_state
3125 * we just return 0 */ 3172 * we just return 0 */
3126 if (vmx->emulation_required && emulate_invalid_guest_state) 3173 if (vmx->emulation_required && emulate_invalid_guest_state) {
3127 return 0; 3174 if (guest_state_valid(vcpu))
3175 vmx->emulation_required = 0;
3176 return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
3177 }
3128 3178
3129 /* Access CR3 don't cause VMExit in paging mode, so we need 3179 /* Access CR3 don't cause VMExit in paging mode, so we need
3130 * to sync with guest real CR3. */ 3180 * to sync with guest real CR3. */
@@ -3238,7 +3288,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3238 vmx->vcpu.arch.nmi_injected = false; 3288 vmx->vcpu.arch.nmi_injected = false;
3239 } 3289 }
3240 kvm_clear_exception_queue(&vmx->vcpu); 3290 kvm_clear_exception_queue(&vmx->vcpu);
3241 if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) { 3291 if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
3292 type == INTR_TYPE_SOFT_EXCEPTION)) {
3242 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 3293 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
3243 error = vmcs_read32(IDT_VECTORING_ERROR_CODE); 3294 error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
3244 kvm_queue_exception_e(&vmx->vcpu, vector, error); 3295 kvm_queue_exception_e(&vmx->vcpu, vector, error);
@@ -3259,6 +3310,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3259 3310
3260 vmx_update_window_states(vcpu); 3311 vmx_update_window_states(vcpu);
3261 3312
3313 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3314 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
3315 GUEST_INTR_STATE_STI |
3316 GUEST_INTR_STATE_MOV_SS);
3317
3262 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3318 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3263 if (vcpu->arch.interrupt.pending) { 3319 if (vcpu->arch.interrupt.pending) {
3264 enable_nmi_window(vcpu); 3320 enable_nmi_window(vcpu);
@@ -3347,6 +3403,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3347 */ 3403 */
3348 vmcs_writel(HOST_CR0, read_cr0()); 3404 vmcs_writel(HOST_CR0, read_cr0());
3349 3405
3406 set_debugreg(vcpu->arch.dr6, 6);
3407
3350 asm( 3408 asm(
3351 /* Store host registers */ 3409 /* Store host registers */
3352 "push %%"R"dx; push %%"R"bp;" 3410 "push %%"R"dx; push %%"R"bp;"
@@ -3441,6 +3499,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3441 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 3499 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
3442 vcpu->arch.regs_dirty = 0; 3500 vcpu->arch.regs_dirty = 0;
3443 3501
3502 get_debugreg(vcpu->arch.dr6, 6);
3503
3444 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3504 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
3445 if (vmx->rmode.irq.pending) 3505 if (vmx->rmode.irq.pending)
3446 fixup_rmode_irq(vmx); 3506 fixup_rmode_irq(vmx);
@@ -3595,7 +3655,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
3595 .vcpu_put = vmx_vcpu_put, 3655 .vcpu_put = vmx_vcpu_put,
3596 3656
3597 .set_guest_debug = set_guest_debug, 3657 .set_guest_debug = set_guest_debug,
3598 .guest_debug_pre = kvm_guest_debug_pre,
3599 .get_msr = vmx_get_msr, 3658 .get_msr = vmx_get_msr,
3600 .set_msr = vmx_set_msr, 3659 .set_msr = vmx_set_msr,
3601 .get_segment_base = vmx_get_segment_base, 3660 .get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 758b7a155ae9..8ca100a9ecac 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
36#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/iommu.h> 37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/msr.h> 42#include <asm/msr.h>
@@ -69,6 +70,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
69 70
70static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 71static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
71 struct kvm_cpuid_entry2 __user *entries); 72 struct kvm_cpuid_entry2 __user *entries);
73struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
74 u32 function, u32 index);
72 75
73struct kvm_x86_ops *kvm_x86_ops; 76struct kvm_x86_ops *kvm_x86_ops;
74EXPORT_SYMBOL_GPL(kvm_x86_ops); 77EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -173,6 +176,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
173 u32 error_code) 176 u32 error_code)
174{ 177{
175 ++vcpu->stat.pf_guest; 178 ++vcpu->stat.pf_guest;
179
176 if (vcpu->arch.exception.pending) { 180 if (vcpu->arch.exception.pending) {
177 if (vcpu->arch.exception.nr == PF_VECTOR) { 181 if (vcpu->arch.exception.nr == PF_VECTOR) {
178 printk(KERN_DEBUG "kvm: inject_page_fault:" 182 printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -361,6 +365,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
361 } 365 }
362 kvm_x86_ops->set_cr4(vcpu, cr4); 366 kvm_x86_ops->set_cr4(vcpu, cr4);
363 vcpu->arch.cr4 = cr4; 367 vcpu->arch.cr4 = cr4;
368 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
364 kvm_mmu_sync_global(vcpu); 369 kvm_mmu_sync_global(vcpu);
365 kvm_mmu_reset_context(vcpu); 370 kvm_mmu_reset_context(vcpu);
366} 371}
@@ -442,6 +447,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
442} 447}
443EXPORT_SYMBOL_GPL(kvm_get_cr8); 448EXPORT_SYMBOL_GPL(kvm_get_cr8);
444 449
450static inline u32 bit(int bitno)
451{
452 return 1 << (bitno & 31);
453}
454
445/* 455/*
446 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 456 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
447 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 457 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -456,7 +466,7 @@ static u32 msrs_to_save[] = {
456 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 466 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
457#endif 467#endif
458 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 468 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
459 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT 469 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
460}; 470};
461 471
462static unsigned num_msrs_to_save; 472static unsigned num_msrs_to_save;
@@ -481,6 +491,28 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
481 return; 491 return;
482 } 492 }
483 493
494 if (efer & EFER_FFXSR) {
495 struct kvm_cpuid_entry2 *feat;
496
497 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
498 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
499 printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
500 kvm_inject_gp(vcpu, 0);
501 return;
502 }
503 }
504
505 if (efer & EFER_SVME) {
506 struct kvm_cpuid_entry2 *feat;
507
508 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
509 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
510 printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
511 kvm_inject_gp(vcpu, 0);
512 return;
513 }
514 }
515
484 kvm_x86_ops->set_efer(vcpu, efer); 516 kvm_x86_ops->set_efer(vcpu, efer);
485 517
486 efer &= ~EFER_LMA; 518 efer &= ~EFER_LMA;
@@ -586,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
586 hv_clock->tsc_to_system_mul); 618 hv_clock->tsc_to_system_mul);
587} 619}
588 620
621static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
622
589static void kvm_write_guest_time(struct kvm_vcpu *v) 623static void kvm_write_guest_time(struct kvm_vcpu *v)
590{ 624{
591 struct timespec ts; 625 struct timespec ts;
@@ -596,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
596 if ((!vcpu->time_page)) 630 if ((!vcpu->time_page))
597 return; 631 return;
598 632
599 if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { 633 if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
600 kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); 634 kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
601 vcpu->hv_clock_tsc_khz = tsc_khz; 635 vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
602 } 636 }
603 637
604 /* Keep irq disabled to prevent changes to the clock */ 638 /* Keep irq disabled to prevent changes to the clock */
@@ -629,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
629 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 663 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
630} 664}
631 665
666static int kvm_request_guest_time_update(struct kvm_vcpu *v)
667{
668 struct kvm_vcpu_arch *vcpu = &v->arch;
669
670 if (!vcpu->time_page)
671 return 0;
672 set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
673 return 1;
674}
675
632static bool msr_mtrr_valid(unsigned msr) 676static bool msr_mtrr_valid(unsigned msr)
633{ 677{
634 switch (msr) { 678 switch (msr) {
@@ -722,6 +766,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
722 break; 766 break;
723 case MSR_IA32_UCODE_REV: 767 case MSR_IA32_UCODE_REV:
724 case MSR_IA32_UCODE_WRITE: 768 case MSR_IA32_UCODE_WRITE:
769 case MSR_VM_HSAVE_PA:
725 break; 770 break;
726 case 0x200 ... 0x2ff: 771 case 0x200 ... 0x2ff:
727 return set_msr_mtrr(vcpu, msr, data); 772 return set_msr_mtrr(vcpu, msr, data);
@@ -758,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
758 vcpu->arch.time_page = NULL; 803 vcpu->arch.time_page = NULL;
759 } 804 }
760 805
761 kvm_write_guest_time(vcpu); 806 kvm_request_guest_time_update(vcpu);
762 break; 807 break;
763 } 808 }
764 default: 809 default:
@@ -843,6 +888,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
843 case MSR_IA32_LASTBRANCHTOIP: 888 case MSR_IA32_LASTBRANCHTOIP:
844 case MSR_IA32_LASTINTFROMIP: 889 case MSR_IA32_LASTINTFROMIP:
845 case MSR_IA32_LASTINTTOIP: 890 case MSR_IA32_LASTINTTOIP:
891 case MSR_VM_HSAVE_PA:
846 data = 0; 892 data = 0;
847 break; 893 break;
848 case MSR_MTRRcap: 894 case MSR_MTRRcap:
@@ -967,10 +1013,13 @@ int kvm_dev_ioctl_check_extension(long ext)
967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 1013 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
968 case KVM_CAP_SET_TSS_ADDR: 1014 case KVM_CAP_SET_TSS_ADDR:
969 case KVM_CAP_EXT_CPUID: 1015 case KVM_CAP_EXT_CPUID:
1016 case KVM_CAP_CLOCKSOURCE:
970 case KVM_CAP_PIT: 1017 case KVM_CAP_PIT:
971 case KVM_CAP_NOP_IO_DELAY: 1018 case KVM_CAP_NOP_IO_DELAY:
972 case KVM_CAP_MP_STATE: 1019 case KVM_CAP_MP_STATE:
973 case KVM_CAP_SYNC_MMU: 1020 case KVM_CAP_SYNC_MMU:
1021 case KVM_CAP_REINJECT_CONTROL:
1022 case KVM_CAP_IRQ_INJECT_STATUS:
974 r = 1; 1023 r = 1;
975 break; 1024 break;
976 case KVM_CAP_COALESCED_MMIO: 1025 case KVM_CAP_COALESCED_MMIO:
@@ -991,9 +1040,6 @@ int kvm_dev_ioctl_check_extension(long ext)
991 case KVM_CAP_IOMMU: 1040 case KVM_CAP_IOMMU:
992 r = iommu_found(); 1041 r = iommu_found();
993 break; 1042 break;
994 case KVM_CAP_CLOCKSOURCE:
995 r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
996 break;
997 default: 1043 default:
998 r = 0; 1044 r = 0;
999 break; 1045 break;
@@ -1044,7 +1090,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
1044 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1090 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1045 goto out; 1091 goto out;
1046 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 1092 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
1047 cpuid_arg->entries); 1093 cpuid_arg->entries);
1048 if (r) 1094 if (r)
1049 goto out; 1095 goto out;
1050 1096
@@ -1064,7 +1110,7 @@ out:
1064void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1110void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1065{ 1111{
1066 kvm_x86_ops->vcpu_load(vcpu, cpu); 1112 kvm_x86_ops->vcpu_load(vcpu, cpu);
1067 kvm_write_guest_time(vcpu); 1113 kvm_request_guest_time_update(vcpu);
1068} 1114}
1069 1115
1070void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1116void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1142,8 +1188,8 @@ out:
1142} 1188}
1143 1189
1144static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, 1190static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1145 struct kvm_cpuid2 *cpuid, 1191 struct kvm_cpuid2 *cpuid,
1146 struct kvm_cpuid_entry2 __user *entries) 1192 struct kvm_cpuid_entry2 __user *entries)
1147{ 1193{
1148 int r; 1194 int r;
1149 1195
@@ -1162,8 +1208,8 @@ out:
1162} 1208}
1163 1209
1164static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, 1210static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1165 struct kvm_cpuid2 *cpuid, 1211 struct kvm_cpuid2 *cpuid,
1166 struct kvm_cpuid_entry2 __user *entries) 1212 struct kvm_cpuid_entry2 __user *entries)
1167{ 1213{
1168 int r; 1214 int r;
1169 1215
@@ -1172,7 +1218,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1172 goto out; 1218 goto out;
1173 r = -EFAULT; 1219 r = -EFAULT;
1174 if (copy_to_user(entries, &vcpu->arch.cpuid_entries, 1220 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
1175 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) 1221 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
1176 goto out; 1222 goto out;
1177 return 0; 1223 return 0;
1178 1224
@@ -1181,18 +1227,13 @@ out:
1181 return r; 1227 return r;
1182} 1228}
1183 1229
1184static inline u32 bit(int bitno)
1185{
1186 return 1 << (bitno & 31);
1187}
1188
1189static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, 1230static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1190 u32 index) 1231 u32 index)
1191{ 1232{
1192 entry->function = function; 1233 entry->function = function;
1193 entry->index = index; 1234 entry->index = index;
1194 cpuid_count(entry->function, entry->index, 1235 cpuid_count(entry->function, entry->index,
1195 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); 1236 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
1196 entry->flags = 0; 1237 entry->flags = 0;
1197} 1238}
1198 1239
@@ -1222,15 +1263,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1222#ifdef CONFIG_X86_64 1263#ifdef CONFIG_X86_64
1223 bit(X86_FEATURE_LM) | 1264 bit(X86_FEATURE_LM) |
1224#endif 1265#endif
1266 bit(X86_FEATURE_FXSR_OPT) |
1225 bit(X86_FEATURE_MMXEXT) | 1267 bit(X86_FEATURE_MMXEXT) |
1226 bit(X86_FEATURE_3DNOWEXT) | 1268 bit(X86_FEATURE_3DNOWEXT) |
1227 bit(X86_FEATURE_3DNOW); 1269 bit(X86_FEATURE_3DNOW);
1228 const u32 kvm_supported_word3_x86_features = 1270 const u32 kvm_supported_word3_x86_features =
1229 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); 1271 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
1230 const u32 kvm_supported_word6_x86_features = 1272 const u32 kvm_supported_word6_x86_features =
1231 bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY); 1273 bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
1274 bit(X86_FEATURE_SVM);
1232 1275
1233 /* all func 2 cpuid_count() should be called on the same cpu */ 1276 /* all calls to cpuid_count() should be made on the same cpu */
1234 get_cpu(); 1277 get_cpu();
1235 do_cpuid_1_ent(entry, function, index); 1278 do_cpuid_1_ent(entry, function, index);
1236 ++*nent; 1279 ++*nent;
@@ -1304,7 +1347,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1304} 1347}
1305 1348
1306static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 1349static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1307 struct kvm_cpuid_entry2 __user *entries) 1350 struct kvm_cpuid_entry2 __user *entries)
1308{ 1351{
1309 struct kvm_cpuid_entry2 *cpuid_entries; 1352 struct kvm_cpuid_entry2 *cpuid_entries;
1310 int limit, nent = 0, r = -E2BIG; 1353 int limit, nent = 0, r = -E2BIG;
@@ -1321,7 +1364,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1321 limit = cpuid_entries[0].eax; 1364 limit = cpuid_entries[0].eax;
1322 for (func = 1; func <= limit && nent < cpuid->nent; ++func) 1365 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
1323 do_cpuid_ent(&cpuid_entries[nent], func, 0, 1366 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1324 &nent, cpuid->nent); 1367 &nent, cpuid->nent);
1325 r = -E2BIG; 1368 r = -E2BIG;
1326 if (nent >= cpuid->nent) 1369 if (nent >= cpuid->nent)
1327 goto out_free; 1370 goto out_free;
@@ -1330,10 +1373,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1330 limit = cpuid_entries[nent - 1].eax; 1373 limit = cpuid_entries[nent - 1].eax;
1331 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) 1374 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
1332 do_cpuid_ent(&cpuid_entries[nent], func, 0, 1375 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1333 &nent, cpuid->nent); 1376 &nent, cpuid->nent);
1334 r = -EFAULT; 1377 r = -EFAULT;
1335 if (copy_to_user(entries, cpuid_entries, 1378 if (copy_to_user(entries, cpuid_entries,
1336 nent * sizeof(struct kvm_cpuid_entry2))) 1379 nent * sizeof(struct kvm_cpuid_entry2)))
1337 goto out_free; 1380 goto out_free;
1338 cpuid->nent = nent; 1381 cpuid->nent = nent;
1339 r = 0; 1382 r = 0;
@@ -1477,7 +1520,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1477 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1520 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1478 goto out; 1521 goto out;
1479 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, 1522 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
1480 cpuid_arg->entries); 1523 cpuid_arg->entries);
1481 if (r) 1524 if (r)
1482 goto out; 1525 goto out;
1483 break; 1526 break;
@@ -1490,7 +1533,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1490 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1533 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1491 goto out; 1534 goto out;
1492 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, 1535 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
1493 cpuid_arg->entries); 1536 cpuid_arg->entries);
1494 if (r) 1537 if (r)
1495 goto out; 1538 goto out;
1496 r = -EFAULT; 1539 r = -EFAULT;
@@ -1710,6 +1753,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1710 return r; 1753 return r;
1711} 1754}
1712 1755
1756static int kvm_vm_ioctl_reinject(struct kvm *kvm,
1757 struct kvm_reinject_control *control)
1758{
1759 if (!kvm->arch.vpit)
1760 return -ENXIO;
1761 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
1762 return 0;
1763}
1764
1713/* 1765/*
1714 * Get (and clear) the dirty memory log for a memory slot. 1766 * Get (and clear) the dirty memory log for a memory slot.
1715 */ 1767 */
@@ -1807,13 +1859,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
1807 } 1859 }
1808 } else 1860 } else
1809 goto out; 1861 goto out;
1862 r = kvm_setup_default_irq_routing(kvm);
1863 if (r) {
1864 kfree(kvm->arch.vpic);
1865 kfree(kvm->arch.vioapic);
1866 goto out;
1867 }
1810 break; 1868 break;
1811 case KVM_CREATE_PIT: 1869 case KVM_CREATE_PIT:
1870 mutex_lock(&kvm->lock);
1871 r = -EEXIST;
1872 if (kvm->arch.vpit)
1873 goto create_pit_unlock;
1812 r = -ENOMEM; 1874 r = -ENOMEM;
1813 kvm->arch.vpit = kvm_create_pit(kvm); 1875 kvm->arch.vpit = kvm_create_pit(kvm);
1814 if (kvm->arch.vpit) 1876 if (kvm->arch.vpit)
1815 r = 0; 1877 r = 0;
1878 create_pit_unlock:
1879 mutex_unlock(&kvm->lock);
1816 break; 1880 break;
1881 case KVM_IRQ_LINE_STATUS:
1817 case KVM_IRQ_LINE: { 1882 case KVM_IRQ_LINE: {
1818 struct kvm_irq_level irq_event; 1883 struct kvm_irq_level irq_event;
1819 1884
@@ -1821,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
1821 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 1886 if (copy_from_user(&irq_event, argp, sizeof irq_event))
1822 goto out; 1887 goto out;
1823 if (irqchip_in_kernel(kvm)) { 1888 if (irqchip_in_kernel(kvm)) {
1889 __s32 status;
1824 mutex_lock(&kvm->lock); 1890 mutex_lock(&kvm->lock);
1825 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1891 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
1826 irq_event.irq, irq_event.level); 1892 irq_event.irq, irq_event.level);
1827 mutex_unlock(&kvm->lock); 1893 mutex_unlock(&kvm->lock);
1894 if (ioctl == KVM_IRQ_LINE_STATUS) {
1895 irq_event.status = status;
1896 if (copy_to_user(argp, &irq_event,
1897 sizeof irq_event))
1898 goto out;
1899 }
1828 r = 0; 1900 r = 0;
1829 } 1901 }
1830 break; 1902 break;
@@ -1907,6 +1979,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
1907 r = 0; 1979 r = 0;
1908 break; 1980 break;
1909 } 1981 }
1982 case KVM_REINJECT_CONTROL: {
1983 struct kvm_reinject_control control;
1984 r = -EFAULT;
1985 if (copy_from_user(&control, argp, sizeof(control)))
1986 goto out;
1987 r = kvm_vm_ioctl_reinject(kvm, &control);
1988 if (r)
1989 goto out;
1990 r = 0;
1991 break;
1992 }
1910 default: 1993 default:
1911 ; 1994 ;
1912 } 1995 }
@@ -1960,10 +2043,38 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1960 return dev; 2043 return dev;
1961} 2044}
1962 2045
1963int emulator_read_std(unsigned long addr, 2046static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
1964 void *val, 2047 struct kvm_vcpu *vcpu)
1965 unsigned int bytes, 2048{
1966 struct kvm_vcpu *vcpu) 2049 void *data = val;
2050 int r = X86EMUL_CONTINUE;
2051
2052 while (bytes) {
2053 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
2054 unsigned offset = addr & (PAGE_SIZE-1);
2055 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
2056 int ret;
2057
2058 if (gpa == UNMAPPED_GVA) {
2059 r = X86EMUL_PROPAGATE_FAULT;
2060 goto out;
2061 }
2062 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
2063 if (ret < 0) {
2064 r = X86EMUL_UNHANDLEABLE;
2065 goto out;
2066 }
2067
2068 bytes -= toread;
2069 data += toread;
2070 addr += toread;
2071 }
2072out:
2073 return r;
2074}
2075
2076static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
2077 struct kvm_vcpu *vcpu)
1967{ 2078{
1968 void *data = val; 2079 void *data = val;
1969 int r = X86EMUL_CONTINUE; 2080 int r = X86EMUL_CONTINUE;
@@ -1971,27 +2082,27 @@ int emulator_read_std(unsigned long addr,
1971 while (bytes) { 2082 while (bytes) {
1972 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 2083 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1973 unsigned offset = addr & (PAGE_SIZE-1); 2084 unsigned offset = addr & (PAGE_SIZE-1);
1974 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); 2085 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
1975 int ret; 2086 int ret;
1976 2087
1977 if (gpa == UNMAPPED_GVA) { 2088 if (gpa == UNMAPPED_GVA) {
1978 r = X86EMUL_PROPAGATE_FAULT; 2089 r = X86EMUL_PROPAGATE_FAULT;
1979 goto out; 2090 goto out;
1980 } 2091 }
1981 ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); 2092 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
1982 if (ret < 0) { 2093 if (ret < 0) {
1983 r = X86EMUL_UNHANDLEABLE; 2094 r = X86EMUL_UNHANDLEABLE;
1984 goto out; 2095 goto out;
1985 } 2096 }
1986 2097
1987 bytes -= tocopy; 2098 bytes -= towrite;
1988 data += tocopy; 2099 data += towrite;
1989 addr += tocopy; 2100 addr += towrite;
1990 } 2101 }
1991out: 2102out:
1992 return r; 2103 return r;
1993} 2104}
1994EXPORT_SYMBOL_GPL(emulator_read_std); 2105
1995 2106
1996static int emulator_read_emulated(unsigned long addr, 2107static int emulator_read_emulated(unsigned long addr,
1997 void *val, 2108 void *val,
@@ -2013,8 +2124,8 @@ static int emulator_read_emulated(unsigned long addr,
2013 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 2124 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
2014 goto mmio; 2125 goto mmio;
2015 2126
2016 if (emulator_read_std(addr, val, bytes, vcpu) 2127 if (kvm_read_guest_virt(addr, val, bytes, vcpu)
2017 == X86EMUL_CONTINUE) 2128 == X86EMUL_CONTINUE)
2018 return X86EMUL_CONTINUE; 2129 return X86EMUL_CONTINUE;
2019 if (gpa == UNMAPPED_GVA) 2130 if (gpa == UNMAPPED_GVA)
2020 return X86EMUL_PROPAGATE_FAULT; 2131 return X86EMUL_PROPAGATE_FAULT;
@@ -2217,7 +2328,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2217 2328
2218 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 2329 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
2219 2330
2220 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); 2331 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
2221 2332
2222 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 2333 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
2223 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 2334 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2225,7 +2336,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2225EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 2336EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
2226 2337
2227static struct x86_emulate_ops emulate_ops = { 2338static struct x86_emulate_ops emulate_ops = {
2228 .read_std = emulator_read_std, 2339 .read_std = kvm_read_guest_virt,
2229 .read_emulated = emulator_read_emulated, 2340 .read_emulated = emulator_read_emulated,
2230 .write_emulated = emulator_write_emulated, 2341 .write_emulated = emulator_write_emulated,
2231 .cmpxchg_emulated = emulator_cmpxchg_emulated, 2342 .cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -2327,40 +2438,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2327} 2438}
2328EXPORT_SYMBOL_GPL(emulate_instruction); 2439EXPORT_SYMBOL_GPL(emulate_instruction);
2329 2440
2330static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
2331{
2332 int i;
2333
2334 for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
2335 if (vcpu->arch.pio.guest_pages[i]) {
2336 kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
2337 vcpu->arch.pio.guest_pages[i] = NULL;
2338 }
2339}
2340
2341static int pio_copy_data(struct kvm_vcpu *vcpu) 2441static int pio_copy_data(struct kvm_vcpu *vcpu)
2342{ 2442{
2343 void *p = vcpu->arch.pio_data; 2443 void *p = vcpu->arch.pio_data;
2344 void *q; 2444 gva_t q = vcpu->arch.pio.guest_gva;
2345 unsigned bytes; 2445 unsigned bytes;
2346 int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1; 2446 int ret;
2347 2447
2348 q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
2349 PAGE_KERNEL);
2350 if (!q) {
2351 free_pio_guest_pages(vcpu);
2352 return -ENOMEM;
2353 }
2354 q += vcpu->arch.pio.guest_page_offset;
2355 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 2448 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
2356 if (vcpu->arch.pio.in) 2449 if (vcpu->arch.pio.in)
2357 memcpy(q, p, bytes); 2450 ret = kvm_write_guest_virt(q, p, bytes, vcpu);
2358 else 2451 else
2359 memcpy(p, q, bytes); 2452 ret = kvm_read_guest_virt(q, p, bytes, vcpu);
2360 q -= vcpu->arch.pio.guest_page_offset; 2453 return ret;
2361 vunmap(q);
2362 free_pio_guest_pages(vcpu);
2363 return 0;
2364} 2454}
2365 2455
2366int complete_pio(struct kvm_vcpu *vcpu) 2456int complete_pio(struct kvm_vcpu *vcpu)
@@ -2471,7 +2561,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2471 vcpu->arch.pio.in = in; 2561 vcpu->arch.pio.in = in;
2472 vcpu->arch.pio.string = 0; 2562 vcpu->arch.pio.string = 0;
2473 vcpu->arch.pio.down = 0; 2563 vcpu->arch.pio.down = 0;
2474 vcpu->arch.pio.guest_page_offset = 0;
2475 vcpu->arch.pio.rep = 0; 2564 vcpu->arch.pio.rep = 0;
2476 2565
2477 if (vcpu->run->io.direction == KVM_EXIT_IO_IN) 2566 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2499,9 +2588,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2499 gva_t address, int rep, unsigned port) 2588 gva_t address, int rep, unsigned port)
2500{ 2589{
2501 unsigned now, in_page; 2590 unsigned now, in_page;
2502 int i, ret = 0; 2591 int ret = 0;
2503 int nr_pages = 1;
2504 struct page *page;
2505 struct kvm_io_device *pio_dev; 2592 struct kvm_io_device *pio_dev;
2506 2593
2507 vcpu->run->exit_reason = KVM_EXIT_IO; 2594 vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2513,7 +2600,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2513 vcpu->arch.pio.in = in; 2600 vcpu->arch.pio.in = in;
2514 vcpu->arch.pio.string = 1; 2601 vcpu->arch.pio.string = 1;
2515 vcpu->arch.pio.down = down; 2602 vcpu->arch.pio.down = down;
2516 vcpu->arch.pio.guest_page_offset = offset_in_page(address);
2517 vcpu->arch.pio.rep = rep; 2603 vcpu->arch.pio.rep = rep;
2518 2604
2519 if (vcpu->run->io.direction == KVM_EXIT_IO_IN) 2605 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2533,15 +2619,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2533 else 2619 else
2534 in_page = offset_in_page(address) + size; 2620 in_page = offset_in_page(address) + size;
2535 now = min(count, (unsigned long)in_page / size); 2621 now = min(count, (unsigned long)in_page / size);
2536 if (!now) { 2622 if (!now)
2537 /*
2538 * String I/O straddles page boundary. Pin two guest pages
2539 * so that we satisfy atomicity constraints. Do just one
2540 * transaction to avoid complexity.
2541 */
2542 nr_pages = 2;
2543 now = 1; 2623 now = 1;
2544 }
2545 if (down) { 2624 if (down) {
2546 /* 2625 /*
2547 * String I/O in reverse. Yuck. Kill the guest, fix later. 2626 * String I/O in reverse. Yuck. Kill the guest, fix later.
@@ -2556,15 +2635,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2556 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) 2635 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
2557 kvm_x86_ops->skip_emulated_instruction(vcpu); 2636 kvm_x86_ops->skip_emulated_instruction(vcpu);
2558 2637
2559 for (i = 0; i < nr_pages; ++i) { 2638 vcpu->arch.pio.guest_gva = address;
2560 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
2561 vcpu->arch.pio.guest_pages[i] = page;
2562 if (!page) {
2563 kvm_inject_gp(vcpu, 0);
2564 free_pio_guest_pages(vcpu);
2565 return 1;
2566 }
2567 }
2568 2639
2569 pio_dev = vcpu_find_pio_dev(vcpu, port, 2640 pio_dev = vcpu_find_pio_dev(vcpu, port,
2570 vcpu->arch.pio.cur_count, 2641 vcpu->arch.pio.cur_count,
@@ -2572,7 +2643,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2572 if (!vcpu->arch.pio.in) { 2643 if (!vcpu->arch.pio.in) {
2573 /* string PIO write */ 2644 /* string PIO write */
2574 ret = pio_copy_data(vcpu); 2645 ret = pio_copy_data(vcpu);
2575 if (ret >= 0 && pio_dev) { 2646 if (ret == X86EMUL_PROPAGATE_FAULT) {
2647 kvm_inject_gp(vcpu, 0);
2648 return 1;
2649 }
2650 if (ret == 0 && pio_dev) {
2576 pio_string_write(pio_dev, vcpu); 2651 pio_string_write(pio_dev, vcpu);
2577 complete_pio(vcpu); 2652 complete_pio(vcpu);
2578 if (vcpu->arch.pio.count == 0) 2653 if (vcpu->arch.pio.count == 0)
@@ -2587,9 +2662,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2587} 2662}
2588EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); 2663EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
2589 2664
2665static void bounce_off(void *info)
2666{
2667 /* nothing */
2668}
2669
2670static unsigned int ref_freq;
2671static unsigned long tsc_khz_ref;
2672
2673static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
2674 void *data)
2675{
2676 struct cpufreq_freqs *freq = data;
2677 struct kvm *kvm;
2678 struct kvm_vcpu *vcpu;
2679 int i, send_ipi = 0;
2680
2681 if (!ref_freq)
2682 ref_freq = freq->old;
2683
2684 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
2685 return 0;
2686 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
2687 return 0;
2688 per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
2689
2690 spin_lock(&kvm_lock);
2691 list_for_each_entry(kvm, &vm_list, vm_list) {
2692 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
2693 vcpu = kvm->vcpus[i];
2694 if (!vcpu)
2695 continue;
2696 if (vcpu->cpu != freq->cpu)
2697 continue;
2698 if (!kvm_request_guest_time_update(vcpu))
2699 continue;
2700 if (vcpu->cpu != smp_processor_id())
2701 send_ipi++;
2702 }
2703 }
2704 spin_unlock(&kvm_lock);
2705
2706 if (freq->old < freq->new && send_ipi) {
2707 /*
2708 * We upscale the frequency. Must make the guest
2709 * doesn't see old kvmclock values while running with
2710 * the new frequency, otherwise we risk the guest sees
2711 * time go backwards.
2712 *
2713 * In case we update the frequency for another cpu
2714 * (which might be in guest context) send an interrupt
2715 * to kick the cpu out of guest context. Next time
2716 * guest context is entered kvmclock will be updated,
2717 * so the guest will not see stale values.
2718 */
2719 smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
2720 }
2721 return 0;
2722}
2723
2724static struct notifier_block kvmclock_cpufreq_notifier_block = {
2725 .notifier_call = kvmclock_cpufreq_notifier
2726};
2727
2590int kvm_arch_init(void *opaque) 2728int kvm_arch_init(void *opaque)
2591{ 2729{
2592 int r; 2730 int r, cpu;
2593 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; 2731 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
2594 2732
2595 if (kvm_x86_ops) { 2733 if (kvm_x86_ops) {
@@ -2620,6 +2758,15 @@ int kvm_arch_init(void *opaque)
2620 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2758 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2621 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2759 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2622 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); 2760 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
2761
2762 for_each_possible_cpu(cpu)
2763 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
2764 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
2765 tsc_khz_ref = tsc_khz;
2766 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
2767 CPUFREQ_TRANSITION_NOTIFIER);
2768 }
2769
2623 return 0; 2770 return 0;
2624 2771
2625out: 2772out:
@@ -2827,25 +2974,20 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
2827 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) 2974 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
2828 return 0; 2975 return 0;
2829 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && 2976 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
2830 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) 2977 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
2831 return 0; 2978 return 0;
2832 return 1; 2979 return 1;
2833} 2980}
2834 2981
2835void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 2982struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
2983 u32 function, u32 index)
2836{ 2984{
2837 int i; 2985 int i;
2838 u32 function, index; 2986 struct kvm_cpuid_entry2 *best = NULL;
2839 struct kvm_cpuid_entry2 *e, *best;
2840 2987
2841 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
2842 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
2843 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
2844 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
2845 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
2846 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
2847 best = NULL;
2848 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { 2988 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
2989 struct kvm_cpuid_entry2 *e;
2990
2849 e = &vcpu->arch.cpuid_entries[i]; 2991 e = &vcpu->arch.cpuid_entries[i];
2850 if (is_matching_cpuid_entry(e, function, index)) { 2992 if (is_matching_cpuid_entry(e, function, index)) {
2851 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) 2993 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2860,6 +3002,21 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2860 if (!best || e->function > best->function) 3002 if (!best || e->function > best->function)
2861 best = e; 3003 best = e;
2862 } 3004 }
3005 return best;
3006}
3007
3008void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
3009{
3010 u32 function, index;
3011 struct kvm_cpuid_entry2 *best;
3012
3013 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
3014 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
3015 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
3016 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
3017 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
3018 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
3019 best = kvm_find_cpuid_entry(vcpu, function, index);
2863 if (best) { 3020 if (best) {
2864 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); 3021 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
2865 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); 3022 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
@@ -2945,6 +3102,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2945 if (vcpu->requests) { 3102 if (vcpu->requests) {
2946 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 3103 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
2947 __kvm_migrate_timers(vcpu); 3104 __kvm_migrate_timers(vcpu);
3105 if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
3106 kvm_write_guest_time(vcpu);
2948 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) 3107 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
2949 kvm_mmu_sync_roots(vcpu); 3108 kvm_mmu_sync_roots(vcpu);
2950 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 3109 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
@@ -2979,9 +3138,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2979 goto out; 3138 goto out;
2980 } 3139 }
2981 3140
2982 if (vcpu->guest_debug.enabled)
2983 kvm_x86_ops->guest_debug_pre(vcpu);
2984
2985 vcpu->guest_mode = 1; 3141 vcpu->guest_mode = 1;
2986 /* 3142 /*
2987 * Make sure that guest_mode assignment won't happen after 3143 * Make sure that guest_mode assignment won't happen after
@@ -3002,10 +3158,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3002 3158
3003 kvm_guest_enter(); 3159 kvm_guest_enter();
3004 3160
3161 get_debugreg(vcpu->arch.host_dr6, 6);
3162 get_debugreg(vcpu->arch.host_dr7, 7);
3163 if (unlikely(vcpu->arch.switch_db_regs)) {
3164 get_debugreg(vcpu->arch.host_db[0], 0);
3165 get_debugreg(vcpu->arch.host_db[1], 1);
3166 get_debugreg(vcpu->arch.host_db[2], 2);
3167 get_debugreg(vcpu->arch.host_db[3], 3);
3168
3169 set_debugreg(0, 7);
3170 set_debugreg(vcpu->arch.eff_db[0], 0);
3171 set_debugreg(vcpu->arch.eff_db[1], 1);
3172 set_debugreg(vcpu->arch.eff_db[2], 2);
3173 set_debugreg(vcpu->arch.eff_db[3], 3);
3174 }
3005 3175
3006 KVMTRACE_0D(VMENTRY, vcpu, entryexit); 3176 KVMTRACE_0D(VMENTRY, vcpu, entryexit);
3007 kvm_x86_ops->run(vcpu, kvm_run); 3177 kvm_x86_ops->run(vcpu, kvm_run);
3008 3178
3179 if (unlikely(vcpu->arch.switch_db_regs)) {
3180 set_debugreg(0, 7);
3181 set_debugreg(vcpu->arch.host_db[0], 0);
3182 set_debugreg(vcpu->arch.host_db[1], 1);
3183 set_debugreg(vcpu->arch.host_db[2], 2);
3184 set_debugreg(vcpu->arch.host_db[3], 3);
3185 }
3186 set_debugreg(vcpu->arch.host_dr6, 6);
3187 set_debugreg(vcpu->arch.host_dr7, 7);
3188
3009 vcpu->guest_mode = 0; 3189 vcpu->guest_mode = 0;
3010 local_irq_enable(); 3190 local_irq_enable();
3011 3191
@@ -3192,7 +3372,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3192 /* 3372 /*
3193 * Don't leak debug flags in case they were set for guest debugging 3373 * Don't leak debug flags in case they were set for guest debugging
3194 */ 3374 */
3195 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep) 3375 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3196 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 3376 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
3197 3377
3198 vcpu_put(vcpu); 3378 vcpu_put(vcpu);
@@ -3811,15 +3991,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3811 return 0; 3991 return 0;
3812} 3992}
3813 3993
3814int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 3994int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3815 struct kvm_debug_guest *dbg) 3995 struct kvm_guest_debug *dbg)
3816{ 3996{
3817 int r; 3997 int i, r;
3818 3998
3819 vcpu_load(vcpu); 3999 vcpu_load(vcpu);
3820 4000
4001 if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
4002 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
4003 for (i = 0; i < KVM_NR_DB_REGS; ++i)
4004 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
4005 vcpu->arch.switch_db_regs =
4006 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
4007 } else {
4008 for (i = 0; i < KVM_NR_DB_REGS; i++)
4009 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
4010 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
4011 }
4012
3821 r = kvm_x86_ops->set_guest_debug(vcpu, dbg); 4013 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
3822 4014
4015 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
4016 kvm_queue_exception(vcpu, DB_VECTOR);
4017 else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
4018 kvm_queue_exception(vcpu, BP_VECTOR);
4019
3823 vcpu_put(vcpu); 4020 vcpu_put(vcpu);
3824 4021
3825 return r; 4022 return r;
@@ -4007,6 +4204,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
4007 vcpu->arch.nmi_pending = false; 4204 vcpu->arch.nmi_pending = false;
4008 vcpu->arch.nmi_injected = false; 4205 vcpu->arch.nmi_injected = false;
4009 4206
4207 vcpu->arch.switch_db_regs = 0;
4208 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
4209 vcpu->arch.dr6 = DR6_FIXED_1;
4210 vcpu->arch.dr7 = DR7_FIXED_1;
4211
4010 return kvm_x86_ops->vcpu_reset(vcpu); 4212 return kvm_x86_ops->vcpu_reset(vcpu);
4011} 4213}
4012 4214
@@ -4100,6 +4302,8 @@ struct kvm *kvm_arch_create_vm(void)
4100 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 4302 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
4101 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 4303 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
4102 4304
4305 rdtscll(kvm->arch.vm_init_tsc);
4306
4103 return kvm; 4307 return kvm;
4104} 4308}
4105 4309
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d174db7a3370..ca91749d2083 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -178,7 +178,7 @@ static u32 opcode_table[256] = {
178 0, ImplicitOps | Stack, 0, 0, 178 0, ImplicitOps | Stack, 0, 0,
179 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, 179 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
180 /* 0xC8 - 0xCF */ 180 /* 0xC8 - 0xCF */
181 0, 0, 0, 0, 0, 0, 0, 0, 181 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
182 /* 0xD0 - 0xD7 */ 182 /* 0xD0 - 0xD7 */
183 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, 183 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
184 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, 184 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1136} 1136}
1137 1137
1138static int emulate_pop(struct x86_emulate_ctxt *ctxt, 1138static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1139 struct x86_emulate_ops *ops) 1139 struct x86_emulate_ops *ops,
1140 void *dest, int len)
1140{ 1141{
1141 struct decode_cache *c = &ctxt->decode; 1142 struct decode_cache *c = &ctxt->decode;
1142 int rc; 1143 int rc;
1143 1144
1144 rc = ops->read_emulated(register_address(c, ss_base(ctxt), 1145 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1145 c->regs[VCPU_REGS_RSP]), 1146 c->regs[VCPU_REGS_RSP]),
1146 &c->src.val, c->src.bytes, ctxt->vcpu); 1147 dest, len, ctxt->vcpu);
1147 if (rc != 0) 1148 if (rc != 0)
1148 return rc; 1149 return rc;
1149 1150
1150 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); 1151 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1151 return rc; 1152 return rc;
1152} 1153}
1153 1154
@@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1157 struct decode_cache *c = &ctxt->decode; 1158 struct decode_cache *c = &ctxt->decode;
1158 int rc; 1159 int rc;
1159 1160
1160 c->src.bytes = c->dst.bytes; 1161 rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1161 rc = emulate_pop(ctxt, ops);
1162 if (rc != 0) 1162 if (rc != 0)
1163 return rc; 1163 return rc;
1164 c->dst.val = c->src.val;
1165 return 0; 1164 return 0;
1166} 1165}
1167 1166
@@ -1279,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1279 return 0; 1278 return 0;
1280} 1279}
1281 1280
1281static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1282 struct x86_emulate_ops *ops)
1283{
1284 struct decode_cache *c = &ctxt->decode;
1285 int rc;
1286 unsigned long cs;
1287
1288 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1289 if (rc)
1290 return rc;
1291 if (c->op_bytes == 4)
1292 c->eip = (u32)c->eip;
1293 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1294 if (rc)
1295 return rc;
1296 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
1297 return rc;
1298}
1299
1282static inline int writeback(struct x86_emulate_ctxt *ctxt, 1300static inline int writeback(struct x86_emulate_ctxt *ctxt,
1283 struct x86_emulate_ops *ops) 1301 struct x86_emulate_ops *ops)
1284{ 1302{
@@ -1467,11 +1485,9 @@ special_insn:
1467 break; 1485 break;
1468 case 0x58 ... 0x5f: /* pop reg */ 1486 case 0x58 ... 0x5f: /* pop reg */
1469 pop_instruction: 1487 pop_instruction:
1470 c->src.bytes = c->op_bytes; 1488 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
1471 rc = emulate_pop(ctxt, ops);
1472 if (rc != 0) 1489 if (rc != 0)
1473 goto done; 1490 goto done;
1474 c->dst.val = c->src.val;
1475 break; 1491 break;
1476 case 0x63: /* movsxd */ 1492 case 0x63: /* movsxd */
1477 if (ctxt->mode != X86EMUL_MODE_PROT64) 1493 if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1738,6 +1754,11 @@ special_insn:
1738 mov: 1754 mov:
1739 c->dst.val = c->src.val; 1755 c->dst.val = c->src.val;
1740 break; 1756 break;
1757 case 0xcb: /* ret far */
1758 rc = emulate_ret_far(ctxt, ops);
1759 if (rc)
1760 goto done;
1761 break;
1741 case 0xd0 ... 0xd1: /* Grp2 */ 1762 case 0xd0 ... 0xd1: /* Grp2 */
1742 c->src.val = 1; 1763 c->src.val = 1;
1743 emulate_grp2(ctxt); 1764 emulate_grp2(ctxt);
@@ -1908,11 +1929,16 @@ twobyte_insn:
1908 c->dst.type = OP_NONE; 1929 c->dst.type = OP_NONE;
1909 break; 1930 break;
1910 case 3: /* lidt/vmmcall */ 1931 case 3: /* lidt/vmmcall */
1911 if (c->modrm_mod == 3 && c->modrm_rm == 1) { 1932 if (c->modrm_mod == 3) {
1912 rc = kvm_fix_hypercall(ctxt->vcpu); 1933 switch (c->modrm_rm) {
1913 if (rc) 1934 case 1:
1914 goto done; 1935 rc = kvm_fix_hypercall(ctxt->vcpu);
1915 kvm_emulate_hypercall(ctxt->vcpu); 1936 if (rc)
1937 goto done;
1938 break;
1939 default:
1940 goto cannot_emulate;
1941 }
1916 } else { 1942 } else {
1917 rc = read_descriptor(ctxt, ops, c->src.ptr, 1943 rc = read_descriptor(ctxt, ops, c->src.ptr,
1918 &size, &address, 1944 &size, &address,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9fe4ddaa8f6f..90e44a10e68a 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1058,14 +1058,6 @@ __init void lguest_init(void)
1058 * lguest_init() where the rest of the fairly chaotic boot setup 1058 * lguest_init() where the rest of the fairly chaotic boot setup
1059 * occurs. */ 1059 * occurs. */
1060 1060
1061 /* The native boot code sets up initial page tables immediately after
1062 * the kernel itself, and sets init_pg_tables_end so they're not
1063 * clobbered. The Launcher places our initial pagetables somewhere at
1064 * the top of our physical memory, so we don't need extra space: set
1065 * init_pg_tables_end to the end of the kernel. */
1066 init_pg_tables_start = __pa(pg0);
1067 init_pg_tables_end = __pa(pg0);
1068
1069 /* As described in head_32.S, we map the first 128M of memory. */ 1061 /* As described in head_32.S, we map the first 128M of memory. */
1070 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; 1062 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
1071 1063
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index c22981fa2f3a..ad5441ed1b57 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,30 +1,38 @@
1/* Copyright 2002 Andi Kleen */ 1/* Copyright 2002 Andi Kleen */
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/dwarf2.h>
6 7
7/* 8/*
8 * memcpy - Copy a memory block. 9 * memcpy - Copy a memory block.
9 * 10 *
10 * Input: 11 * Input:
11 * rdi destination 12 * rdi destination
12 * rsi source 13 * rsi source
13 * rdx count 14 * rdx count
14 * 15 *
15 * Output: 16 * Output:
16 * rax original destination 17 * rax original destination
17 */ 18 */
18 19
20/*
21 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
22 *
23 * Calls to this get patched into the kernel image via the
24 * alternative instructions framework:
25 */
19 ALIGN 26 ALIGN
20memcpy_c: 27memcpy_c:
21 CFI_STARTPROC 28 CFI_STARTPROC
22 movq %rdi,%rax 29 movq %rdi, %rax
23 movl %edx,%ecx 30
24 shrl $3,%ecx 31 movl %edx, %ecx
25 andl $7,%edx 32 shrl $3, %ecx
33 andl $7, %edx
26 rep movsq 34 rep movsq
27 movl %edx,%ecx 35 movl %edx, %ecx
28 rep movsb 36 rep movsb
29 ret 37 ret
30 CFI_ENDPROC 38 CFI_ENDPROC
@@ -33,99 +41,110 @@ ENDPROC(memcpy_c)
33ENTRY(__memcpy) 41ENTRY(__memcpy)
34ENTRY(memcpy) 42ENTRY(memcpy)
35 CFI_STARTPROC 43 CFI_STARTPROC
36 pushq %rbx
37 CFI_ADJUST_CFA_OFFSET 8
38 CFI_REL_OFFSET rbx, 0
39 movq %rdi,%rax
40 44
41 movl %edx,%ecx 45 /*
42 shrl $6,%ecx 46 * Put the number of full 64-byte blocks into %ecx.
47 * Tail portion is handled at the end:
48 */
49 movq %rdi, %rax
50 movl %edx, %ecx
51 shrl $6, %ecx
43 jz .Lhandle_tail 52 jz .Lhandle_tail
44 53
45 .p2align 4 54 .p2align 4
46.Lloop_64: 55.Lloop_64:
56 /*
57 * We decrement the loop index here - and the zero-flag is
58 * checked at the end of the loop (instructions inbetween do
59 * not change the zero flag):
60 */
47 decl %ecx 61 decl %ecx
48 62
49 movq (%rsi),%r11 63 /*
50 movq 8(%rsi),%r8 64 * Move in blocks of 4x16 bytes:
65 */
66 movq 0*8(%rsi), %r11
67 movq 1*8(%rsi), %r8
68 movq %r11, 0*8(%rdi)
69 movq %r8, 1*8(%rdi)
51 70
52 movq %r11,(%rdi) 71 movq 2*8(%rsi), %r9
53 movq %r8,1*8(%rdi) 72 movq 3*8(%rsi), %r10
73 movq %r9, 2*8(%rdi)
74 movq %r10, 3*8(%rdi)
54 75
55 movq 2*8(%rsi),%r9 76 movq 4*8(%rsi), %r11
56 movq 3*8(%rsi),%r10 77 movq 5*8(%rsi), %r8
78 movq %r11, 4*8(%rdi)
79 movq %r8, 5*8(%rdi)
57 80
58 movq %r9,2*8(%rdi) 81 movq 6*8(%rsi), %r9
59 movq %r10,3*8(%rdi) 82 movq 7*8(%rsi), %r10
83 movq %r9, 6*8(%rdi)
84 movq %r10, 7*8(%rdi)
60 85
61 movq 4*8(%rsi),%r11 86 leaq 64(%rsi), %rsi
62 movq 5*8(%rsi),%r8 87 leaq 64(%rdi), %rdi
63 88
64 movq %r11,4*8(%rdi)
65 movq %r8,5*8(%rdi)
66
67 movq 6*8(%rsi),%r9
68 movq 7*8(%rsi),%r10
69
70 movq %r9,6*8(%rdi)
71 movq %r10,7*8(%rdi)
72
73 leaq 64(%rsi),%rsi
74 leaq 64(%rdi),%rdi
75 jnz .Lloop_64 89 jnz .Lloop_64
76 90
77.Lhandle_tail: 91.Lhandle_tail:
78 movl %edx,%ecx 92 movl %edx, %ecx
79 andl $63,%ecx 93 andl $63, %ecx
80 shrl $3,%ecx 94 shrl $3, %ecx
81 jz .Lhandle_7 95 jz .Lhandle_7
96
82 .p2align 4 97 .p2align 4
83.Lloop_8: 98.Lloop_8:
84 decl %ecx 99 decl %ecx
85 movq (%rsi),%r8 100 movq (%rsi), %r8
86 movq %r8,(%rdi) 101 movq %r8, (%rdi)
87 leaq 8(%rdi),%rdi 102 leaq 8(%rdi), %rdi
88 leaq 8(%rsi),%rsi 103 leaq 8(%rsi), %rsi
89 jnz .Lloop_8 104 jnz .Lloop_8
90 105
91.Lhandle_7: 106.Lhandle_7:
92 movl %edx,%ecx 107 movl %edx, %ecx
93 andl $7,%ecx 108 andl $7, %ecx
94 jz .Lende 109 jz .Lend
110
95 .p2align 4 111 .p2align 4
96.Lloop_1: 112.Lloop_1:
97 movb (%rsi),%r8b 113 movb (%rsi), %r8b
98 movb %r8b,(%rdi) 114 movb %r8b, (%rdi)
99 incq %rdi 115 incq %rdi
100 incq %rsi 116 incq %rsi
101 decl %ecx 117 decl %ecx
102 jnz .Lloop_1 118 jnz .Lloop_1
103 119
104.Lende: 120.Lend:
105 popq %rbx
106 CFI_ADJUST_CFA_OFFSET -8
107 CFI_RESTORE rbx
108 ret 121 ret
109.Lfinal:
110 CFI_ENDPROC 122 CFI_ENDPROC
111ENDPROC(memcpy) 123ENDPROC(memcpy)
112ENDPROC(__memcpy) 124ENDPROC(__memcpy)
113 125
114 /* Some CPUs run faster using the string copy instructions. 126 /*
115 It is also a lot simpler. Use this when possible */ 127 * Some CPUs run faster using the string copy instructions.
128 * It is also a lot simpler. Use this when possible:
129 */
116 130
117 .section .altinstr_replacement,"ax" 131 .section .altinstr_replacement, "ax"
1181: .byte 0xeb /* jmp <disp8> */ 1321: .byte 0xeb /* jmp <disp8> */
119 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ 133 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1202: 1342:
121 .previous 135 .previous
122 .section .altinstructions,"a" 136
137 .section .altinstructions, "a"
123 .align 8 138 .align 8
124 .quad memcpy 139 .quad memcpy
125 .quad 1b 140 .quad 1b
126 .byte X86_FEATURE_REP_GOOD 141 .byte X86_FEATURE_REP_GOOD
127 /* Replace only beginning, memcpy is used to apply alternatives, so it 142
128 * is silly to overwrite itself with nops - reboot is only outcome... */ 143 /*
144 * Replace only beginning, memcpy is used to apply alternatives,
145 * so it is silly to overwrite itself with nops - reboot is the
146 * only outcome...
147 */
129 .byte 2b - 1b 148 .byte 2b - 1b
130 .byte 2b - 1b 149 .byte 2b - 1b
131 .previous 150 .previous
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index d11745334a67..522db5e3d0bf 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -121,22 +121,13 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
121 pagefault_enable(); 121 pagefault_enable();
122} 122}
123 123
124/* This is the same as kmap_atomic() but can map memory that doesn't 124/*
125 * This is the same as kmap_atomic() but can map memory that doesn't
125 * have a struct page associated with it. 126 * have a struct page associated with it.
126 */ 127 */
127void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) 128void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
128{ 129{
129 enum fixed_addresses idx; 130 return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
130 unsigned long vaddr;
131
132 pagefault_disable();
133
134 idx = type + KM_TYPE_NR*smp_processor_id();
135 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
136 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
137 arch_flush_lazy_mmu_mode();
138
139 return (void*) vaddr;
140} 131}
141EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ 132EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
142 133
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 15219e0d1243..fd3da1dda1c9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -94,9 +94,9 @@ struct map_range {
94#define NR_RANGE_MR 5 94#define NR_RANGE_MR 5
95#endif 95#endif
96 96
97static int save_mr(struct map_range *mr, int nr_range, 97static int __meminit save_mr(struct map_range *mr, int nr_range,
98 unsigned long start_pfn, unsigned long end_pfn, 98 unsigned long start_pfn, unsigned long end_pfn,
99 unsigned long page_size_mask) 99 unsigned long page_size_mask)
100{ 100{
101 if (start_pfn < end_pfn) { 101 if (start_pfn < end_pfn) {
102 if (nr_range >= NR_RANGE_MR) 102 if (nr_range >= NR_RANGE_MR)
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 04102d42ff42..699c9b2895ae 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -31,16 +31,27 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
31} 31}
32EXPORT_SYMBOL_GPL(is_io_mapping_possible); 32EXPORT_SYMBOL_GPL(is_io_mapping_possible);
33 33
34/* Map 'pfn' using fixed map 'type' and protections 'prot' 34void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
35 */
36void *
37iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
38{ 35{
39 enum fixed_addresses idx; 36 enum fixed_addresses idx;
40 unsigned long vaddr; 37 unsigned long vaddr;
41 38
42 pagefault_disable(); 39 pagefault_disable();
43 40
41 idx = type + KM_TYPE_NR * smp_processor_id();
42 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
43 set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
44 arch_flush_lazy_mmu_mode();
45
46 return (void *)vaddr;
47}
48
49/*
50 * Map 'pfn' using fixed map 'type' and protections 'prot'
51 */
52void *
53iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
54{
44 /* 55 /*
45 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. 56 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
46 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the 57 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
@@ -50,12 +61,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
50 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) 61 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
51 prot = PAGE_KERNEL_UC_MINUS; 62 prot = PAGE_KERNEL_UC_MINUS;
52 63
53 idx = type + KM_TYPE_NR*smp_processor_id(); 64 return kmap_atomic_prot_pfn(pfn, type, prot);
54 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
55 set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
56 arch_flush_lazy_mmu_mode();
57
58 return (void*) vaddr;
59} 65}
60EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); 66EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
61 67
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index aca924a30ee6..0dfa09d69e80 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -22,13 +22,17 @@
22#include <asm/pgalloc.h> 22#include <asm/pgalloc.h>
23#include <asm/pat.h> 23#include <asm/pat.h>
24 24
25#ifdef CONFIG_X86_64 25static inline int phys_addr_valid(resource_size_t addr)
26
27static inline int phys_addr_valid(unsigned long addr)
28{ 26{
29 return addr < (1UL << boot_cpu_data.x86_phys_bits); 27#ifdef CONFIG_PHYS_ADDR_T_64BIT
28 return !(addr >> boot_cpu_data.x86_phys_bits);
29#else
30 return 1;
31#endif
30} 32}
31 33
34#ifdef CONFIG_X86_64
35
32unsigned long __phys_addr(unsigned long x) 36unsigned long __phys_addr(unsigned long x)
33{ 37{
34 if (x >= __START_KERNEL_map) { 38 if (x >= __START_KERNEL_map) {
@@ -65,11 +69,6 @@ EXPORT_SYMBOL(__virt_addr_valid);
65 69
66#else 70#else
67 71
68static inline int phys_addr_valid(unsigned long addr)
69{
70 return 1;
71}
72
73#ifdef CONFIG_DEBUG_VIRTUAL 72#ifdef CONFIG_DEBUG_VIRTUAL
74unsigned long __phys_addr(unsigned long x) 73unsigned long __phys_addr(unsigned long x)
75{ 74{
@@ -517,7 +516,7 @@ void __init early_ioremap_init(void)
517 printk(KERN_INFO "early_ioremap_init()\n"); 516 printk(KERN_INFO "early_ioremap_init()\n");
518 517
519 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) 518 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
520 slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); 519 slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
521 520
522 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); 521 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
523 memset(bm_pte, 0, sizeof(bm_pte)); 522 memset(bm_pte, 0, sizeof(bm_pte));
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 6a518dd08a36..4f115e00486b 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
311 311
312 if (!ctx->active) { 312 if (!ctx->active) {
313 pr_warning("kmmio: spurious debug trap on CPU %d.\n", 313 pr_debug("kmmio: spurious debug trap on CPU %d.\n",
314 smp_processor_id()); 314 smp_processor_id());
315 goto out; 315 goto out;
316 } 316 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 9c4294986af7..d71e1b636ce6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,7 @@
16#include <asm/processor.h> 16#include <asm/processor.h>
17#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm/sections.h> 18#include <asm/sections.h>
19#include <asm/setup.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20#include <asm/pgalloc.h> 21#include <asm/pgalloc.h>
21#include <asm/proto.h> 22#include <asm/proto.h>
@@ -33,6 +34,7 @@ struct cpa_data {
33 unsigned long pfn; 34 unsigned long pfn;
34 unsigned force_split : 1; 35 unsigned force_split : 1;
35 int curpage; 36 int curpage;
37 struct page **pages;
36}; 38};
37 39
38/* 40/*
@@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock);
45 47
46#define CPA_FLUSHTLB 1 48#define CPA_FLUSHTLB 1
47#define CPA_ARRAY 2 49#define CPA_ARRAY 2
50#define CPA_PAGES_ARRAY 4
48 51
49#ifdef CONFIG_PROC_FS 52#ifdef CONFIG_PROC_FS
50static unsigned long direct_pages_count[PG_LEVEL_NUM]; 53static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -95,7 +98,7 @@ static inline unsigned long highmap_start_pfn(void)
95 98
96static inline unsigned long highmap_end_pfn(void) 99static inline unsigned long highmap_end_pfn(void)
97{ 100{
98 return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; 101 return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
99} 102}
100 103
101#endif 104#endif
@@ -201,10 +204,10 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
201 } 204 }
202} 205}
203 206
204static void cpa_flush_array(unsigned long *start, int numpages, int cache) 207static void cpa_flush_array(unsigned long *start, int numpages, int cache,
208 int in_flags, struct page **pages)
205{ 209{
206 unsigned int i, level; 210 unsigned int i, level;
207 unsigned long *addr;
208 211
209 BUG_ON(irqs_disabled()); 212 BUG_ON(irqs_disabled());
210 213
@@ -225,14 +228,22 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache)
225 * will cause all other CPUs to flush the same 228 * will cause all other CPUs to flush the same
226 * cachelines: 229 * cachelines:
227 */ 230 */
228 for (i = 0, addr = start; i < numpages; i++, addr++) { 231 for (i = 0; i < numpages; i++) {
229 pte_t *pte = lookup_address(*addr, &level); 232 unsigned long addr;
233 pte_t *pte;
234
235 if (in_flags & CPA_PAGES_ARRAY)
236 addr = (unsigned long)page_address(pages[i]);
237 else
238 addr = start[i];
239
240 pte = lookup_address(addr, &level);
230 241
231 /* 242 /*
232 * Only flush present addresses: 243 * Only flush present addresses:
233 */ 244 */
234 if (pte && (pte_val(*pte) & _PAGE_PRESENT)) 245 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
235 clflush_cache_range((void *) *addr, PAGE_SIZE); 246 clflush_cache_range((void *)addr, PAGE_SIZE);
236 } 247 }
237} 248}
238 249
@@ -584,7 +595,9 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
584 unsigned int level; 595 unsigned int level;
585 pte_t *kpte, old_pte; 596 pte_t *kpte, old_pte;
586 597
587 if (cpa->flags & CPA_ARRAY) 598 if (cpa->flags & CPA_PAGES_ARRAY)
599 address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
600 else if (cpa->flags & CPA_ARRAY)
588 address = cpa->vaddr[cpa->curpage]; 601 address = cpa->vaddr[cpa->curpage];
589 else 602 else
590 address = *cpa->vaddr; 603 address = *cpa->vaddr;
@@ -687,7 +700,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
687 * No need to redo, when the primary call touched the direct 700 * No need to redo, when the primary call touched the direct
688 * mapping already: 701 * mapping already:
689 */ 702 */
690 if (cpa->flags & CPA_ARRAY) 703 if (cpa->flags & CPA_PAGES_ARRAY)
704 vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
705 else if (cpa->flags & CPA_ARRAY)
691 vaddr = cpa->vaddr[cpa->curpage]; 706 vaddr = cpa->vaddr[cpa->curpage];
692 else 707 else
693 vaddr = *cpa->vaddr; 708 vaddr = *cpa->vaddr;
@@ -698,7 +713,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
698 alias_cpa = *cpa; 713 alias_cpa = *cpa;
699 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 714 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
700 alias_cpa.vaddr = &temp_cpa_vaddr; 715 alias_cpa.vaddr = &temp_cpa_vaddr;
701 alias_cpa.flags &= ~CPA_ARRAY; 716 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
702 717
703 718
704 ret = __change_page_attr_set_clr(&alias_cpa, 0); 719 ret = __change_page_attr_set_clr(&alias_cpa, 0);
@@ -711,7 +726,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
711 * No need to redo, when the primary call touched the high 726 * No need to redo, when the primary call touched the high
712 * mapping already: 727 * mapping already:
713 */ 728 */
714 if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) 729 if (within(vaddr, (unsigned long) _text, _brk_end))
715 return 0; 730 return 0;
716 731
717 /* 732 /*
@@ -724,7 +739,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
724 alias_cpa = *cpa; 739 alias_cpa = *cpa;
725 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; 740 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
726 alias_cpa.vaddr = &temp_cpa_vaddr; 741 alias_cpa.vaddr = &temp_cpa_vaddr;
727 alias_cpa.flags &= ~CPA_ARRAY; 742 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
728 743
729 /* 744 /*
730 * The high mapping range is imprecise, so ignore the return value. 745 * The high mapping range is imprecise, so ignore the return value.
@@ -745,7 +760,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
745 */ 760 */
746 cpa->numpages = numpages; 761 cpa->numpages = numpages;
747 /* for array changes, we can't use large page */ 762 /* for array changes, we can't use large page */
748 if (cpa->flags & CPA_ARRAY) 763 if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
749 cpa->numpages = 1; 764 cpa->numpages = 1;
750 765
751 if (!debug_pagealloc) 766 if (!debug_pagealloc)
@@ -769,7 +784,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
769 */ 784 */
770 BUG_ON(cpa->numpages > numpages); 785 BUG_ON(cpa->numpages > numpages);
771 numpages -= cpa->numpages; 786 numpages -= cpa->numpages;
772 if (cpa->flags & CPA_ARRAY) 787 if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
773 cpa->curpage++; 788 cpa->curpage++;
774 else 789 else
775 *cpa->vaddr += cpa->numpages * PAGE_SIZE; 790 *cpa->vaddr += cpa->numpages * PAGE_SIZE;
@@ -786,7 +801,8 @@ static inline int cache_attr(pgprot_t attr)
786 801
787static int change_page_attr_set_clr(unsigned long *addr, int numpages, 802static int change_page_attr_set_clr(unsigned long *addr, int numpages,
788 pgprot_t mask_set, pgprot_t mask_clr, 803 pgprot_t mask_set, pgprot_t mask_clr,
789 int force_split, int array) 804 int force_split, int in_flag,
805 struct page **pages)
790{ 806{
791 struct cpa_data cpa; 807 struct cpa_data cpa;
792 int ret, cache, checkalias; 808 int ret, cache, checkalias;
@@ -801,15 +817,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
801 return 0; 817 return 0;
802 818
803 /* Ensure we are PAGE_SIZE aligned */ 819 /* Ensure we are PAGE_SIZE aligned */
804 if (!array) { 820 if (in_flag & CPA_ARRAY) {
805 if (*addr & ~PAGE_MASK) {
806 *addr &= PAGE_MASK;
807 /*
808 * People should not be passing in unaligned addresses:
809 */
810 WARN_ON_ONCE(1);
811 }
812 } else {
813 int i; 821 int i;
814 for (i = 0; i < numpages; i++) { 822 for (i = 0; i < numpages; i++) {
815 if (addr[i] & ~PAGE_MASK) { 823 if (addr[i] & ~PAGE_MASK) {
@@ -817,6 +825,18 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
817 WARN_ON_ONCE(1); 825 WARN_ON_ONCE(1);
818 } 826 }
819 } 827 }
828 } else if (!(in_flag & CPA_PAGES_ARRAY)) {
829 /*
830 * in_flag of CPA_PAGES_ARRAY implies it is aligned.
831 * No need to cehck in that case
832 */
833 if (*addr & ~PAGE_MASK) {
834 *addr &= PAGE_MASK;
835 /*
836 * People should not be passing in unaligned addresses:
837 */
838 WARN_ON_ONCE(1);
839 }
820 } 840 }
821 841
822 /* Must avoid aliasing mappings in the highmem code */ 842 /* Must avoid aliasing mappings in the highmem code */
@@ -832,6 +852,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
832 arch_flush_lazy_mmu_mode(); 852 arch_flush_lazy_mmu_mode();
833 853
834 cpa.vaddr = addr; 854 cpa.vaddr = addr;
855 cpa.pages = pages;
835 cpa.numpages = numpages; 856 cpa.numpages = numpages;
836 cpa.mask_set = mask_set; 857 cpa.mask_set = mask_set;
837 cpa.mask_clr = mask_clr; 858 cpa.mask_clr = mask_clr;
@@ -839,8 +860,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
839 cpa.curpage = 0; 860 cpa.curpage = 0;
840 cpa.force_split = force_split; 861 cpa.force_split = force_split;
841 862
842 if (array) 863 if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
843 cpa.flags |= CPA_ARRAY; 864 cpa.flags |= in_flag;
844 865
845 /* No alias checking for _NX bit modifications */ 866 /* No alias checking for _NX bit modifications */
846 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 867 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
@@ -866,9 +887,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
866 * wbindv): 887 * wbindv):
867 */ 888 */
868 if (!ret && cpu_has_clflush) { 889 if (!ret && cpu_has_clflush) {
869 if (cpa.flags & CPA_ARRAY) 890 if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
870 cpa_flush_array(addr, numpages, cache); 891 cpa_flush_array(addr, numpages, cache,
871 else 892 cpa.flags, pages);
893 } else
872 cpa_flush_range(*addr, numpages, cache); 894 cpa_flush_range(*addr, numpages, cache);
873 } else 895 } else
874 cpa_flush_all(cache); 896 cpa_flush_all(cache);
@@ -888,14 +910,28 @@ static inline int change_page_attr_set(unsigned long *addr, int numpages,
888 pgprot_t mask, int array) 910 pgprot_t mask, int array)
889{ 911{
890 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, 912 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
891 array); 913 (array ? CPA_ARRAY : 0), NULL);
892} 914}
893 915
894static inline int change_page_attr_clear(unsigned long *addr, int numpages, 916static inline int change_page_attr_clear(unsigned long *addr, int numpages,
895 pgprot_t mask, int array) 917 pgprot_t mask, int array)
896{ 918{
897 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, 919 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
898 array); 920 (array ? CPA_ARRAY : 0), NULL);
921}
922
923static inline int cpa_set_pages_array(struct page **pages, int numpages,
924 pgprot_t mask)
925{
926 return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
927 CPA_PAGES_ARRAY, pages);
928}
929
930static inline int cpa_clear_pages_array(struct page **pages, int numpages,
931 pgprot_t mask)
932{
933 return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
934 CPA_PAGES_ARRAY, pages);
899} 935}
900 936
901int _set_memory_uc(unsigned long addr, int numpages) 937int _set_memory_uc(unsigned long addr, int numpages)
@@ -1043,7 +1079,7 @@ int set_memory_np(unsigned long addr, int numpages)
1043int set_memory_4k(unsigned long addr, int numpages) 1079int set_memory_4k(unsigned long addr, int numpages)
1044{ 1080{
1045 return change_page_attr_set_clr(&addr, numpages, __pgprot(0), 1081 return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
1046 __pgprot(0), 1, 0); 1082 __pgprot(0), 1, 0, NULL);
1047} 1083}
1048 1084
1049int set_pages_uc(struct page *page, int numpages) 1085int set_pages_uc(struct page *page, int numpages)
@@ -1054,6 +1090,35 @@ int set_pages_uc(struct page *page, int numpages)
1054} 1090}
1055EXPORT_SYMBOL(set_pages_uc); 1091EXPORT_SYMBOL(set_pages_uc);
1056 1092
1093int set_pages_array_uc(struct page **pages, int addrinarray)
1094{
1095 unsigned long start;
1096 unsigned long end;
1097 int i;
1098 int free_idx;
1099
1100 for (i = 0; i < addrinarray; i++) {
1101 start = (unsigned long)page_address(pages[i]);
1102 end = start + PAGE_SIZE;
1103 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
1104 goto err_out;
1105 }
1106
1107 if (cpa_set_pages_array(pages, addrinarray,
1108 __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) {
1109 return 0; /* Success */
1110 }
1111err_out:
1112 free_idx = i;
1113 for (i = 0; i < free_idx; i++) {
1114 start = (unsigned long)page_address(pages[i]);
1115 end = start + PAGE_SIZE;
1116 free_memtype(start, end);
1117 }
1118 return -EINVAL;
1119}
1120EXPORT_SYMBOL(set_pages_array_uc);
1121
1057int set_pages_wb(struct page *page, int numpages) 1122int set_pages_wb(struct page *page, int numpages)
1058{ 1123{
1059 unsigned long addr = (unsigned long)page_address(page); 1124 unsigned long addr = (unsigned long)page_address(page);
@@ -1062,6 +1127,26 @@ int set_pages_wb(struct page *page, int numpages)
1062} 1127}
1063EXPORT_SYMBOL(set_pages_wb); 1128EXPORT_SYMBOL(set_pages_wb);
1064 1129
1130int set_pages_array_wb(struct page **pages, int addrinarray)
1131{
1132 int retval;
1133 unsigned long start;
1134 unsigned long end;
1135 int i;
1136
1137 retval = cpa_clear_pages_array(pages, addrinarray,
1138 __pgprot(_PAGE_CACHE_MASK));
1139
1140 for (i = 0; i < addrinarray; i++) {
1141 start = (unsigned long)page_address(pages[i]);
1142 end = start + PAGE_SIZE;
1143 free_memtype(start, end);
1144 }
1145
1146 return retval;
1147}
1148EXPORT_SYMBOL(set_pages_array_wb);
1149
1065int set_pages_x(struct page *page, int numpages) 1150int set_pages_x(struct page *page, int numpages)
1066{ 1151{
1067 unsigned long addr = (unsigned long)page_address(page); 1152 unsigned long addr = (unsigned long)page_address(page);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 2ed37158012d..640339ee4fb2 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -677,10 +677,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
677 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 677 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
678 678
679 /* 679 /*
680 * reserve_pfn_range() doesn't support RAM pages. 680 * reserve_pfn_range() doesn't support RAM pages. Maintain the current
681 * behavior with RAM pages by returning success.
681 */ 682 */
682 if (is_ram != 0) 683 if (is_ram != 0)
683 return -EINVAL; 684 return 0;
684 685
685 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 686 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
686 if (ret) 687 if (ret)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index f2e477c91c1b..46c8834aedc0 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -50,7 +50,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
50 } 50 }
51 pte = pte_offset_kernel(pmd, vaddr); 51 pte = pte_offset_kernel(pmd, vaddr);
52 if (pte_val(pteval)) 52 if (pte_val(pteval))
53 set_pte_present(&init_mm, vaddr, pte, pteval); 53 set_pte_at(&init_mm, vaddr, pte, pteval);
54 else 54 else
55 pte_clear(&init_mm, vaddr, pte); 55 pte_clear(&init_mm, vaddr, pte);
56 56
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a654d59e4483..821e97017e95 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -187,11 +187,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
187 cpumask, cpumask_of(smp_processor_id())); 187 cpumask, cpumask_of(smp_processor_id()));
188 188
189 /* 189 /*
190 * Make the above memory operations globally visible before
191 * sending the IPI.
192 */
193 smp_mb();
194 /*
195 * We have to send the IPI only to 190 * We have to send the IPI only to
196 * CPUs affected. 191 * CPUs affected.
197 */ 192 */
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 82d22fc601ae..8c362b96b644 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -90,7 +90,7 @@ static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
90 return 0; 90 return 0;
91} 91}
92 92
93static struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitdata = { 93static const struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitconst = {
94/* 94/*
95 * Systems where PCI IO resource ISA alignment can be skipped 95 * Systems where PCI IO resource ISA alignment can be skipped
96 * when the ISA enable bit in the bridge control is not set 96 * when the ISA enable bit in the bridge control is not set
@@ -183,7 +183,7 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d)
183} 183}
184#endif 184#endif
185 185
186static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { 186static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
187#ifdef __i386__ 187#ifdef __i386__
188/* 188/*
189 * Laptops which need pci=assign-busses to see Cardbus cards 189 * Laptops which need pci=assign-busses to see Cardbus cards
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 7d388d5cf548..9c49919e4d1c 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -356,7 +356,7 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
356DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); 356DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
357 357
358 358
359static struct dmi_system_id __devinitdata msi_k8t_dmi_table[] = { 359static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
360 { 360 {
361 .ident = "MSI-K8T-Neo2Fir", 361 .ident = "MSI-K8T-Neo2Fir",
362 .matches = { 362 .matches = {
@@ -413,7 +413,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
413 */ 413 */
414static u16 toshiba_line_size; 414static u16 toshiba_line_size;
415 415
416static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = { 416static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] = {
417 { 417 {
418 .ident = "Toshiba PS5 based laptop", 418 .ident = "Toshiba PS5 based laptop",
419 .matches = { 419 .matches = {
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5ead808dd70c..f234a37bd428 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -319,6 +319,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
319 return -EINVAL; 319 return -EINVAL;
320 } 320 }
321 flags = new_flags; 321 flags = new_flags;
322 vma->vm_page_prot = __pgprot(
323 (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK) |
324 flags);
322 } 325 }
323 326
324 if (((vma->vm_pgoff < max_low_pfn_mapped) || 327 if (((vma->vm_pgoff < max_low_pfn_mapped) ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index cb6afa4ec95c..db3802fb7b84 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1723{ 1723{
1724 pmd_t *kernel_pmd; 1724 pmd_t *kernel_pmd;
1725 1725
1726 init_pg_tables_start = __pa(pgd); 1726 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
1727 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1727 xen_start_info->nr_pt_frames * PAGE_SIZE +
1728 max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); 1728 512*1024);
1729 1729
1730 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 1730 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1731 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 1731 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
@@ -1870,7 +1870,6 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1870 1870
1871#ifdef CONFIG_X86_PAE 1871#ifdef CONFIG_X86_PAE
1872 .set_pte_atomic = xen_set_pte_atomic, 1872 .set_pte_atomic = xen_set_pte_atomic,
1873 .set_pte_present = xen_set_pte_at,
1874 .pte_clear = xen_pte_clear, 1873 .pte_clear = xen_pte_clear,
1875 .pmd_clear = xen_pmd_clear, 1874 .pmd_clear = xen_pmd_clear,
1876#endif /* CONFIG_X86_PAE */ 1875#endif /* CONFIG_X86_PAE */