diff options
Diffstat (limited to 'arch/x86')
200 files changed, 6263 insertions, 4684 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d1430ef6b4f9..fc20fdc0f7f2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -24,6 +24,7 @@ config X86 | |||
24 | select HAVE_UNSTABLE_SCHED_CLOCK | 24 | select HAVE_UNSTABLE_SCHED_CLOCK |
25 | select HAVE_IDE | 25 | select HAVE_IDE |
26 | select HAVE_OPROFILE | 26 | select HAVE_OPROFILE |
27 | select HAVE_PERF_COUNTERS if (!M386 && !M486) | ||
27 | select HAVE_IOREMAP_PROT | 28 | select HAVE_IOREMAP_PROT |
28 | select HAVE_KPROBES | 29 | select HAVE_KPROBES |
29 | select ARCH_WANT_OPTIONAL_GPIOLIB | 30 | select ARCH_WANT_OPTIONAL_GPIOLIB |
@@ -37,7 +38,7 @@ config X86 | |||
37 | select HAVE_FUNCTION_GRAPH_FP_TEST | 38 | select HAVE_FUNCTION_GRAPH_FP_TEST |
38 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST | 39 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST |
39 | select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE | 40 | select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE |
40 | select HAVE_FTRACE_SYSCALLS | 41 | select HAVE_SYSCALL_TRACEPOINTS |
41 | select HAVE_KVM | 42 | select HAVE_KVM |
42 | select HAVE_ARCH_KGDB | 43 | select HAVE_ARCH_KGDB |
43 | select HAVE_ARCH_TRACEHOOK | 44 | select HAVE_ARCH_TRACEHOOK |
@@ -585,7 +586,6 @@ config GART_IOMMU | |||
585 | bool "GART IOMMU support" if EMBEDDED | 586 | bool "GART IOMMU support" if EMBEDDED |
586 | default y | 587 | default y |
587 | select SWIOTLB | 588 | select SWIOTLB |
588 | select AGP | ||
589 | depends on X86_64 && PCI | 589 | depends on X86_64 && PCI |
590 | ---help--- | 590 | ---help--- |
591 | Support for full DMA access of devices with 32bit memory access only | 591 | Support for full DMA access of devices with 32bit memory access only |
@@ -742,7 +742,6 @@ config X86_UP_IOAPIC | |||
742 | config X86_LOCAL_APIC | 742 | config X86_LOCAL_APIC |
743 | def_bool y | 743 | def_bool y |
744 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC | 744 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC |
745 | select HAVE_PERF_COUNTERS if (!M386 && !M486) | ||
746 | 745 | ||
747 | config X86_IO_APIC | 746 | config X86_IO_APIC |
748 | def_bool y | 747 | def_bool y |
@@ -1913,25 +1912,26 @@ config DMAR_DEFAULT_ON | |||
1913 | recommended you say N here while the DMAR code remains | 1912 | recommended you say N here while the DMAR code remains |
1914 | experimental. | 1913 | experimental. |
1915 | 1914 | ||
1916 | config DMAR_GFX_WA | 1915 | config DMAR_BROKEN_GFX_WA |
1917 | def_bool y | 1916 | def_bool n |
1918 | prompt "Support for Graphics workaround" | 1917 | prompt "Workaround broken graphics drivers (going away soon)" |
1919 | depends on DMAR | 1918 | depends on DMAR |
1920 | ---help--- | 1919 | ---help--- |
1921 | Current Graphics drivers tend to use physical address | 1920 | Current Graphics drivers tend to use physical address |
1922 | for DMA and avoid using DMA APIs. Setting this config | 1921 | for DMA and avoid using DMA APIs. Setting this config |
1923 | option permits the IOMMU driver to set a unity map for | 1922 | option permits the IOMMU driver to set a unity map for |
1924 | all the OS-visible memory. Hence the driver can continue | 1923 | all the OS-visible memory. Hence the driver can continue |
1925 | to use physical addresses for DMA. | 1924 | to use physical addresses for DMA, at least until this |
1925 | option is removed in the 2.6.32 kernel. | ||
1926 | 1926 | ||
1927 | config DMAR_FLOPPY_WA | 1927 | config DMAR_FLOPPY_WA |
1928 | def_bool y | 1928 | def_bool y |
1929 | depends on DMAR | 1929 | depends on DMAR |
1930 | ---help--- | 1930 | ---help--- |
1931 | Floppy disk drivers are know to bypass DMA API calls | 1931 | Floppy disk drivers are known to bypass DMA API calls |
1932 | thereby failing to work when IOMMU is enabled. This | 1932 | thereby failing to work when IOMMU is enabled. This |
1933 | workaround will setup a 1:1 mapping for the first | 1933 | workaround will setup a 1:1 mapping for the first |
1934 | 16M to make floppy (an ISA device) work. | 1934 | 16MiB to make floppy (an ISA device) work. |
1935 | 1935 | ||
1936 | config INTR_REMAP | 1936 | config INTR_REMAP |
1937 | bool "Support for Interrupt Remapping (EXPERIMENTAL)" | 1937 | bool "Support for Interrupt Remapping (EXPERIMENTAL)" |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8130334329c0..527519b8a9f9 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -262,6 +262,15 @@ config MCORE2 | |||
262 | family in /proc/cpuinfo. Newer ones have 6 and older ones 15 | 262 | family in /proc/cpuinfo. Newer ones have 6 and older ones 15 |
263 | (not a typo) | 263 | (not a typo) |
264 | 264 | ||
265 | config MATOM | ||
266 | bool "Intel Atom" | ||
267 | ---help--- | ||
268 | |||
269 | Select this for the Intel Atom platform. Intel Atom CPUs have an | ||
270 | in-order pipelining architecture and thus can benefit from | ||
271 | accordingly optimized code. Use a recent GCC with specific Atom | ||
272 | support in order to fully benefit from selecting this option. | ||
273 | |||
265 | config GENERIC_CPU | 274 | config GENERIC_CPU |
266 | bool "Generic-x86-64" | 275 | bool "Generic-x86-64" |
267 | depends on X86_64 | 276 | depends on X86_64 |
@@ -295,7 +304,7 @@ config X86_CPU | |||
295 | config X86_L1_CACHE_BYTES | 304 | config X86_L1_CACHE_BYTES |
296 | int | 305 | int |
297 | default "128" if MPSC | 306 | default "128" if MPSC |
298 | default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32 | 307 | default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32 |
299 | 308 | ||
300 | config X86_INTERNODE_CACHE_BYTES | 309 | config X86_INTERNODE_CACHE_BYTES |
301 | int | 310 | int |
@@ -310,7 +319,7 @@ config X86_L1_CACHE_SHIFT | |||
310 | default "7" if MPENTIUM4 || MPSC | 319 | default "7" if MPENTIUM4 || MPSC |
311 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 320 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 |
312 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 321 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
313 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU | 322 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU |
314 | 323 | ||
315 | config X86_XADD | 324 | config X86_XADD |
316 | def_bool y | 325 | def_bool y |
@@ -359,7 +368,7 @@ config X86_INTEL_USERCOPY | |||
359 | 368 | ||
360 | config X86_USE_PPRO_CHECKSUM | 369 | config X86_USE_PPRO_CHECKSUM |
361 | def_bool y | 370 | def_bool y |
362 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 | 371 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM |
363 | 372 | ||
364 | config X86_USE_3DNOW | 373 | config X86_USE_3DNOW |
365 | def_bool y | 374 | def_bool y |
@@ -387,7 +396,7 @@ config X86_P6_NOP | |||
387 | 396 | ||
388 | config X86_TSC | 397 | config X86_TSC |
389 | def_bool y | 398 | def_bool y |
390 | depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 | 399 | depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64 |
391 | 400 | ||
392 | config X86_CMPXCHG64 | 401 | config X86_CMPXCHG64 |
393 | def_bool y | 402 | def_bool y |
@@ -397,7 +406,7 @@ config X86_CMPXCHG64 | |||
397 | # generates cmov. | 406 | # generates cmov. |
398 | config X86_CMOV | 407 | config X86_CMOV |
399 | def_bool y | 408 | def_bool y |
400 | depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64) | 409 | depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) |
401 | 410 | ||
402 | config X86_MINIMUM_CPU_FAMILY | 411 | config X86_MINIMUM_CPU_FAMILY |
403 | int | 412 | int |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1b68659c41b4..7983c420eaf2 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -32,8 +32,8 @@ ifeq ($(CONFIG_X86_32),y) | |||
32 | 32 | ||
33 | # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use | 33 | # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use |
34 | # a lot more stack due to the lack of sharing of stacklots: | 34 | # a lot more stack due to the lack of sharing of stacklots: |
35 | KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \ | 35 | KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \ |
36 | echo $(call cc-option,-fno-unit-at-a-time); fi ;) | 36 | $(call cc-option,-fno-unit-at-a-time)) |
37 | 37 | ||
38 | # CPU-specific tuning. Anything which can be shared with UML should go here. | 38 | # CPU-specific tuning. Anything which can be shared with UML should go here. |
39 | include $(srctree)/arch/x86/Makefile_32.cpu | 39 | include $(srctree)/arch/x86/Makefile_32.cpu |
@@ -55,6 +55,8 @@ else | |||
55 | 55 | ||
56 | cflags-$(CONFIG_MCORE2) += \ | 56 | cflags-$(CONFIG_MCORE2) += \ |
57 | $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) | 57 | $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) |
58 | cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ | ||
59 | $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) | ||
58 | cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) | 60 | cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) |
59 | KBUILD_CFLAGS += $(cflags-y) | 61 | KBUILD_CFLAGS += $(cflags-y) |
60 | 62 | ||
@@ -72,7 +74,7 @@ endif | |||
72 | 74 | ||
73 | ifdef CONFIG_CC_STACKPROTECTOR | 75 | ifdef CONFIG_CC_STACKPROTECTOR |
74 | cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh | 76 | cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh |
75 | ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y) | 77 | ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) |
76 | stackp-y := -fstack-protector | 78 | stackp-y := -fstack-protector |
77 | stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all | 79 | stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all |
78 | KBUILD_CFLAGS += $(stackp-y) | 80 | KBUILD_CFLAGS += $(stackp-y) |
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 80177ec052f0..30e9a264f69d 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu | |||
@@ -33,6 +33,8 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-f | |||
33 | cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) | 33 | cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) |
34 | cflags-$(CONFIG_MVIAC7) += -march=i686 | 34 | cflags-$(CONFIG_MVIAC7) += -march=i686 |
35 | cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) | 35 | cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) |
36 | cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ | ||
37 | $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) | ||
36 | 38 | ||
37 | # AMD Elan support | 39 | # AMD Elan support |
38 | cflags-$(CONFIG_X86_ELAN) += -march=i486 | 40 | cflags-$(CONFIG_X86_ELAN) += -march=i486 |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index e2ff504b4ddc..f8ed0658404c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -4,7 +4,7 @@ | |||
4 | # create a compressed vmlinux image from the original vmlinux | 4 | # create a compressed vmlinux image from the original vmlinux |
5 | # | 5 | # |
6 | 6 | ||
7 | targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o | 7 | targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o |
8 | 8 | ||
9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 | 9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 |
10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC | 10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC |
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index d660be492363..49e0c18833e0 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c | |||
@@ -37,14 +37,13 @@ static int set_bios_mode(u8 mode) | |||
37 | ireg.al = mode; /* AH=0x00 Set Video Mode */ | 37 | ireg.al = mode; /* AH=0x00 Set Video Mode */ |
38 | intcall(0x10, &ireg, NULL); | 38 | intcall(0x10, &ireg, NULL); |
39 | 39 | ||
40 | |||
41 | ireg.ah = 0x0f; /* Get Current Video Mode */ | 40 | ireg.ah = 0x0f; /* Get Current Video Mode */ |
42 | intcall(0x10, &ireg, &oreg); | 41 | intcall(0x10, &ireg, &oreg); |
43 | 42 | ||
44 | do_restore = 1; /* Assume video contents were lost */ | 43 | do_restore = 1; /* Assume video contents were lost */ |
45 | 44 | ||
46 | /* Not all BIOSes are clean with the top bit */ | 45 | /* Not all BIOSes are clean with the top bit */ |
47 | new_mode = ireg.al & 0x7f; | 46 | new_mode = oreg.al & 0x7f; |
48 | 47 | ||
49 | if (new_mode == mode) | 48 | if (new_mode == mode) |
50 | return 0; /* Mode change OK */ | 49 | return 0; /* Mode change OK */ |
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index d7ef26ba4540..11e8c6eb80a1 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c | |||
@@ -44,7 +44,7 @@ static int vesa_probe(void) | |||
44 | ireg.di = (size_t)&vginfo; | 44 | ireg.di = (size_t)&vginfo; |
45 | intcall(0x10, &ireg, &oreg); | 45 | intcall(0x10, &ireg, &oreg); |
46 | 46 | ||
47 | if (ireg.ax != 0x004f || | 47 | if (oreg.ax != 0x004f || |
48 | vginfo.signature != VESA_MAGIC || | 48 | vginfo.signature != VESA_MAGIC || |
49 | vginfo.version < 0x0102) | 49 | vginfo.version < 0x0102) |
50 | return 0; /* Not present */ | 50 | return 0; /* Not present */ |
@@ -68,7 +68,7 @@ static int vesa_probe(void) | |||
68 | ireg.di = (size_t)&vminfo; | 68 | ireg.di = (size_t)&vminfo; |
69 | intcall(0x10, &ireg, &oreg); | 69 | intcall(0x10, &ireg, &oreg); |
70 | 70 | ||
71 | if (ireg.ax != 0x004f) | 71 | if (oreg.ax != 0x004f) |
72 | continue; | 72 | continue; |
73 | 73 | ||
74 | if ((vminfo.mode_attr & 0x15) == 0x05) { | 74 | if ((vminfo.mode_attr & 0x15) == 0x05) { |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index edb992ebef92..d28fad19654a 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -2355,7 +2355,7 @@ CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y | |||
2355 | CONFIG_HAVE_DYNAMIC_FTRACE=y | 2355 | CONFIG_HAVE_DYNAMIC_FTRACE=y |
2356 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y | 2356 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y |
2357 | CONFIG_HAVE_HW_BRANCH_TRACER=y | 2357 | CONFIG_HAVE_HW_BRANCH_TRACER=y |
2358 | CONFIG_HAVE_FTRACE_SYSCALLS=y | 2358 | CONFIG_HAVE_SYSCALL_TRACEPOINTS=y |
2359 | CONFIG_RING_BUFFER=y | 2359 | CONFIG_RING_BUFFER=y |
2360 | CONFIG_TRACING=y | 2360 | CONFIG_TRACING=y |
2361 | CONFIG_TRACING_SUPPORT=y | 2361 | CONFIG_TRACING_SUPPORT=y |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index cee1dd2e69b2..6c86acd847a4 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -2329,7 +2329,7 @@ CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y | |||
2329 | CONFIG_HAVE_DYNAMIC_FTRACE=y | 2329 | CONFIG_HAVE_DYNAMIC_FTRACE=y |
2330 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y | 2330 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y |
2331 | CONFIG_HAVE_HW_BRANCH_TRACER=y | 2331 | CONFIG_HAVE_HW_BRANCH_TRACER=y |
2332 | CONFIG_HAVE_FTRACE_SYSCALLS=y | 2332 | CONFIG_HAVE_SYSCALL_TRACEPOINTS=y |
2333 | CONFIG_RING_BUFFER=y | 2333 | CONFIG_RING_BUFFER=y |
2334 | CONFIG_TRACING=y | 2334 | CONFIG_TRACING=y |
2335 | CONFIG_TRACING_SUPPORT=y | 2335 | CONFIG_TRACING_SUPPORT=y |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index c580c5ec1cad..585edebe12cf 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -59,13 +59,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
59 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 59 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
60 | const u8 *in, unsigned int len, u8 *iv); | 60 | const u8 *in, unsigned int len, u8 *iv); |
61 | 61 | ||
62 | static inline int kernel_fpu_using(void) | ||
63 | { | ||
64 | if (in_interrupt() && !(read_cr0() & X86_CR0_TS)) | ||
65 | return 1; | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) | 62 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) |
70 | { | 63 | { |
71 | unsigned long addr = (unsigned long)raw_ctx; | 64 | unsigned long addr = (unsigned long)raw_ctx; |
@@ -89,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, | |||
89 | return -EINVAL; | 82 | return -EINVAL; |
90 | } | 83 | } |
91 | 84 | ||
92 | if (kernel_fpu_using()) | 85 | if (irq_fpu_usable()) |
93 | err = crypto_aes_expand_key(ctx, in_key, key_len); | 86 | err = crypto_aes_expand_key(ctx, in_key, key_len); |
94 | else { | 87 | else { |
95 | kernel_fpu_begin(); | 88 | kernel_fpu_begin(); |
@@ -110,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
110 | { | 103 | { |
111 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | 104 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); |
112 | 105 | ||
113 | if (kernel_fpu_using()) | 106 | if (irq_fpu_usable()) |
114 | crypto_aes_encrypt_x86(ctx, dst, src); | 107 | crypto_aes_encrypt_x86(ctx, dst, src); |
115 | else { | 108 | else { |
116 | kernel_fpu_begin(); | 109 | kernel_fpu_begin(); |
@@ -123,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
123 | { | 116 | { |
124 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | 117 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); |
125 | 118 | ||
126 | if (kernel_fpu_using()) | 119 | if (irq_fpu_usable()) |
127 | crypto_aes_decrypt_x86(ctx, dst, src); | 120 | crypto_aes_decrypt_x86(ctx, dst, src); |
128 | else { | 121 | else { |
129 | kernel_fpu_begin(); | 122 | kernel_fpu_begin(); |
@@ -349,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req) | |||
349 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | 342 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
350 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | 343 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
351 | 344 | ||
352 | if (kernel_fpu_using()) { | 345 | if (irq_fpu_usable()) { |
353 | struct ablkcipher_request *cryptd_req = | 346 | struct ablkcipher_request *cryptd_req = |
354 | ablkcipher_request_ctx(req); | 347 | ablkcipher_request_ctx(req); |
355 | memcpy(cryptd_req, req, sizeof(*req)); | 348 | memcpy(cryptd_req, req, sizeof(*req)); |
@@ -370,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req) | |||
370 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | 363 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
371 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | 364 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
372 | 365 | ||
373 | if (kernel_fpu_using()) { | 366 | if (irq_fpu_usable()) { |
374 | struct ablkcipher_request *cryptd_req = | 367 | struct ablkcipher_request *cryptd_req = |
375 | ablkcipher_request_ctx(req); | 368 | ablkcipher_request_ctx(req); |
376 | memcpy(cryptd_req, req, sizeof(*req)); | 369 | memcpy(cryptd_req, req, sizeof(*req)); |
@@ -636,7 +629,7 @@ static int __init aesni_init(void) | |||
636 | int err; | 629 | int err; |
637 | 630 | ||
638 | if (!cpu_has_aes) { | 631 | if (!cpu_has_aes) { |
639 | printk(KERN_ERR "Intel AES-NI instructions are not detected.\n"); | 632 | printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); |
640 | return -ENODEV; | 633 | return -ENODEV; |
641 | } | 634 | } |
642 | if ((err = crypto_register_alg(&aesni_alg))) | 635 | if ((err = crypto_register_alg(&aesni_alg))) |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index e590261ba059..ba331bfd1112 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -537,7 +537,7 @@ ia32_sys_call_table: | |||
537 | .quad sys_mkdir | 537 | .quad sys_mkdir |
538 | .quad sys_rmdir /* 40 */ | 538 | .quad sys_rmdir /* 40 */ |
539 | .quad sys_dup | 539 | .quad sys_dup |
540 | .quad sys32_pipe | 540 | .quad sys_pipe |
541 | .quad compat_sys_times | 541 | .quad compat_sys_times |
542 | .quad quiet_ni_syscall /* old prof syscall holder */ | 542 | .quad quiet_ni_syscall /* old prof syscall holder */ |
543 | .quad sys_brk /* 45 */ | 543 | .quad sys_brk /* 45 */ |
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 085a8c35f149..9f5527198825 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
@@ -189,20 +189,6 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len, | |||
189 | return sys_mprotect(start, len, prot); | 189 | return sys_mprotect(start, len, prot); |
190 | } | 190 | } |
191 | 191 | ||
192 | asmlinkage long sys32_pipe(int __user *fd) | ||
193 | { | ||
194 | int retval; | ||
195 | int fds[2]; | ||
196 | |||
197 | retval = do_pipe_flags(fds, 0); | ||
198 | if (retval) | ||
199 | goto out; | ||
200 | if (copy_to_user(fd, fds, sizeof(fds))) | ||
201 | retval = -EFAULT; | ||
202 | out: | ||
203 | return retval; | ||
204 | } | ||
205 | |||
206 | asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act, | 192 | asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act, |
207 | struct sigaction32 __user *oact, | 193 | struct sigaction32 __user *oact, |
208 | unsigned int sigsetsize) | 194 | unsigned int sigsetsize) |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 1a37bcdc8606..c240efc74e00 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -73,8 +73,6 @@ static inline void alternatives_smp_module_del(struct module *mod) {} | |||
73 | static inline void alternatives_smp_switch(int smp) {} | 73 | static inline void alternatives_smp_switch(int smp) {} |
74 | #endif /* CONFIG_SMP */ | 74 | #endif /* CONFIG_SMP */ |
75 | 75 | ||
76 | const unsigned char *const *find_nop_table(void); | ||
77 | |||
78 | /* alternative assembly primitive: */ | 76 | /* alternative assembly primitive: */ |
79 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ | 77 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ |
80 | \ | 78 | \ |
@@ -144,8 +142,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, | |||
144 | #define __parainstructions_end NULL | 142 | #define __parainstructions_end NULL |
145 | #endif | 143 | #endif |
146 | 144 | ||
147 | extern void add_nops(void *insns, unsigned int len); | ||
148 | |||
149 | /* | 145 | /* |
150 | * Clear and restore the kernel write-protection flag on the local CPU. | 146 | * Clear and restore the kernel write-protection flag on the local CPU. |
151 | * Allows the kernel to edit read-only pages. | 147 | * Allows the kernel to edit read-only pages. |
@@ -161,10 +157,7 @@ extern void add_nops(void *insns, unsigned int len); | |||
161 | * Intel's errata. | 157 | * Intel's errata. |
162 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an | 158 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an |
163 | * inconsistent instruction while you patch. | 159 | * inconsistent instruction while you patch. |
164 | * The _early version expects the memory to already be RW. | ||
165 | */ | 160 | */ |
166 | |||
167 | extern void *text_poke(void *addr, const void *opcode, size_t len); | 161 | extern void *text_poke(void *addr, const void *opcode, size_t len); |
168 | extern void *text_poke_early(void *addr, const void *opcode, size_t len); | ||
169 | 162 | ||
170 | #endif /* _ASM_X86_ALTERNATIVE_H */ | 163 | #endif /* _ASM_X86_ALTERNATIVE_H */ |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index bdf96f119f06..ac95995b7bad 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #ifdef CONFIG_AMD_IOMMU | 25 | #ifdef CONFIG_AMD_IOMMU |
26 | extern int amd_iommu_init(void); | 26 | extern int amd_iommu_init(void); |
27 | extern int amd_iommu_init_dma_ops(void); | 27 | extern int amd_iommu_init_dma_ops(void); |
28 | extern int amd_iommu_init_passthrough(void); | ||
28 | extern void amd_iommu_detect(void); | 29 | extern void amd_iommu_detect(void); |
29 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | 30 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); |
30 | extern void amd_iommu_flush_all_domains(void); | 31 | extern void amd_iommu_flush_all_domains(void); |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 0c878caaa0a2..2a2cc7a78a81 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -143,22 +143,29 @@ | |||
143 | #define EVT_BUFFER_SIZE 8192 /* 512 entries */ | 143 | #define EVT_BUFFER_SIZE 8192 /* 512 entries */ |
144 | #define EVT_LEN_MASK (0x9ULL << 56) | 144 | #define EVT_LEN_MASK (0x9ULL << 56) |
145 | 145 | ||
146 | #define PAGE_MODE_NONE 0x00 | ||
146 | #define PAGE_MODE_1_LEVEL 0x01 | 147 | #define PAGE_MODE_1_LEVEL 0x01 |
147 | #define PAGE_MODE_2_LEVEL 0x02 | 148 | #define PAGE_MODE_2_LEVEL 0x02 |
148 | #define PAGE_MODE_3_LEVEL 0x03 | 149 | #define PAGE_MODE_3_LEVEL 0x03 |
149 | 150 | #define PAGE_MODE_4_LEVEL 0x04 | |
150 | #define IOMMU_PDE_NL_0 0x000ULL | 151 | #define PAGE_MODE_5_LEVEL 0x05 |
151 | #define IOMMU_PDE_NL_1 0x200ULL | 152 | #define PAGE_MODE_6_LEVEL 0x06 |
152 | #define IOMMU_PDE_NL_2 0x400ULL | 153 | |
153 | #define IOMMU_PDE_NL_3 0x600ULL | 154 | #define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) |
154 | 155 | #define PM_LEVEL_SIZE(x) (((x) < 6) ? \ | |
155 | #define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL) | 156 | ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ |
156 | #define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL) | 157 | (0xffffffffffffffffULL)) |
157 | #define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL) | 158 | #define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) |
158 | 159 | #define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) | |
159 | #define IOMMU_MAP_SIZE_L1 (1ULL << 21) | 160 | #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ |
160 | #define IOMMU_MAP_SIZE_L2 (1ULL << 30) | 161 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) |
161 | #define IOMMU_MAP_SIZE_L3 (1ULL << 39) | 162 | #define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) |
163 | |||
164 | #define PM_MAP_4k 0 | ||
165 | #define PM_ADDR_MASK 0x000ffffffffff000ULL | ||
166 | #define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \ | ||
167 | (~((1ULL << (12 + ((lvl) * 9))) - 1))) | ||
168 | #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) | ||
162 | 169 | ||
163 | #define IOMMU_PTE_P (1ULL << 0) | 170 | #define IOMMU_PTE_P (1ULL << 0) |
164 | #define IOMMU_PTE_TV (1ULL << 1) | 171 | #define IOMMU_PTE_TV (1ULL << 1) |
@@ -167,11 +174,6 @@ | |||
167 | #define IOMMU_PTE_IR (1ULL << 61) | 174 | #define IOMMU_PTE_IR (1ULL << 61) |
168 | #define IOMMU_PTE_IW (1ULL << 62) | 175 | #define IOMMU_PTE_IW (1ULL << 62) |
169 | 176 | ||
170 | #define IOMMU_L1_PDE(address) \ | ||
171 | ((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) | ||
172 | #define IOMMU_L2_PDE(address) \ | ||
173 | ((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) | ||
174 | |||
175 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) | 177 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) |
176 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) | 178 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) |
177 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) | 179 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) |
@@ -194,11 +196,14 @@ | |||
194 | #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ | 196 | #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ |
195 | #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops | 197 | #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops |
196 | domain for an IOMMU */ | 198 | domain for an IOMMU */ |
199 | #define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page | ||
200 | translation */ | ||
201 | |||
197 | extern bool amd_iommu_dump; | 202 | extern bool amd_iommu_dump; |
198 | #define DUMP_printk(format, arg...) \ | 203 | #define DUMP_printk(format, arg...) \ |
199 | do { \ | 204 | do { \ |
200 | if (amd_iommu_dump) \ | 205 | if (amd_iommu_dump) \ |
201 | printk(KERN_INFO "AMD IOMMU: " format, ## arg); \ | 206 | printk(KERN_INFO "AMD-Vi: " format, ## arg); \ |
202 | } while(0); | 207 | } while(0); |
203 | 208 | ||
204 | /* | 209 | /* |
@@ -226,6 +231,7 @@ struct protection_domain { | |||
226 | int mode; /* paging mode (0-6 levels) */ | 231 | int mode; /* paging mode (0-6 levels) */ |
227 | u64 *pt_root; /* page table root pointer */ | 232 | u64 *pt_root; /* page table root pointer */ |
228 | unsigned long flags; /* flags to find out type of domain */ | 233 | unsigned long flags; /* flags to find out type of domain */ |
234 | bool updated; /* complete domain flush required */ | ||
229 | unsigned dev_cnt; /* devices assigned to this domain */ | 235 | unsigned dev_cnt; /* devices assigned to this domain */ |
230 | void *priv; /* private data */ | 236 | void *priv; /* private data */ |
231 | }; | 237 | }; |
@@ -337,6 +343,9 @@ struct amd_iommu { | |||
337 | /* if one, we need to send a completion wait command */ | 343 | /* if one, we need to send a completion wait command */ |
338 | bool need_sync; | 344 | bool need_sync; |
339 | 345 | ||
346 | /* becomes true if a command buffer reset is running */ | ||
347 | bool reset_in_progress; | ||
348 | |||
340 | /* default dma_ops domain for that IOMMU */ | 349 | /* default dma_ops domain for that IOMMU */ |
341 | struct dma_ops_domain *default_dom; | 350 | struct dma_ops_domain *default_dom; |
342 | }; | 351 | }; |
@@ -457,4 +466,7 @@ static inline void amd_iommu_stats_init(void) { } | |||
457 | 466 | ||
458 | #endif /* CONFIG_AMD_IOMMU_STATS */ | 467 | #endif /* CONFIG_AMD_IOMMU_STATS */ |
459 | 468 | ||
469 | /* some function prototypes */ | ||
470 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | ||
471 | |||
460 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ | 472 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index bb7d47925847..586b7adb8e53 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -183,6 +183,10 @@ static inline int x2apic_enabled(void) | |||
183 | } | 183 | } |
184 | 184 | ||
185 | #define x2apic_supported() (cpu_has_x2apic) | 185 | #define x2apic_supported() (cpu_has_x2apic) |
186 | static inline void x2apic_force_phys(void) | ||
187 | { | ||
188 | x2apic_phys = 1; | ||
189 | } | ||
186 | #else | 190 | #else |
187 | static inline void check_x2apic(void) | 191 | static inline void check_x2apic(void) |
188 | { | 192 | { |
@@ -194,6 +198,9 @@ static inline int x2apic_enabled(void) | |||
194 | { | 198 | { |
195 | return 0; | 199 | return 0; |
196 | } | 200 | } |
201 | static inline void x2apic_force_phys(void) | ||
202 | { | ||
203 | } | ||
197 | 204 | ||
198 | #define x2apic_preenabled 0 | 205 | #define x2apic_preenabled 0 |
199 | #define x2apic_supported() 0 | 206 | #define x2apic_supported() 0 |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 7ddb36ab933b..7386bfa4f4bc 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -8,7 +8,8 @@ | |||
8 | * Ingo Molnar <mingo@redhat.com>, 1999, 2000 | 8 | * Ingo Molnar <mingo@redhat.com>, 1999, 2000 |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #define APIC_DEFAULT_PHYS_BASE 0xfee00000 | 11 | #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 |
12 | #define APIC_DEFAULT_PHYS_BASE 0xfee00000 | ||
12 | 13 | ||
13 | #define APIC_ID 0x20 | 14 | #define APIC_ID 0x20 |
14 | 15 | ||
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 56be78f582f0..b3ed1e1460ff 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #ifdef __ASSEMBLY__ | 4 | #ifdef __ASSEMBLY__ |
5 | # define __ASM_FORM(x) x | 5 | # define __ASM_FORM(x) x |
6 | # define __ASM_EX_SEC .section __ex_table | 6 | # define __ASM_EX_SEC .section __ex_table, "a" |
7 | #else | 7 | #else |
8 | # define __ASM_FORM(x) " " #x " " | 8 | # define __ASM_FORM(x) " " #x " " |
9 | # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" | 9 | # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" |
@@ -38,10 +38,18 @@ | |||
38 | #define _ASM_DI __ASM_REG(di) | 38 | #define _ASM_DI __ASM_REG(di) |
39 | 39 | ||
40 | /* Exception table entry */ | 40 | /* Exception table entry */ |
41 | #ifdef __ASSEMBLY__ | ||
42 | # define _ASM_EXTABLE(from,to) \ | ||
43 | __ASM_EX_SEC ; \ | ||
44 | _ASM_ALIGN ; \ | ||
45 | _ASM_PTR from , to ; \ | ||
46 | .previous | ||
47 | #else | ||
41 | # define _ASM_EXTABLE(from,to) \ | 48 | # define _ASM_EXTABLE(from,to) \ |
42 | __ASM_EX_SEC \ | 49 | __ASM_EX_SEC \ |
43 | _ASM_ALIGN "\n" \ | 50 | _ASM_ALIGN "\n" \ |
44 | _ASM_PTR #from "," #to "\n" \ | 51 | _ASM_PTR #from "," #to "\n" \ |
45 | " .previous\n" | 52 | " .previous\n" |
53 | #endif | ||
46 | 54 | ||
47 | #endif /* _ASM_X86_ASM_H */ | 55 | #endif /* _ASM_X86_ASM_H */ |
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 2503d4e64c2a..dc5a667ff791 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h | |||
@@ -19,7 +19,10 @@ | |||
19 | * | 19 | * |
20 | * Atomically reads the value of @v. | 20 | * Atomically reads the value of @v. |
21 | */ | 21 | */ |
22 | #define atomic_read(v) ((v)->counter) | 22 | static inline int atomic_read(const atomic_t *v) |
23 | { | ||
24 | return v->counter; | ||
25 | } | ||
23 | 26 | ||
24 | /** | 27 | /** |
25 | * atomic_set - set atomic variable | 28 | * atomic_set - set atomic variable |
@@ -28,7 +31,10 @@ | |||
28 | * | 31 | * |
29 | * Atomically sets the value of @v to @i. | 32 | * Atomically sets the value of @v to @i. |
30 | */ | 33 | */ |
31 | #define atomic_set(v, i) (((v)->counter) = (i)) | 34 | static inline void atomic_set(atomic_t *v, int i) |
35 | { | ||
36 | v->counter = i; | ||
37 | } | ||
32 | 38 | ||
33 | /** | 39 | /** |
34 | * atomic_add - add integer to atomic variable | 40 | * atomic_add - add integer to atomic variable |
@@ -200,8 +206,15 @@ static inline int atomic_sub_return(int i, atomic_t *v) | |||
200 | return atomic_add_return(-i, v); | 206 | return atomic_add_return(-i, v); |
201 | } | 207 | } |
202 | 208 | ||
203 | #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) | 209 | static inline int atomic_cmpxchg(atomic_t *v, int old, int new) |
204 | #define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) | 210 | { |
211 | return cmpxchg(&v->counter, old, new); | ||
212 | } | ||
213 | |||
214 | static inline int atomic_xchg(atomic_t *v, int new) | ||
215 | { | ||
216 | return xchg(&v->counter, new); | ||
217 | } | ||
205 | 218 | ||
206 | /** | 219 | /** |
207 | * atomic_add_unless - add unless the number is already a given value | 220 | * atomic_add_unless - add unless the number is already a given value |
@@ -250,45 +263,12 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
250 | /* An 64bit atomic type */ | 263 | /* An 64bit atomic type */ |
251 | 264 | ||
252 | typedef struct { | 265 | typedef struct { |
253 | unsigned long long counter; | 266 | u64 __aligned(8) counter; |
254 | } atomic64_t; | 267 | } atomic64_t; |
255 | 268 | ||
256 | #define ATOMIC64_INIT(val) { (val) } | 269 | #define ATOMIC64_INIT(val) { (val) } |
257 | 270 | ||
258 | /** | 271 | extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); |
259 | * atomic64_read - read atomic64 variable | ||
260 | * @ptr: pointer of type atomic64_t | ||
261 | * | ||
262 | * Atomically reads the value of @v. | ||
263 | * Doesn't imply a read memory barrier. | ||
264 | */ | ||
265 | #define __atomic64_read(ptr) ((ptr)->counter) | ||
266 | |||
267 | static inline unsigned long long | ||
268 | cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new) | ||
269 | { | ||
270 | asm volatile( | ||
271 | |||
272 | LOCK_PREFIX "cmpxchg8b (%[ptr])\n" | ||
273 | |||
274 | : "=A" (old) | ||
275 | |||
276 | : [ptr] "D" (ptr), | ||
277 | "A" (old), | ||
278 | "b" (ll_low(new)), | ||
279 | "c" (ll_high(new)) | ||
280 | |||
281 | : "memory"); | ||
282 | |||
283 | return old; | ||
284 | } | ||
285 | |||
286 | static inline unsigned long long | ||
287 | atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, | ||
288 | unsigned long long new_val) | ||
289 | { | ||
290 | return cmpxchg8b(&ptr->counter, old_val, new_val); | ||
291 | } | ||
292 | 272 | ||
293 | /** | 273 | /** |
294 | * atomic64_xchg - xchg atomic64 variable | 274 | * atomic64_xchg - xchg atomic64 variable |
@@ -298,18 +278,7 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, | |||
298 | * Atomically xchgs the value of @ptr to @new_val and returns | 278 | * Atomically xchgs the value of @ptr to @new_val and returns |
299 | * the old value. | 279 | * the old value. |
300 | */ | 280 | */ |
301 | 281 | extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); | |
302 | static inline unsigned long long | ||
303 | atomic64_xchg(atomic64_t *ptr, unsigned long long new_val) | ||
304 | { | ||
305 | unsigned long long old_val; | ||
306 | |||
307 | do { | ||
308 | old_val = atomic_read(ptr); | ||
309 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
310 | |||
311 | return old_val; | ||
312 | } | ||
313 | 282 | ||
314 | /** | 283 | /** |
315 | * atomic64_set - set atomic64 variable | 284 | * atomic64_set - set atomic64 variable |
@@ -318,10 +287,7 @@ atomic64_xchg(atomic64_t *ptr, unsigned long long new_val) | |||
318 | * | 287 | * |
319 | * Atomically sets the value of @ptr to @new_val. | 288 | * Atomically sets the value of @ptr to @new_val. |
320 | */ | 289 | */ |
321 | static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) | 290 | extern void atomic64_set(atomic64_t *ptr, u64 new_val); |
322 | { | ||
323 | atomic64_xchg(ptr, new_val); | ||
324 | } | ||
325 | 291 | ||
326 | /** | 292 | /** |
327 | * atomic64_read - read atomic64 variable | 293 | * atomic64_read - read atomic64 variable |
@@ -329,17 +295,30 @@ static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) | |||
329 | * | 295 | * |
330 | * Atomically reads the value of @ptr and returns it. | 296 | * Atomically reads the value of @ptr and returns it. |
331 | */ | 297 | */ |
332 | static inline unsigned long long atomic64_read(atomic64_t *ptr) | 298 | static inline u64 atomic64_read(atomic64_t *ptr) |
333 | { | 299 | { |
334 | unsigned long long curr_val; | 300 | u64 res; |
335 | 301 | ||
336 | do { | 302 | /* |
337 | curr_val = __atomic64_read(ptr); | 303 | * Note, we inline this atomic64_t primitive because |
338 | } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val); | 304 | * it only clobbers EAX/EDX and leaves the others |
339 | 305 | * untouched. We also (somewhat subtly) rely on the | |
340 | return curr_val; | 306 | * fact that cmpxchg8b returns the current 64-bit value |
307 | * of the memory location we are touching: | ||
308 | */ | ||
309 | asm volatile( | ||
310 | "mov %%ebx, %%eax\n\t" | ||
311 | "mov %%ecx, %%edx\n\t" | ||
312 | LOCK_PREFIX "cmpxchg8b %1\n" | ||
313 | : "=&A" (res) | ||
314 | : "m" (*ptr) | ||
315 | ); | ||
316 | |||
317 | return res; | ||
341 | } | 318 | } |
342 | 319 | ||
320 | extern u64 atomic64_read(atomic64_t *ptr); | ||
321 | |||
343 | /** | 322 | /** |
344 | * atomic64_add_return - add and return | 323 | * atomic64_add_return - add and return |
345 | * @delta: integer value to add | 324 | * @delta: integer value to add |
@@ -347,34 +326,14 @@ static inline unsigned long long atomic64_read(atomic64_t *ptr) | |||
347 | * | 326 | * |
348 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | 327 | * Atomically adds @delta to @ptr and returns @delta + *@ptr |
349 | */ | 328 | */ |
350 | static inline unsigned long long | 329 | extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); |
351 | atomic64_add_return(unsigned long long delta, atomic64_t *ptr) | ||
352 | { | ||
353 | unsigned long long old_val, new_val; | ||
354 | |||
355 | do { | ||
356 | old_val = atomic_read(ptr); | ||
357 | new_val = old_val + delta; | ||
358 | |||
359 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
360 | |||
361 | return new_val; | ||
362 | } | ||
363 | |||
364 | static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr) | ||
365 | { | ||
366 | return atomic64_add_return(-delta, ptr); | ||
367 | } | ||
368 | 330 | ||
369 | static inline long atomic64_inc_return(atomic64_t *ptr) | 331 | /* |
370 | { | 332 | * Other variants with different arithmetic operators: |
371 | return atomic64_add_return(1, ptr); | 333 | */ |
372 | } | 334 | extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); |
373 | 335 | extern u64 atomic64_inc_return(atomic64_t *ptr); | |
374 | static inline long atomic64_dec_return(atomic64_t *ptr) | 336 | extern u64 atomic64_dec_return(atomic64_t *ptr); |
375 | { | ||
376 | return atomic64_sub_return(1, ptr); | ||
377 | } | ||
378 | 337 | ||
379 | /** | 338 | /** |
380 | * atomic64_add - add integer to atomic64 variable | 339 | * atomic64_add - add integer to atomic64 variable |
@@ -383,10 +342,7 @@ static inline long atomic64_dec_return(atomic64_t *ptr) | |||
383 | * | 342 | * |
384 | * Atomically adds @delta to @ptr. | 343 | * Atomically adds @delta to @ptr. |
385 | */ | 344 | */ |
386 | static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr) | 345 | extern void atomic64_add(u64 delta, atomic64_t *ptr); |
387 | { | ||
388 | atomic64_add_return(delta, ptr); | ||
389 | } | ||
390 | 346 | ||
391 | /** | 347 | /** |
392 | * atomic64_sub - subtract the atomic64 variable | 348 | * atomic64_sub - subtract the atomic64 variable |
@@ -395,10 +351,7 @@ static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr) | |||
395 | * | 351 | * |
396 | * Atomically subtracts @delta from @ptr. | 352 | * Atomically subtracts @delta from @ptr. |
397 | */ | 353 | */ |
398 | static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr) | 354 | extern void atomic64_sub(u64 delta, atomic64_t *ptr); |
399 | { | ||
400 | atomic64_add(-delta, ptr); | ||
401 | } | ||
402 | 355 | ||
403 | /** | 356 | /** |
404 | * atomic64_sub_and_test - subtract value from variable and test result | 357 | * atomic64_sub_and_test - subtract value from variable and test result |
@@ -409,13 +362,7 @@ static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr) | |||
409 | * true if the result is zero, or false for all | 362 | * true if the result is zero, or false for all |
410 | * other cases. | 363 | * other cases. |
411 | */ | 364 | */ |
412 | static inline int | 365 | extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); |
413 | atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr) | ||
414 | { | ||
415 | unsigned long long old_val = atomic64_sub_return(delta, ptr); | ||
416 | |||
417 | return old_val == 0; | ||
418 | } | ||
419 | 366 | ||
420 | /** | 367 | /** |
421 | * atomic64_inc - increment atomic64 variable | 368 | * atomic64_inc - increment atomic64 variable |
@@ -423,10 +370,7 @@ atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr) | |||
423 | * | 370 | * |
424 | * Atomically increments @ptr by 1. | 371 | * Atomically increments @ptr by 1. |
425 | */ | 372 | */ |
426 | static inline void atomic64_inc(atomic64_t *ptr) | 373 | extern void atomic64_inc(atomic64_t *ptr); |
427 | { | ||
428 | atomic64_add(1, ptr); | ||
429 | } | ||
430 | 374 | ||
431 | /** | 375 | /** |
432 | * atomic64_dec - decrement atomic64 variable | 376 | * atomic64_dec - decrement atomic64 variable |
@@ -434,10 +378,7 @@ static inline void atomic64_inc(atomic64_t *ptr) | |||
434 | * | 378 | * |
435 | * Atomically decrements @ptr by 1. | 379 | * Atomically decrements @ptr by 1. |
436 | */ | 380 | */ |
437 | static inline void atomic64_dec(atomic64_t *ptr) | 381 | extern void atomic64_dec(atomic64_t *ptr); |
438 | { | ||
439 | atomic64_sub(1, ptr); | ||
440 | } | ||
441 | 382 | ||
442 | /** | 383 | /** |
443 | * atomic64_dec_and_test - decrement and test | 384 | * atomic64_dec_and_test - decrement and test |
@@ -447,10 +388,7 @@ static inline void atomic64_dec(atomic64_t *ptr) | |||
447 | * returns true if the result is 0, or false for all other | 388 | * returns true if the result is 0, or false for all other |
448 | * cases. | 389 | * cases. |
449 | */ | 390 | */ |
450 | static inline int atomic64_dec_and_test(atomic64_t *ptr) | 391 | extern int atomic64_dec_and_test(atomic64_t *ptr); |
451 | { | ||
452 | return atomic64_sub_and_test(1, ptr); | ||
453 | } | ||
454 | 392 | ||
455 | /** | 393 | /** |
456 | * atomic64_inc_and_test - increment and test | 394 | * atomic64_inc_and_test - increment and test |
@@ -460,10 +398,7 @@ static inline int atomic64_dec_and_test(atomic64_t *ptr) | |||
460 | * and returns true if the result is zero, or false for all | 398 | * and returns true if the result is zero, or false for all |
461 | * other cases. | 399 | * other cases. |
462 | */ | 400 | */ |
463 | static inline int atomic64_inc_and_test(atomic64_t *ptr) | 401 | extern int atomic64_inc_and_test(atomic64_t *ptr); |
464 | { | ||
465 | return atomic64_sub_and_test(-1, ptr); | ||
466 | } | ||
467 | 402 | ||
468 | /** | 403 | /** |
469 | * atomic64_add_negative - add and test if negative | 404 | * atomic64_add_negative - add and test if negative |
@@ -474,13 +409,7 @@ static inline int atomic64_inc_and_test(atomic64_t *ptr) | |||
474 | * if the result is negative, or false when | 409 | * if the result is negative, or false when |
475 | * result is greater than or equal to zero. | 410 | * result is greater than or equal to zero. |
476 | */ | 411 | */ |
477 | static inline int | 412 | extern int atomic64_add_negative(u64 delta, atomic64_t *ptr); |
478 | atomic64_add_negative(unsigned long long delta, atomic64_t *ptr) | ||
479 | { | ||
480 | long long old_val = atomic64_add_return(delta, ptr); | ||
481 | |||
482 | return old_val < 0; | ||
483 | } | ||
484 | 413 | ||
485 | #include <asm-generic/atomic-long.h> | 414 | #include <asm-generic/atomic-long.h> |
486 | #endif /* _ASM_X86_ATOMIC_32_H */ | 415 | #endif /* _ASM_X86_ATOMIC_32_H */ |
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h index 0d6360220007..d605dc268e79 100644 --- a/arch/x86/include/asm/atomic_64.h +++ b/arch/x86/include/asm/atomic_64.h | |||
@@ -18,7 +18,10 @@ | |||
18 | * | 18 | * |
19 | * Atomically reads the value of @v. | 19 | * Atomically reads the value of @v. |
20 | */ | 20 | */ |
21 | #define atomic_read(v) ((v)->counter) | 21 | static inline int atomic_read(const atomic_t *v) |
22 | { | ||
23 | return v->counter; | ||
24 | } | ||
22 | 25 | ||
23 | /** | 26 | /** |
24 | * atomic_set - set atomic variable | 27 | * atomic_set - set atomic variable |
@@ -27,7 +30,10 @@ | |||
27 | * | 30 | * |
28 | * Atomically sets the value of @v to @i. | 31 | * Atomically sets the value of @v to @i. |
29 | */ | 32 | */ |
30 | #define atomic_set(v, i) (((v)->counter) = (i)) | 33 | static inline void atomic_set(atomic_t *v, int i) |
34 | { | ||
35 | v->counter = i; | ||
36 | } | ||
31 | 37 | ||
32 | /** | 38 | /** |
33 | * atomic_add - add integer to atomic variable | 39 | * atomic_add - add integer to atomic variable |
@@ -192,7 +198,10 @@ static inline int atomic_sub_return(int i, atomic_t *v) | |||
192 | * Atomically reads the value of @v. | 198 | * Atomically reads the value of @v. |
193 | * Doesn't imply a read memory barrier. | 199 | * Doesn't imply a read memory barrier. |
194 | */ | 200 | */ |
195 | #define atomic64_read(v) ((v)->counter) | 201 | static inline long atomic64_read(const atomic64_t *v) |
202 | { | ||
203 | return v->counter; | ||
204 | } | ||
196 | 205 | ||
197 | /** | 206 | /** |
198 | * atomic64_set - set atomic64 variable | 207 | * atomic64_set - set atomic64 variable |
@@ -201,7 +210,10 @@ static inline int atomic_sub_return(int i, atomic_t *v) | |||
201 | * | 210 | * |
202 | * Atomically sets the value of @v to @i. | 211 | * Atomically sets the value of @v to @i. |
203 | */ | 212 | */ |
204 | #define atomic64_set(v, i) (((v)->counter) = (i)) | 213 | static inline void atomic64_set(atomic64_t *v, long i) |
214 | { | ||
215 | v->counter = i; | ||
216 | } | ||
205 | 217 | ||
206 | /** | 218 | /** |
207 | * atomic64_add - add integer to atomic64 variable | 219 | * atomic64_add - add integer to atomic64 variable |
@@ -355,11 +367,25 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) | |||
355 | #define atomic64_inc_return(v) (atomic64_add_return(1, (v))) | 367 | #define atomic64_inc_return(v) (atomic64_add_return(1, (v))) |
356 | #define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) | 368 | #define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) |
357 | 369 | ||
358 | #define atomic64_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) | 370 | static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) |
359 | #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) | 371 | { |
372 | return cmpxchg(&v->counter, old, new); | ||
373 | } | ||
374 | |||
375 | static inline long atomic64_xchg(atomic64_t *v, long new) | ||
376 | { | ||
377 | return xchg(&v->counter, new); | ||
378 | } | ||
360 | 379 | ||
361 | #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) | 380 | static inline long atomic_cmpxchg(atomic_t *v, int old, int new) |
362 | #define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) | 381 | { |
382 | return cmpxchg(&v->counter, old, new); | ||
383 | } | ||
384 | |||
385 | static inline long atomic_xchg(atomic_t *v, int new) | ||
386 | { | ||
387 | return xchg(&v->counter, new); | ||
388 | } | ||
363 | 389 | ||
364 | /** | 390 | /** |
365 | * atomic_add_unless - add unless the number is a given value | 391 | * atomic_add_unless - add unless the number is a given value |
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 418e632d4a80..7a1065958ba9 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
@@ -8,7 +8,7 @@ | |||
8 | 8 | ||
9 | #ifdef __KERNEL__ | 9 | #ifdef __KERNEL__ |
10 | 10 | ||
11 | #include <asm/page_types.h> | 11 | #include <asm/pgtable_types.h> |
12 | 12 | ||
13 | /* Physical address where kernel should be loaded. */ | 13 | /* Physical address where kernel should be loaded. */ |
14 | #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ | 14 | #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ |
@@ -16,10 +16,10 @@ | |||
16 | & ~(CONFIG_PHYSICAL_ALIGN - 1)) | 16 | & ~(CONFIG_PHYSICAL_ALIGN - 1)) |
17 | 17 | ||
18 | /* Minimum kernel alignment, as a power of two */ | 18 | /* Minimum kernel alignment, as a power of two */ |
19 | #ifdef CONFIG_x86_64 | 19 | #ifdef CONFIG_X86_64 |
20 | #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT | 20 | #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT |
21 | #else | 21 | #else |
22 | #define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) | 22 | #define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER) |
23 | #endif | 23 | #endif |
24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) | 24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) |
25 | 25 | ||
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 4a28d22d4793..847fee6493a2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -95,6 +95,7 @@ | |||
95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ | 95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ |
96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ | 96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ |
97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ | 97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ |
98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ | ||
98 | 99 | ||
99 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 100 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
100 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 101 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index c68c361697e1..4d447b732d82 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h | |||
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task); | |||
11 | 11 | ||
12 | static __always_inline struct task_struct *get_current(void) | 12 | static __always_inline struct task_struct *get_current(void) |
13 | { | 13 | { |
14 | return percpu_read(current_task); | 14 | return percpu_read_stable(current_task); |
15 | } | 15 | } |
16 | 16 | ||
17 | #define current get_current() | 17 | #define current get_current() |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index c993e9e0fed4..e8de2f6f5ca5 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -291,11 +291,24 @@ static inline unsigned long get_desc_base(const struct desc_struct *desc) | |||
291 | return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); | 291 | return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); |
292 | } | 292 | } |
293 | 293 | ||
294 | static inline void set_desc_base(struct desc_struct *desc, unsigned long base) | ||
295 | { | ||
296 | desc->base0 = base & 0xffff; | ||
297 | desc->base1 = (base >> 16) & 0xff; | ||
298 | desc->base2 = (base >> 24) & 0xff; | ||
299 | } | ||
300 | |||
294 | static inline unsigned long get_desc_limit(const struct desc_struct *desc) | 301 | static inline unsigned long get_desc_limit(const struct desc_struct *desc) |
295 | { | 302 | { |
296 | return desc->limit0 | (desc->limit << 16); | 303 | return desc->limit0 | (desc->limit << 16); |
297 | } | 304 | } |
298 | 305 | ||
306 | static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) | ||
307 | { | ||
308 | desc->limit0 = limit & 0xffff; | ||
309 | desc->limit = (limit >> 16) & 0xf; | ||
310 | } | ||
311 | |||
299 | static inline void _set_gate(int gate, unsigned type, void *addr, | 312 | static inline void _set_gate(int gate, unsigned type, void *addr, |
300 | unsigned dpl, unsigned ist, unsigned seg) | 313 | unsigned dpl, unsigned ist, unsigned seg) |
301 | { | 314 | { |
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index a6adefa28b94..9d6684849fd9 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h | |||
@@ -34,6 +34,12 @@ struct desc_struct { | |||
34 | }; | 34 | }; |
35 | } __attribute__((packed)); | 35 | } __attribute__((packed)); |
36 | 36 | ||
37 | #define GDT_ENTRY_INIT(flags, base, limit) { { { \ | ||
38 | .a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \ | ||
39 | .b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \ | ||
40 | ((limit) & 0xf0000) | ((base) & 0xff000000), \ | ||
41 | } } } | ||
42 | |||
37 | enum { | 43 | enum { |
38 | GATE_INTERRUPT = 0xE, | 44 | GATE_INTERRUPT = 0xE, |
39 | GATE_TRAP = 0xF, | 45 | GATE_TRAP = 0xF, |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 1c3f9435f1c9..0ee770d23d0e 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -55,6 +55,24 @@ extern int dma_set_mask(struct device *dev, u64 mask); | |||
55 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 55 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
56 | dma_addr_t *dma_addr, gfp_t flag); | 56 | dma_addr_t *dma_addr, gfp_t flag); |
57 | 57 | ||
58 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | ||
59 | { | ||
60 | if (!dev->dma_mask) | ||
61 | return 0; | ||
62 | |||
63 | return addr + size <= *dev->dma_mask; | ||
64 | } | ||
65 | |||
66 | static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) | ||
67 | { | ||
68 | return paddr; | ||
69 | } | ||
70 | |||
71 | static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) | ||
72 | { | ||
73 | return daddr; | ||
74 | } | ||
75 | |||
58 | static inline void | 76 | static inline void |
59 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 77 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
60 | enum dma_data_direction dir) | 78 | enum dma_data_direction dir) |
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 3afc5e87cfdd..ae6253ab9029 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
@@ -87,9 +87,25 @@ | |||
87 | CFI_RESTORE \reg | 87 | CFI_RESTORE \reg |
88 | .endm | 88 | .endm |
89 | #else /*!CONFIG_X86_64*/ | 89 | #else /*!CONFIG_X86_64*/ |
90 | .macro pushl_cfi reg | ||
91 | pushl \reg | ||
92 | CFI_ADJUST_CFA_OFFSET 4 | ||
93 | .endm | ||
90 | 94 | ||
91 | /* 32bit defenitions are missed yet */ | 95 | .macro popl_cfi reg |
96 | popl \reg | ||
97 | CFI_ADJUST_CFA_OFFSET -4 | ||
98 | .endm | ||
92 | 99 | ||
100 | .macro movl_cfi reg offset=0 | ||
101 | movl %\reg, \offset(%esp) | ||
102 | CFI_REL_OFFSET \reg, \offset | ||
103 | .endm | ||
104 | |||
105 | .macro movl_cfi_restore offset reg | ||
106 | movl \offset(%esp), %\reg | ||
107 | CFI_RESTORE \reg | ||
108 | .endm | ||
93 | #endif /*!CONFIG_X86_64*/ | 109 | #endif /*!CONFIG_X86_64*/ |
94 | #endif /*__ASSEMBLY__*/ | 110 | #endif /*__ASSEMBLY__*/ |
95 | 111 | ||
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index edc90f23e708..8406ed7f9926 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -33,7 +33,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); | |||
33 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ | 33 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ |
34 | efi_call_virt(f, a1, a2, a3, a4, a5, a6) | 34 | efi_call_virt(f, a1, a2, a3, a4, a5, a6) |
35 | 35 | ||
36 | #define efi_ioremap(addr, size) ioremap_cache(addr, size) | 36 | #define efi_ioremap(addr, size, type) ioremap_cache(addr, size) |
37 | 37 | ||
38 | #else /* !CONFIG_X86_32 */ | 38 | #else /* !CONFIG_X86_32 */ |
39 | 39 | ||
@@ -84,7 +84,8 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, | |||
84 | efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 84 | efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ |
85 | (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) | 85 | (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) |
86 | 86 | ||
87 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); | 87 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, |
88 | u32 type); | ||
88 | 89 | ||
89 | #endif /* CONFIG_X86_32 */ | 90 | #endif /* CONFIG_X86_32 */ |
90 | 91 | ||
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 2d81af3974a0..7b2d71df39a6 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -111,12 +111,9 @@ enum fixed_addresses { | |||
111 | #ifdef CONFIG_PARAVIRT | 111 | #ifdef CONFIG_PARAVIRT |
112 | FIX_PARAVIRT_BOOTMAP, | 112 | FIX_PARAVIRT_BOOTMAP, |
113 | #endif | 113 | #endif |
114 | FIX_TEXT_POKE0, /* reserve 2 pages for text_poke() */ | 114 | FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ |
115 | FIX_TEXT_POKE1, | 115 | FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ |
116 | __end_of_permanent_fixed_addresses, | 116 | __end_of_permanent_fixed_addresses, |
117 | #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | ||
118 | FIX_OHCI1394_BASE, | ||
119 | #endif | ||
120 | /* | 117 | /* |
121 | * 256 temporary boot-time mappings, used by early_ioremap(), | 118 | * 256 temporary boot-time mappings, used by early_ioremap(), |
122 | * before ioremap() is functional. | 119 | * before ioremap() is functional. |
@@ -129,6 +126,9 @@ enum fixed_addresses { | |||
129 | FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - | 126 | FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - |
130 | (__end_of_permanent_fixed_addresses & 255), | 127 | (__end_of_permanent_fixed_addresses & 255), |
131 | FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, | 128 | FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, |
129 | #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | ||
130 | FIX_OHCI1394_BASE, | ||
131 | #endif | ||
132 | #ifdef CONFIG_X86_32 | 132 | #ifdef CONFIG_X86_32 |
133 | FIX_WP_TEST, | 133 | FIX_WP_TEST, |
134 | #endif | 134 | #endif |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index bd2c6511c887..db24c2278be0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
@@ -28,13 +28,6 @@ | |||
28 | 28 | ||
29 | #endif | 29 | #endif |
30 | 30 | ||
31 | /* FIXME: I don't want to stay hardcoded */ | ||
32 | #ifdef CONFIG_X86_64 | ||
33 | # define FTRACE_SYSCALL_MAX 296 | ||
34 | #else | ||
35 | # define FTRACE_SYSCALL_MAX 333 | ||
36 | #endif | ||
37 | |||
38 | #ifdef CONFIG_FUNCTION_TRACER | 31 | #ifdef CONFIG_FUNCTION_TRACER |
39 | #define MCOUNT_ADDR ((long)(mcount)) | 32 | #define MCOUNT_ADDR ((long)(mcount)) |
40 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ | 33 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 175adf58dd4f..0b20bbb758f2 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -26,6 +26,7 @@ extern void fpu_init(void); | |||
26 | extern void mxcsr_feature_mask_init(void); | 26 | extern void mxcsr_feature_mask_init(void); |
27 | extern int init_fpu(struct task_struct *child); | 27 | extern int init_fpu(struct task_struct *child); |
28 | extern asmlinkage void math_state_restore(void); | 28 | extern asmlinkage void math_state_restore(void); |
29 | extern void __math_state_restore(void); | ||
29 | extern void init_thread_xstate(void); | 30 | extern void init_thread_xstate(void); |
30 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 31 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
31 | 32 | ||
@@ -301,6 +302,14 @@ static inline void kernel_fpu_end(void) | |||
301 | preempt_enable(); | 302 | preempt_enable(); |
302 | } | 303 | } |
303 | 304 | ||
305 | static inline bool irq_fpu_usable(void) | ||
306 | { | ||
307 | struct pt_regs *regs; | ||
308 | |||
309 | return !in_interrupt() || !(regs = get_irq_regs()) || \ | ||
310 | user_mode(regs) || (read_cr0() & X86_CR0_TS); | ||
311 | } | ||
312 | |||
304 | /* | 313 | /* |
305 | * Some instructions like VIA's padlock instructions generate a spurious | 314 | * Some instructions like VIA's padlock instructions generate a spurious |
306 | * DNA fault but don't modify SSE registers. And these instructions | 315 | * DNA fault but don't modify SSE registers. And these instructions |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index daf866ed0612..85232d32fcb8 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -150,17 +150,17 @@ extern int timer_through_8259; | |||
150 | #define io_apic_assign_pci_irqs \ | 150 | #define io_apic_assign_pci_irqs \ |
151 | (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) | 151 | (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) |
152 | 152 | ||
153 | #ifdef CONFIG_ACPI | 153 | extern u8 io_apic_unique_id(u8 id); |
154 | extern int io_apic_get_unique_id(int ioapic, int apic_id); | 154 | extern int io_apic_get_unique_id(int ioapic, int apic_id); |
155 | extern int io_apic_get_version(int ioapic); | 155 | extern int io_apic_get_version(int ioapic); |
156 | extern int io_apic_get_redir_entries(int ioapic); | 156 | extern int io_apic_get_redir_entries(int ioapic); |
157 | #endif /* CONFIG_ACPI */ | ||
158 | 157 | ||
159 | struct io_apic_irq_attr; | 158 | struct io_apic_irq_attr; |
160 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | 159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, |
161 | struct io_apic_irq_attr *irq_attr); | 160 | struct io_apic_irq_attr *irq_attr); |
162 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); | 161 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); |
163 | extern void ioapic_init_mappings(void); | 162 | extern void ioapic_init_mappings(void); |
163 | extern void ioapic_insert_resources(void); | ||
164 | 164 | ||
165 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); | 165 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); |
166 | extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); | 166 | extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); |
@@ -176,10 +176,21 @@ extern int setup_ioapic_entry(int apic, int irq, | |||
176 | int polarity, int vector, int pin); | 176 | int polarity, int vector, int pin); |
177 | extern void ioapic_write_entry(int apic, int pin, | 177 | extern void ioapic_write_entry(int apic, int pin, |
178 | struct IO_APIC_route_entry e); | 178 | struct IO_APIC_route_entry e); |
179 | |||
180 | struct mp_ioapic_gsi{ | ||
181 | int gsi_base; | ||
182 | int gsi_end; | ||
183 | }; | ||
184 | extern struct mp_ioapic_gsi mp_gsi_routing[]; | ||
185 | int mp_find_ioapic(int gsi); | ||
186 | int mp_find_ioapic_pin(int ioapic, int gsi); | ||
187 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); | ||
188 | |||
179 | #else /* !CONFIG_X86_IO_APIC */ | 189 | #else /* !CONFIG_X86_IO_APIC */ |
180 | #define io_apic_assign_pci_irqs 0 | 190 | #define io_apic_assign_pci_irqs 0 |
181 | static const int timer_through_8259 = 0; | 191 | static const int timer_through_8259 = 0; |
182 | static inline void ioapic_init_mappings(void) { } | 192 | static inline void ioapic_init_mappings(void) { } |
193 | static inline void ioapic_insert_resources(void) { } | ||
183 | 194 | ||
184 | static inline void probe_nr_irqs_gsi(void) { } | 195 | static inline void probe_nr_irqs_gsi(void) { } |
185 | #endif | 196 | #endif |
diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/asm/ioctls.h index 0d5b23b7b06e..ec34c760665e 100644 --- a/arch/x86/include/asm/ioctls.h +++ b/arch/x86/include/asm/ioctls.h | |||
@@ -1,94 +1 @@ | |||
1 | #ifndef _ASM_X86_IOCTLS_H | #include <asm-generic/ioctls.h> | |
2 | #define _ASM_X86_IOCTLS_H | ||
3 | |||
4 | #include <asm/ioctl.h> | ||
5 | |||
6 | /* 0x54 is just a magic number to make these relatively unique ('T') */ | ||
7 | |||
8 | #define TCGETS 0x5401 | ||
9 | #define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */ | ||
10 | #define TCSETSW 0x5403 | ||
11 | #define TCSETSF 0x5404 | ||
12 | #define TCGETA 0x5405 | ||
13 | #define TCSETA 0x5406 | ||
14 | #define TCSETAW 0x5407 | ||
15 | #define TCSETAF 0x5408 | ||
16 | #define TCSBRK 0x5409 | ||
17 | #define TCXONC 0x540A | ||
18 | #define TCFLSH 0x540B | ||
19 | #define TIOCEXCL 0x540C | ||
20 | #define TIOCNXCL 0x540D | ||
21 | #define TIOCSCTTY 0x540E | ||
22 | #define TIOCGPGRP 0x540F | ||
23 | #define TIOCSPGRP 0x5410 | ||
24 | #define TIOCOUTQ 0x5411 | ||
25 | #define TIOCSTI 0x5412 | ||
26 | #define TIOCGWINSZ 0x5413 | ||
27 | #define TIOCSWINSZ 0x5414 | ||
28 | #define TIOCMGET 0x5415 | ||
29 | #define TIOCMBIS 0x5416 | ||
30 | #define TIOCMBIC 0x5417 | ||
31 | #define TIOCMSET 0x5418 | ||
32 | #define TIOCGSOFTCAR 0x5419 | ||
33 | #define TIOCSSOFTCAR 0x541A | ||
34 | #define FIONREAD 0x541B | ||
35 | #define TIOCINQ FIONREAD | ||
36 | #define TIOCLINUX 0x541C | ||
37 | #define TIOCCONS 0x541D | ||
38 | #define TIOCGSERIAL 0x541E | ||
39 | #define TIOCSSERIAL 0x541F | ||
40 | #define TIOCPKT 0x5420 | ||
41 | #define FIONBIO 0x5421 | ||
42 | #define TIOCNOTTY 0x5422 | ||
43 | #define TIOCSETD 0x5423 | ||
44 | #define TIOCGETD 0x5424 | ||
45 | #define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ | ||
46 | /* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */ | ||
47 | #define TIOCSBRK 0x5427 /* BSD compatibility */ | ||
48 | #define TIOCCBRK 0x5428 /* BSD compatibility */ | ||
49 | #define TIOCGSID 0x5429 /* Return the session ID of FD */ | ||
50 | #define TCGETS2 _IOR('T', 0x2A, struct termios2) | ||
51 | #define TCSETS2 _IOW('T', 0x2B, struct termios2) | ||
52 | #define TCSETSW2 _IOW('T', 0x2C, struct termios2) | ||
53 | #define TCSETSF2 _IOW('T', 0x2D, struct termios2) | ||
54 | #define TIOCGRS485 0x542E | ||
55 | #define TIOCSRS485 0x542F | ||
56 | #define TIOCGPTN _IOR('T', 0x30, unsigned int) | ||
57 | /* Get Pty Number (of pty-mux device) */ | ||
58 | #define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */ | ||
59 | #define TCGETX 0x5432 /* SYS5 TCGETX compatibility */ | ||
60 | #define TCSETX 0x5433 | ||
61 | #define TCSETXF 0x5434 | ||
62 | #define TCSETXW 0x5435 | ||
63 | |||
64 | #define FIONCLEX 0x5450 | ||
65 | #define FIOCLEX 0x5451 | ||
66 | #define FIOASYNC 0x5452 | ||
67 | #define TIOCSERCONFIG 0x5453 | ||
68 | #define TIOCSERGWILD 0x5454 | ||
69 | #define TIOCSERSWILD 0x5455 | ||
70 | #define TIOCGLCKTRMIOS 0x5456 | ||
71 | #define TIOCSLCKTRMIOS 0x5457 | ||
72 | #define TIOCSERGSTRUCT 0x5458 /* For debugging only */ | ||
73 | #define TIOCSERGETLSR 0x5459 /* Get line status register */ | ||
74 | #define TIOCSERGETMULTI 0x545A /* Get multiport config */ | ||
75 | #define TIOCSERSETMULTI 0x545B /* Set multiport config */ | ||
76 | |||
77 | #define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ | ||
78 | #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ | ||
79 | #define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ | ||
80 | #define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ | ||
81 | #define FIOQSIZE 0x5460 | ||
82 | |||
83 | /* Used for packet mode */ | ||
84 | #define TIOCPKT_DATA 0 | ||
85 | #define TIOCPKT_FLUSHREAD 1 | ||
86 | #define TIOCPKT_FLUSHWRITE 2 | ||
87 | #define TIOCPKT_STOP 4 | ||
88 | #define TIOCPKT_START 8 | ||
89 | #define TIOCPKT_NOSTOP 16 | ||
90 | #define TIOCPKT_DOSTOP 32 | ||
91 | |||
92 | #define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ | ||
93 | |||
94 | #endif /* _ASM_X86_IOCTLS_H */ | ||
diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/asm/ipcbuf.h index ee678fd51594..84c7e51cb6d0 100644 --- a/arch/x86/include/asm/ipcbuf.h +++ b/arch/x86/include/asm/ipcbuf.h | |||
@@ -1,28 +1 @@ | |||
1 | #ifndef _ASM_X86_IPCBUF_H | #include <asm-generic/ipcbuf.h> | |
2 | #define _ASM_X86_IPCBUF_H | ||
3 | |||
4 | /* | ||
5 | * The ipc64_perm structure for x86 architecture. | ||
6 | * Note extra padding because this structure is passed back and forth | ||
7 | * between kernel and user space. | ||
8 | * | ||
9 | * Pad space is left for: | ||
10 | * - 32-bit mode_t and seq | ||
11 | * - 2 miscellaneous 32-bit values | ||
12 | */ | ||
13 | |||
14 | struct ipc64_perm { | ||
15 | __kernel_key_t key; | ||
16 | __kernel_uid32_t uid; | ||
17 | __kernel_gid32_t gid; | ||
18 | __kernel_uid32_t cuid; | ||
19 | __kernel_gid32_t cgid; | ||
20 | __kernel_mode_t mode; | ||
21 | unsigned short __pad1; | ||
22 | unsigned short seq; | ||
23 | unsigned short __pad2; | ||
24 | unsigned long __unused1; | ||
25 | unsigned long __unused2; | ||
26 | }; | ||
27 | |||
28 | #endif /* _ASM_X86_IPCBUF_H */ | ||
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 2bdab21f0898..9e2b952f810a 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h | |||
@@ -12,9 +12,14 @@ static inline unsigned long native_save_fl(void) | |||
12 | { | 12 | { |
13 | unsigned long flags; | 13 | unsigned long flags; |
14 | 14 | ||
15 | /* | ||
16 | * "=rm" is safe here, because "pop" adjusts the stack before | ||
17 | * it evaluates its effective address -- this is part of the | ||
18 | * documented behavior of the "pop" instruction. | ||
19 | */ | ||
15 | asm volatile("# __raw_save_flags\n\t" | 20 | asm volatile("# __raw_save_flags\n\t" |
16 | "pushf ; pop %0" | 21 | "pushf ; pop %0" |
17 | : "=g" (flags) | 22 | : "=rm" (flags) |
18 | : /* no input */ | 23 | : /* no input */ |
19 | : "memory"); | 24 | : "memory"); |
20 | 25 | ||
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 313389cd50d2..0d97deba1e35 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h | |||
@@ -17,8 +17,7 @@ | |||
17 | /* Pages for switcher itself, then two pages per cpu */ | 17 | /* Pages for switcher itself, then two pages per cpu */ |
18 | #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) | 18 | #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) |
19 | 19 | ||
20 | /* We map at -4M (-2M when PAE is activated) for ease of mapping | 20 | /* We map at -4M (-2M for PAE) for ease of mapping (one PTE page). */ |
21 | * into the guest (one PTE page). */ | ||
22 | #ifdef CONFIG_X86_PAE | 21 | #ifdef CONFIG_X86_PAE |
23 | #define SWITCHER_ADDR 0xFFE00000 | 22 | #define SWITCHER_ADDR 0xFFE00000 |
24 | #else | 23 | #else |
@@ -91,8 +90,9 @@ static inline void lguest_set_ts(void) | |||
91 | } | 90 | } |
92 | 91 | ||
93 | /* Full 4G segment descriptors, suitable for CS and DS. */ | 92 | /* Full 4G segment descriptors, suitable for CS and DS. */ |
94 | #define FULL_EXEC_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9b00} } }) | 93 | #define FULL_EXEC_SEGMENT \ |
95 | #define FULL_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9300} } }) | 94 | ((struct desc_struct)GDT_ENTRY_INIT(0xc09b, 0, 0xfffff)) |
95 | #define FULL_SEGMENT ((struct desc_struct)GDT_ENTRY_INIT(0xc093, 0, 0xfffff)) | ||
96 | 96 | ||
97 | #endif /* __ASSEMBLY__ */ | 97 | #endif /* __ASSEMBLY__ */ |
98 | 98 | ||
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index d31c4a684078..ba0eed8aa1a6 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h | |||
@@ -30,27 +30,27 @@ | |||
30 | #include <asm/hw_irq.h> | 30 | #include <asm/hw_irq.h> |
31 | #include <asm/kvm_para.h> | 31 | #include <asm/kvm_para.h> |
32 | 32 | ||
33 | /*G:031 But first, how does our Guest contact the Host to ask for privileged | 33 | /*G:030 |
34 | * But first, how does our Guest contact the Host to ask for privileged | ||
34 | * operations? There are two ways: the direct way is to make a "hypercall", | 35 | * operations? There are two ways: the direct way is to make a "hypercall", |
35 | * to make requests of the Host Itself. | 36 | * to make requests of the Host Itself. |
36 | * | 37 | * |
37 | * We use the KVM hypercall mechanism. Seventeen hypercalls are | 38 | * We use the KVM hypercall mechanism, though completely different hypercall |
38 | * available: the hypercall number is put in the %eax register, and the | 39 | * numbers. Seventeen hypercalls are available: the hypercall number is put in |
39 | * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. | 40 | * the %eax register, and the arguments (when required) are placed in %ebx, |
40 | * If a return value makes sense, it's returned in %eax. | 41 | * %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax. |
41 | * | 42 | * |
42 | * Grossly invalid calls result in Sudden Death at the hands of the vengeful | 43 | * Grossly invalid calls result in Sudden Death at the hands of the vengeful |
43 | * Host, rather than returning failure. This reflects Winston Churchill's | 44 | * Host, rather than returning failure. This reflects Winston Churchill's |
44 | * definition of a gentleman: "someone who is only rude intentionally". */ | 45 | * definition of a gentleman: "someone who is only rude intentionally". |
45 | /*:*/ | 46 | :*/ |
46 | 47 | ||
47 | /* Can't use our min() macro here: needs to be a constant */ | 48 | /* Can't use our min() macro here: needs to be a constant */ |
48 | #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) | 49 | #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) |
49 | 50 | ||
50 | #define LHCALL_RING_SIZE 64 | 51 | #define LHCALL_RING_SIZE 64 |
51 | struct hcall_args { | 52 | struct hcall_args { |
52 | /* These map directly onto eax, ebx, ecx, edx and esi | 53 | /* These map directly onto eax/ebx/ecx/edx/esi in struct lguest_regs */ |
53 | * in struct lguest_regs */ | ||
54 | unsigned long arg0, arg1, arg2, arg3, arg4; | 54 | unsigned long arg0, arg1, arg2, arg3, arg4; |
55 | }; | 55 | }; |
56 | 56 | ||
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h index 751af2550ed9..593e51d4643f 100644 --- a/arch/x86/include/asm/mman.h +++ b/arch/x86/include/asm/mman.h | |||
@@ -1,20 +1,8 @@ | |||
1 | #ifndef _ASM_X86_MMAN_H | 1 | #ifndef _ASM_X86_MMAN_H |
2 | #define _ASM_X86_MMAN_H | 2 | #define _ASM_X86_MMAN_H |
3 | 3 | ||
4 | #include <asm-generic/mman-common.h> | ||
5 | |||
6 | #define MAP_32BIT 0x40 /* only give out 32bit addresses */ | 4 | #define MAP_32BIT 0x40 /* only give out 32bit addresses */ |
7 | 5 | ||
8 | #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ | 6 | #include <asm-generic/mman.h> |
9 | #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ | ||
10 | #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ | ||
11 | #define MAP_LOCKED 0x2000 /* pages are locked */ | ||
12 | #define MAP_NORESERVE 0x4000 /* don't check for reservations */ | ||
13 | #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ | ||
14 | #define MAP_NONBLOCK 0x10000 /* do not block on IO */ | ||
15 | #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ | ||
16 | |||
17 | #define MCL_CURRENT 1 /* lock all current mappings */ | ||
18 | #define MCL_FUTURE 2 /* lock all future mappings */ | ||
19 | 7 | ||
20 | #endif /* _ASM_X86_MMAN_H */ | 8 | #endif /* _ASM_X86_MMAN_H */ |
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 47d62743c4d5..3e2ce58a31a3 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h | |||
@@ -1,18 +1,7 @@ | |||
1 | #ifndef _ASM_X86_MODULE_H | 1 | #ifndef _ASM_X86_MODULE_H |
2 | #define _ASM_X86_MODULE_H | 2 | #define _ASM_X86_MODULE_H |
3 | 3 | ||
4 | /* x86_32/64 are simple */ | 4 | #include <asm-generic/module.h> |
5 | struct mod_arch_specific {}; | ||
6 | |||
7 | #ifdef CONFIG_X86_32 | ||
8 | # define Elf_Shdr Elf32_Shdr | ||
9 | # define Elf_Sym Elf32_Sym | ||
10 | # define Elf_Ehdr Elf32_Ehdr | ||
11 | #else | ||
12 | # define Elf_Shdr Elf64_Shdr | ||
13 | # define Elf_Sym Elf64_Sym | ||
14 | # define Elf_Ehdr Elf64_Ehdr | ||
15 | #endif | ||
16 | 5 | ||
17 | #ifdef CONFIG_X86_64 | 6 | #ifdef CONFIG_X86_64 |
18 | /* X86_64 does not define MODULE_PROC_FAMILY */ | 7 | /* X86_64 does not define MODULE_PROC_FAMILY */ |
@@ -28,6 +17,8 @@ struct mod_arch_specific {}; | |||
28 | #define MODULE_PROC_FAMILY "586MMX " | 17 | #define MODULE_PROC_FAMILY "586MMX " |
29 | #elif defined CONFIG_MCORE2 | 18 | #elif defined CONFIG_MCORE2 |
30 | #define MODULE_PROC_FAMILY "CORE2 " | 19 | #define MODULE_PROC_FAMILY "CORE2 " |
20 | #elif defined CONFIG_MATOM | ||
21 | #define MODULE_PROC_FAMILY "ATOM " | ||
31 | #elif defined CONFIG_M686 | 22 | #elif defined CONFIG_M686 |
32 | #define MODULE_PROC_FAMILY "686 " | 23 | #define MODULE_PROC_FAMILY "686 " |
33 | #elif defined CONFIG_MPENTIUMII | 24 | #elif defined CONFIG_MPENTIUMII |
diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/asm/msgbuf.h index 7e4e9481f51c..809134c644a6 100644 --- a/arch/x86/include/asm/msgbuf.h +++ b/arch/x86/include/asm/msgbuf.h | |||
@@ -1,39 +1 @@ | |||
1 | #ifndef _ASM_X86_MSGBUF_H | #include <asm-generic/msgbuf.h> | |
2 | #define _ASM_X86_MSGBUF_H | ||
3 | |||
4 | /* | ||
5 | * The msqid64_ds structure for i386 architecture. | ||
6 | * Note extra padding because this structure is passed back and forth | ||
7 | * between kernel and user space. | ||
8 | * | ||
9 | * Pad space on i386 is left for: | ||
10 | * - 64-bit time_t to solve y2038 problem | ||
11 | * - 2 miscellaneous 32-bit values | ||
12 | * | ||
13 | * Pad space on x8664 is left for: | ||
14 | * - 2 miscellaneous 64-bit values | ||
15 | */ | ||
16 | struct msqid64_ds { | ||
17 | struct ipc64_perm msg_perm; | ||
18 | __kernel_time_t msg_stime; /* last msgsnd time */ | ||
19 | #ifdef __i386__ | ||
20 | unsigned long __unused1; | ||
21 | #endif | ||
22 | __kernel_time_t msg_rtime; /* last msgrcv time */ | ||
23 | #ifdef __i386__ | ||
24 | unsigned long __unused2; | ||
25 | #endif | ||
26 | __kernel_time_t msg_ctime; /* last change time */ | ||
27 | #ifdef __i386__ | ||
28 | unsigned long __unused3; | ||
29 | #endif | ||
30 | unsigned long msg_cbytes; /* current number of bytes on queue */ | ||
31 | unsigned long msg_qnum; /* number of messages in queue */ | ||
32 | unsigned long msg_qbytes; /* max number of bytes on queue */ | ||
33 | __kernel_pid_t msg_lspid; /* pid of last msgsnd */ | ||
34 | __kernel_pid_t msg_lrpid; /* last receive pid */ | ||
35 | unsigned long __unused4; | ||
36 | unsigned long __unused5; | ||
37 | }; | ||
38 | |||
39 | #endif /* _ASM_X86_MSGBUF_H */ | ||
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1692fb5050e3..6be7fc254b59 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -246,10 +246,6 @@ | |||
246 | #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) | 246 | #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) |
247 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) | 247 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) |
248 | 248 | ||
249 | /* Intel Model 6 */ | ||
250 | #define MSR_P6_EVNTSEL0 0x00000186 | ||
251 | #define MSR_P6_EVNTSEL1 0x00000187 | ||
252 | |||
253 | /* P4/Xeon+ specific */ | 249 | /* P4/Xeon+ specific */ |
254 | #define MSR_IA32_MCG_EAX 0x00000180 | 250 | #define MSR_IA32_MCG_EAX 0x00000180 |
255 | #define MSR_IA32_MCG_EBX 0x00000181 | 251 | #define MSR_IA32_MCG_EBX 0x00000181 |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 48ad9d29484a..7e2b6ba962ff 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -3,10 +3,16 @@ | |||
3 | 3 | ||
4 | #include <asm/msr-index.h> | 4 | #include <asm/msr-index.h> |
5 | 5 | ||
6 | #ifdef __KERNEL__ | ||
7 | #ifndef __ASSEMBLY__ | 6 | #ifndef __ASSEMBLY__ |
8 | 7 | ||
9 | #include <linux/types.h> | 8 | #include <linux/types.h> |
9 | #include <linux/ioctl.h> | ||
10 | |||
11 | #define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8]) | ||
12 | #define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8]) | ||
13 | |||
14 | #ifdef __KERNEL__ | ||
15 | |||
10 | #include <asm/asm.h> | 16 | #include <asm/asm.h> |
11 | #include <asm/errno.h> | 17 | #include <asm/errno.h> |
12 | #include <asm/cpumask.h> | 18 | #include <asm/cpumask.h> |
@@ -67,23 +73,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, | |||
67 | ".previous\n\t" | 73 | ".previous\n\t" |
68 | _ASM_EXTABLE(2b, 3b) | 74 | _ASM_EXTABLE(2b, 3b) |
69 | : [err] "=r" (*err), EAX_EDX_RET(val, low, high) | 75 | : [err] "=r" (*err), EAX_EDX_RET(val, low, high) |
70 | : "c" (msr), [fault] "i" (-EFAULT)); | 76 | : "c" (msr), [fault] "i" (-EIO)); |
71 | return EAX_EDX_VAL(val, low, high); | ||
72 | } | ||
73 | |||
74 | static inline unsigned long long native_read_msr_amd_safe(unsigned int msr, | ||
75 | int *err) | ||
76 | { | ||
77 | DECLARE_ARGS(val, low, high); | ||
78 | |||
79 | asm volatile("2: rdmsr ; xor %0,%0\n" | ||
80 | "1:\n\t" | ||
81 | ".section .fixup,\"ax\"\n\t" | ||
82 | "3: mov %3,%0 ; jmp 1b\n\t" | ||
83 | ".previous\n\t" | ||
84 | _ASM_EXTABLE(2b, 3b) | ||
85 | : "=r" (*err), EAX_EDX_RET(val, low, high) | ||
86 | : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT)); | ||
87 | return EAX_EDX_VAL(val, low, high); | 77 | return EAX_EDX_VAL(val, low, high); |
88 | } | 78 | } |
89 | 79 | ||
@@ -106,13 +96,16 @@ notrace static inline int native_write_msr_safe(unsigned int msr, | |||
106 | _ASM_EXTABLE(2b, 3b) | 96 | _ASM_EXTABLE(2b, 3b) |
107 | : [err] "=a" (err) | 97 | : [err] "=a" (err) |
108 | : "c" (msr), "0" (low), "d" (high), | 98 | : "c" (msr), "0" (low), "d" (high), |
109 | [fault] "i" (-EFAULT) | 99 | [fault] "i" (-EIO) |
110 | : "memory"); | 100 | : "memory"); |
111 | return err; | 101 | return err; |
112 | } | 102 | } |
113 | 103 | ||
114 | extern unsigned long long native_read_tsc(void); | 104 | extern unsigned long long native_read_tsc(void); |
115 | 105 | ||
106 | extern int native_rdmsr_safe_regs(u32 regs[8]); | ||
107 | extern int native_wrmsr_safe_regs(u32 regs[8]); | ||
108 | |||
116 | static __always_inline unsigned long long __native_read_tsc(void) | 109 | static __always_inline unsigned long long __native_read_tsc(void) |
117 | { | 110 | { |
118 | DECLARE_ARGS(val, low, high); | 111 | DECLARE_ARGS(val, low, high); |
@@ -181,14 +174,44 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
181 | *p = native_read_msr_safe(msr, &err); | 174 | *p = native_read_msr_safe(msr, &err); |
182 | return err; | 175 | return err; |
183 | } | 176 | } |
177 | |||
184 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | 178 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) |
185 | { | 179 | { |
180 | u32 gprs[8] = { 0 }; | ||
186 | int err; | 181 | int err; |
187 | 182 | ||
188 | *p = native_read_msr_amd_safe(msr, &err); | 183 | gprs[1] = msr; |
184 | gprs[7] = 0x9c5a203a; | ||
185 | |||
186 | err = native_rdmsr_safe_regs(gprs); | ||
187 | |||
188 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
189 | |||
189 | return err; | 190 | return err; |
190 | } | 191 | } |
191 | 192 | ||
193 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
194 | { | ||
195 | u32 gprs[8] = { 0 }; | ||
196 | |||
197 | gprs[0] = (u32)val; | ||
198 | gprs[1] = msr; | ||
199 | gprs[2] = val >> 32; | ||
200 | gprs[7] = 0x9c5a203a; | ||
201 | |||
202 | return native_wrmsr_safe_regs(gprs); | ||
203 | } | ||
204 | |||
205 | static inline int rdmsr_safe_regs(u32 regs[8]) | ||
206 | { | ||
207 | return native_rdmsr_safe_regs(regs); | ||
208 | } | ||
209 | |||
210 | static inline int wrmsr_safe_regs(u32 regs[8]) | ||
211 | { | ||
212 | return native_wrmsr_safe_regs(regs); | ||
213 | } | ||
214 | |||
192 | #define rdtscl(low) \ | 215 | #define rdtscl(low) \ |
193 | ((low) = (u32)__native_read_tsc()) | 216 | ((low) = (u32)__native_read_tsc()) |
194 | 217 | ||
@@ -228,6 +251,8 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | |||
228 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | 251 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); |
229 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); | 252 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); |
230 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); | 253 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); |
254 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); | ||
255 | int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); | ||
231 | #else /* CONFIG_SMP */ | 256 | #else /* CONFIG_SMP */ |
232 | static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | 257 | static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) |
233 | { | 258 | { |
@@ -258,7 +283,15 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
258 | { | 283 | { |
259 | return wrmsr_safe(msr_no, l, h); | 284 | return wrmsr_safe(msr_no, l, h); |
260 | } | 285 | } |
286 | static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) | ||
287 | { | ||
288 | return rdmsr_safe_regs(regs); | ||
289 | } | ||
290 | static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) | ||
291 | { | ||
292 | return wrmsr_safe_regs(regs); | ||
293 | } | ||
261 | #endif /* CONFIG_SMP */ | 294 | #endif /* CONFIG_SMP */ |
262 | #endif /* __ASSEMBLY__ */ | ||
263 | #endif /* __KERNEL__ */ | 295 | #endif /* __KERNEL__ */ |
296 | #endif /* __ASSEMBLY__ */ | ||
264 | #endif /* _ASM_X86_MSR_H */ | 297 | #endif /* _ASM_X86_MSR_H */ |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c97264409934..e63cf7d441e1 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -45,8 +45,8 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, | |||
45 | void __user *, size_t *, loff_t *); | 45 | void __user *, size_t *, loff_t *); |
46 | extern int unknown_nmi_panic; | 46 | extern int unknown_nmi_panic; |
47 | 47 | ||
48 | void __trigger_all_cpu_backtrace(void); | 48 | void arch_trigger_all_cpu_backtrace(void); |
49 | #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() | 49 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace |
50 | 50 | ||
51 | static inline void localise_nmi_watchdog(void) | 51 | static inline void localise_nmi_watchdog(void) |
52 | { | 52 | { |
@@ -72,7 +72,6 @@ void lapic_watchdog_stop(void); | |||
72 | int lapic_watchdog_init(unsigned nmi_hz); | 72 | int lapic_watchdog_init(unsigned nmi_hz); |
73 | int lapic_wd_event(unsigned nmi_hz); | 73 | int lapic_wd_event(unsigned nmi_hz); |
74 | unsigned lapic_adjust_nmi_hz(unsigned hz); | 74 | unsigned lapic_adjust_nmi_hz(unsigned hz); |
75 | int lapic_watchdog_ok(void); | ||
76 | void disable_lapic_nmi_watchdog(void); | 75 | void disable_lapic_nmi_watchdog(void); |
77 | void enable_lapic_nmi_watchdog(void); | 76 | void enable_lapic_nmi_watchdog(void); |
78 | void stop_nmi(void); | 77 | void stop_nmi(void); |
diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/asm/param.h index 6f0d0422f4ca..965d45427975 100644 --- a/arch/x86/include/asm/param.h +++ b/arch/x86/include/asm/param.h | |||
@@ -1,22 +1 @@ | |||
1 | #ifndef _ASM_X86_PARAM_H | #include <asm-generic/param.h> | |
2 | #define _ASM_X86_PARAM_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | # define HZ CONFIG_HZ /* Internal kernel timer frequency */ | ||
6 | # define USER_HZ 100 /* some user interfaces are */ | ||
7 | # define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */ | ||
8 | #endif | ||
9 | |||
10 | #ifndef HZ | ||
11 | #define HZ 100 | ||
12 | #endif | ||
13 | |||
14 | #define EXEC_PAGESIZE 4096 | ||
15 | |||
16 | #ifndef NOGROUP | ||
17 | #define NOGROUP (-1) | ||
18 | #endif | ||
19 | |||
20 | #define MAXHOSTNAMELEN 64 /* max length of hostname */ | ||
21 | |||
22 | #endif /* _ASM_X86_PARAM_H */ | ||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4fb37c8a0832..40d6586af25b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -7,689 +7,11 @@ | |||
7 | #include <asm/pgtable_types.h> | 7 | #include <asm/pgtable_types.h> |
8 | #include <asm/asm.h> | 8 | #include <asm/asm.h> |
9 | 9 | ||
10 | /* Bitmask of what can be clobbered: usually at least eax. */ | 10 | #include <asm/paravirt_types.h> |
11 | #define CLBR_NONE 0 | ||
12 | #define CLBR_EAX (1 << 0) | ||
13 | #define CLBR_ECX (1 << 1) | ||
14 | #define CLBR_EDX (1 << 2) | ||
15 | #define CLBR_EDI (1 << 3) | ||
16 | |||
17 | #ifdef CONFIG_X86_32 | ||
18 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ | ||
19 | #define CLBR_ANY ((1 << 4) - 1) | ||
20 | |||
21 | #define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) | ||
22 | #define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) | ||
23 | #define CLBR_SCRATCH (0) | ||
24 | #else | ||
25 | #define CLBR_RAX CLBR_EAX | ||
26 | #define CLBR_RCX CLBR_ECX | ||
27 | #define CLBR_RDX CLBR_EDX | ||
28 | #define CLBR_RDI CLBR_EDI | ||
29 | #define CLBR_RSI (1 << 4) | ||
30 | #define CLBR_R8 (1 << 5) | ||
31 | #define CLBR_R9 (1 << 6) | ||
32 | #define CLBR_R10 (1 << 7) | ||
33 | #define CLBR_R11 (1 << 8) | ||
34 | |||
35 | #define CLBR_ANY ((1 << 9) - 1) | ||
36 | |||
37 | #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ | ||
38 | CLBR_RCX | CLBR_R8 | CLBR_R9) | ||
39 | #define CLBR_RET_REG (CLBR_RAX) | ||
40 | #define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) | ||
41 | |||
42 | #include <asm/desc_defs.h> | ||
43 | #endif /* X86_64 */ | ||
44 | |||
45 | #define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) | ||
46 | 11 | ||
47 | #ifndef __ASSEMBLY__ | 12 | #ifndef __ASSEMBLY__ |
48 | #include <linux/types.h> | 13 | #include <linux/types.h> |
49 | #include <linux/cpumask.h> | 14 | #include <linux/cpumask.h> |
50 | #include <asm/kmap_types.h> | ||
51 | #include <asm/desc_defs.h> | ||
52 | |||
53 | struct page; | ||
54 | struct thread_struct; | ||
55 | struct desc_ptr; | ||
56 | struct tss_struct; | ||
57 | struct mm_struct; | ||
58 | struct desc_struct; | ||
59 | struct task_struct; | ||
60 | |||
61 | /* | ||
62 | * Wrapper type for pointers to code which uses the non-standard | ||
63 | * calling convention. See PV_CALL_SAVE_REGS_THUNK below. | ||
64 | */ | ||
65 | struct paravirt_callee_save { | ||
66 | void *func; | ||
67 | }; | ||
68 | |||
69 | /* general info */ | ||
70 | struct pv_info { | ||
71 | unsigned int kernel_rpl; | ||
72 | int shared_kernel_pmd; | ||
73 | int paravirt_enabled; | ||
74 | const char *name; | ||
75 | }; | ||
76 | |||
77 | struct pv_init_ops { | ||
78 | /* | ||
79 | * Patch may replace one of the defined code sequences with | ||
80 | * arbitrary code, subject to the same register constraints. | ||
81 | * This generally means the code is not free to clobber any | ||
82 | * registers other than EAX. The patch function should return | ||
83 | * the number of bytes of code generated, as we nop pad the | ||
84 | * rest in generic code. | ||
85 | */ | ||
86 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, | ||
87 | unsigned long addr, unsigned len); | ||
88 | |||
89 | /* Basic arch-specific setup */ | ||
90 | void (*arch_setup)(void); | ||
91 | char *(*memory_setup)(void); | ||
92 | void (*post_allocator_init)(void); | ||
93 | |||
94 | /* Print a banner to identify the environment */ | ||
95 | void (*banner)(void); | ||
96 | }; | ||
97 | |||
98 | |||
99 | struct pv_lazy_ops { | ||
100 | /* Set deferred update mode, used for batching operations. */ | ||
101 | void (*enter)(void); | ||
102 | void (*leave)(void); | ||
103 | }; | ||
104 | |||
105 | struct pv_time_ops { | ||
106 | void (*time_init)(void); | ||
107 | |||
108 | /* Set and set time of day */ | ||
109 | unsigned long (*get_wallclock)(void); | ||
110 | int (*set_wallclock)(unsigned long); | ||
111 | |||
112 | unsigned long long (*sched_clock)(void); | ||
113 | unsigned long (*get_tsc_khz)(void); | ||
114 | }; | ||
115 | |||
116 | struct pv_cpu_ops { | ||
117 | /* hooks for various privileged instructions */ | ||
118 | unsigned long (*get_debugreg)(int regno); | ||
119 | void (*set_debugreg)(int regno, unsigned long value); | ||
120 | |||
121 | void (*clts)(void); | ||
122 | |||
123 | unsigned long (*read_cr0)(void); | ||
124 | void (*write_cr0)(unsigned long); | ||
125 | |||
126 | unsigned long (*read_cr4_safe)(void); | ||
127 | unsigned long (*read_cr4)(void); | ||
128 | void (*write_cr4)(unsigned long); | ||
129 | |||
130 | #ifdef CONFIG_X86_64 | ||
131 | unsigned long (*read_cr8)(void); | ||
132 | void (*write_cr8)(unsigned long); | ||
133 | #endif | ||
134 | |||
135 | /* Segment descriptor handling */ | ||
136 | void (*load_tr_desc)(void); | ||
137 | void (*load_gdt)(const struct desc_ptr *); | ||
138 | void (*load_idt)(const struct desc_ptr *); | ||
139 | void (*store_gdt)(struct desc_ptr *); | ||
140 | void (*store_idt)(struct desc_ptr *); | ||
141 | void (*set_ldt)(const void *desc, unsigned entries); | ||
142 | unsigned long (*store_tr)(void); | ||
143 | void (*load_tls)(struct thread_struct *t, unsigned int cpu); | ||
144 | #ifdef CONFIG_X86_64 | ||
145 | void (*load_gs_index)(unsigned int idx); | ||
146 | #endif | ||
147 | void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, | ||
148 | const void *desc); | ||
149 | void (*write_gdt_entry)(struct desc_struct *, | ||
150 | int entrynum, const void *desc, int size); | ||
151 | void (*write_idt_entry)(gate_desc *, | ||
152 | int entrynum, const gate_desc *gate); | ||
153 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | ||
154 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | ||
155 | |||
156 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | ||
157 | |||
158 | void (*set_iopl_mask)(unsigned mask); | ||
159 | |||
160 | void (*wbinvd)(void); | ||
161 | void (*io_delay)(void); | ||
162 | |||
163 | /* cpuid emulation, mostly so that caps bits can be disabled */ | ||
164 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, | ||
165 | unsigned int *ecx, unsigned int *edx); | ||
166 | |||
167 | /* MSR, PMC and TSR operations. | ||
168 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | ||
169 | u64 (*read_msr_amd)(unsigned int msr, int *err); | ||
170 | u64 (*read_msr)(unsigned int msr, int *err); | ||
171 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | ||
172 | |||
173 | u64 (*read_tsc)(void); | ||
174 | u64 (*read_pmc)(int counter); | ||
175 | unsigned long long (*read_tscp)(unsigned int *aux); | ||
176 | |||
177 | /* | ||
178 | * Atomically enable interrupts and return to userspace. This | ||
179 | * is only ever used to return to 32-bit processes; in a | ||
180 | * 64-bit kernel, it's used for 32-on-64 compat processes, but | ||
181 | * never native 64-bit processes. (Jump, not call.) | ||
182 | */ | ||
183 | void (*irq_enable_sysexit)(void); | ||
184 | |||
185 | /* | ||
186 | * Switch to usermode gs and return to 64-bit usermode using | ||
187 | * sysret. Only used in 64-bit kernels to return to 64-bit | ||
188 | * processes. Usermode register state, including %rsp, must | ||
189 | * already be restored. | ||
190 | */ | ||
191 | void (*usergs_sysret64)(void); | ||
192 | |||
193 | /* | ||
194 | * Switch to usermode gs and return to 32-bit usermode using | ||
195 | * sysret. Used to return to 32-on-64 compat processes. | ||
196 | * Other usermode register state, including %esp, must already | ||
197 | * be restored. | ||
198 | */ | ||
199 | void (*usergs_sysret32)(void); | ||
200 | |||
201 | /* Normal iret. Jump to this with the standard iret stack | ||
202 | frame set up. */ | ||
203 | void (*iret)(void); | ||
204 | |||
205 | void (*swapgs)(void); | ||
206 | |||
207 | void (*start_context_switch)(struct task_struct *prev); | ||
208 | void (*end_context_switch)(struct task_struct *next); | ||
209 | }; | ||
210 | |||
211 | struct pv_irq_ops { | ||
212 | void (*init_IRQ)(void); | ||
213 | |||
214 | /* | ||
215 | * Get/set interrupt state. save_fl and restore_fl are only | ||
216 | * expected to use X86_EFLAGS_IF; all other bits | ||
217 | * returned from save_fl are undefined, and may be ignored by | ||
218 | * restore_fl. | ||
219 | * | ||
220 | * NOTE: These functions callers expect the callee to preserve | ||
221 | * more registers than the standard C calling convention. | ||
222 | */ | ||
223 | struct paravirt_callee_save save_fl; | ||
224 | struct paravirt_callee_save restore_fl; | ||
225 | struct paravirt_callee_save irq_disable; | ||
226 | struct paravirt_callee_save irq_enable; | ||
227 | |||
228 | void (*safe_halt)(void); | ||
229 | void (*halt)(void); | ||
230 | |||
231 | #ifdef CONFIG_X86_64 | ||
232 | void (*adjust_exception_frame)(void); | ||
233 | #endif | ||
234 | }; | ||
235 | |||
236 | struct pv_apic_ops { | ||
237 | #ifdef CONFIG_X86_LOCAL_APIC | ||
238 | void (*setup_boot_clock)(void); | ||
239 | void (*setup_secondary_clock)(void); | ||
240 | |||
241 | void (*startup_ipi_hook)(int phys_apicid, | ||
242 | unsigned long start_eip, | ||
243 | unsigned long start_esp); | ||
244 | #endif | ||
245 | }; | ||
246 | |||
247 | struct pv_mmu_ops { | ||
248 | /* | ||
249 | * Called before/after init_mm pagetable setup. setup_start | ||
250 | * may reset %cr3, and may pre-install parts of the pagetable; | ||
251 | * pagetable setup is expected to preserve any existing | ||
252 | * mapping. | ||
253 | */ | ||
254 | void (*pagetable_setup_start)(pgd_t *pgd_base); | ||
255 | void (*pagetable_setup_done)(pgd_t *pgd_base); | ||
256 | |||
257 | unsigned long (*read_cr2)(void); | ||
258 | void (*write_cr2)(unsigned long); | ||
259 | |||
260 | unsigned long (*read_cr3)(void); | ||
261 | void (*write_cr3)(unsigned long); | ||
262 | |||
263 | /* | ||
264 | * Hooks for intercepting the creation/use/destruction of an | ||
265 | * mm_struct. | ||
266 | */ | ||
267 | void (*activate_mm)(struct mm_struct *prev, | ||
268 | struct mm_struct *next); | ||
269 | void (*dup_mmap)(struct mm_struct *oldmm, | ||
270 | struct mm_struct *mm); | ||
271 | void (*exit_mmap)(struct mm_struct *mm); | ||
272 | |||
273 | |||
274 | /* TLB operations */ | ||
275 | void (*flush_tlb_user)(void); | ||
276 | void (*flush_tlb_kernel)(void); | ||
277 | void (*flush_tlb_single)(unsigned long addr); | ||
278 | void (*flush_tlb_others)(const struct cpumask *cpus, | ||
279 | struct mm_struct *mm, | ||
280 | unsigned long va); | ||
281 | |||
282 | /* Hooks for allocating and freeing a pagetable top-level */ | ||
283 | int (*pgd_alloc)(struct mm_struct *mm); | ||
284 | void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); | ||
285 | |||
286 | /* | ||
287 | * Hooks for allocating/releasing pagetable pages when they're | ||
288 | * attached to a pagetable | ||
289 | */ | ||
290 | void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); | ||
291 | void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); | ||
292 | void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); | ||
293 | void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); | ||
294 | void (*release_pte)(unsigned long pfn); | ||
295 | void (*release_pmd)(unsigned long pfn); | ||
296 | void (*release_pud)(unsigned long pfn); | ||
297 | |||
298 | /* Pagetable manipulation functions */ | ||
299 | void (*set_pte)(pte_t *ptep, pte_t pteval); | ||
300 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, | ||
301 | pte_t *ptep, pte_t pteval); | ||
302 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | ||
303 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | ||
304 | pte_t *ptep); | ||
305 | void (*pte_update_defer)(struct mm_struct *mm, | ||
306 | unsigned long addr, pte_t *ptep); | ||
307 | |||
308 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, | ||
309 | pte_t *ptep); | ||
310 | void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, | ||
311 | pte_t *ptep, pte_t pte); | ||
312 | |||
313 | struct paravirt_callee_save pte_val; | ||
314 | struct paravirt_callee_save make_pte; | ||
315 | |||
316 | struct paravirt_callee_save pgd_val; | ||
317 | struct paravirt_callee_save make_pgd; | ||
318 | |||
319 | #if PAGETABLE_LEVELS >= 3 | ||
320 | #ifdef CONFIG_X86_PAE | ||
321 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | ||
322 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | ||
323 | pte_t *ptep); | ||
324 | void (*pmd_clear)(pmd_t *pmdp); | ||
325 | |||
326 | #endif /* CONFIG_X86_PAE */ | ||
327 | |||
328 | void (*set_pud)(pud_t *pudp, pud_t pudval); | ||
329 | |||
330 | struct paravirt_callee_save pmd_val; | ||
331 | struct paravirt_callee_save make_pmd; | ||
332 | |||
333 | #if PAGETABLE_LEVELS == 4 | ||
334 | struct paravirt_callee_save pud_val; | ||
335 | struct paravirt_callee_save make_pud; | ||
336 | |||
337 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | ||
338 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
339 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
340 | |||
341 | #ifdef CONFIG_HIGHPTE | ||
342 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); | ||
343 | #endif | ||
344 | |||
345 | struct pv_lazy_ops lazy_mode; | ||
346 | |||
347 | /* dom0 ops */ | ||
348 | |||
349 | /* Sometimes the physical address is a pfn, and sometimes its | ||
350 | an mfn. We can tell which is which from the index. */ | ||
351 | void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, | ||
352 | phys_addr_t phys, pgprot_t flags); | ||
353 | }; | ||
354 | |||
355 | struct raw_spinlock; | ||
356 | struct pv_lock_ops { | ||
357 | int (*spin_is_locked)(struct raw_spinlock *lock); | ||
358 | int (*spin_is_contended)(struct raw_spinlock *lock); | ||
359 | void (*spin_lock)(struct raw_spinlock *lock); | ||
360 | void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); | ||
361 | int (*spin_trylock)(struct raw_spinlock *lock); | ||
362 | void (*spin_unlock)(struct raw_spinlock *lock); | ||
363 | }; | ||
364 | |||
365 | /* This contains all the paravirt structures: we get a convenient | ||
366 | * number for each function using the offset which we use to indicate | ||
367 | * what to patch. */ | ||
368 | struct paravirt_patch_template { | ||
369 | struct pv_init_ops pv_init_ops; | ||
370 | struct pv_time_ops pv_time_ops; | ||
371 | struct pv_cpu_ops pv_cpu_ops; | ||
372 | struct pv_irq_ops pv_irq_ops; | ||
373 | struct pv_apic_ops pv_apic_ops; | ||
374 | struct pv_mmu_ops pv_mmu_ops; | ||
375 | struct pv_lock_ops pv_lock_ops; | ||
376 | }; | ||
377 | |||
378 | extern struct pv_info pv_info; | ||
379 | extern struct pv_init_ops pv_init_ops; | ||
380 | extern struct pv_time_ops pv_time_ops; | ||
381 | extern struct pv_cpu_ops pv_cpu_ops; | ||
382 | extern struct pv_irq_ops pv_irq_ops; | ||
383 | extern struct pv_apic_ops pv_apic_ops; | ||
384 | extern struct pv_mmu_ops pv_mmu_ops; | ||
385 | extern struct pv_lock_ops pv_lock_ops; | ||
386 | |||
387 | #define PARAVIRT_PATCH(x) \ | ||
388 | (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) | ||
389 | |||
390 | #define paravirt_type(op) \ | ||
391 | [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ | ||
392 | [paravirt_opptr] "i" (&(op)) | ||
393 | #define paravirt_clobber(clobber) \ | ||
394 | [paravirt_clobber] "i" (clobber) | ||
395 | |||
396 | /* | ||
397 | * Generate some code, and mark it as patchable by the | ||
398 | * apply_paravirt() alternate instruction patcher. | ||
399 | */ | ||
400 | #define _paravirt_alt(insn_string, type, clobber) \ | ||
401 | "771:\n\t" insn_string "\n" "772:\n" \ | ||
402 | ".pushsection .parainstructions,\"a\"\n" \ | ||
403 | _ASM_ALIGN "\n" \ | ||
404 | _ASM_PTR " 771b\n" \ | ||
405 | " .byte " type "\n" \ | ||
406 | " .byte 772b-771b\n" \ | ||
407 | " .short " clobber "\n" \ | ||
408 | ".popsection\n" | ||
409 | |||
410 | /* Generate patchable code, with the default asm parameters. */ | ||
411 | #define paravirt_alt(insn_string) \ | ||
412 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | ||
413 | |||
414 | /* Simple instruction patching code. */ | ||
415 | #define DEF_NATIVE(ops, name, code) \ | ||
416 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | ||
417 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | ||
418 | |||
419 | unsigned paravirt_patch_nop(void); | ||
420 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); | ||
421 | unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); | ||
422 | unsigned paravirt_patch_ignore(unsigned len); | ||
423 | unsigned paravirt_patch_call(void *insnbuf, | ||
424 | const void *target, u16 tgt_clobbers, | ||
425 | unsigned long addr, u16 site_clobbers, | ||
426 | unsigned len); | ||
427 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | ||
428 | unsigned long addr, unsigned len); | ||
429 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | ||
430 | unsigned long addr, unsigned len); | ||
431 | |||
432 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | ||
433 | const char *start, const char *end); | ||
434 | |||
435 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
436 | unsigned long addr, unsigned len); | ||
437 | |||
438 | int paravirt_disable_iospace(void); | ||
439 | |||
440 | /* | ||
441 | * This generates an indirect call based on the operation type number. | ||
442 | * The type number, computed in PARAVIRT_PATCH, is derived from the | ||
443 | * offset into the paravirt_patch_template structure, and can therefore be | ||
444 | * freely converted back into a structure offset. | ||
445 | */ | ||
446 | #define PARAVIRT_CALL "call *%c[paravirt_opptr];" | ||
447 | |||
448 | /* | ||
449 | * These macros are intended to wrap calls through one of the paravirt | ||
450 | * ops structs, so that they can be later identified and patched at | ||
451 | * runtime. | ||
452 | * | ||
453 | * Normally, a call to a pv_op function is a simple indirect call: | ||
454 | * (pv_op_struct.operations)(args...). | ||
455 | * | ||
456 | * Unfortunately, this is a relatively slow operation for modern CPUs, | ||
457 | * because it cannot necessarily determine what the destination | ||
458 | * address is. In this case, the address is a runtime constant, so at | ||
459 | * the very least we can patch the call to e a simple direct call, or | ||
460 | * ideally, patch an inline implementation into the callsite. (Direct | ||
461 | * calls are essentially free, because the call and return addresses | ||
462 | * are completely predictable.) | ||
463 | * | ||
464 | * For i386, these macros rely on the standard gcc "regparm(3)" calling | ||
465 | * convention, in which the first three arguments are placed in %eax, | ||
466 | * %edx, %ecx (in that order), and the remaining arguments are placed | ||
467 | * on the stack. All caller-save registers (eax,edx,ecx) are expected | ||
468 | * to be modified (either clobbered or used for return values). | ||
469 | * X86_64, on the other hand, already specifies a register-based calling | ||
470 | * conventions, returning at %rax, with parameteres going on %rdi, %rsi, | ||
471 | * %rdx, and %rcx. Note that for this reason, x86_64 does not need any | ||
472 | * special handling for dealing with 4 arguments, unlike i386. | ||
473 | * However, x86_64 also have to clobber all caller saved registers, which | ||
474 | * unfortunately, are quite a bit (r8 - r11) | ||
475 | * | ||
476 | * The call instruction itself is marked by placing its start address | ||
477 | * and size into the .parainstructions section, so that | ||
478 | * apply_paravirt() in arch/i386/kernel/alternative.c can do the | ||
479 | * appropriate patching under the control of the backend pv_init_ops | ||
480 | * implementation. | ||
481 | * | ||
482 | * Unfortunately there's no way to get gcc to generate the args setup | ||
483 | * for the call, and then allow the call itself to be generated by an | ||
484 | * inline asm. Because of this, we must do the complete arg setup and | ||
485 | * return value handling from within these macros. This is fairly | ||
486 | * cumbersome. | ||
487 | * | ||
488 | * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. | ||
489 | * It could be extended to more arguments, but there would be little | ||
490 | * to be gained from that. For each number of arguments, there are | ||
491 | * the two VCALL and CALL variants for void and non-void functions. | ||
492 | * | ||
493 | * When there is a return value, the invoker of the macro must specify | ||
494 | * the return type. The macro then uses sizeof() on that type to | ||
495 | * determine whether its a 32 or 64 bit value, and places the return | ||
496 | * in the right register(s) (just %eax for 32-bit, and %edx:%eax for | ||
497 | * 64-bit). For x86_64 machines, it just returns at %rax regardless of | ||
498 | * the return value size. | ||
499 | * | ||
500 | * 64-bit arguments are passed as a pair of adjacent 32-bit arguments | ||
501 | * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments | ||
502 | * in low,high order | ||
503 | * | ||
504 | * Small structures are passed and returned in registers. The macro | ||
505 | * calling convention can't directly deal with this, so the wrapper | ||
506 | * functions must do this. | ||
507 | * | ||
508 | * These PVOP_* macros are only defined within this header. This | ||
509 | * means that all uses must be wrapped in inline functions. This also | ||
510 | * makes sure the incoming and outgoing types are always correct. | ||
511 | */ | ||
512 | #ifdef CONFIG_X86_32 | ||
513 | #define PVOP_VCALL_ARGS \ | ||
514 | unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx | ||
515 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS | ||
516 | |||
517 | #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) | ||
518 | #define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) | ||
519 | #define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) | ||
520 | |||
521 | #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ | ||
522 | "=c" (__ecx) | ||
523 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS | ||
524 | |||
525 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) | ||
526 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
527 | |||
528 | #define EXTRA_CLOBBERS | ||
529 | #define VEXTRA_CLOBBERS | ||
530 | #else /* CONFIG_X86_64 */ | ||
531 | #define PVOP_VCALL_ARGS \ | ||
532 | unsigned long __edi = __edi, __esi = __esi, \ | ||
533 | __edx = __edx, __ecx = __ecx | ||
534 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | ||
535 | |||
536 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) | ||
537 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) | ||
538 | #define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) | ||
539 | #define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) | ||
540 | |||
541 | #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ | ||
542 | "=S" (__esi), "=d" (__edx), \ | ||
543 | "=c" (__ecx) | ||
544 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | ||
545 | |||
546 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) | ||
547 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
548 | |||
549 | #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" | ||
550 | #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" | ||
551 | #endif /* CONFIG_X86_32 */ | ||
552 | |||
553 | #ifdef CONFIG_PARAVIRT_DEBUG | ||
554 | #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) | ||
555 | #else | ||
556 | #define PVOP_TEST_NULL(op) ((void)op) | ||
557 | #endif | ||
558 | |||
559 | #define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ | ||
560 | pre, post, ...) \ | ||
561 | ({ \ | ||
562 | rettype __ret; \ | ||
563 | PVOP_CALL_ARGS; \ | ||
564 | PVOP_TEST_NULL(op); \ | ||
565 | /* This is 32-bit specific, but is okay in 64-bit */ \ | ||
566 | /* since this condition will never hold */ \ | ||
567 | if (sizeof(rettype) > sizeof(unsigned long)) { \ | ||
568 | asm volatile(pre \ | ||
569 | paravirt_alt(PARAVIRT_CALL) \ | ||
570 | post \ | ||
571 | : call_clbr \ | ||
572 | : paravirt_type(op), \ | ||
573 | paravirt_clobber(clbr), \ | ||
574 | ##__VA_ARGS__ \ | ||
575 | : "memory", "cc" extra_clbr); \ | ||
576 | __ret = (rettype)((((u64)__edx) << 32) | __eax); \ | ||
577 | } else { \ | ||
578 | asm volatile(pre \ | ||
579 | paravirt_alt(PARAVIRT_CALL) \ | ||
580 | post \ | ||
581 | : call_clbr \ | ||
582 | : paravirt_type(op), \ | ||
583 | paravirt_clobber(clbr), \ | ||
584 | ##__VA_ARGS__ \ | ||
585 | : "memory", "cc" extra_clbr); \ | ||
586 | __ret = (rettype)__eax; \ | ||
587 | } \ | ||
588 | __ret; \ | ||
589 | }) | ||
590 | |||
591 | #define __PVOP_CALL(rettype, op, pre, post, ...) \ | ||
592 | ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ | ||
593 | EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) | ||
594 | |||
595 | #define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ | ||
596 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
597 | PVOP_CALLEE_CLOBBERS, , \ | ||
598 | pre, post, ##__VA_ARGS__) | ||
599 | |||
600 | |||
601 | #define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ | ||
602 | ({ \ | ||
603 | PVOP_VCALL_ARGS; \ | ||
604 | PVOP_TEST_NULL(op); \ | ||
605 | asm volatile(pre \ | ||
606 | paravirt_alt(PARAVIRT_CALL) \ | ||
607 | post \ | ||
608 | : call_clbr \ | ||
609 | : paravirt_type(op), \ | ||
610 | paravirt_clobber(clbr), \ | ||
611 | ##__VA_ARGS__ \ | ||
612 | : "memory", "cc" extra_clbr); \ | ||
613 | }) | ||
614 | |||
615 | #define __PVOP_VCALL(op, pre, post, ...) \ | ||
616 | ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ | ||
617 | VEXTRA_CLOBBERS, \ | ||
618 | pre, post, ##__VA_ARGS__) | ||
619 | |||
620 | #define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ | ||
621 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
622 | PVOP_VCALLEE_CLOBBERS, , \ | ||
623 | pre, post, ##__VA_ARGS__) | ||
624 | |||
625 | |||
626 | |||
627 | #define PVOP_CALL0(rettype, op) \ | ||
628 | __PVOP_CALL(rettype, op, "", "") | ||
629 | #define PVOP_VCALL0(op) \ | ||
630 | __PVOP_VCALL(op, "", "") | ||
631 | |||
632 | #define PVOP_CALLEE0(rettype, op) \ | ||
633 | __PVOP_CALLEESAVE(rettype, op, "", "") | ||
634 | #define PVOP_VCALLEE0(op) \ | ||
635 | __PVOP_VCALLEESAVE(op, "", "") | ||
636 | |||
637 | |||
638 | #define PVOP_CALL1(rettype, op, arg1) \ | ||
639 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
640 | #define PVOP_VCALL1(op, arg1) \ | ||
641 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
642 | |||
643 | #define PVOP_CALLEE1(rettype, op, arg1) \ | ||
644 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
645 | #define PVOP_VCALLEE1(op, arg1) \ | ||
646 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
647 | |||
648 | |||
649 | #define PVOP_CALL2(rettype, op, arg1, arg2) \ | ||
650 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
651 | PVOP_CALL_ARG2(arg2)) | ||
652 | #define PVOP_VCALL2(op, arg1, arg2) \ | ||
653 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
654 | PVOP_CALL_ARG2(arg2)) | ||
655 | |||
656 | #define PVOP_CALLEE2(rettype, op, arg1, arg2) \ | ||
657 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
658 | PVOP_CALL_ARG2(arg2)) | ||
659 | #define PVOP_VCALLEE2(op, arg1, arg2) \ | ||
660 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
661 | PVOP_CALL_ARG2(arg2)) | ||
662 | |||
663 | |||
664 | #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ | ||
665 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
666 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
667 | #define PVOP_VCALL3(op, arg1, arg2, arg3) \ | ||
668 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
669 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
670 | |||
671 | /* This is the only difference in x86_64. We can make it much simpler */ | ||
672 | #ifdef CONFIG_X86_32 | ||
673 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
674 | __PVOP_CALL(rettype, op, \ | ||
675 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
676 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
677 | PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) | ||
678 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
679 | __PVOP_VCALL(op, \ | ||
680 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
681 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ | ||
682 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) | ||
683 | #else | ||
684 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
685 | __PVOP_CALL(rettype, op, "", "", \ | ||
686 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
687 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
688 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
689 | __PVOP_VCALL(op, "", "", \ | ||
690 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
691 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
692 | #endif | ||
693 | 15 | ||
694 | static inline int paravirt_enabled(void) | 16 | static inline int paravirt_enabled(void) |
695 | { | 17 | { |
@@ -820,15 +142,22 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err) | |||
820 | { | 142 | { |
821 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); | 143 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); |
822 | } | 144 | } |
823 | static inline u64 paravirt_read_msr_amd(unsigned msr, int *err) | 145 | |
146 | static inline int paravirt_rdmsr_regs(u32 *regs) | ||
824 | { | 147 | { |
825 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err); | 148 | return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs); |
826 | } | 149 | } |
150 | |||
827 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) | 151 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) |
828 | { | 152 | { |
829 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); | 153 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); |
830 | } | 154 | } |
831 | 155 | ||
156 | static inline int paravirt_wrmsr_regs(u32 *regs) | ||
157 | { | ||
158 | return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs); | ||
159 | } | ||
160 | |||
832 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ | 161 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ |
833 | #define rdmsr(msr, val1, val2) \ | 162 | #define rdmsr(msr, val1, val2) \ |
834 | do { \ | 163 | do { \ |
@@ -862,6 +191,9 @@ do { \ | |||
862 | _err; \ | 191 | _err; \ |
863 | }) | 192 | }) |
864 | 193 | ||
194 | #define rdmsr_safe_regs(regs) paravirt_rdmsr_regs(regs) | ||
195 | #define wrmsr_safe_regs(regs) paravirt_wrmsr_regs(regs) | ||
196 | |||
865 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | 197 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) |
866 | { | 198 | { |
867 | int err; | 199 | int err; |
@@ -871,12 +203,31 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
871 | } | 203 | } |
872 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | 204 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) |
873 | { | 205 | { |
206 | u32 gprs[8] = { 0 }; | ||
874 | int err; | 207 | int err; |
875 | 208 | ||
876 | *p = paravirt_read_msr_amd(msr, &err); | 209 | gprs[1] = msr; |
210 | gprs[7] = 0x9c5a203a; | ||
211 | |||
212 | err = paravirt_rdmsr_regs(gprs); | ||
213 | |||
214 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
215 | |||
877 | return err; | 216 | return err; |
878 | } | 217 | } |
879 | 218 | ||
219 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
220 | { | ||
221 | u32 gprs[8] = { 0 }; | ||
222 | |||
223 | gprs[0] = (u32)val; | ||
224 | gprs[1] = msr; | ||
225 | gprs[2] = val >> 32; | ||
226 | gprs[7] = 0x9c5a203a; | ||
227 | |||
228 | return paravirt_wrmsr_regs(gprs); | ||
229 | } | ||
230 | |||
880 | static inline u64 paravirt_read_tsc(void) | 231 | static inline u64 paravirt_read_tsc(void) |
881 | { | 232 | { |
882 | return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); | 233 | return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); |
@@ -1393,20 +744,6 @@ static inline void pmd_clear(pmd_t *pmdp) | |||
1393 | } | 744 | } |
1394 | #endif /* CONFIG_X86_PAE */ | 745 | #endif /* CONFIG_X86_PAE */ |
1395 | 746 | ||
1396 | /* Lazy mode for batching updates / context switch */ | ||
1397 | enum paravirt_lazy_mode { | ||
1398 | PARAVIRT_LAZY_NONE, | ||
1399 | PARAVIRT_LAZY_MMU, | ||
1400 | PARAVIRT_LAZY_CPU, | ||
1401 | }; | ||
1402 | |||
1403 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | ||
1404 | void paravirt_start_context_switch(struct task_struct *prev); | ||
1405 | void paravirt_end_context_switch(struct task_struct *next); | ||
1406 | |||
1407 | void paravirt_enter_lazy_mmu(void); | ||
1408 | void paravirt_leave_lazy_mmu(void); | ||
1409 | |||
1410 | #define __HAVE_ARCH_START_CONTEXT_SWITCH | 747 | #define __HAVE_ARCH_START_CONTEXT_SWITCH |
1411 | static inline void arch_start_context_switch(struct task_struct *prev) | 748 | static inline void arch_start_context_switch(struct task_struct *prev) |
1412 | { | 749 | { |
@@ -1437,12 +774,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | |||
1437 | pv_mmu_ops.set_fixmap(idx, phys, flags); | 774 | pv_mmu_ops.set_fixmap(idx, phys, flags); |
1438 | } | 775 | } |
1439 | 776 | ||
1440 | void _paravirt_nop(void); | ||
1441 | u32 _paravirt_ident_32(u32); | ||
1442 | u64 _paravirt_ident_64(u64); | ||
1443 | |||
1444 | #define paravirt_nop ((void *)_paravirt_nop) | ||
1445 | |||
1446 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 777 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) |
1447 | 778 | ||
1448 | static inline int __raw_spin_is_locked(struct raw_spinlock *lock) | 779 | static inline int __raw_spin_is_locked(struct raw_spinlock *lock) |
@@ -1479,17 +810,6 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) | |||
1479 | 810 | ||
1480 | #endif | 811 | #endif |
1481 | 812 | ||
1482 | /* These all sit in the .parainstructions section to tell us what to patch. */ | ||
1483 | struct paravirt_patch_site { | ||
1484 | u8 *instr; /* original instructions */ | ||
1485 | u8 instrtype; /* type of this instruction */ | ||
1486 | u8 len; /* length of original instruction */ | ||
1487 | u16 clobbers; /* what registers you may clobber */ | ||
1488 | }; | ||
1489 | |||
1490 | extern struct paravirt_patch_site __parainstructions[], | ||
1491 | __parainstructions_end[]; | ||
1492 | |||
1493 | #ifdef CONFIG_X86_32 | 813 | #ifdef CONFIG_X86_32 |
1494 | #define PV_SAVE_REGS "pushl %ecx; pushl %edx;" | 814 | #define PV_SAVE_REGS "pushl %ecx; pushl %edx;" |
1495 | #define PV_RESTORE_REGS "popl %edx; popl %ecx;" | 815 | #define PV_RESTORE_REGS "popl %edx; popl %ecx;" |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h new file mode 100644 index 000000000000..25402d0006e7 --- /dev/null +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -0,0 +1,721 @@ | |||
1 | #ifndef _ASM_X86_PARAVIRT_TYPES_H | ||
2 | #define _ASM_X86_PARAVIRT_TYPES_H | ||
3 | |||
4 | /* Bitmask of what can be clobbered: usually at least eax. */ | ||
5 | #define CLBR_NONE 0 | ||
6 | #define CLBR_EAX (1 << 0) | ||
7 | #define CLBR_ECX (1 << 1) | ||
8 | #define CLBR_EDX (1 << 2) | ||
9 | #define CLBR_EDI (1 << 3) | ||
10 | |||
11 | #ifdef CONFIG_X86_32 | ||
12 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ | ||
13 | #define CLBR_ANY ((1 << 4) - 1) | ||
14 | |||
15 | #define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) | ||
16 | #define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) | ||
17 | #define CLBR_SCRATCH (0) | ||
18 | #else | ||
19 | #define CLBR_RAX CLBR_EAX | ||
20 | #define CLBR_RCX CLBR_ECX | ||
21 | #define CLBR_RDX CLBR_EDX | ||
22 | #define CLBR_RDI CLBR_EDI | ||
23 | #define CLBR_RSI (1 << 4) | ||
24 | #define CLBR_R8 (1 << 5) | ||
25 | #define CLBR_R9 (1 << 6) | ||
26 | #define CLBR_R10 (1 << 7) | ||
27 | #define CLBR_R11 (1 << 8) | ||
28 | |||
29 | #define CLBR_ANY ((1 << 9) - 1) | ||
30 | |||
31 | #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ | ||
32 | CLBR_RCX | CLBR_R8 | CLBR_R9) | ||
33 | #define CLBR_RET_REG (CLBR_RAX) | ||
34 | #define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) | ||
35 | |||
36 | #endif /* X86_64 */ | ||
37 | |||
38 | #define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) | ||
39 | |||
40 | #ifndef __ASSEMBLY__ | ||
41 | |||
42 | #include <asm/desc_defs.h> | ||
43 | #include <asm/kmap_types.h> | ||
44 | |||
45 | struct page; | ||
46 | struct thread_struct; | ||
47 | struct desc_ptr; | ||
48 | struct tss_struct; | ||
49 | struct mm_struct; | ||
50 | struct desc_struct; | ||
51 | struct task_struct; | ||
52 | struct cpumask; | ||
53 | |||
54 | /* | ||
55 | * Wrapper type for pointers to code which uses the non-standard | ||
56 | * calling convention. See PV_CALL_SAVE_REGS_THUNK below. | ||
57 | */ | ||
58 | struct paravirt_callee_save { | ||
59 | void *func; | ||
60 | }; | ||
61 | |||
62 | /* general info */ | ||
63 | struct pv_info { | ||
64 | unsigned int kernel_rpl; | ||
65 | int shared_kernel_pmd; | ||
66 | int paravirt_enabled; | ||
67 | const char *name; | ||
68 | }; | ||
69 | |||
70 | struct pv_init_ops { | ||
71 | /* | ||
72 | * Patch may replace one of the defined code sequences with | ||
73 | * arbitrary code, subject to the same register constraints. | ||
74 | * This generally means the code is not free to clobber any | ||
75 | * registers other than EAX. The patch function should return | ||
76 | * the number of bytes of code generated, as we nop pad the | ||
77 | * rest in generic code. | ||
78 | */ | ||
79 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, | ||
80 | unsigned long addr, unsigned len); | ||
81 | |||
82 | /* Basic arch-specific setup */ | ||
83 | void (*arch_setup)(void); | ||
84 | char *(*memory_setup)(void); | ||
85 | void (*post_allocator_init)(void); | ||
86 | |||
87 | /* Print a banner to identify the environment */ | ||
88 | void (*banner)(void); | ||
89 | }; | ||
90 | |||
91 | |||
92 | struct pv_lazy_ops { | ||
93 | /* Set deferred update mode, used for batching operations. */ | ||
94 | void (*enter)(void); | ||
95 | void (*leave)(void); | ||
96 | }; | ||
97 | |||
98 | struct pv_time_ops { | ||
99 | void (*time_init)(void); | ||
100 | |||
101 | /* Set and set time of day */ | ||
102 | unsigned long (*get_wallclock)(void); | ||
103 | int (*set_wallclock)(unsigned long); | ||
104 | |||
105 | unsigned long long (*sched_clock)(void); | ||
106 | unsigned long (*get_tsc_khz)(void); | ||
107 | }; | ||
108 | |||
109 | struct pv_cpu_ops { | ||
110 | /* hooks for various privileged instructions */ | ||
111 | unsigned long (*get_debugreg)(int regno); | ||
112 | void (*set_debugreg)(int regno, unsigned long value); | ||
113 | |||
114 | void (*clts)(void); | ||
115 | |||
116 | unsigned long (*read_cr0)(void); | ||
117 | void (*write_cr0)(unsigned long); | ||
118 | |||
119 | unsigned long (*read_cr4_safe)(void); | ||
120 | unsigned long (*read_cr4)(void); | ||
121 | void (*write_cr4)(unsigned long); | ||
122 | |||
123 | #ifdef CONFIG_X86_64 | ||
124 | unsigned long (*read_cr8)(void); | ||
125 | void (*write_cr8)(unsigned long); | ||
126 | #endif | ||
127 | |||
128 | /* Segment descriptor handling */ | ||
129 | void (*load_tr_desc)(void); | ||
130 | void (*load_gdt)(const struct desc_ptr *); | ||
131 | void (*load_idt)(const struct desc_ptr *); | ||
132 | void (*store_gdt)(struct desc_ptr *); | ||
133 | void (*store_idt)(struct desc_ptr *); | ||
134 | void (*set_ldt)(const void *desc, unsigned entries); | ||
135 | unsigned long (*store_tr)(void); | ||
136 | void (*load_tls)(struct thread_struct *t, unsigned int cpu); | ||
137 | #ifdef CONFIG_X86_64 | ||
138 | void (*load_gs_index)(unsigned int idx); | ||
139 | #endif | ||
140 | void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, | ||
141 | const void *desc); | ||
142 | void (*write_gdt_entry)(struct desc_struct *, | ||
143 | int entrynum, const void *desc, int size); | ||
144 | void (*write_idt_entry)(gate_desc *, | ||
145 | int entrynum, const gate_desc *gate); | ||
146 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | ||
147 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | ||
148 | |||
149 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | ||
150 | |||
151 | void (*set_iopl_mask)(unsigned mask); | ||
152 | |||
153 | void (*wbinvd)(void); | ||
154 | void (*io_delay)(void); | ||
155 | |||
156 | /* cpuid emulation, mostly so that caps bits can be disabled */ | ||
157 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, | ||
158 | unsigned int *ecx, unsigned int *edx); | ||
159 | |||
160 | /* MSR, PMC and TSR operations. | ||
161 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | ||
162 | u64 (*read_msr)(unsigned int msr, int *err); | ||
163 | int (*rdmsr_regs)(u32 *regs); | ||
164 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | ||
165 | int (*wrmsr_regs)(u32 *regs); | ||
166 | |||
167 | u64 (*read_tsc)(void); | ||
168 | u64 (*read_pmc)(int counter); | ||
169 | unsigned long long (*read_tscp)(unsigned int *aux); | ||
170 | |||
171 | /* | ||
172 | * Atomically enable interrupts and return to userspace. This | ||
173 | * is only ever used to return to 32-bit processes; in a | ||
174 | * 64-bit kernel, it's used for 32-on-64 compat processes, but | ||
175 | * never native 64-bit processes. (Jump, not call.) | ||
176 | */ | ||
177 | void (*irq_enable_sysexit)(void); | ||
178 | |||
179 | /* | ||
180 | * Switch to usermode gs and return to 64-bit usermode using | ||
181 | * sysret. Only used in 64-bit kernels to return to 64-bit | ||
182 | * processes. Usermode register state, including %rsp, must | ||
183 | * already be restored. | ||
184 | */ | ||
185 | void (*usergs_sysret64)(void); | ||
186 | |||
187 | /* | ||
188 | * Switch to usermode gs and return to 32-bit usermode using | ||
189 | * sysret. Used to return to 32-on-64 compat processes. | ||
190 | * Other usermode register state, including %esp, must already | ||
191 | * be restored. | ||
192 | */ | ||
193 | void (*usergs_sysret32)(void); | ||
194 | |||
195 | /* Normal iret. Jump to this with the standard iret stack | ||
196 | frame set up. */ | ||
197 | void (*iret)(void); | ||
198 | |||
199 | void (*swapgs)(void); | ||
200 | |||
201 | void (*start_context_switch)(struct task_struct *prev); | ||
202 | void (*end_context_switch)(struct task_struct *next); | ||
203 | }; | ||
204 | |||
205 | struct pv_irq_ops { | ||
206 | void (*init_IRQ)(void); | ||
207 | |||
208 | /* | ||
209 | * Get/set interrupt state. save_fl and restore_fl are only | ||
210 | * expected to use X86_EFLAGS_IF; all other bits | ||
211 | * returned from save_fl are undefined, and may be ignored by | ||
212 | * restore_fl. | ||
213 | * | ||
214 | * NOTE: These functions callers expect the callee to preserve | ||
215 | * more registers than the standard C calling convention. | ||
216 | */ | ||
217 | struct paravirt_callee_save save_fl; | ||
218 | struct paravirt_callee_save restore_fl; | ||
219 | struct paravirt_callee_save irq_disable; | ||
220 | struct paravirt_callee_save irq_enable; | ||
221 | |||
222 | void (*safe_halt)(void); | ||
223 | void (*halt)(void); | ||
224 | |||
225 | #ifdef CONFIG_X86_64 | ||
226 | void (*adjust_exception_frame)(void); | ||
227 | #endif | ||
228 | }; | ||
229 | |||
230 | struct pv_apic_ops { | ||
231 | #ifdef CONFIG_X86_LOCAL_APIC | ||
232 | void (*setup_boot_clock)(void); | ||
233 | void (*setup_secondary_clock)(void); | ||
234 | |||
235 | void (*startup_ipi_hook)(int phys_apicid, | ||
236 | unsigned long start_eip, | ||
237 | unsigned long start_esp); | ||
238 | #endif | ||
239 | }; | ||
240 | |||
241 | struct pv_mmu_ops { | ||
242 | /* | ||
243 | * Called before/after init_mm pagetable setup. setup_start | ||
244 | * may reset %cr3, and may pre-install parts of the pagetable; | ||
245 | * pagetable setup is expected to preserve any existing | ||
246 | * mapping. | ||
247 | */ | ||
248 | void (*pagetable_setup_start)(pgd_t *pgd_base); | ||
249 | void (*pagetable_setup_done)(pgd_t *pgd_base); | ||
250 | |||
251 | unsigned long (*read_cr2)(void); | ||
252 | void (*write_cr2)(unsigned long); | ||
253 | |||
254 | unsigned long (*read_cr3)(void); | ||
255 | void (*write_cr3)(unsigned long); | ||
256 | |||
257 | /* | ||
258 | * Hooks for intercepting the creation/use/destruction of an | ||
259 | * mm_struct. | ||
260 | */ | ||
261 | void (*activate_mm)(struct mm_struct *prev, | ||
262 | struct mm_struct *next); | ||
263 | void (*dup_mmap)(struct mm_struct *oldmm, | ||
264 | struct mm_struct *mm); | ||
265 | void (*exit_mmap)(struct mm_struct *mm); | ||
266 | |||
267 | |||
268 | /* TLB operations */ | ||
269 | void (*flush_tlb_user)(void); | ||
270 | void (*flush_tlb_kernel)(void); | ||
271 | void (*flush_tlb_single)(unsigned long addr); | ||
272 | void (*flush_tlb_others)(const struct cpumask *cpus, | ||
273 | struct mm_struct *mm, | ||
274 | unsigned long va); | ||
275 | |||
276 | /* Hooks for allocating and freeing a pagetable top-level */ | ||
277 | int (*pgd_alloc)(struct mm_struct *mm); | ||
278 | void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); | ||
279 | |||
280 | /* | ||
281 | * Hooks for allocating/releasing pagetable pages when they're | ||
282 | * attached to a pagetable | ||
283 | */ | ||
284 | void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); | ||
285 | void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); | ||
286 | void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); | ||
287 | void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); | ||
288 | void (*release_pte)(unsigned long pfn); | ||
289 | void (*release_pmd)(unsigned long pfn); | ||
290 | void (*release_pud)(unsigned long pfn); | ||
291 | |||
292 | /* Pagetable manipulation functions */ | ||
293 | void (*set_pte)(pte_t *ptep, pte_t pteval); | ||
294 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, | ||
295 | pte_t *ptep, pte_t pteval); | ||
296 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | ||
297 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | ||
298 | pte_t *ptep); | ||
299 | void (*pte_update_defer)(struct mm_struct *mm, | ||
300 | unsigned long addr, pte_t *ptep); | ||
301 | |||
302 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, | ||
303 | pte_t *ptep); | ||
304 | void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, | ||
305 | pte_t *ptep, pte_t pte); | ||
306 | |||
307 | struct paravirt_callee_save pte_val; | ||
308 | struct paravirt_callee_save make_pte; | ||
309 | |||
310 | struct paravirt_callee_save pgd_val; | ||
311 | struct paravirt_callee_save make_pgd; | ||
312 | |||
313 | #if PAGETABLE_LEVELS >= 3 | ||
314 | #ifdef CONFIG_X86_PAE | ||
315 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | ||
316 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | ||
317 | pte_t *ptep); | ||
318 | void (*pmd_clear)(pmd_t *pmdp); | ||
319 | |||
320 | #endif /* CONFIG_X86_PAE */ | ||
321 | |||
322 | void (*set_pud)(pud_t *pudp, pud_t pudval); | ||
323 | |||
324 | struct paravirt_callee_save pmd_val; | ||
325 | struct paravirt_callee_save make_pmd; | ||
326 | |||
327 | #if PAGETABLE_LEVELS == 4 | ||
328 | struct paravirt_callee_save pud_val; | ||
329 | struct paravirt_callee_save make_pud; | ||
330 | |||
331 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | ||
332 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
333 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
334 | |||
335 | #ifdef CONFIG_HIGHPTE | ||
336 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); | ||
337 | #endif | ||
338 | |||
339 | struct pv_lazy_ops lazy_mode; | ||
340 | |||
341 | /* dom0 ops */ | ||
342 | |||
343 | /* Sometimes the physical address is a pfn, and sometimes its | ||
344 | an mfn. We can tell which is which from the index. */ | ||
345 | void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, | ||
346 | phys_addr_t phys, pgprot_t flags); | ||
347 | }; | ||
348 | |||
349 | struct raw_spinlock; | ||
350 | struct pv_lock_ops { | ||
351 | int (*spin_is_locked)(struct raw_spinlock *lock); | ||
352 | int (*spin_is_contended)(struct raw_spinlock *lock); | ||
353 | void (*spin_lock)(struct raw_spinlock *lock); | ||
354 | void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); | ||
355 | int (*spin_trylock)(struct raw_spinlock *lock); | ||
356 | void (*spin_unlock)(struct raw_spinlock *lock); | ||
357 | }; | ||
358 | |||
359 | /* This contains all the paravirt structures: we get a convenient | ||
360 | * number for each function using the offset which we use to indicate | ||
361 | * what to patch. */ | ||
362 | struct paravirt_patch_template { | ||
363 | struct pv_init_ops pv_init_ops; | ||
364 | struct pv_time_ops pv_time_ops; | ||
365 | struct pv_cpu_ops pv_cpu_ops; | ||
366 | struct pv_irq_ops pv_irq_ops; | ||
367 | struct pv_apic_ops pv_apic_ops; | ||
368 | struct pv_mmu_ops pv_mmu_ops; | ||
369 | struct pv_lock_ops pv_lock_ops; | ||
370 | }; | ||
371 | |||
372 | extern struct pv_info pv_info; | ||
373 | extern struct pv_init_ops pv_init_ops; | ||
374 | extern struct pv_time_ops pv_time_ops; | ||
375 | extern struct pv_cpu_ops pv_cpu_ops; | ||
376 | extern struct pv_irq_ops pv_irq_ops; | ||
377 | extern struct pv_apic_ops pv_apic_ops; | ||
378 | extern struct pv_mmu_ops pv_mmu_ops; | ||
379 | extern struct pv_lock_ops pv_lock_ops; | ||
380 | |||
381 | #define PARAVIRT_PATCH(x) \ | ||
382 | (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) | ||
383 | |||
384 | #define paravirt_type(op) \ | ||
385 | [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ | ||
386 | [paravirt_opptr] "i" (&(op)) | ||
387 | #define paravirt_clobber(clobber) \ | ||
388 | [paravirt_clobber] "i" (clobber) | ||
389 | |||
390 | /* | ||
391 | * Generate some code, and mark it as patchable by the | ||
392 | * apply_paravirt() alternate instruction patcher. | ||
393 | */ | ||
394 | #define _paravirt_alt(insn_string, type, clobber) \ | ||
395 | "771:\n\t" insn_string "\n" "772:\n" \ | ||
396 | ".pushsection .parainstructions,\"a\"\n" \ | ||
397 | _ASM_ALIGN "\n" \ | ||
398 | _ASM_PTR " 771b\n" \ | ||
399 | " .byte " type "\n" \ | ||
400 | " .byte 772b-771b\n" \ | ||
401 | " .short " clobber "\n" \ | ||
402 | ".popsection\n" | ||
403 | |||
404 | /* Generate patchable code, with the default asm parameters. */ | ||
405 | #define paravirt_alt(insn_string) \ | ||
406 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | ||
407 | |||
408 | /* Simple instruction patching code. */ | ||
409 | #define DEF_NATIVE(ops, name, code) \ | ||
410 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | ||
411 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | ||
412 | |||
413 | unsigned paravirt_patch_nop(void); | ||
414 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); | ||
415 | unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); | ||
416 | unsigned paravirt_patch_ignore(unsigned len); | ||
417 | unsigned paravirt_patch_call(void *insnbuf, | ||
418 | const void *target, u16 tgt_clobbers, | ||
419 | unsigned long addr, u16 site_clobbers, | ||
420 | unsigned len); | ||
421 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | ||
422 | unsigned long addr, unsigned len); | ||
423 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | ||
424 | unsigned long addr, unsigned len); | ||
425 | |||
426 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | ||
427 | const char *start, const char *end); | ||
428 | |||
429 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
430 | unsigned long addr, unsigned len); | ||
431 | |||
432 | int paravirt_disable_iospace(void); | ||
433 | |||
434 | /* | ||
435 | * This generates an indirect call based on the operation type number. | ||
436 | * The type number, computed in PARAVIRT_PATCH, is derived from the | ||
437 | * offset into the paravirt_patch_template structure, and can therefore be | ||
438 | * freely converted back into a structure offset. | ||
439 | */ | ||
440 | #define PARAVIRT_CALL "call *%c[paravirt_opptr];" | ||
441 | |||
442 | /* | ||
443 | * These macros are intended to wrap calls through one of the paravirt | ||
444 | * ops structs, so that they can be later identified and patched at | ||
445 | * runtime. | ||
446 | * | ||
447 | * Normally, a call to a pv_op function is a simple indirect call: | ||
448 | * (pv_op_struct.operations)(args...). | ||
449 | * | ||
450 | * Unfortunately, this is a relatively slow operation for modern CPUs, | ||
451 | * because it cannot necessarily determine what the destination | ||
452 | * address is. In this case, the address is a runtime constant, so at | ||
453 | * the very least we can patch the call to e a simple direct call, or | ||
454 | * ideally, patch an inline implementation into the callsite. (Direct | ||
455 | * calls are essentially free, because the call and return addresses | ||
456 | * are completely predictable.) | ||
457 | * | ||
458 | * For i386, these macros rely on the standard gcc "regparm(3)" calling | ||
459 | * convention, in which the first three arguments are placed in %eax, | ||
460 | * %edx, %ecx (in that order), and the remaining arguments are placed | ||
461 | * on the stack. All caller-save registers (eax,edx,ecx) are expected | ||
462 | * to be modified (either clobbered or used for return values). | ||
463 | * X86_64, on the other hand, already specifies a register-based calling | ||
464 | * conventions, returning at %rax, with parameteres going on %rdi, %rsi, | ||
465 | * %rdx, and %rcx. Note that for this reason, x86_64 does not need any | ||
466 | * special handling for dealing with 4 arguments, unlike i386. | ||
467 | * However, x86_64 also have to clobber all caller saved registers, which | ||
468 | * unfortunately, are quite a bit (r8 - r11) | ||
469 | * | ||
470 | * The call instruction itself is marked by placing its start address | ||
471 | * and size into the .parainstructions section, so that | ||
472 | * apply_paravirt() in arch/i386/kernel/alternative.c can do the | ||
473 | * appropriate patching under the control of the backend pv_init_ops | ||
474 | * implementation. | ||
475 | * | ||
476 | * Unfortunately there's no way to get gcc to generate the args setup | ||
477 | * for the call, and then allow the call itself to be generated by an | ||
478 | * inline asm. Because of this, we must do the complete arg setup and | ||
479 | * return value handling from within these macros. This is fairly | ||
480 | * cumbersome. | ||
481 | * | ||
482 | * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. | ||
483 | * It could be extended to more arguments, but there would be little | ||
484 | * to be gained from that. For each number of arguments, there are | ||
485 | * the two VCALL and CALL variants for void and non-void functions. | ||
486 | * | ||
487 | * When there is a return value, the invoker of the macro must specify | ||
488 | * the return type. The macro then uses sizeof() on that type to | ||
489 | * determine whether its a 32 or 64 bit value, and places the return | ||
490 | * in the right register(s) (just %eax for 32-bit, and %edx:%eax for | ||
491 | * 64-bit). For x86_64 machines, it just returns at %rax regardless of | ||
492 | * the return value size. | ||
493 | * | ||
494 | * 64-bit arguments are passed as a pair of adjacent 32-bit arguments | ||
495 | * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments | ||
496 | * in low,high order | ||
497 | * | ||
498 | * Small structures are passed and returned in registers. The macro | ||
499 | * calling convention can't directly deal with this, so the wrapper | ||
500 | * functions must do this. | ||
501 | * | ||
502 | * These PVOP_* macros are only defined within this header. This | ||
503 | * means that all uses must be wrapped in inline functions. This also | ||
504 | * makes sure the incoming and outgoing types are always correct. | ||
505 | */ | ||
506 | #ifdef CONFIG_X86_32 | ||
507 | #define PVOP_VCALL_ARGS \ | ||
508 | unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx | ||
509 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS | ||
510 | |||
511 | #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) | ||
512 | #define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) | ||
513 | #define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) | ||
514 | |||
515 | #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ | ||
516 | "=c" (__ecx) | ||
517 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS | ||
518 | |||
519 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) | ||
520 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
521 | |||
522 | #define EXTRA_CLOBBERS | ||
523 | #define VEXTRA_CLOBBERS | ||
524 | #else /* CONFIG_X86_64 */ | ||
525 | #define PVOP_VCALL_ARGS \ | ||
526 | unsigned long __edi = __edi, __esi = __esi, \ | ||
527 | __edx = __edx, __ecx = __ecx | ||
528 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | ||
529 | |||
530 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) | ||
531 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) | ||
532 | #define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) | ||
533 | #define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) | ||
534 | |||
535 | #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ | ||
536 | "=S" (__esi), "=d" (__edx), \ | ||
537 | "=c" (__ecx) | ||
538 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | ||
539 | |||
540 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) | ||
541 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
542 | |||
543 | #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" | ||
544 | #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" | ||
545 | #endif /* CONFIG_X86_32 */ | ||
546 | |||
547 | #ifdef CONFIG_PARAVIRT_DEBUG | ||
548 | #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) | ||
549 | #else | ||
550 | #define PVOP_TEST_NULL(op) ((void)op) | ||
551 | #endif | ||
552 | |||
553 | #define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ | ||
554 | pre, post, ...) \ | ||
555 | ({ \ | ||
556 | rettype __ret; \ | ||
557 | PVOP_CALL_ARGS; \ | ||
558 | PVOP_TEST_NULL(op); \ | ||
559 | /* This is 32-bit specific, but is okay in 64-bit */ \ | ||
560 | /* since this condition will never hold */ \ | ||
561 | if (sizeof(rettype) > sizeof(unsigned long)) { \ | ||
562 | asm volatile(pre \ | ||
563 | paravirt_alt(PARAVIRT_CALL) \ | ||
564 | post \ | ||
565 | : call_clbr \ | ||
566 | : paravirt_type(op), \ | ||
567 | paravirt_clobber(clbr), \ | ||
568 | ##__VA_ARGS__ \ | ||
569 | : "memory", "cc" extra_clbr); \ | ||
570 | __ret = (rettype)((((u64)__edx) << 32) | __eax); \ | ||
571 | } else { \ | ||
572 | asm volatile(pre \ | ||
573 | paravirt_alt(PARAVIRT_CALL) \ | ||
574 | post \ | ||
575 | : call_clbr \ | ||
576 | : paravirt_type(op), \ | ||
577 | paravirt_clobber(clbr), \ | ||
578 | ##__VA_ARGS__ \ | ||
579 | : "memory", "cc" extra_clbr); \ | ||
580 | __ret = (rettype)__eax; \ | ||
581 | } \ | ||
582 | __ret; \ | ||
583 | }) | ||
584 | |||
585 | #define __PVOP_CALL(rettype, op, pre, post, ...) \ | ||
586 | ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ | ||
587 | EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) | ||
588 | |||
589 | #define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ | ||
590 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
591 | PVOP_CALLEE_CLOBBERS, , \ | ||
592 | pre, post, ##__VA_ARGS__) | ||
593 | |||
594 | |||
595 | #define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ | ||
596 | ({ \ | ||
597 | PVOP_VCALL_ARGS; \ | ||
598 | PVOP_TEST_NULL(op); \ | ||
599 | asm volatile(pre \ | ||
600 | paravirt_alt(PARAVIRT_CALL) \ | ||
601 | post \ | ||
602 | : call_clbr \ | ||
603 | : paravirt_type(op), \ | ||
604 | paravirt_clobber(clbr), \ | ||
605 | ##__VA_ARGS__ \ | ||
606 | : "memory", "cc" extra_clbr); \ | ||
607 | }) | ||
608 | |||
609 | #define __PVOP_VCALL(op, pre, post, ...) \ | ||
610 | ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ | ||
611 | VEXTRA_CLOBBERS, \ | ||
612 | pre, post, ##__VA_ARGS__) | ||
613 | |||
614 | #define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ | ||
615 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
616 | PVOP_VCALLEE_CLOBBERS, , \ | ||
617 | pre, post, ##__VA_ARGS__) | ||
618 | |||
619 | |||
620 | |||
621 | #define PVOP_CALL0(rettype, op) \ | ||
622 | __PVOP_CALL(rettype, op, "", "") | ||
623 | #define PVOP_VCALL0(op) \ | ||
624 | __PVOP_VCALL(op, "", "") | ||
625 | |||
626 | #define PVOP_CALLEE0(rettype, op) \ | ||
627 | __PVOP_CALLEESAVE(rettype, op, "", "") | ||
628 | #define PVOP_VCALLEE0(op) \ | ||
629 | __PVOP_VCALLEESAVE(op, "", "") | ||
630 | |||
631 | |||
632 | #define PVOP_CALL1(rettype, op, arg1) \ | ||
633 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
634 | #define PVOP_VCALL1(op, arg1) \ | ||
635 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
636 | |||
637 | #define PVOP_CALLEE1(rettype, op, arg1) \ | ||
638 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
639 | #define PVOP_VCALLEE1(op, arg1) \ | ||
640 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
641 | |||
642 | |||
643 | #define PVOP_CALL2(rettype, op, arg1, arg2) \ | ||
644 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
645 | PVOP_CALL_ARG2(arg2)) | ||
646 | #define PVOP_VCALL2(op, arg1, arg2) \ | ||
647 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
648 | PVOP_CALL_ARG2(arg2)) | ||
649 | |||
650 | #define PVOP_CALLEE2(rettype, op, arg1, arg2) \ | ||
651 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
652 | PVOP_CALL_ARG2(arg2)) | ||
653 | #define PVOP_VCALLEE2(op, arg1, arg2) \ | ||
654 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
655 | PVOP_CALL_ARG2(arg2)) | ||
656 | |||
657 | |||
658 | #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ | ||
659 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
660 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
661 | #define PVOP_VCALL3(op, arg1, arg2, arg3) \ | ||
662 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
663 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
664 | |||
665 | /* This is the only difference in x86_64. We can make it much simpler */ | ||
666 | #ifdef CONFIG_X86_32 | ||
667 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
668 | __PVOP_CALL(rettype, op, \ | ||
669 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
670 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
671 | PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) | ||
672 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
673 | __PVOP_VCALL(op, \ | ||
674 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
675 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ | ||
676 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) | ||
677 | #else | ||
678 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
679 | __PVOP_CALL(rettype, op, "", "", \ | ||
680 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
681 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
682 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
683 | __PVOP_VCALL(op, "", "", \ | ||
684 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
685 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
686 | #endif | ||
687 | |||
688 | /* Lazy mode for batching updates / context switch */ | ||
689 | enum paravirt_lazy_mode { | ||
690 | PARAVIRT_LAZY_NONE, | ||
691 | PARAVIRT_LAZY_MMU, | ||
692 | PARAVIRT_LAZY_CPU, | ||
693 | }; | ||
694 | |||
695 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | ||
696 | void paravirt_start_context_switch(struct task_struct *prev); | ||
697 | void paravirt_end_context_switch(struct task_struct *next); | ||
698 | |||
699 | void paravirt_enter_lazy_mmu(void); | ||
700 | void paravirt_leave_lazy_mmu(void); | ||
701 | |||
702 | void _paravirt_nop(void); | ||
703 | u32 _paravirt_ident_32(u32); | ||
704 | u64 _paravirt_ident_64(u64); | ||
705 | |||
706 | #define paravirt_nop ((void *)_paravirt_nop) | ||
707 | |||
708 | /* These all sit in the .parainstructions section to tell us what to patch. */ | ||
709 | struct paravirt_patch_site { | ||
710 | u8 *instr; /* original instructions */ | ||
711 | u8 instrtype; /* type of this instruction */ | ||
712 | u8 len; /* length of original instruction */ | ||
713 | u16 clobbers; /* what registers you may clobber */ | ||
714 | }; | ||
715 | |||
716 | extern struct paravirt_patch_site __parainstructions[], | ||
717 | __parainstructions_end[]; | ||
718 | |||
719 | #endif /* __ASSEMBLY__ */ | ||
720 | |||
721 | #endif /* _ASM_X86_PARAVIRT_TYPES_H */ | ||
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 927958d13c19..1ff685ca221c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -91,7 +91,7 @@ extern void pci_iommu_alloc(void); | |||
91 | 91 | ||
92 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) | 92 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) |
93 | 93 | ||
94 | #if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) | 94 | #if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG) |
95 | 95 | ||
96 | #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ | 96 | #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ |
97 | dma_addr_t ADDR_NAME; | 97 | dma_addr_t ADDR_NAME; |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 02ecb30982a3..04eacefcfd26 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -42,13 +42,14 @@ | |||
42 | 42 | ||
43 | #else /* ...!ASSEMBLY */ | 43 | #else /* ...!ASSEMBLY */ |
44 | 44 | ||
45 | #include <linux/kernel.h> | ||
45 | #include <linux/stringify.h> | 46 | #include <linux/stringify.h> |
46 | 47 | ||
47 | #ifdef CONFIG_SMP | 48 | #ifdef CONFIG_SMP |
48 | #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x | 49 | #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x |
49 | #define __my_cpu_offset percpu_read(this_cpu_off) | 50 | #define __my_cpu_offset percpu_read(this_cpu_off) |
50 | #else | 51 | #else |
51 | #define __percpu_arg(x) "%" #x | 52 | #define __percpu_arg(x) "%P" #x |
52 | #endif | 53 | #endif |
53 | 54 | ||
54 | /* | 55 | /* |
@@ -103,36 +104,48 @@ do { \ | |||
103 | } \ | 104 | } \ |
104 | } while (0) | 105 | } while (0) |
105 | 106 | ||
106 | #define percpu_from_op(op, var) \ | 107 | #define percpu_from_op(op, var, constraint) \ |
107 | ({ \ | 108 | ({ \ |
108 | typeof(var) ret__; \ | 109 | typeof(var) ret__; \ |
109 | switch (sizeof(var)) { \ | 110 | switch (sizeof(var)) { \ |
110 | case 1: \ | 111 | case 1: \ |
111 | asm(op "b "__percpu_arg(1)",%0" \ | 112 | asm(op "b "__percpu_arg(1)",%0" \ |
112 | : "=q" (ret__) \ | 113 | : "=q" (ret__) \ |
113 | : "m" (var)); \ | 114 | : constraint); \ |
114 | break; \ | 115 | break; \ |
115 | case 2: \ | 116 | case 2: \ |
116 | asm(op "w "__percpu_arg(1)",%0" \ | 117 | asm(op "w "__percpu_arg(1)",%0" \ |
117 | : "=r" (ret__) \ | 118 | : "=r" (ret__) \ |
118 | : "m" (var)); \ | 119 | : constraint); \ |
119 | break; \ | 120 | break; \ |
120 | case 4: \ | 121 | case 4: \ |
121 | asm(op "l "__percpu_arg(1)",%0" \ | 122 | asm(op "l "__percpu_arg(1)",%0" \ |
122 | : "=r" (ret__) \ | 123 | : "=r" (ret__) \ |
123 | : "m" (var)); \ | 124 | : constraint); \ |
124 | break; \ | 125 | break; \ |
125 | case 8: \ | 126 | case 8: \ |
126 | asm(op "q "__percpu_arg(1)",%0" \ | 127 | asm(op "q "__percpu_arg(1)",%0" \ |
127 | : "=r" (ret__) \ | 128 | : "=r" (ret__) \ |
128 | : "m" (var)); \ | 129 | : constraint); \ |
129 | break; \ | 130 | break; \ |
130 | default: __bad_percpu_size(); \ | 131 | default: __bad_percpu_size(); \ |
131 | } \ | 132 | } \ |
132 | ret__; \ | 133 | ret__; \ |
133 | }) | 134 | }) |
134 | 135 | ||
135 | #define percpu_read(var) percpu_from_op("mov", per_cpu__##var) | 136 | /* |
137 | * percpu_read() makes gcc load the percpu variable every time it is | ||
138 | * accessed while percpu_read_stable() allows the value to be cached. | ||
139 | * percpu_read_stable() is more efficient and can be used if its value | ||
140 | * is guaranteed to be valid across cpus. The current users include | ||
141 | * get_current() and get_thread_info() both of which are actually | ||
142 | * per-thread variables implemented as per-cpu variables and thus | ||
143 | * stable for the duration of the respective task. | ||
144 | */ | ||
145 | #define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \ | ||
146 | "m" (per_cpu__##var)) | ||
147 | #define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \ | ||
148 | "p" (&per_cpu__##var)) | ||
136 | #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) | 149 | #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) |
137 | #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) | 150 | #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) |
138 | #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) | 151 | #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) |
@@ -155,6 +168,15 @@ do { \ | |||
155 | /* We can use this directly for local CPU (faster). */ | 168 | /* We can use this directly for local CPU (faster). */ |
156 | DECLARE_PER_CPU(unsigned long, this_cpu_off); | 169 | DECLARE_PER_CPU(unsigned long, this_cpu_off); |
157 | 170 | ||
171 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
172 | void *pcpu_lpage_remapped(void *kaddr); | ||
173 | #else | ||
174 | static inline void *pcpu_lpage_remapped(void *kaddr) | ||
175 | { | ||
176 | return NULL; | ||
177 | } | ||
178 | #endif | ||
179 | |||
158 | #endif /* !__ASSEMBLY__ */ | 180 | #endif /* !__ASSEMBLY__ */ |
159 | 181 | ||
160 | #ifdef CONFIG_SMP | 182 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 5fb33e160ea0..e7b7c938ae27 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h | |||
@@ -84,9 +84,22 @@ union cpuid10_edx { | |||
84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | 84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b |
85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | 85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) |
86 | 86 | ||
87 | /* | ||
88 | * We model BTS tracing as another fixed-mode PMC. | ||
89 | * | ||
90 | * We choose a value in the middle of the fixed counter range, since lower | ||
91 | * values are used by actual fixed counters and higher values are used | ||
92 | * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. | ||
93 | */ | ||
94 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) | ||
95 | |||
96 | |||
87 | #ifdef CONFIG_PERF_COUNTERS | 97 | #ifdef CONFIG_PERF_COUNTERS |
88 | extern void init_hw_perf_counters(void); | 98 | extern void init_hw_perf_counters(void); |
89 | extern void perf_counters_lapic_init(void); | 99 | extern void perf_counters_lapic_init(void); |
100 | |||
101 | #define PERF_COUNTER_INDEX_OFFSET 0 | ||
102 | |||
90 | #else | 103 | #else |
91 | static inline void init_hw_perf_counters(void) { } | 104 | static inline void init_hw_perf_counters(void) { } |
92 | static inline void perf_counters_lapic_init(void) { } | 105 | static inline void perf_counters_lapic_init(void) { } |
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index dd14c54ac718..0e8c2a0fd922 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h | |||
@@ -46,7 +46,13 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte) | |||
46 | __free_page(pte); | 46 | __free_page(pte); |
47 | } | 47 | } |
48 | 48 | ||
49 | extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte); | 49 | extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte); |
50 | |||
51 | static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, | ||
52 | unsigned long address) | ||
53 | { | ||
54 | ___pte_free_tlb(tlb, pte); | ||
55 | } | ||
50 | 56 | ||
51 | static inline void pmd_populate_kernel(struct mm_struct *mm, | 57 | static inline void pmd_populate_kernel(struct mm_struct *mm, |
52 | pmd_t *pmd, pte_t *pte) | 58 | pmd_t *pmd, pte_t *pte) |
@@ -78,7 +84,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | |||
78 | free_page((unsigned long)pmd); | 84 | free_page((unsigned long)pmd); |
79 | } | 85 | } |
80 | 86 | ||
81 | extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); | 87 | extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); |
88 | |||
89 | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, | ||
90 | unsigned long adddress) | ||
91 | { | ||
92 | ___pmd_free_tlb(tlb, pmd); | ||
93 | } | ||
82 | 94 | ||
83 | #ifdef CONFIG_X86_PAE | 95 | #ifdef CONFIG_X86_PAE |
84 | extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); | 96 | extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); |
@@ -108,7 +120,14 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) | |||
108 | free_page((unsigned long)pud); | 120 | free_page((unsigned long)pud); |
109 | } | 121 | } |
110 | 122 | ||
111 | extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); | 123 | extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); |
124 | |||
125 | static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, | ||
126 | unsigned long address) | ||
127 | { | ||
128 | ___pud_free_tlb(tlb, pud); | ||
129 | } | ||
130 | |||
112 | #endif /* PAGETABLE_LEVELS > 3 */ | 131 | #endif /* PAGETABLE_LEVELS > 3 */ |
113 | #endif /* PAGETABLE_LEVELS > 2 */ | 132 | #endif /* PAGETABLE_LEVELS > 2 */ |
114 | 133 | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3cc06e3fceb8..4c5b51fdc788 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_PGTABLE_H | 2 | #define _ASM_X86_PGTABLE_H |
3 | 3 | ||
4 | #include <asm/page.h> | 4 | #include <asm/page.h> |
5 | #include <asm/e820.h> | ||
5 | 6 | ||
6 | #include <asm/pgtable_types.h> | 7 | #include <asm/pgtable_types.h> |
7 | 8 | ||
@@ -134,6 +135,11 @@ static inline unsigned long pte_pfn(pte_t pte) | |||
134 | return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; | 135 | return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; |
135 | } | 136 | } |
136 | 137 | ||
138 | static inline unsigned long pmd_pfn(pmd_t pmd) | ||
139 | { | ||
140 | return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; | ||
141 | } | ||
142 | |||
137 | #define pte_page(pte) pfn_to_page(pte_pfn(pte)) | 143 | #define pte_page(pte) pfn_to_page(pte_pfn(pte)) |
138 | 144 | ||
139 | static inline int pmd_large(pmd_t pte) | 145 | static inline int pmd_large(pmd_t pte) |
@@ -269,10 +275,17 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | |||
269 | 275 | ||
270 | #define canon_pgprot(p) __pgprot(massage_pgprot(p)) | 276 | #define canon_pgprot(p) __pgprot(massage_pgprot(p)) |
271 | 277 | ||
272 | static inline int is_new_memtype_allowed(unsigned long flags, | 278 | static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, |
273 | unsigned long new_flags) | 279 | unsigned long flags, |
280 | unsigned long new_flags) | ||
274 | { | 281 | { |
275 | /* | 282 | /* |
283 | * PAT type is always WB for ISA. So no need to check. | ||
284 | */ | ||
285 | if (is_ISA_range(paddr, paddr + size - 1)) | ||
286 | return 1; | ||
287 | |||
288 | /* | ||
276 | * Certain new memtypes are not allowed with certain | 289 | * Certain new memtypes are not allowed with certain |
277 | * requested memtype: | 290 | * requested memtype: |
278 | * - request is uncached, return cannot be write-back | 291 | * - request is uncached, return cannot be write-back |
@@ -351,7 +364,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) | |||
351 | * this macro returns the index of the entry in the pmd page which would | 364 | * this macro returns the index of the entry in the pmd page which would |
352 | * control the given virtual address | 365 | * control the given virtual address |
353 | */ | 366 | */ |
354 | static inline unsigned pmd_index(unsigned long address) | 367 | static inline unsigned long pmd_index(unsigned long address) |
355 | { | 368 | { |
356 | return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); | 369 | return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); |
357 | } | 370 | } |
@@ -371,7 +384,7 @@ static inline unsigned pmd_index(unsigned long address) | |||
371 | * this function returns the index of the entry in the pte page which would | 384 | * this function returns the index of the entry in the pte page which would |
372 | * control the given virtual address | 385 | * control the given virtual address |
373 | */ | 386 | */ |
374 | static inline unsigned pte_index(unsigned long address) | 387 | static inline unsigned long pte_index(unsigned long address) |
375 | { | 388 | { |
376 | return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); | 389 | return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); |
377 | } | 390 | } |
@@ -422,11 +435,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) | |||
422 | return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); | 435 | return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); |
423 | } | 436 | } |
424 | 437 | ||
425 | static inline unsigned long pmd_pfn(pmd_t pmd) | ||
426 | { | ||
427 | return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; | ||
428 | } | ||
429 | |||
430 | static inline int pud_large(pud_t pud) | 438 | static inline int pud_large(pud_t pud) |
431 | { | 439 | { |
432 | return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == | 440 | return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == |
@@ -462,7 +470,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) | |||
462 | #define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) | 470 | #define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) |
463 | 471 | ||
464 | /* to find an entry in a page-table-directory. */ | 472 | /* to find an entry in a page-table-directory. */ |
465 | static inline unsigned pud_index(unsigned long address) | 473 | static inline unsigned long pud_index(unsigned long address) |
466 | { | 474 | { |
467 | return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); | 475 | return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); |
468 | } | 476 | } |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c7768269b1cf..e08ea043e085 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -403,7 +403,17 @@ extern unsigned long kernel_eflags; | |||
403 | extern asmlinkage void ignore_sysret(void); | 403 | extern asmlinkage void ignore_sysret(void); |
404 | #else /* X86_64 */ | 404 | #else /* X86_64 */ |
405 | #ifdef CONFIG_CC_STACKPROTECTOR | 405 | #ifdef CONFIG_CC_STACKPROTECTOR |
406 | DECLARE_PER_CPU(unsigned long, stack_canary); | 406 | /* |
407 | * Make sure stack canary segment base is cached-aligned: | ||
408 | * "For Intel Atom processors, avoid non zero segment base address | ||
409 | * that is not aligned to cache line boundary at all cost." | ||
410 | * (Optim Ref Manual Assembly/Compiler Coding Rule 15.) | ||
411 | */ | ||
412 | struct stack_canary { | ||
413 | char __pad[20]; /* canary at %gs:20 */ | ||
414 | unsigned long canary; | ||
415 | }; | ||
416 | DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); | ||
407 | #endif | 417 | #endif |
408 | #endif /* X86_64 */ | 418 | #endif /* X86_64 */ |
409 | 419 | ||
@@ -703,13 +713,23 @@ static inline void cpu_relax(void) | |||
703 | rep_nop(); | 713 | rep_nop(); |
704 | } | 714 | } |
705 | 715 | ||
706 | /* Stop speculative execution: */ | 716 | /* Stop speculative execution and prefetching of modified code. */ |
707 | static inline void sync_core(void) | 717 | static inline void sync_core(void) |
708 | { | 718 | { |
709 | int tmp; | 719 | int tmp; |
710 | 720 | ||
711 | asm volatile("cpuid" : "=a" (tmp) : "0" (1) | 721 | #if defined(CONFIG_M386) || defined(CONFIG_M486) |
712 | : "ebx", "ecx", "edx", "memory"); | 722 | if (boot_cpu_data.x86 < 5) |
723 | /* There is no speculative execution. | ||
724 | * jmp is a barrier to prefetching. */ | ||
725 | asm volatile("jmp 1f\n1:\n" ::: "memory"); | ||
726 | else | ||
727 | #endif | ||
728 | /* cpuid is a barrier to speculative execution. | ||
729 | * Prefetched instructions are automatically | ||
730 | * invalidated when modified. */ | ||
731 | asm volatile("cpuid" : "=a" (tmp) : "0" (1) | ||
732 | : "ebx", "ecx", "edx", "memory"); | ||
713 | } | 733 | } |
714 | 734 | ||
715 | static inline void __monitor(const void *eax, unsigned long ecx, | 735 | static inline void __monitor(const void *eax, unsigned long ecx, |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 49fb3ecf3bb3..621f56d73121 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -22,7 +22,14 @@ extern int reboot_force; | |||
22 | 22 | ||
23 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); | 23 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); |
24 | 24 | ||
25 | #define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) | 25 | /* |
26 | #define round_down(x, y) ((x) & ~((y) - 1)) | 26 | * This looks more complex than it should be. But we need to |
27 | * get the type for the ~ right in round_down (it needs to be | ||
28 | * as wide as the result!), and we want to evaluate the macro | ||
29 | * arguments just once each. | ||
30 | */ | ||
31 | #define __round_mask(x,y) ((__typeof__(x))((y)-1)) | ||
32 | #define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1) | ||
33 | #define round_down(x,y) ((x) & ~__round_mask(x,y)) | ||
27 | 34 | ||
28 | #endif /* _ASM_X86_PROTO_H */ | 35 | #endif /* _ASM_X86_PROTO_H */ |
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h index 263d397d2eef..75af592677ec 100644 --- a/arch/x86/include/asm/scatterlist.h +++ b/arch/x86/include/asm/scatterlist.h | |||
@@ -1,33 +1,8 @@ | |||
1 | #ifndef _ASM_X86_SCATTERLIST_H | 1 | #ifndef _ASM_X86_SCATTERLIST_H |
2 | #define _ASM_X86_SCATTERLIST_H | 2 | #define _ASM_X86_SCATTERLIST_H |
3 | 3 | ||
4 | #include <asm/types.h> | ||
5 | |||
6 | struct scatterlist { | ||
7 | #ifdef CONFIG_DEBUG_SG | ||
8 | unsigned long sg_magic; | ||
9 | #endif | ||
10 | unsigned long page_link; | ||
11 | unsigned int offset; | ||
12 | unsigned int length; | ||
13 | dma_addr_t dma_address; | ||
14 | unsigned int dma_length; | ||
15 | }; | ||
16 | |||
17 | #define ARCH_HAS_SG_CHAIN | ||
18 | #define ISA_DMA_THRESHOLD (0x00ffffff) | 4 | #define ISA_DMA_THRESHOLD (0x00ffffff) |
19 | 5 | ||
20 | /* | 6 | #include <asm-generic/scatterlist.h> |
21 | * These macros should be used after a pci_map_sg call has been done | ||
22 | * to get bus addresses of each of the SG entries and their lengths. | ||
23 | * You should only work with the number of sg entries pci_map_sg | ||
24 | * returns. | ||
25 | */ | ||
26 | #define sg_dma_address(sg) ((sg)->dma_address) | ||
27 | #ifdef CONFIG_X86_32 | ||
28 | # define sg_dma_len(sg) ((sg)->length) | ||
29 | #else | ||
30 | # define sg_dma_len(sg) ((sg)->dma_length) | ||
31 | #endif | ||
32 | 7 | ||
33 | #endif /* _ASM_X86_SCATTERLIST_H */ | 8 | #endif /* _ASM_X86_SCATTERLIST_H */ |
diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/asm/shmbuf.h index b51413b74971..83c05fc2de38 100644 --- a/arch/x86/include/asm/shmbuf.h +++ b/arch/x86/include/asm/shmbuf.h | |||
@@ -1,51 +1 @@ | |||
1 | #ifndef _ASM_X86_SHMBUF_H | #include <asm-generic/shmbuf.h> | |
2 | #define _ASM_X86_SHMBUF_H | ||
3 | |||
4 | /* | ||
5 | * The shmid64_ds structure for x86 architecture. | ||
6 | * Note extra padding because this structure is passed back and forth | ||
7 | * between kernel and user space. | ||
8 | * | ||
9 | * Pad space on 32 bit is left for: | ||
10 | * - 64-bit time_t to solve y2038 problem | ||
11 | * - 2 miscellaneous 32-bit values | ||
12 | * | ||
13 | * Pad space on 64 bit is left for: | ||
14 | * - 2 miscellaneous 64-bit values | ||
15 | */ | ||
16 | |||
17 | struct shmid64_ds { | ||
18 | struct ipc64_perm shm_perm; /* operation perms */ | ||
19 | size_t shm_segsz; /* size of segment (bytes) */ | ||
20 | __kernel_time_t shm_atime; /* last attach time */ | ||
21 | #ifdef __i386__ | ||
22 | unsigned long __unused1; | ||
23 | #endif | ||
24 | __kernel_time_t shm_dtime; /* last detach time */ | ||
25 | #ifdef __i386__ | ||
26 | unsigned long __unused2; | ||
27 | #endif | ||
28 | __kernel_time_t shm_ctime; /* last change time */ | ||
29 | #ifdef __i386__ | ||
30 | unsigned long __unused3; | ||
31 | #endif | ||
32 | __kernel_pid_t shm_cpid; /* pid of creator */ | ||
33 | __kernel_pid_t shm_lpid; /* pid of last operator */ | ||
34 | unsigned long shm_nattch; /* no. of current attaches */ | ||
35 | unsigned long __unused4; | ||
36 | unsigned long __unused5; | ||
37 | }; | ||
38 | |||
39 | struct shminfo64 { | ||
40 | unsigned long shmmax; | ||
41 | unsigned long shmmin; | ||
42 | unsigned long shmmni; | ||
43 | unsigned long shmseg; | ||
44 | unsigned long shmall; | ||
45 | unsigned long __unused1; | ||
46 | unsigned long __unused2; | ||
47 | unsigned long __unused3; | ||
48 | unsigned long __unused4; | ||
49 | }; | ||
50 | |||
51 | #endif /* _ASM_X86_SHMBUF_H */ | ||
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h index ca8bf2cd0ba9..6b71384b9d8b 100644 --- a/arch/x86/include/asm/socket.h +++ b/arch/x86/include/asm/socket.h | |||
@@ -1,60 +1 @@ | |||
1 | #ifndef _ASM_X86_SOCKET_H | #include <asm-generic/socket.h> | |
2 | #define _ASM_X86_SOCKET_H | ||
3 | |||
4 | #include <asm/sockios.h> | ||
5 | |||
6 | /* For setsockopt(2) */ | ||
7 | #define SOL_SOCKET 1 | ||
8 | |||
9 | #define SO_DEBUG 1 | ||
10 | #define SO_REUSEADDR 2 | ||
11 | #define SO_TYPE 3 | ||
12 | #define SO_ERROR 4 | ||
13 | #define SO_DONTROUTE 5 | ||
14 | #define SO_BROADCAST 6 | ||
15 | #define SO_SNDBUF 7 | ||
16 | #define SO_RCVBUF 8 | ||
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
19 | #define SO_KEEPALIVE 9 | ||
20 | #define SO_OOBINLINE 10 | ||
21 | #define SO_NO_CHECK 11 | ||
22 | #define SO_PRIORITY 12 | ||
23 | #define SO_LINGER 13 | ||
24 | #define SO_BSDCOMPAT 14 | ||
25 | /* To add :#define SO_REUSEPORT 15 */ | ||
26 | #define SO_PASSCRED 16 | ||
27 | #define SO_PEERCRED 17 | ||
28 | #define SO_RCVLOWAT 18 | ||
29 | #define SO_SNDLOWAT 19 | ||
30 | #define SO_RCVTIMEO 20 | ||
31 | #define SO_SNDTIMEO 21 | ||
32 | |||
33 | /* Security levels - as per NRL IPv6 - don't actually do anything */ | ||
34 | #define SO_SECURITY_AUTHENTICATION 22 | ||
35 | #define SO_SECURITY_ENCRYPTION_TRANSPORT 23 | ||
36 | #define SO_SECURITY_ENCRYPTION_NETWORK 24 | ||
37 | |||
38 | #define SO_BINDTODEVICE 25 | ||
39 | |||
40 | /* Socket filtering */ | ||
41 | #define SO_ATTACH_FILTER 26 | ||
42 | #define SO_DETACH_FILTER 27 | ||
43 | |||
44 | #define SO_PEERNAME 28 | ||
45 | #define SO_TIMESTAMP 29 | ||
46 | #define SCM_TIMESTAMP SO_TIMESTAMP | ||
47 | |||
48 | #define SO_ACCEPTCONN 30 | ||
49 | |||
50 | #define SO_PEERSEC 31 | ||
51 | #define SO_PASSSEC 34 | ||
52 | #define SO_TIMESTAMPNS 35 | ||
53 | #define SCM_TIMESTAMPNS SO_TIMESTAMPNS | ||
54 | |||
55 | #define SO_MARK 36 | ||
56 | |||
57 | #define SO_TIMESTAMPING 37 | ||
58 | #define SCM_TIMESTAMPING SO_TIMESTAMPING | ||
59 | |||
60 | #endif /* _ASM_X86_SOCKET_H */ | ||
diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/asm/sockios.h index 49cc72b5d3c9..def6d4746ee7 100644 --- a/arch/x86/include/asm/sockios.h +++ b/arch/x86/include/asm/sockios.h | |||
@@ -1,13 +1 @@ | |||
1 | #ifndef _ASM_X86_SOCKIOS_H | #include <asm-generic/sockios.h> | |
2 | #define _ASM_X86_SOCKIOS_H | ||
3 | |||
4 | /* Socket-level I/O control calls. */ | ||
5 | #define FIOSETOWN 0x8901 | ||
6 | #define SIOCSPGRP 0x8902 | ||
7 | #define FIOGETOWN 0x8903 | ||
8 | #define SIOCGPGRP 0x8904 | ||
9 | #define SIOCATMARK 0x8905 | ||
10 | #define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ | ||
11 | #define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ | ||
12 | |||
13 | #endif /* _ASM_X86_SOCKIOS_H */ | ||
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index b7e5db876399..4e77853321db 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -302,4 +302,8 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) | |||
302 | #define _raw_read_relax(lock) cpu_relax() | 302 | #define _raw_read_relax(lock) cpu_relax() |
303 | #define _raw_write_relax(lock) cpu_relax() | 303 | #define _raw_write_relax(lock) cpu_relax() |
304 | 304 | ||
305 | /* The {read|write|spin}_lock() on x86 are full memory barriers. */ | ||
306 | static inline void smp_mb__after_lock(void) { } | ||
307 | #define ARCH_HAS_SMP_MB_AFTER_LOCK | ||
308 | |||
305 | #endif /* _ASM_X86_SPINLOCK_H */ | 309 | #endif /* _ASM_X86_SPINLOCK_H */ |
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index c2d742c6e15f..157517763565 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h | |||
@@ -48,7 +48,7 @@ | |||
48 | * head_32 for boot CPU and setup_per_cpu_areas() for others. | 48 | * head_32 for boot CPU and setup_per_cpu_areas() for others. |
49 | */ | 49 | */ |
50 | #define GDT_STACK_CANARY_INIT \ | 50 | #define GDT_STACK_CANARY_INIT \ |
51 | [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } }, | 51 | [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18), |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * Initialize the stackprotector canary value. | 54 | * Initialize the stackprotector canary value. |
@@ -78,21 +78,19 @@ static __always_inline void boot_init_stack_canary(void) | |||
78 | #ifdef CONFIG_X86_64 | 78 | #ifdef CONFIG_X86_64 |
79 | percpu_write(irq_stack_union.stack_canary, canary); | 79 | percpu_write(irq_stack_union.stack_canary, canary); |
80 | #else | 80 | #else |
81 | percpu_write(stack_canary, canary); | 81 | percpu_write(stack_canary.canary, canary); |
82 | #endif | 82 | #endif |
83 | } | 83 | } |
84 | 84 | ||
85 | static inline void setup_stack_canary_segment(int cpu) | 85 | static inline void setup_stack_canary_segment(int cpu) |
86 | { | 86 | { |
87 | #ifdef CONFIG_X86_32 | 87 | #ifdef CONFIG_X86_32 |
88 | unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; | 88 | unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); |
89 | struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); | 89 | struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); |
90 | struct desc_struct desc; | 90 | struct desc_struct desc; |
91 | 91 | ||
92 | desc = gdt_table[GDT_ENTRY_STACK_CANARY]; | 92 | desc = gdt_table[GDT_ENTRY_STACK_CANARY]; |
93 | desc.base0 = canary & 0xffff; | 93 | set_desc_base(&desc, canary); |
94 | desc.base1 = (canary >> 16) & 0xff; | ||
95 | desc.base2 = (canary >> 24) & 0xff; | ||
96 | write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); | 94 | write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); |
97 | #endif | 95 | #endif |
98 | } | 96 | } |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f517944b2b17..cf86a5e73815 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | extern int kstack_depth_to_print; | 4 | extern int kstack_depth_to_print; |
5 | 5 | ||
6 | int x86_is_stack_id(int id, char *name); | ||
7 | |||
6 | /* Generic stack tracer with callbacks */ | 8 | /* Generic stack tracer with callbacks */ |
7 | 9 | ||
8 | struct stacktrace_ops { | 10 | struct stacktrace_ops { |
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 643c59b4bc6e..f08f97374892 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -31,7 +31,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
31 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ | 31 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ |
32 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" | 32 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" |
33 | #define __switch_canary_oparam \ | 33 | #define __switch_canary_oparam \ |
34 | , [stack_canary] "=m" (per_cpu_var(stack_canary)) | 34 | , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) |
35 | #define __switch_canary_iparam \ | 35 | #define __switch_canary_iparam \ |
36 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) | 36 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) |
37 | #else /* CC_STACKPROTECTOR */ | 37 | #else /* CC_STACKPROTECTOR */ |
@@ -150,33 +150,6 @@ do { \ | |||
150 | #endif | 150 | #endif |
151 | 151 | ||
152 | #ifdef __KERNEL__ | 152 | #ifdef __KERNEL__ |
153 | #define _set_base(addr, base) do { unsigned long __pr; \ | ||
154 | __asm__ __volatile__ ("movw %%dx,%1\n\t" \ | ||
155 | "rorl $16,%%edx\n\t" \ | ||
156 | "movb %%dl,%2\n\t" \ | ||
157 | "movb %%dh,%3" \ | ||
158 | :"=&d" (__pr) \ | ||
159 | :"m" (*((addr)+2)), \ | ||
160 | "m" (*((addr)+4)), \ | ||
161 | "m" (*((addr)+7)), \ | ||
162 | "0" (base) \ | ||
163 | ); } while (0) | ||
164 | |||
165 | #define _set_limit(addr, limit) do { unsigned long __lr; \ | ||
166 | __asm__ __volatile__ ("movw %%dx,%1\n\t" \ | ||
167 | "rorl $16,%%edx\n\t" \ | ||
168 | "movb %2,%%dh\n\t" \ | ||
169 | "andb $0xf0,%%dh\n\t" \ | ||
170 | "orb %%dh,%%dl\n\t" \ | ||
171 | "movb %%dl,%2" \ | ||
172 | :"=&d" (__lr) \ | ||
173 | :"m" (*(addr)), \ | ||
174 | "m" (*((addr)+6)), \ | ||
175 | "0" (limit) \ | ||
176 | ); } while (0) | ||
177 | |||
178 | #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) | ||
179 | #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) | ||
180 | 153 | ||
181 | extern void native_load_gs_index(unsigned); | 154 | extern void native_load_gs_index(unsigned); |
182 | 155 | ||
diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/asm/termbits.h index af1b70ea440f..3935b106de79 100644 --- a/arch/x86/include/asm/termbits.h +++ b/arch/x86/include/asm/termbits.h | |||
@@ -1,198 +1 @@ | |||
1 | #ifndef _ASM_X86_TERMBITS_H | #include <asm-generic/termbits.h> | |
2 | #define _ASM_X86_TERMBITS_H | ||
3 | |||
4 | #include <linux/posix_types.h> | ||
5 | |||
6 | typedef unsigned char cc_t; | ||
7 | typedef unsigned int speed_t; | ||
8 | typedef unsigned int tcflag_t; | ||
9 | |||
10 | #define NCCS 19 | ||
11 | struct termios { | ||
12 | tcflag_t c_iflag; /* input mode flags */ | ||
13 | tcflag_t c_oflag; /* output mode flags */ | ||
14 | tcflag_t c_cflag; /* control mode flags */ | ||
15 | tcflag_t c_lflag; /* local mode flags */ | ||
16 | cc_t c_line; /* line discipline */ | ||
17 | cc_t c_cc[NCCS]; /* control characters */ | ||
18 | }; | ||
19 | |||
20 | struct termios2 { | ||
21 | tcflag_t c_iflag; /* input mode flags */ | ||
22 | tcflag_t c_oflag; /* output mode flags */ | ||
23 | tcflag_t c_cflag; /* control mode flags */ | ||
24 | tcflag_t c_lflag; /* local mode flags */ | ||
25 | cc_t c_line; /* line discipline */ | ||
26 | cc_t c_cc[NCCS]; /* control characters */ | ||
27 | speed_t c_ispeed; /* input speed */ | ||
28 | speed_t c_ospeed; /* output speed */ | ||
29 | }; | ||
30 | |||
31 | struct ktermios { | ||
32 | tcflag_t c_iflag; /* input mode flags */ | ||
33 | tcflag_t c_oflag; /* output mode flags */ | ||
34 | tcflag_t c_cflag; /* control mode flags */ | ||
35 | tcflag_t c_lflag; /* local mode flags */ | ||
36 | cc_t c_line; /* line discipline */ | ||
37 | cc_t c_cc[NCCS]; /* control characters */ | ||
38 | speed_t c_ispeed; /* input speed */ | ||
39 | speed_t c_ospeed; /* output speed */ | ||
40 | }; | ||
41 | |||
42 | /* c_cc characters */ | ||
43 | #define VINTR 0 | ||
44 | #define VQUIT 1 | ||
45 | #define VERASE 2 | ||
46 | #define VKILL 3 | ||
47 | #define VEOF 4 | ||
48 | #define VTIME 5 | ||
49 | #define VMIN 6 | ||
50 | #define VSWTC 7 | ||
51 | #define VSTART 8 | ||
52 | #define VSTOP 9 | ||
53 | #define VSUSP 10 | ||
54 | #define VEOL 11 | ||
55 | #define VREPRINT 12 | ||
56 | #define VDISCARD 13 | ||
57 | #define VWERASE 14 | ||
58 | #define VLNEXT 15 | ||
59 | #define VEOL2 16 | ||
60 | |||
61 | /* c_iflag bits */ | ||
62 | #define IGNBRK 0000001 | ||
63 | #define BRKINT 0000002 | ||
64 | #define IGNPAR 0000004 | ||
65 | #define PARMRK 0000010 | ||
66 | #define INPCK 0000020 | ||
67 | #define ISTRIP 0000040 | ||
68 | #define INLCR 0000100 | ||
69 | #define IGNCR 0000200 | ||
70 | #define ICRNL 0000400 | ||
71 | #define IUCLC 0001000 | ||
72 | #define IXON 0002000 | ||
73 | #define IXANY 0004000 | ||
74 | #define IXOFF 0010000 | ||
75 | #define IMAXBEL 0020000 | ||
76 | #define IUTF8 0040000 | ||
77 | |||
78 | /* c_oflag bits */ | ||
79 | #define OPOST 0000001 | ||
80 | #define OLCUC 0000002 | ||
81 | #define ONLCR 0000004 | ||
82 | #define OCRNL 0000010 | ||
83 | #define ONOCR 0000020 | ||
84 | #define ONLRET 0000040 | ||
85 | #define OFILL 0000100 | ||
86 | #define OFDEL 0000200 | ||
87 | #define NLDLY 0000400 | ||
88 | #define NL0 0000000 | ||
89 | #define NL1 0000400 | ||
90 | #define CRDLY 0003000 | ||
91 | #define CR0 0000000 | ||
92 | #define CR1 0001000 | ||
93 | #define CR2 0002000 | ||
94 | #define CR3 0003000 | ||
95 | #define TABDLY 0014000 | ||
96 | #define TAB0 0000000 | ||
97 | #define TAB1 0004000 | ||
98 | #define TAB2 0010000 | ||
99 | #define TAB3 0014000 | ||
100 | #define XTABS 0014000 | ||
101 | #define BSDLY 0020000 | ||
102 | #define BS0 0000000 | ||
103 | #define BS1 0020000 | ||
104 | #define VTDLY 0040000 | ||
105 | #define VT0 0000000 | ||
106 | #define VT1 0040000 | ||
107 | #define FFDLY 0100000 | ||
108 | #define FF0 0000000 | ||
109 | #define FF1 0100000 | ||
110 | |||
111 | /* c_cflag bit meaning */ | ||
112 | #define CBAUD 0010017 | ||
113 | #define B0 0000000 /* hang up */ | ||
114 | #define B50 0000001 | ||
115 | #define B75 0000002 | ||
116 | #define B110 0000003 | ||
117 | #define B134 0000004 | ||
118 | #define B150 0000005 | ||
119 | #define B200 0000006 | ||
120 | #define B300 0000007 | ||
121 | #define B600 0000010 | ||
122 | #define B1200 0000011 | ||
123 | #define B1800 0000012 | ||
124 | #define B2400 0000013 | ||
125 | #define B4800 0000014 | ||
126 | #define B9600 0000015 | ||
127 | #define B19200 0000016 | ||
128 | #define B38400 0000017 | ||
129 | #define EXTA B19200 | ||
130 | #define EXTB B38400 | ||
131 | #define CSIZE 0000060 | ||
132 | #define CS5 0000000 | ||
133 | #define CS6 0000020 | ||
134 | #define CS7 0000040 | ||
135 | #define CS8 0000060 | ||
136 | #define CSTOPB 0000100 | ||
137 | #define CREAD 0000200 | ||
138 | #define PARENB 0000400 | ||
139 | #define PARODD 0001000 | ||
140 | #define HUPCL 0002000 | ||
141 | #define CLOCAL 0004000 | ||
142 | #define CBAUDEX 0010000 | ||
143 | #define BOTHER 0010000 /* non standard rate */ | ||
144 | #define B57600 0010001 | ||
145 | #define B115200 0010002 | ||
146 | #define B230400 0010003 | ||
147 | #define B460800 0010004 | ||
148 | #define B500000 0010005 | ||
149 | #define B576000 0010006 | ||
150 | #define B921600 0010007 | ||
151 | #define B1000000 0010010 | ||
152 | #define B1152000 0010011 | ||
153 | #define B1500000 0010012 | ||
154 | #define B2000000 0010013 | ||
155 | #define B2500000 0010014 | ||
156 | #define B3000000 0010015 | ||
157 | #define B3500000 0010016 | ||
158 | #define B4000000 0010017 | ||
159 | #define CIBAUD 002003600000 /* input baud rate */ | ||
160 | #define CMSPAR 010000000000 /* mark or space (stick) parity */ | ||
161 | #define CRTSCTS 020000000000 /* flow control */ | ||
162 | |||
163 | #define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ | ||
164 | |||
165 | /* c_lflag bits */ | ||
166 | #define ISIG 0000001 | ||
167 | #define ICANON 0000002 | ||
168 | #define XCASE 0000004 | ||
169 | #define ECHO 0000010 | ||
170 | #define ECHOE 0000020 | ||
171 | #define ECHOK 0000040 | ||
172 | #define ECHONL 0000100 | ||
173 | #define NOFLSH 0000200 | ||
174 | #define TOSTOP 0000400 | ||
175 | #define ECHOCTL 0001000 | ||
176 | #define ECHOPRT 0002000 | ||
177 | #define ECHOKE 0004000 | ||
178 | #define FLUSHO 0010000 | ||
179 | #define PENDIN 0040000 | ||
180 | #define IEXTEN 0100000 | ||
181 | |||
182 | /* tcflow() and TCXONC use these */ | ||
183 | #define TCOOFF 0 | ||
184 | #define TCOON 1 | ||
185 | #define TCIOFF 2 | ||
186 | #define TCION 3 | ||
187 | |||
188 | /* tcflush() and TCFLSH use these */ | ||
189 | #define TCIFLUSH 0 | ||
190 | #define TCOFLUSH 1 | ||
191 | #define TCIOFLUSH 2 | ||
192 | |||
193 | /* tcsetattr uses these */ | ||
194 | #define TCSANOW 0 | ||
195 | #define TCSADRAIN 1 | ||
196 | #define TCSAFLUSH 2 | ||
197 | |||
198 | #endif /* _ASM_X86_TERMBITS_H */ | ||
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h index c4ee8056baca..280d78a9d966 100644 --- a/arch/x86/include/asm/termios.h +++ b/arch/x86/include/asm/termios.h | |||
@@ -1,114 +1 @@ | |||
1 | #ifndef _ASM_X86_TERMIOS_H | #include <asm-generic/termios.h> | |
2 | #define _ASM_X86_TERMIOS_H | ||
3 | |||
4 | #include <asm/termbits.h> | ||
5 | #include <asm/ioctls.h> | ||
6 | |||
7 | struct winsize { | ||
8 | unsigned short ws_row; | ||
9 | unsigned short ws_col; | ||
10 | unsigned short ws_xpixel; | ||
11 | unsigned short ws_ypixel; | ||
12 | }; | ||
13 | |||
14 | #define NCC 8 | ||
15 | struct termio { | ||
16 | unsigned short c_iflag; /* input mode flags */ | ||
17 | unsigned short c_oflag; /* output mode flags */ | ||
18 | unsigned short c_cflag; /* control mode flags */ | ||
19 | unsigned short c_lflag; /* local mode flags */ | ||
20 | unsigned char c_line; /* line discipline */ | ||
21 | unsigned char c_cc[NCC]; /* control characters */ | ||
22 | }; | ||
23 | |||
24 | /* modem lines */ | ||
25 | #define TIOCM_LE 0x001 | ||
26 | #define TIOCM_DTR 0x002 | ||
27 | #define TIOCM_RTS 0x004 | ||
28 | #define TIOCM_ST 0x008 | ||
29 | #define TIOCM_SR 0x010 | ||
30 | #define TIOCM_CTS 0x020 | ||
31 | #define TIOCM_CAR 0x040 | ||
32 | #define TIOCM_RNG 0x080 | ||
33 | #define TIOCM_DSR 0x100 | ||
34 | #define TIOCM_CD TIOCM_CAR | ||
35 | #define TIOCM_RI TIOCM_RNG | ||
36 | #define TIOCM_OUT1 0x2000 | ||
37 | #define TIOCM_OUT2 0x4000 | ||
38 | #define TIOCM_LOOP 0x8000 | ||
39 | |||
40 | /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ | ||
41 | |||
42 | #ifdef __KERNEL__ | ||
43 | |||
44 | #include <asm/uaccess.h> | ||
45 | |||
46 | /* intr=^C quit=^\ erase=del kill=^U | ||
47 | eof=^D vtime=\0 vmin=\1 sxtc=\0 | ||
48 | start=^Q stop=^S susp=^Z eol=\0 | ||
49 | reprint=^R discard=^U werase=^W lnext=^V | ||
50 | eol2=\0 | ||
51 | */ | ||
52 | #define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" | ||
53 | |||
54 | /* | ||
55 | * Translate a "termio" structure into a "termios". Ugh. | ||
56 | */ | ||
57 | #define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ | ||
58 | unsigned short __tmp; \ | ||
59 | get_user(__tmp,&(termio)->x); \ | ||
60 | *(unsigned short *) &(termios)->x = __tmp; \ | ||
61 | } | ||
62 | |||
63 | static inline int user_termio_to_kernel_termios(struct ktermios *termios, | ||
64 | struct termio __user *termio) | ||
65 | { | ||
66 | SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); | ||
67 | SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); | ||
68 | SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); | ||
69 | SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); | ||
70 | get_user(termios->c_line, &termio->c_line); | ||
71 | return copy_from_user(termios->c_cc, termio->c_cc, NCC); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Translate a "termios" structure into a "termio". Ugh. | ||
76 | */ | ||
77 | static inline int kernel_termios_to_user_termio(struct termio __user *termio, | ||
78 | struct ktermios *termios) | ||
79 | { | ||
80 | put_user((termios)->c_iflag, &(termio)->c_iflag); | ||
81 | put_user((termios)->c_oflag, &(termio)->c_oflag); | ||
82 | put_user((termios)->c_cflag, &(termio)->c_cflag); | ||
83 | put_user((termios)->c_lflag, &(termio)->c_lflag); | ||
84 | put_user((termios)->c_line, &(termio)->c_line); | ||
85 | return copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); | ||
86 | } | ||
87 | |||
88 | static inline int user_termios_to_kernel_termios(struct ktermios *k, | ||
89 | struct termios2 __user *u) | ||
90 | { | ||
91 | return copy_from_user(k, u, sizeof(struct termios2)); | ||
92 | } | ||
93 | |||
94 | static inline int kernel_termios_to_user_termios(struct termios2 __user *u, | ||
95 | struct ktermios *k) | ||
96 | { | ||
97 | return copy_to_user(u, k, sizeof(struct termios2)); | ||
98 | } | ||
99 | |||
100 | static inline int user_termios_to_kernel_termios_1(struct ktermios *k, | ||
101 | struct termios __user *u) | ||
102 | { | ||
103 | return copy_from_user(k, u, sizeof(struct termios)); | ||
104 | } | ||
105 | |||
106 | static inline int kernel_termios_to_user_termios_1(struct termios __user *u, | ||
107 | struct ktermios *k) | ||
108 | { | ||
109 | return copy_to_user(u, k, sizeof(struct termios)); | ||
110 | } | ||
111 | |||
112 | #endif /* __KERNEL__ */ | ||
113 | |||
114 | #endif /* _ASM_X86_TERMIOS_H */ | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index b0783520988b..d27d0a2fec4c 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -49,7 +49,7 @@ struct thread_info { | |||
49 | .exec_domain = &default_exec_domain, \ | 49 | .exec_domain = &default_exec_domain, \ |
50 | .flags = 0, \ | 50 | .flags = 0, \ |
51 | .cpu = 0, \ | 51 | .cpu = 0, \ |
52 | .preempt_count = 1, \ | 52 | .preempt_count = INIT_PREEMPT_COUNT, \ |
53 | .addr_limit = KERNEL_DS, \ | 53 | .addr_limit = KERNEL_DS, \ |
54 | .restart_block = { \ | 54 | .restart_block = { \ |
55 | .fn = do_no_restart_syscall, \ | 55 | .fn = do_no_restart_syscall, \ |
@@ -95,7 +95,7 @@ struct thread_info { | |||
95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ |
96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | 96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ |
97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ | 97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
98 | #define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */ | 98 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ |
99 | 99 | ||
100 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | 100 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) |
101 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 101 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
@@ -118,17 +118,17 @@ struct thread_info { | |||
118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) |
119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | 119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) |
120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | 120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) |
121 | #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) | 121 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) |
122 | 122 | ||
123 | /* work to do in syscall_trace_enter() */ | 123 | /* work to do in syscall_trace_enter() */ |
124 | #define _TIF_WORK_SYSCALL_ENTRY \ | 124 | #define _TIF_WORK_SYSCALL_ENTRY \ |
125 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE | \ | 125 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ |
126 | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP) | 126 | _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) |
127 | 127 | ||
128 | /* work to do in syscall_trace_leave() */ | 128 | /* work to do in syscall_trace_leave() */ |
129 | #define _TIF_WORK_SYSCALL_EXIT \ | 129 | #define _TIF_WORK_SYSCALL_EXIT \ |
130 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ | 130 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ |
131 | _TIF_SYSCALL_FTRACE) | 131 | _TIF_SYSCALL_TRACEPOINT) |
132 | 132 | ||
133 | /* work to do on interrupt/exception return */ | 133 | /* work to do on interrupt/exception return */ |
134 | #define _TIF_WORK_MASK \ | 134 | #define _TIF_WORK_MASK \ |
@@ -137,7 +137,8 @@ struct thread_info { | |||
137 | _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) | 137 | _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) |
138 | 138 | ||
139 | /* work to do on any return to user space */ | 139 | /* work to do on any return to user space */ |
140 | #define _TIF_ALLWORK_MASK ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_FTRACE) | 140 | #define _TIF_ALLWORK_MASK \ |
141 | ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) | ||
141 | 142 | ||
142 | /* Only used for 64 bit */ | 143 | /* Only used for 64 bit */ |
143 | #define _TIF_DO_NOTIFY_MASK \ | 144 | #define _TIF_DO_NOTIFY_MASK \ |
@@ -213,7 +214,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack); | |||
213 | static inline struct thread_info *current_thread_info(void) | 214 | static inline struct thread_info *current_thread_info(void) |
214 | { | 215 | { |
215 | struct thread_info *ti; | 216 | struct thread_info *ti; |
216 | ti = (void *)(percpu_read(kernel_stack) + | 217 | ti = (void *)(percpu_read_stable(kernel_stack) + |
217 | KERNEL_STACK_OFFSET - THREAD_SIZE); | 218 | KERNEL_STACK_OFFSET - THREAD_SIZE); |
218 | return ti; | 219 | return ti; |
219 | } | 220 | } |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 066ef590d7e0..26d06e052a18 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -129,25 +129,34 @@ extern unsigned long node_remap_size[]; | |||
129 | #endif | 129 | #endif |
130 | 130 | ||
131 | /* sched_domains SD_NODE_INIT for NUMA machines */ | 131 | /* sched_domains SD_NODE_INIT for NUMA machines */ |
132 | #define SD_NODE_INIT (struct sched_domain) { \ | 132 | #define SD_NODE_INIT (struct sched_domain) { \ |
133 | .min_interval = 8, \ | 133 | .min_interval = 8, \ |
134 | .max_interval = 32, \ | 134 | .max_interval = 32, \ |
135 | .busy_factor = 32, \ | 135 | .busy_factor = 32, \ |
136 | .imbalance_pct = 125, \ | 136 | .imbalance_pct = 125, \ |
137 | .cache_nice_tries = SD_CACHE_NICE_TRIES, \ | 137 | .cache_nice_tries = SD_CACHE_NICE_TRIES, \ |
138 | .busy_idx = 3, \ | 138 | .busy_idx = 3, \ |
139 | .idle_idx = SD_IDLE_IDX, \ | 139 | .idle_idx = SD_IDLE_IDX, \ |
140 | .newidle_idx = SD_NEWIDLE_IDX, \ | 140 | .newidle_idx = SD_NEWIDLE_IDX, \ |
141 | .wake_idx = 1, \ | 141 | .wake_idx = 1, \ |
142 | .forkexec_idx = SD_FORKEXEC_IDX, \ | 142 | .forkexec_idx = SD_FORKEXEC_IDX, \ |
143 | .flags = SD_LOAD_BALANCE \ | 143 | \ |
144 | | SD_BALANCE_EXEC \ | 144 | .flags = 1*SD_LOAD_BALANCE \ |
145 | | SD_BALANCE_FORK \ | 145 | | 1*SD_BALANCE_NEWIDLE \ |
146 | | SD_WAKE_AFFINE \ | 146 | | 1*SD_BALANCE_EXEC \ |
147 | | SD_WAKE_BALANCE \ | 147 | | 1*SD_BALANCE_FORK \ |
148 | | SD_SERIALIZE, \ | 148 | | 0*SD_WAKE_IDLE \ |
149 | .last_balance = jiffies, \ | 149 | | 1*SD_WAKE_AFFINE \ |
150 | .balance_interval = 1, \ | 150 | | 1*SD_WAKE_BALANCE \ |
151 | | 0*SD_SHARE_CPUPOWER \ | ||
152 | | 0*SD_POWERSAVINGS_BALANCE \ | ||
153 | | 0*SD_SHARE_PKG_RESOURCES \ | ||
154 | | 1*SD_SERIALIZE \ | ||
155 | | 1*SD_WAKE_IDLE_FAR \ | ||
156 | | 0*SD_PREFER_SIBLING \ | ||
157 | , \ | ||
158 | .last_balance = jiffies, \ | ||
159 | .balance_interval = 1, \ | ||
151 | } | 160 | } |
152 | 161 | ||
153 | #ifdef CONFIG_X86_64_ACPI_NUMA | 162 | #ifdef CONFIG_X86_64_ACPI_NUMA |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index bfd74c032fca..4da91ad69e0d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -81,9 +81,7 @@ extern int panic_on_unrecovered_nmi; | |||
81 | 81 | ||
82 | void math_error(void __user *); | 82 | void math_error(void __user *); |
83 | void math_emulate(struct math_emu_info *); | 83 | void math_emulate(struct math_emu_info *); |
84 | #ifdef CONFIG_X86_32 | 84 | #ifndef CONFIG_X86_32 |
85 | unsigned long patch_espfix_desc(unsigned long, unsigned long); | ||
86 | #else | ||
87 | asmlinkage void smp_thermal_interrupt(void); | 85 | asmlinkage void smp_thermal_interrupt(void); |
88 | asmlinkage void mce_threshold_interrupt(void); | 86 | asmlinkage void mce_threshold_interrupt(void); |
89 | #endif | 87 | #endif |
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h index 09b97745772f..df1da20f4534 100644 --- a/arch/x86/include/asm/types.h +++ b/arch/x86/include/asm/types.h | |||
@@ -1,19 +1,11 @@ | |||
1 | #ifndef _ASM_X86_TYPES_H | 1 | #ifndef _ASM_X86_TYPES_H |
2 | #define _ASM_X86_TYPES_H | 2 | #define _ASM_X86_TYPES_H |
3 | 3 | ||
4 | #include <asm-generic/int-ll64.h> | 4 | #define dma_addr_t dma_addr_t |
5 | 5 | ||
6 | #ifndef __ASSEMBLY__ | 6 | #include <asm-generic/types.h> |
7 | |||
8 | typedef unsigned short umode_t; | ||
9 | 7 | ||
10 | #endif /* __ASSEMBLY__ */ | ||
11 | |||
12 | /* | ||
13 | * These aren't exported outside the kernel to avoid name space clashes | ||
14 | */ | ||
15 | #ifdef __KERNEL__ | 8 | #ifdef __KERNEL__ |
16 | |||
17 | #ifndef __ASSEMBLY__ | 9 | #ifndef __ASSEMBLY__ |
18 | 10 | ||
19 | typedef u64 dma64_addr_t; | 11 | typedef u64 dma64_addr_t; |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 20e6a795e160..d2c6c930b491 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -212,9 +212,9 @@ extern int __get_user_bad(void); | |||
212 | : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") | 212 | : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") |
213 | #else | 213 | #else |
214 | #define __put_user_asm_u64(x, ptr, retval, errret) \ | 214 | #define __put_user_asm_u64(x, ptr, retval, errret) \ |
215 | __put_user_asm(x, ptr, retval, "q", "", "Zr", errret) | 215 | __put_user_asm(x, ptr, retval, "q", "", "er", errret) |
216 | #define __put_user_asm_ex_u64(x, addr) \ | 216 | #define __put_user_asm_ex_u64(x, addr) \ |
217 | __put_user_asm_ex(x, addr, "q", "", "Zr") | 217 | __put_user_asm_ex(x, addr, "q", "", "er") |
218 | #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) | 218 | #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) |
219 | #endif | 219 | #endif |
220 | 220 | ||
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 8cc687326eb8..db24b215fc50 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -88,11 +88,11 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size) | |||
88 | ret, "l", "k", "ir", 4); | 88 | ret, "l", "k", "ir", 4); |
89 | return ret; | 89 | return ret; |
90 | case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst, | 90 | case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst, |
91 | ret, "q", "", "ir", 8); | 91 | ret, "q", "", "er", 8); |
92 | return ret; | 92 | return ret; |
93 | case 10: | 93 | case 10: |
94 | __put_user_asm(*(u64 *)src, (u64 __user *)dst, | 94 | __put_user_asm(*(u64 *)src, (u64 __user *)dst, |
95 | ret, "q", "", "ir", 10); | 95 | ret, "q", "", "er", 10); |
96 | if (unlikely(ret)) | 96 | if (unlikely(ret)) |
97 | return ret; | 97 | return ret; |
98 | asm("":::"memory"); | 98 | asm("":::"memory"); |
@@ -101,12 +101,12 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size) | |||
101 | return ret; | 101 | return ret; |
102 | case 16: | 102 | case 16: |
103 | __put_user_asm(*(u64 *)src, (u64 __user *)dst, | 103 | __put_user_asm(*(u64 *)src, (u64 __user *)dst, |
104 | ret, "q", "", "ir", 16); | 104 | ret, "q", "", "er", 16); |
105 | if (unlikely(ret)) | 105 | if (unlikely(ret)) |
106 | return ret; | 106 | return ret; |
107 | asm("":::"memory"); | 107 | asm("":::"memory"); |
108 | __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, | 108 | __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, |
109 | ret, "q", "", "ir", 8); | 109 | ret, "q", "", "er", 8); |
110 | return ret; | 110 | return ret; |
111 | default: | 111 | default: |
112 | return copy_user_generic((__force void *)dst, src, size); | 112 | return copy_user_generic((__force void *)dst, src, size); |
@@ -157,7 +157,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) | |||
157 | ret, "q", "", "=r", 8); | 157 | ret, "q", "", "=r", 8); |
158 | if (likely(!ret)) | 158 | if (likely(!ret)) |
159 | __put_user_asm(tmp, (u64 __user *)dst, | 159 | __put_user_asm(tmp, (u64 __user *)dst, |
160 | ret, "q", "", "ir", 8); | 160 | ret, "q", "", "er", 8); |
161 | return ret; | 161 | return ret; |
162 | } | 162 | } |
163 | default: | 163 | default: |
diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/asm/ucontext.h index 87324cf439d9..b7c29c8017f2 100644 --- a/arch/x86/include/asm/ucontext.h +++ b/arch/x86/include/asm/ucontext.h | |||
@@ -7,12 +7,6 @@ | |||
7 | * sigcontext struct (uc_mcontext). | 7 | * sigcontext struct (uc_mcontext). |
8 | */ | 8 | */ |
9 | 9 | ||
10 | struct ucontext { | 10 | #include <asm-generic/ucontext.h> |
11 | unsigned long uc_flags; | ||
12 | struct ucontext *uc_link; | ||
13 | stack_t uc_stack; | ||
14 | struct sigcontext uc_mcontext; | ||
15 | sigset_t uc_sigmask; /* mask last for extensibility */ | ||
16 | }; | ||
17 | 11 | ||
18 | #endif /* _ASM_X86_UCONTEXT_H */ | 12 | #endif /* _ASM_X86_UCONTEXT_H */ |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 732a30706153..8deaada61bc8 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -345,6 +345,8 @@ | |||
345 | 345 | ||
346 | #ifdef __KERNEL__ | 346 | #ifdef __KERNEL__ |
347 | 347 | ||
348 | #define NR_syscalls 337 | ||
349 | |||
348 | #define __ARCH_WANT_IPC_PARSE_VERSION | 350 | #define __ARCH_WANT_IPC_PARSE_VERSION |
349 | #define __ARCH_WANT_OLD_READDIR | 351 | #define __ARCH_WANT_OLD_READDIR |
350 | #define __ARCH_WANT_OLD_STAT | 352 | #define __ARCH_WANT_OLD_STAT |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 900e1617e672..b9f3c60de5f7 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -688,6 +688,12 @@ __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) | |||
688 | #endif /* __NO_STUBS */ | 688 | #endif /* __NO_STUBS */ |
689 | 689 | ||
690 | #ifdef __KERNEL__ | 690 | #ifdef __KERNEL__ |
691 | |||
692 | #ifndef COMPILE_OFFSETS | ||
693 | #include <asm/asm-offsets.h> | ||
694 | #define NR_syscalls (__NR_syscall_max + 1) | ||
695 | #endif | ||
696 | |||
691 | /* | 697 | /* |
692 | * "Conditional" syscalls | 698 | * "Conditional" syscalls |
693 | * | 699 | * |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index bddd44f2f0ab..80e2984f521c 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -133,7 +133,7 @@ struct bau_msg_payload { | |||
133 | * see table 4.2.3.0.1 in broacast_assist spec. | 133 | * see table 4.2.3.0.1 in broacast_assist spec. |
134 | */ | 134 | */ |
135 | struct bau_msg_header { | 135 | struct bau_msg_header { |
136 | unsigned int dest_subnodeid:6; /* must be zero */ | 136 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ |
137 | /* bits 5:0 */ | 137 | /* bits 5:0 */ |
138 | unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ | 138 | unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ |
139 | /* bits 20:6 */ /* first bit in node_map */ | 139 | /* bits 20:6 */ /* first bit in node_map */ |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 341070f7ad5c..77a68505419a 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -175,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
175 | #define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) | 175 | #define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) |
176 | 176 | ||
177 | #define UV_GLOBAL_MMR64_PNODE_BITS(p) \ | 177 | #define UV_GLOBAL_MMR64_PNODE_BITS(p) \ |
178 | ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) | 178 | (((unsigned long)(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) |
179 | 179 | ||
180 | #define UV_APIC_PNODE_SHIFT 6 | 180 | #define UV_APIC_PNODE_SHIFT 6 |
181 | 181 | ||
@@ -327,6 +327,7 @@ struct uv_blade_info { | |||
327 | unsigned short nr_possible_cpus; | 327 | unsigned short nr_possible_cpus; |
328 | unsigned short nr_online_cpus; | 328 | unsigned short nr_online_cpus; |
329 | unsigned short pnode; | 329 | unsigned short pnode; |
330 | short memory_nid; | ||
330 | }; | 331 | }; |
331 | extern struct uv_blade_info *uv_blade_info; | 332 | extern struct uv_blade_info *uv_blade_info; |
332 | extern short *uv_node_to_blade; | 333 | extern short *uv_node_to_blade; |
@@ -363,6 +364,12 @@ static inline int uv_blade_to_pnode(int bid) | |||
363 | return uv_blade_info[bid].pnode; | 364 | return uv_blade_info[bid].pnode; |
364 | } | 365 | } |
365 | 366 | ||
367 | /* Nid of memory node on blade. -1 if no blade-local memory */ | ||
368 | static inline int uv_blade_to_memory_nid(int bid) | ||
369 | { | ||
370 | return uv_blade_info[bid].memory_nid; | ||
371 | } | ||
372 | |||
366 | /* Determine the number of possible cpus on a blade */ | 373 | /* Determine the number of possible cpus on a blade */ |
367 | static inline int uv_blade_nr_possible_cpus(int bid) | 374 | static inline int uv_blade_nr_possible_cpus(int bid) |
368 | { | 375 | { |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6c327b852e23..430d5b24af7b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -26,6 +26,8 @@ CFLAGS_tsc.o := $(nostackp) | |||
26 | CFLAGS_paravirt.o := $(nostackp) | 26 | CFLAGS_paravirt.o := $(nostackp) |
27 | GCOV_PROFILE_vsyscall_64.o := n | 27 | GCOV_PROFILE_vsyscall_64.o := n |
28 | GCOV_PROFILE_hpet.o := n | 28 | GCOV_PROFILE_hpet.o := n |
29 | GCOV_PROFILE_tsc.o := n | ||
30 | GCOV_PROFILE_paravirt.o := n | ||
29 | 31 | ||
30 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 32 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
31 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 33 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6b8ca3a0285d..67e929b89875 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -833,106 +833,6 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
833 | extern int es7000_plat; | 833 | extern int es7000_plat; |
834 | #endif | 834 | #endif |
835 | 835 | ||
836 | static struct { | ||
837 | int gsi_base; | ||
838 | int gsi_end; | ||
839 | } mp_ioapic_routing[MAX_IO_APICS]; | ||
840 | |||
841 | int mp_find_ioapic(int gsi) | ||
842 | { | ||
843 | int i = 0; | ||
844 | |||
845 | /* Find the IOAPIC that manages this GSI. */ | ||
846 | for (i = 0; i < nr_ioapics; i++) { | ||
847 | if ((gsi >= mp_ioapic_routing[i].gsi_base) | ||
848 | && (gsi <= mp_ioapic_routing[i].gsi_end)) | ||
849 | return i; | ||
850 | } | ||
851 | |||
852 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); | ||
853 | return -1; | ||
854 | } | ||
855 | |||
856 | int mp_find_ioapic_pin(int ioapic, int gsi) | ||
857 | { | ||
858 | if (WARN_ON(ioapic == -1)) | ||
859 | return -1; | ||
860 | if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end)) | ||
861 | return -1; | ||
862 | |||
863 | return gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
864 | } | ||
865 | |||
866 | static u8 __init uniq_ioapic_id(u8 id) | ||
867 | { | ||
868 | #ifdef CONFIG_X86_32 | ||
869 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
870 | !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
871 | return io_apic_get_unique_id(nr_ioapics, id); | ||
872 | else | ||
873 | return id; | ||
874 | #else | ||
875 | int i; | ||
876 | DECLARE_BITMAP(used, 256); | ||
877 | bitmap_zero(used, 256); | ||
878 | for (i = 0; i < nr_ioapics; i++) { | ||
879 | struct mpc_ioapic *ia = &mp_ioapics[i]; | ||
880 | __set_bit(ia->apicid, used); | ||
881 | } | ||
882 | if (!test_bit(id, used)) | ||
883 | return id; | ||
884 | return find_first_zero_bit(used, 256); | ||
885 | #endif | ||
886 | } | ||
887 | |||
888 | static int bad_ioapic(unsigned long address) | ||
889 | { | ||
890 | if (nr_ioapics >= MAX_IO_APICS) { | ||
891 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | ||
892 | "(found %d)\n", MAX_IO_APICS, nr_ioapics); | ||
893 | panic("Recompile kernel with bigger MAX_IO_APICS!\n"); | ||
894 | } | ||
895 | if (!address) { | ||
896 | printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" | ||
897 | " found in table, skipping!\n"); | ||
898 | return 1; | ||
899 | } | ||
900 | return 0; | ||
901 | } | ||
902 | |||
903 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | ||
904 | { | ||
905 | int idx = 0; | ||
906 | |||
907 | if (bad_ioapic(address)) | ||
908 | return; | ||
909 | |||
910 | idx = nr_ioapics; | ||
911 | |||
912 | mp_ioapics[idx].type = MP_IOAPIC; | ||
913 | mp_ioapics[idx].flags = MPC_APIC_USABLE; | ||
914 | mp_ioapics[idx].apicaddr = address; | ||
915 | |||
916 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | ||
917 | mp_ioapics[idx].apicid = uniq_ioapic_id(id); | ||
918 | mp_ioapics[idx].apicver = io_apic_get_version(idx); | ||
919 | |||
920 | /* | ||
921 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | ||
922 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | ||
923 | */ | ||
924 | mp_ioapic_routing[idx].gsi_base = gsi_base; | ||
925 | mp_ioapic_routing[idx].gsi_end = gsi_base + | ||
926 | io_apic_get_redir_entries(idx); | ||
927 | |||
928 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | ||
929 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, | ||
930 | mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, | ||
931 | mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); | ||
932 | |||
933 | nr_ioapics++; | ||
934 | } | ||
935 | |||
936 | int __init acpi_probe_gsi(void) | 836 | int __init acpi_probe_gsi(void) |
937 | { | 837 | { |
938 | int idx; | 838 | int idx; |
@@ -947,7 +847,7 @@ int __init acpi_probe_gsi(void) | |||
947 | 847 | ||
948 | max_gsi = 0; | 848 | max_gsi = 0; |
949 | for (idx = 0; idx < nr_ioapics; idx++) { | 849 | for (idx = 0; idx < nr_ioapics; idx++) { |
950 | gsi = mp_ioapic_routing[idx].gsi_end; | 850 | gsi = mp_gsi_routing[idx].gsi_end; |
951 | 851 | ||
952 | if (gsi > max_gsi) | 852 | if (gsi > max_gsi) |
953 | max_gsi = gsi; | 853 | max_gsi = gsi; |
@@ -1179,9 +1079,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
1179 | * If MPS is present, it will handle them, | 1079 | * If MPS is present, it will handle them, |
1180 | * otherwise the system will stay in PIC mode | 1080 | * otherwise the system will stay in PIC mode |
1181 | */ | 1081 | */ |
1182 | if (acpi_disabled || acpi_noirq) { | 1082 | if (acpi_disabled || acpi_noirq) |
1183 | return -ENODEV; | 1083 | return -ENODEV; |
1184 | } | ||
1185 | 1084 | ||
1186 | if (!cpu_has_apic) | 1085 | if (!cpu_has_apic) |
1187 | return -ENODEV; | 1086 | return -ENODEV; |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f57658702571..de7353c0ce9c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/mutex.h> | 3 | #include <linux/mutex.h> |
4 | #include <linux/list.h> | 4 | #include <linux/list.h> |
5 | #include <linux/stringify.h> | ||
5 | #include <linux/kprobes.h> | 6 | #include <linux/kprobes.h> |
6 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
7 | #include <linux/vmalloc.h> | 8 | #include <linux/vmalloc.h> |
@@ -32,7 +33,7 @@ __setup("smp-alt-boot", bootonly); | |||
32 | #define smp_alt_once 1 | 33 | #define smp_alt_once 1 |
33 | #endif | 34 | #endif |
34 | 35 | ||
35 | static int debug_alternative; | 36 | static int __initdata_or_module debug_alternative; |
36 | 37 | ||
37 | static int __init debug_alt(char *str) | 38 | static int __init debug_alt(char *str) |
38 | { | 39 | { |
@@ -51,7 +52,7 @@ static int __init setup_noreplace_smp(char *str) | |||
51 | __setup("noreplace-smp", setup_noreplace_smp); | 52 | __setup("noreplace-smp", setup_noreplace_smp); |
52 | 53 | ||
53 | #ifdef CONFIG_PARAVIRT | 54 | #ifdef CONFIG_PARAVIRT |
54 | static int noreplace_paravirt = 0; | 55 | static int __initdata_or_module noreplace_paravirt = 0; |
55 | 56 | ||
56 | static int __init setup_noreplace_paravirt(char *str) | 57 | static int __init setup_noreplace_paravirt(char *str) |
57 | { | 58 | { |
@@ -64,16 +65,17 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); | |||
64 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | 65 | #define DPRINTK(fmt, args...) if (debug_alternative) \ |
65 | printk(KERN_DEBUG fmt, args) | 66 | printk(KERN_DEBUG fmt, args) |
66 | 67 | ||
67 | #ifdef GENERIC_NOP1 | 68 | #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) |
68 | /* Use inline assembly to define this because the nops are defined | 69 | /* Use inline assembly to define this because the nops are defined |
69 | as inline assembly strings in the include files and we cannot | 70 | as inline assembly strings in the include files and we cannot |
70 | get them easily into strings. */ | 71 | get them easily into strings. */ |
71 | asm("\t.section .rodata, \"a\"\nintelnops: " | 72 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: " |
72 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 | 73 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 |
73 | GENERIC_NOP7 GENERIC_NOP8 | 74 | GENERIC_NOP7 GENERIC_NOP8 |
74 | "\t.previous"); | 75 | "\t.previous"); |
75 | extern const unsigned char intelnops[]; | 76 | extern const unsigned char intelnops[]; |
76 | static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { | 77 | static const unsigned char *const __initconst_or_module |
78 | intel_nops[ASM_NOP_MAX+1] = { | ||
77 | NULL, | 79 | NULL, |
78 | intelnops, | 80 | intelnops, |
79 | intelnops + 1, | 81 | intelnops + 1, |
@@ -87,12 +89,13 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { | |||
87 | #endif | 89 | #endif |
88 | 90 | ||
89 | #ifdef K8_NOP1 | 91 | #ifdef K8_NOP1 |
90 | asm("\t.section .rodata, \"a\"\nk8nops: " | 92 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: " |
91 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | 93 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 |
92 | K8_NOP7 K8_NOP8 | 94 | K8_NOP7 K8_NOP8 |
93 | "\t.previous"); | 95 | "\t.previous"); |
94 | extern const unsigned char k8nops[]; | 96 | extern const unsigned char k8nops[]; |
95 | static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { | 97 | static const unsigned char *const __initconst_or_module |
98 | k8_nops[ASM_NOP_MAX+1] = { | ||
96 | NULL, | 99 | NULL, |
97 | k8nops, | 100 | k8nops, |
98 | k8nops + 1, | 101 | k8nops + 1, |
@@ -105,13 +108,14 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { | |||
105 | }; | 108 | }; |
106 | #endif | 109 | #endif |
107 | 110 | ||
108 | #ifdef K7_NOP1 | 111 | #if defined(K7_NOP1) && !defined(CONFIG_X86_64) |
109 | asm("\t.section .rodata, \"a\"\nk7nops: " | 112 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: " |
110 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | 113 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 |
111 | K7_NOP7 K7_NOP8 | 114 | K7_NOP7 K7_NOP8 |
112 | "\t.previous"); | 115 | "\t.previous"); |
113 | extern const unsigned char k7nops[]; | 116 | extern const unsigned char k7nops[]; |
114 | static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { | 117 | static const unsigned char *const __initconst_or_module |
118 | k7_nops[ASM_NOP_MAX+1] = { | ||
115 | NULL, | 119 | NULL, |
116 | k7nops, | 120 | k7nops, |
117 | k7nops + 1, | 121 | k7nops + 1, |
@@ -125,12 +129,13 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { | |||
125 | #endif | 129 | #endif |
126 | 130 | ||
127 | #ifdef P6_NOP1 | 131 | #ifdef P6_NOP1 |
128 | asm("\t.section .rodata, \"a\"\np6nops: " | 132 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: " |
129 | P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 | 133 | P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 |
130 | P6_NOP7 P6_NOP8 | 134 | P6_NOP7 P6_NOP8 |
131 | "\t.previous"); | 135 | "\t.previous"); |
132 | extern const unsigned char p6nops[]; | 136 | extern const unsigned char p6nops[]; |
133 | static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { | 137 | static const unsigned char *const __initconst_or_module |
138 | p6_nops[ASM_NOP_MAX+1] = { | ||
134 | NULL, | 139 | NULL, |
135 | p6nops, | 140 | p6nops, |
136 | p6nops + 1, | 141 | p6nops + 1, |
@@ -146,7 +151,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { | |||
146 | #ifdef CONFIG_X86_64 | 151 | #ifdef CONFIG_X86_64 |
147 | 152 | ||
148 | extern char __vsyscall_0; | 153 | extern char __vsyscall_0; |
149 | const unsigned char *const *find_nop_table(void) | 154 | static const unsigned char *const *__init_or_module find_nop_table(void) |
150 | { | 155 | { |
151 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | 156 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
152 | boot_cpu_has(X86_FEATURE_NOPL)) | 157 | boot_cpu_has(X86_FEATURE_NOPL)) |
@@ -157,7 +162,7 @@ const unsigned char *const *find_nop_table(void) | |||
157 | 162 | ||
158 | #else /* CONFIG_X86_64 */ | 163 | #else /* CONFIG_X86_64 */ |
159 | 164 | ||
160 | const unsigned char *const *find_nop_table(void) | 165 | static const unsigned char *const *__init_or_module find_nop_table(void) |
161 | { | 166 | { |
162 | if (boot_cpu_has(X86_FEATURE_K8)) | 167 | if (boot_cpu_has(X86_FEATURE_K8)) |
163 | return k8_nops; | 168 | return k8_nops; |
@@ -172,7 +177,7 @@ const unsigned char *const *find_nop_table(void) | |||
172 | #endif /* CONFIG_X86_64 */ | 177 | #endif /* CONFIG_X86_64 */ |
173 | 178 | ||
174 | /* Use this to add nops to a buffer, then text_poke the whole buffer. */ | 179 | /* Use this to add nops to a buffer, then text_poke the whole buffer. */ |
175 | void add_nops(void *insns, unsigned int len) | 180 | static void __init_or_module add_nops(void *insns, unsigned int len) |
176 | { | 181 | { |
177 | const unsigned char *const *noptable = find_nop_table(); | 182 | const unsigned char *const *noptable = find_nop_table(); |
178 | 183 | ||
@@ -185,10 +190,10 @@ void add_nops(void *insns, unsigned int len) | |||
185 | len -= noplen; | 190 | len -= noplen; |
186 | } | 191 | } |
187 | } | 192 | } |
188 | EXPORT_SYMBOL_GPL(add_nops); | ||
189 | 193 | ||
190 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 194 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
191 | extern u8 *__smp_locks[], *__smp_locks_end[]; | 195 | extern u8 *__smp_locks[], *__smp_locks_end[]; |
196 | static void *text_poke_early(void *addr, const void *opcode, size_t len); | ||
192 | 197 | ||
193 | /* Replace instructions with better alternatives for this CPU type. | 198 | /* Replace instructions with better alternatives for this CPU type. |
194 | This runs before SMP is initialized to avoid SMP problems with | 199 | This runs before SMP is initialized to avoid SMP problems with |
@@ -196,7 +201,8 @@ extern u8 *__smp_locks[], *__smp_locks_end[]; | |||
196 | APs have less capabilities than the boot processor are not handled. | 201 | APs have less capabilities than the boot processor are not handled. |
197 | Tough. Make sure you disable such features by hand. */ | 202 | Tough. Make sure you disable such features by hand. */ |
198 | 203 | ||
199 | void apply_alternatives(struct alt_instr *start, struct alt_instr *end) | 204 | void __init_or_module apply_alternatives(struct alt_instr *start, |
205 | struct alt_instr *end) | ||
200 | { | 206 | { |
201 | struct alt_instr *a; | 207 | struct alt_instr *a; |
202 | char insnbuf[MAX_PATCH_LEN]; | 208 | char insnbuf[MAX_PATCH_LEN]; |
@@ -279,9 +285,10 @@ static LIST_HEAD(smp_alt_modules); | |||
279 | static DEFINE_MUTEX(smp_alt); | 285 | static DEFINE_MUTEX(smp_alt); |
280 | static int smp_mode = 1; /* protected by smp_alt */ | 286 | static int smp_mode = 1; /* protected by smp_alt */ |
281 | 287 | ||
282 | void alternatives_smp_module_add(struct module *mod, char *name, | 288 | void __init_or_module alternatives_smp_module_add(struct module *mod, |
283 | void *locks, void *locks_end, | 289 | char *name, |
284 | void *text, void *text_end) | 290 | void *locks, void *locks_end, |
291 | void *text, void *text_end) | ||
285 | { | 292 | { |
286 | struct smp_alt_module *smp; | 293 | struct smp_alt_module *smp; |
287 | 294 | ||
@@ -317,7 +324,7 @@ void alternatives_smp_module_add(struct module *mod, char *name, | |||
317 | mutex_unlock(&smp_alt); | 324 | mutex_unlock(&smp_alt); |
318 | } | 325 | } |
319 | 326 | ||
320 | void alternatives_smp_module_del(struct module *mod) | 327 | void __init_or_module alternatives_smp_module_del(struct module *mod) |
321 | { | 328 | { |
322 | struct smp_alt_module *item; | 329 | struct smp_alt_module *item; |
323 | 330 | ||
@@ -386,8 +393,8 @@ void alternatives_smp_switch(int smp) | |||
386 | #endif | 393 | #endif |
387 | 394 | ||
388 | #ifdef CONFIG_PARAVIRT | 395 | #ifdef CONFIG_PARAVIRT |
389 | void apply_paravirt(struct paravirt_patch_site *start, | 396 | void __init_or_module apply_paravirt(struct paravirt_patch_site *start, |
390 | struct paravirt_patch_site *end) | 397 | struct paravirt_patch_site *end) |
391 | { | 398 | { |
392 | struct paravirt_patch_site *p; | 399 | struct paravirt_patch_site *p; |
393 | char insnbuf[MAX_PATCH_LEN]; | 400 | char insnbuf[MAX_PATCH_LEN]; |
@@ -485,13 +492,14 @@ void __init alternative_instructions(void) | |||
485 | * instructions. And on the local CPU you need to be protected again NMI or MCE | 492 | * instructions. And on the local CPU you need to be protected again NMI or MCE |
486 | * handlers seeing an inconsistent instruction while you patch. | 493 | * handlers seeing an inconsistent instruction while you patch. |
487 | */ | 494 | */ |
488 | void *text_poke_early(void *addr, const void *opcode, size_t len) | 495 | static void *__init_or_module text_poke_early(void *addr, const void *opcode, |
496 | size_t len) | ||
489 | { | 497 | { |
490 | unsigned long flags; | 498 | unsigned long flags; |
491 | local_irq_save(flags); | 499 | local_irq_save(flags); |
492 | memcpy(addr, opcode, len); | 500 | memcpy(addr, opcode, len); |
493 | local_irq_restore(flags); | ||
494 | sync_core(); | 501 | sync_core(); |
502 | local_irq_restore(flags); | ||
495 | /* Could also do a CLFLUSH here to speed up CPU recovery; but | 503 | /* Could also do a CLFLUSH here to speed up CPU recovery; but |
496 | that causes hangs on some VIA CPUs. */ | 504 | that causes hangs on some VIA CPUs. */ |
497 | return addr; | 505 | return addr; |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 9372f0406ad4..98f230f6a28d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -41,9 +41,13 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); | |||
41 | static LIST_HEAD(iommu_pd_list); | 41 | static LIST_HEAD(iommu_pd_list); |
42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | 42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); |
43 | 43 | ||
44 | #ifdef CONFIG_IOMMU_API | 44 | /* |
45 | * Domain for untranslated devices - only allocated | ||
46 | * if iommu=pt passed on kernel cmd line. | ||
47 | */ | ||
48 | static struct protection_domain *pt_domain; | ||
49 | |||
45 | static struct iommu_ops amd_iommu_ops; | 50 | static struct iommu_ops amd_iommu_ops; |
46 | #endif | ||
47 | 51 | ||
48 | /* | 52 | /* |
49 | * general struct to manage commands send to an IOMMU | 53 | * general struct to manage commands send to an IOMMU |
@@ -55,16 +59,16 @@ struct iommu_cmd { | |||
55 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 59 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
56 | struct unity_map_entry *e); | 60 | struct unity_map_entry *e); |
57 | static struct dma_ops_domain *find_protection_domain(u16 devid); | 61 | static struct dma_ops_domain *find_protection_domain(u16 devid); |
58 | static u64* alloc_pte(struct protection_domain *dom, | 62 | static u64 *alloc_pte(struct protection_domain *domain, |
59 | unsigned long address, u64 | 63 | unsigned long address, int end_lvl, |
60 | **pte_page, gfp_t gfp); | 64 | u64 **pte_page, gfp_t gfp); |
61 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | 65 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
62 | unsigned long start_page, | 66 | unsigned long start_page, |
63 | unsigned int pages); | 67 | unsigned int pages); |
64 | 68 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | |
65 | #ifndef BUS_NOTIFY_UNBOUND_DRIVER | 69 | static u64 *fetch_pte(struct protection_domain *domain, |
66 | #define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 | 70 | unsigned long address, int map_size); |
67 | #endif | 71 | static void update_domain(struct protection_domain *domain); |
68 | 72 | ||
69 | #ifdef CONFIG_AMD_IOMMU_STATS | 73 | #ifdef CONFIG_AMD_IOMMU_STATS |
70 | 74 | ||
@@ -138,7 +142,25 @@ static int iommu_has_npcache(struct amd_iommu *iommu) | |||
138 | * | 142 | * |
139 | ****************************************************************************/ | 143 | ****************************************************************************/ |
140 | 144 | ||
141 | static void iommu_print_event(void *__evt) | 145 | static void dump_dte_entry(u16 devid) |
146 | { | ||
147 | int i; | ||
148 | |||
149 | for (i = 0; i < 8; ++i) | ||
150 | pr_err("AMD-Vi: DTE[%d]: %08x\n", i, | ||
151 | amd_iommu_dev_table[devid].data[i]); | ||
152 | } | ||
153 | |||
154 | static void dump_command(unsigned long phys_addr) | ||
155 | { | ||
156 | struct iommu_cmd *cmd = phys_to_virt(phys_addr); | ||
157 | int i; | ||
158 | |||
159 | for (i = 0; i < 4; ++i) | ||
160 | pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); | ||
161 | } | ||
162 | |||
163 | static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | ||
142 | { | 164 | { |
143 | u32 *event = __evt; | 165 | u32 *event = __evt; |
144 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | 166 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; |
@@ -147,7 +169,7 @@ static void iommu_print_event(void *__evt) | |||
147 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | 169 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; |
148 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | 170 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; |
149 | 171 | ||
150 | printk(KERN_ERR "AMD IOMMU: Event logged ["); | 172 | printk(KERN_ERR "AMD-Vi: Event logged ["); |
151 | 173 | ||
152 | switch (type) { | 174 | switch (type) { |
153 | case EVENT_TYPE_ILL_DEV: | 175 | case EVENT_TYPE_ILL_DEV: |
@@ -155,6 +177,7 @@ static void iommu_print_event(void *__evt) | |||
155 | "address=0x%016llx flags=0x%04x]\n", | 177 | "address=0x%016llx flags=0x%04x]\n", |
156 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | 178 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
157 | address, flags); | 179 | address, flags); |
180 | dump_dte_entry(devid); | ||
158 | break; | 181 | break; |
159 | case EVENT_TYPE_IO_FAULT: | 182 | case EVENT_TYPE_IO_FAULT: |
160 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | 183 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " |
@@ -176,6 +199,8 @@ static void iommu_print_event(void *__evt) | |||
176 | break; | 199 | break; |
177 | case EVENT_TYPE_ILL_CMD: | 200 | case EVENT_TYPE_ILL_CMD: |
178 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 201 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
202 | reset_iommu_command_buffer(iommu); | ||
203 | dump_command(address); | ||
179 | break; | 204 | break; |
180 | case EVENT_TYPE_CMD_HARD_ERR: | 205 | case EVENT_TYPE_CMD_HARD_ERR: |
181 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | 206 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " |
@@ -209,7 +234,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) | |||
209 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | 234 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); |
210 | 235 | ||
211 | while (head != tail) { | 236 | while (head != tail) { |
212 | iommu_print_event(iommu->evt_buf + head); | 237 | iommu_print_event(iommu, iommu->evt_buf + head); |
213 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | 238 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; |
214 | } | 239 | } |
215 | 240 | ||
@@ -296,8 +321,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) | |||
296 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 321 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; |
297 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 322 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); |
298 | 323 | ||
299 | if (unlikely(i == EXIT_LOOP_COUNT)) | 324 | if (unlikely(i == EXIT_LOOP_COUNT)) { |
300 | panic("AMD IOMMU: Completion wait loop failed\n"); | 325 | spin_unlock(&iommu->lock); |
326 | reset_iommu_command_buffer(iommu); | ||
327 | spin_lock(&iommu->lock); | ||
328 | } | ||
301 | } | 329 | } |
302 | 330 | ||
303 | /* | 331 | /* |
@@ -445,47 +473,78 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | |||
445 | } | 473 | } |
446 | 474 | ||
447 | /* | 475 | /* |
476 | * This function flushes one domain on one IOMMU | ||
477 | */ | ||
478 | static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) | ||
479 | { | ||
480 | struct iommu_cmd cmd; | ||
481 | unsigned long flags; | ||
482 | |||
483 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
484 | domid, 1, 1); | ||
485 | |||
486 | spin_lock_irqsave(&iommu->lock, flags); | ||
487 | __iommu_queue_command(iommu, &cmd); | ||
488 | __iommu_completion_wait(iommu); | ||
489 | __iommu_wait_for_completion(iommu); | ||
490 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
491 | } | ||
492 | |||
493 | static void flush_all_domains_on_iommu(struct amd_iommu *iommu) | ||
494 | { | ||
495 | int i; | ||
496 | |||
497 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | ||
498 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | ||
499 | continue; | ||
500 | flush_domain_on_iommu(iommu, i); | ||
501 | } | ||
502 | |||
503 | } | ||
504 | |||
505 | /* | ||
448 | * This function is used to flush the IO/TLB for a given protection domain | 506 | * This function is used to flush the IO/TLB for a given protection domain |
449 | * on every IOMMU in the system | 507 | * on every IOMMU in the system |
450 | */ | 508 | */ |
451 | static void iommu_flush_domain(u16 domid) | 509 | static void iommu_flush_domain(u16 domid) |
452 | { | 510 | { |
453 | unsigned long flags; | ||
454 | struct amd_iommu *iommu; | 511 | struct amd_iommu *iommu; |
455 | struct iommu_cmd cmd; | ||
456 | 512 | ||
457 | INC_STATS_COUNTER(domain_flush_all); | 513 | INC_STATS_COUNTER(domain_flush_all); |
458 | 514 | ||
459 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | 515 | for_each_iommu(iommu) |
460 | domid, 1, 1); | 516 | flush_domain_on_iommu(iommu, domid); |
461 | |||
462 | for_each_iommu(iommu) { | ||
463 | spin_lock_irqsave(&iommu->lock, flags); | ||
464 | __iommu_queue_command(iommu, &cmd); | ||
465 | __iommu_completion_wait(iommu); | ||
466 | __iommu_wait_for_completion(iommu); | ||
467 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
468 | } | ||
469 | } | 517 | } |
470 | 518 | ||
471 | void amd_iommu_flush_all_domains(void) | 519 | void amd_iommu_flush_all_domains(void) |
472 | { | 520 | { |
521 | struct amd_iommu *iommu; | ||
522 | |||
523 | for_each_iommu(iommu) | ||
524 | flush_all_domains_on_iommu(iommu); | ||
525 | } | ||
526 | |||
527 | static void flush_all_devices_for_iommu(struct amd_iommu *iommu) | ||
528 | { | ||
473 | int i; | 529 | int i; |
474 | 530 | ||
475 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | 531 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
476 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | 532 | if (iommu != amd_iommu_rlookup_table[i]) |
477 | continue; | 533 | continue; |
478 | iommu_flush_domain(i); | 534 | |
535 | iommu_queue_inv_dev_entry(iommu, i); | ||
536 | iommu_completion_wait(iommu); | ||
479 | } | 537 | } |
480 | } | 538 | } |
481 | 539 | ||
482 | void amd_iommu_flush_all_devices(void) | 540 | static void flush_devices_by_domain(struct protection_domain *domain) |
483 | { | 541 | { |
484 | struct amd_iommu *iommu; | 542 | struct amd_iommu *iommu; |
485 | int i; | 543 | int i; |
486 | 544 | ||
487 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 545 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
488 | if (amd_iommu_pd_table[i] == NULL) | 546 | if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || |
547 | (amd_iommu_pd_table[i] != domain)) | ||
489 | continue; | 548 | continue; |
490 | 549 | ||
491 | iommu = amd_iommu_rlookup_table[i]; | 550 | iommu = amd_iommu_rlookup_table[i]; |
@@ -497,6 +556,27 @@ void amd_iommu_flush_all_devices(void) | |||
497 | } | 556 | } |
498 | } | 557 | } |
499 | 558 | ||
559 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | ||
560 | { | ||
561 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | ||
562 | |||
563 | if (iommu->reset_in_progress) | ||
564 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | ||
565 | |||
566 | iommu->reset_in_progress = true; | ||
567 | |||
568 | amd_iommu_reset_cmd_buffer(iommu); | ||
569 | flush_all_devices_for_iommu(iommu); | ||
570 | flush_all_domains_on_iommu(iommu); | ||
571 | |||
572 | iommu->reset_in_progress = false; | ||
573 | } | ||
574 | |||
575 | void amd_iommu_flush_all_devices(void) | ||
576 | { | ||
577 | flush_devices_by_domain(NULL); | ||
578 | } | ||
579 | |||
500 | /**************************************************************************** | 580 | /**************************************************************************** |
501 | * | 581 | * |
502 | * The functions below are used the create the page table mappings for | 582 | * The functions below are used the create the page table mappings for |
@@ -514,18 +594,21 @@ void amd_iommu_flush_all_devices(void) | |||
514 | static int iommu_map_page(struct protection_domain *dom, | 594 | static int iommu_map_page(struct protection_domain *dom, |
515 | unsigned long bus_addr, | 595 | unsigned long bus_addr, |
516 | unsigned long phys_addr, | 596 | unsigned long phys_addr, |
517 | int prot) | 597 | int prot, |
598 | int map_size) | ||
518 | { | 599 | { |
519 | u64 __pte, *pte; | 600 | u64 __pte, *pte; |
520 | 601 | ||
521 | bus_addr = PAGE_ALIGN(bus_addr); | 602 | bus_addr = PAGE_ALIGN(bus_addr); |
522 | phys_addr = PAGE_ALIGN(phys_addr); | 603 | phys_addr = PAGE_ALIGN(phys_addr); |
523 | 604 | ||
524 | /* only support 512GB address spaces for now */ | 605 | BUG_ON(!PM_ALIGNED(map_size, bus_addr)); |
525 | if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) | 606 | BUG_ON(!PM_ALIGNED(map_size, phys_addr)); |
607 | |||
608 | if (!(prot & IOMMU_PROT_MASK)) | ||
526 | return -EINVAL; | 609 | return -EINVAL; |
527 | 610 | ||
528 | pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL); | 611 | pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); |
529 | 612 | ||
530 | if (IOMMU_PTE_PRESENT(*pte)) | 613 | if (IOMMU_PTE_PRESENT(*pte)) |
531 | return -EBUSY; | 614 | return -EBUSY; |
@@ -538,29 +621,18 @@ static int iommu_map_page(struct protection_domain *dom, | |||
538 | 621 | ||
539 | *pte = __pte; | 622 | *pte = __pte; |
540 | 623 | ||
624 | update_domain(dom); | ||
625 | |||
541 | return 0; | 626 | return 0; |
542 | } | 627 | } |
543 | 628 | ||
544 | static void iommu_unmap_page(struct protection_domain *dom, | 629 | static void iommu_unmap_page(struct protection_domain *dom, |
545 | unsigned long bus_addr) | 630 | unsigned long bus_addr, int map_size) |
546 | { | 631 | { |
547 | u64 *pte; | 632 | u64 *pte = fetch_pte(dom, bus_addr, map_size); |
548 | |||
549 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; | ||
550 | |||
551 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
552 | return; | ||
553 | |||
554 | pte = IOMMU_PTE_PAGE(*pte); | ||
555 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
556 | |||
557 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
558 | return; | ||
559 | |||
560 | pte = IOMMU_PTE_PAGE(*pte); | ||
561 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
562 | 633 | ||
563 | *pte = 0; | 634 | if (pte) |
635 | *pte = 0; | ||
564 | } | 636 | } |
565 | 637 | ||
566 | /* | 638 | /* |
@@ -615,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
615 | 687 | ||
616 | for (addr = e->address_start; addr < e->address_end; | 688 | for (addr = e->address_start; addr < e->address_end; |
617 | addr += PAGE_SIZE) { | 689 | addr += PAGE_SIZE) { |
618 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); | 690 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, |
691 | PM_MAP_4k); | ||
619 | if (ret) | 692 | if (ret) |
620 | return ret; | 693 | return ret; |
621 | /* | 694 | /* |
@@ -670,24 +743,29 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
670 | * This function checks if there is a PTE for a given dma address. If | 743 | * This function checks if there is a PTE for a given dma address. If |
671 | * there is one, it returns the pointer to it. | 744 | * there is one, it returns the pointer to it. |
672 | */ | 745 | */ |
673 | static u64* fetch_pte(struct protection_domain *domain, | 746 | static u64 *fetch_pte(struct protection_domain *domain, |
674 | unsigned long address) | 747 | unsigned long address, int map_size) |
675 | { | 748 | { |
749 | int level; | ||
676 | u64 *pte; | 750 | u64 *pte; |
677 | 751 | ||
678 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 752 | level = domain->mode - 1; |
753 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
679 | 754 | ||
680 | if (!IOMMU_PTE_PRESENT(*pte)) | 755 | while (level > map_size) { |
681 | return NULL; | 756 | if (!IOMMU_PTE_PRESENT(*pte)) |
757 | return NULL; | ||
682 | 758 | ||
683 | pte = IOMMU_PTE_PAGE(*pte); | 759 | level -= 1; |
684 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | ||
685 | 760 | ||
686 | if (!IOMMU_PTE_PRESENT(*pte)) | 761 | pte = IOMMU_PTE_PAGE(*pte); |
687 | return NULL; | 762 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
688 | 763 | ||
689 | pte = IOMMU_PTE_PAGE(*pte); | 764 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { |
690 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 765 | pte = NULL; |
766 | break; | ||
767 | } | ||
768 | } | ||
691 | 769 | ||
692 | return pte; | 770 | return pte; |
693 | } | 771 | } |
@@ -727,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
727 | u64 *pte, *pte_page; | 805 | u64 *pte, *pte_page; |
728 | 806 | ||
729 | for (i = 0; i < num_ptes; ++i) { | 807 | for (i = 0; i < num_ptes; ++i) { |
730 | pte = alloc_pte(&dma_dom->domain, address, | 808 | pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, |
731 | &pte_page, gfp); | 809 | &pte_page, gfp); |
732 | if (!pte) | 810 | if (!pte) |
733 | goto out_free; | 811 | goto out_free; |
@@ -760,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
760 | for (i = dma_dom->aperture[index]->offset; | 838 | for (i = dma_dom->aperture[index]->offset; |
761 | i < dma_dom->aperture_size; | 839 | i < dma_dom->aperture_size; |
762 | i += PAGE_SIZE) { | 840 | i += PAGE_SIZE) { |
763 | u64 *pte = fetch_pte(&dma_dom->domain, i); | 841 | u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); |
764 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 842 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
765 | continue; | 843 | continue; |
766 | 844 | ||
767 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); | 845 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); |
768 | } | 846 | } |
769 | 847 | ||
848 | update_domain(&dma_dom->domain); | ||
849 | |||
770 | return 0; | 850 | return 0; |
771 | 851 | ||
772 | out_free: | 852 | out_free: |
853 | update_domain(&dma_dom->domain); | ||
854 | |||
773 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); | 855 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); |
774 | 856 | ||
775 | kfree(dma_dom->aperture[index]); | 857 | kfree(dma_dom->aperture[index]); |
@@ -1009,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1009 | dma_dom->domain.id = domain_id_alloc(); | 1091 | dma_dom->domain.id = domain_id_alloc(); |
1010 | if (dma_dom->domain.id == 0) | 1092 | if (dma_dom->domain.id == 0) |
1011 | goto free_dma_dom; | 1093 | goto free_dma_dom; |
1012 | dma_dom->domain.mode = PAGE_MODE_3_LEVEL; | 1094 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; |
1013 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 1095 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
1014 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | 1096 | dma_dom->domain.flags = PD_DMA_OPS_MASK; |
1015 | dma_dom->domain.priv = dma_dom; | 1097 | dma_dom->domain.priv = dma_dom; |
@@ -1063,6 +1145,41 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
1063 | return dom; | 1145 | return dom; |
1064 | } | 1146 | } |
1065 | 1147 | ||
1148 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | ||
1149 | { | ||
1150 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
1151 | |||
1152 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
1153 | << DEV_ENTRY_MODE_SHIFT; | ||
1154 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1155 | |||
1156 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1157 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1158 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | ||
1159 | |||
1160 | amd_iommu_pd_table[devid] = domain; | ||
1161 | } | ||
1162 | |||
1163 | /* | ||
1164 | * If a device is not yet associated with a domain, this function does | ||
1165 | * assigns it visible for the hardware | ||
1166 | */ | ||
1167 | static void __attach_device(struct amd_iommu *iommu, | ||
1168 | struct protection_domain *domain, | ||
1169 | u16 devid) | ||
1170 | { | ||
1171 | /* lock domain */ | ||
1172 | spin_lock(&domain->lock); | ||
1173 | |||
1174 | /* update DTE entry */ | ||
1175 | set_dte_entry(devid, domain); | ||
1176 | |||
1177 | domain->dev_cnt += 1; | ||
1178 | |||
1179 | /* ready */ | ||
1180 | spin_unlock(&domain->lock); | ||
1181 | } | ||
1182 | |||
1066 | /* | 1183 | /* |
1067 | * If a device is not yet associated with a domain, this function does | 1184 | * If a device is not yet associated with a domain, this function does |
1068 | * assigns it visible for the hardware | 1185 | * assigns it visible for the hardware |
@@ -1072,27 +1189,16 @@ static void attach_device(struct amd_iommu *iommu, | |||
1072 | u16 devid) | 1189 | u16 devid) |
1073 | { | 1190 | { |
1074 | unsigned long flags; | 1191 | unsigned long flags; |
1075 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
1076 | |||
1077 | domain->dev_cnt += 1; | ||
1078 | |||
1079 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
1080 | << DEV_ENTRY_MODE_SHIFT; | ||
1081 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1082 | 1192 | ||
1083 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1193 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1084 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1194 | __attach_device(iommu, domain, devid); |
1085 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1086 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1087 | |||
1088 | amd_iommu_pd_table[devid] = domain; | ||
1089 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1195 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1090 | 1196 | ||
1091 | /* | 1197 | /* |
1092 | * We might boot into a crash-kernel here. The crashed kernel | 1198 | * We might boot into a crash-kernel here. The crashed kernel |
1093 | * left the caches in the IOMMU dirty. So we have to flush | 1199 | * left the caches in the IOMMU dirty. So we have to flush |
1094 | * here to evict all dirty stuff. | 1200 | * here to evict all dirty stuff. |
1095 | */ | 1201 | */ |
1096 | iommu_queue_inv_dev_entry(iommu, devid); | 1202 | iommu_queue_inv_dev_entry(iommu, devid); |
1097 | iommu_flush_tlb_pde(iommu, domain->id); | 1203 | iommu_flush_tlb_pde(iommu, domain->id); |
1098 | } | 1204 | } |
@@ -1119,6 +1225,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid) | |||
1119 | 1225 | ||
1120 | /* ready */ | 1226 | /* ready */ |
1121 | spin_unlock(&domain->lock); | 1227 | spin_unlock(&domain->lock); |
1228 | |||
1229 | /* | ||
1230 | * If we run in passthrough mode the device must be assigned to the | ||
1231 | * passthrough domain if it is detached from any other domain | ||
1232 | */ | ||
1233 | if (iommu_pass_through) { | ||
1234 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | ||
1235 | __attach_device(iommu, pt_domain, devid); | ||
1236 | } | ||
1122 | } | 1237 | } |
1123 | 1238 | ||
1124 | /* | 1239 | /* |
@@ -1164,6 +1279,8 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1164 | case BUS_NOTIFY_UNBOUND_DRIVER: | 1279 | case BUS_NOTIFY_UNBOUND_DRIVER: |
1165 | if (!domain) | 1280 | if (!domain) |
1166 | goto out; | 1281 | goto out; |
1282 | if (iommu_pass_through) | ||
1283 | break; | ||
1167 | detach_device(domain, devid); | 1284 | detach_device(domain, devid); |
1168 | break; | 1285 | break; |
1169 | case BUS_NOTIFY_ADD_DEVICE: | 1286 | case BUS_NOTIFY_ADD_DEVICE: |
@@ -1192,7 +1309,7 @@ out: | |||
1192 | return 0; | 1309 | return 0; |
1193 | } | 1310 | } |
1194 | 1311 | ||
1195 | struct notifier_block device_nb = { | 1312 | static struct notifier_block device_nb = { |
1196 | .notifier_call = device_change_notifier, | 1313 | .notifier_call = device_change_notifier, |
1197 | }; | 1314 | }; |
1198 | 1315 | ||
@@ -1292,39 +1409,91 @@ static int get_device_resources(struct device *dev, | |||
1292 | return 1; | 1409 | return 1; |
1293 | } | 1410 | } |
1294 | 1411 | ||
1412 | static void update_device_table(struct protection_domain *domain) | ||
1413 | { | ||
1414 | unsigned long flags; | ||
1415 | int i; | ||
1416 | |||
1417 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | ||
1418 | if (amd_iommu_pd_table[i] != domain) | ||
1419 | continue; | ||
1420 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1421 | set_dte_entry(i, domain); | ||
1422 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1423 | } | ||
1424 | } | ||
1425 | |||
1426 | static void update_domain(struct protection_domain *domain) | ||
1427 | { | ||
1428 | if (!domain->updated) | ||
1429 | return; | ||
1430 | |||
1431 | update_device_table(domain); | ||
1432 | flush_devices_by_domain(domain); | ||
1433 | iommu_flush_domain(domain->id); | ||
1434 | |||
1435 | domain->updated = false; | ||
1436 | } | ||
1437 | |||
1295 | /* | 1438 | /* |
1296 | * If the pte_page is not yet allocated this function is called | 1439 | * This function is used to add another level to an IO page table. Adding |
1440 | * another level increases the size of the address space by 9 bits to a size up | ||
1441 | * to 64 bits. | ||
1297 | */ | 1442 | */ |
1298 | static u64* alloc_pte(struct protection_domain *dom, | 1443 | static bool increase_address_space(struct protection_domain *domain, |
1299 | unsigned long address, u64 **pte_page, gfp_t gfp) | 1444 | gfp_t gfp) |
1445 | { | ||
1446 | u64 *pte; | ||
1447 | |||
1448 | if (domain->mode == PAGE_MODE_6_LEVEL) | ||
1449 | /* address space already 64 bit large */ | ||
1450 | return false; | ||
1451 | |||
1452 | pte = (void *)get_zeroed_page(gfp); | ||
1453 | if (!pte) | ||
1454 | return false; | ||
1455 | |||
1456 | *pte = PM_LEVEL_PDE(domain->mode, | ||
1457 | virt_to_phys(domain->pt_root)); | ||
1458 | domain->pt_root = pte; | ||
1459 | domain->mode += 1; | ||
1460 | domain->updated = true; | ||
1461 | |||
1462 | return true; | ||
1463 | } | ||
1464 | |||
1465 | static u64 *alloc_pte(struct protection_domain *domain, | ||
1466 | unsigned long address, | ||
1467 | int end_lvl, | ||
1468 | u64 **pte_page, | ||
1469 | gfp_t gfp) | ||
1300 | { | 1470 | { |
1301 | u64 *pte, *page; | 1471 | u64 *pte, *page; |
1472 | int level; | ||
1302 | 1473 | ||
1303 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 1474 | while (address > PM_LEVEL_SIZE(domain->mode)) |
1475 | increase_address_space(domain, gfp); | ||
1304 | 1476 | ||
1305 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1477 | level = domain->mode - 1; |
1306 | page = (u64 *)get_zeroed_page(gfp); | 1478 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
1307 | if (!page) | ||
1308 | return NULL; | ||
1309 | *pte = IOMMU_L2_PDE(virt_to_phys(page)); | ||
1310 | } | ||
1311 | 1479 | ||
1312 | pte = IOMMU_PTE_PAGE(*pte); | 1480 | while (level > end_lvl) { |
1313 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | 1481 | if (!IOMMU_PTE_PRESENT(*pte)) { |
1482 | page = (u64 *)get_zeroed_page(gfp); | ||
1483 | if (!page) | ||
1484 | return NULL; | ||
1485 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
1486 | } | ||
1314 | 1487 | ||
1315 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1488 | level -= 1; |
1316 | page = (u64 *)get_zeroed_page(gfp); | ||
1317 | if (!page) | ||
1318 | return NULL; | ||
1319 | *pte = IOMMU_L1_PDE(virt_to_phys(page)); | ||
1320 | } | ||
1321 | 1489 | ||
1322 | pte = IOMMU_PTE_PAGE(*pte); | 1490 | pte = IOMMU_PTE_PAGE(*pte); |
1323 | 1491 | ||
1324 | if (pte_page) | 1492 | if (pte_page && level == end_lvl) |
1325 | *pte_page = pte; | 1493 | *pte_page = pte; |
1326 | 1494 | ||
1327 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 1495 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
1496 | } | ||
1328 | 1497 | ||
1329 | return pte; | 1498 | return pte; |
1330 | } | 1499 | } |
@@ -1344,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
1344 | 1513 | ||
1345 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | 1514 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; |
1346 | if (!pte) { | 1515 | if (!pte) { |
1347 | pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); | 1516 | pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, |
1517 | GFP_ATOMIC); | ||
1348 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | 1518 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; |
1349 | } else | 1519 | } else |
1350 | pte += IOMMU_PTE_L0_INDEX(address); | 1520 | pte += PM_LEVEL_INDEX(0, address); |
1521 | |||
1522 | update_domain(&dom->domain); | ||
1351 | 1523 | ||
1352 | return pte; | 1524 | return pte; |
1353 | } | 1525 | } |
@@ -1409,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
1409 | if (!pte) | 1581 | if (!pte) |
1410 | return; | 1582 | return; |
1411 | 1583 | ||
1412 | pte += IOMMU_PTE_L0_INDEX(address); | 1584 | pte += PM_LEVEL_INDEX(0, address); |
1413 | 1585 | ||
1414 | WARN_ON(!*pte); | 1586 | WARN_ON(!*pte); |
1415 | 1587 | ||
@@ -1763,7 +1935,7 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1763 | flag |= __GFP_ZERO; | 1935 | flag |= __GFP_ZERO; |
1764 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | 1936 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); |
1765 | if (!virt_addr) | 1937 | if (!virt_addr) |
1766 | return 0; | 1938 | return NULL; |
1767 | 1939 | ||
1768 | paddr = virt_to_phys(virt_addr); | 1940 | paddr = virt_to_phys(virt_addr); |
1769 | 1941 | ||
@@ -1988,19 +2160,47 @@ static void cleanup_domain(struct protection_domain *domain) | |||
1988 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 2160 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1989 | } | 2161 | } |
1990 | 2162 | ||
1991 | static int amd_iommu_domain_init(struct iommu_domain *dom) | 2163 | static void protection_domain_free(struct protection_domain *domain) |
2164 | { | ||
2165 | if (!domain) | ||
2166 | return; | ||
2167 | |||
2168 | if (domain->id) | ||
2169 | domain_id_free(domain->id); | ||
2170 | |||
2171 | kfree(domain); | ||
2172 | } | ||
2173 | |||
2174 | static struct protection_domain *protection_domain_alloc(void) | ||
1992 | { | 2175 | { |
1993 | struct protection_domain *domain; | 2176 | struct protection_domain *domain; |
1994 | 2177 | ||
1995 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | 2178 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); |
1996 | if (!domain) | 2179 | if (!domain) |
1997 | return -ENOMEM; | 2180 | return NULL; |
1998 | 2181 | ||
1999 | spin_lock_init(&domain->lock); | 2182 | spin_lock_init(&domain->lock); |
2000 | domain->mode = PAGE_MODE_3_LEVEL; | ||
2001 | domain->id = domain_id_alloc(); | 2183 | domain->id = domain_id_alloc(); |
2002 | if (!domain->id) | 2184 | if (!domain->id) |
2185 | goto out_err; | ||
2186 | |||
2187 | return domain; | ||
2188 | |||
2189 | out_err: | ||
2190 | kfree(domain); | ||
2191 | |||
2192 | return NULL; | ||
2193 | } | ||
2194 | |||
2195 | static int amd_iommu_domain_init(struct iommu_domain *dom) | ||
2196 | { | ||
2197 | struct protection_domain *domain; | ||
2198 | |||
2199 | domain = protection_domain_alloc(); | ||
2200 | if (!domain) | ||
2003 | goto out_free; | 2201 | goto out_free; |
2202 | |||
2203 | domain->mode = PAGE_MODE_3_LEVEL; | ||
2004 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 2204 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
2005 | if (!domain->pt_root) | 2205 | if (!domain->pt_root) |
2006 | goto out_free; | 2206 | goto out_free; |
@@ -2010,7 +2210,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom) | |||
2010 | return 0; | 2210 | return 0; |
2011 | 2211 | ||
2012 | out_free: | 2212 | out_free: |
2013 | kfree(domain); | 2213 | protection_domain_free(domain); |
2014 | 2214 | ||
2015 | return -ENOMEM; | 2215 | return -ENOMEM; |
2016 | } | 2216 | } |
@@ -2115,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
2115 | paddr &= PAGE_MASK; | 2315 | paddr &= PAGE_MASK; |
2116 | 2316 | ||
2117 | for (i = 0; i < npages; ++i) { | 2317 | for (i = 0; i < npages; ++i) { |
2118 | ret = iommu_map_page(domain, iova, paddr, prot); | 2318 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); |
2119 | if (ret) | 2319 | if (ret) |
2120 | return ret; | 2320 | return ret; |
2121 | 2321 | ||
@@ -2136,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, | |||
2136 | iova &= PAGE_MASK; | 2336 | iova &= PAGE_MASK; |
2137 | 2337 | ||
2138 | for (i = 0; i < npages; ++i) { | 2338 | for (i = 0; i < npages; ++i) { |
2139 | iommu_unmap_page(domain, iova); | 2339 | iommu_unmap_page(domain, iova, PM_MAP_4k); |
2140 | iova += PAGE_SIZE; | 2340 | iova += PAGE_SIZE; |
2141 | } | 2341 | } |
2142 | 2342 | ||
@@ -2151,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | |||
2151 | phys_addr_t paddr; | 2351 | phys_addr_t paddr; |
2152 | u64 *pte; | 2352 | u64 *pte; |
2153 | 2353 | ||
2154 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; | 2354 | pte = fetch_pte(domain, iova, PM_MAP_4k); |
2155 | |||
2156 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
2157 | return 0; | ||
2158 | |||
2159 | pte = IOMMU_PTE_PAGE(*pte); | ||
2160 | pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; | ||
2161 | |||
2162 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
2163 | return 0; | ||
2164 | |||
2165 | pte = IOMMU_PTE_PAGE(*pte); | ||
2166 | pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; | ||
2167 | 2355 | ||
2168 | if (!IOMMU_PTE_PRESENT(*pte)) | 2356 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
2169 | return 0; | 2357 | return 0; |
2170 | 2358 | ||
2171 | paddr = *pte & IOMMU_PAGE_MASK; | 2359 | paddr = *pte & IOMMU_PAGE_MASK; |
@@ -2191,3 +2379,46 @@ static struct iommu_ops amd_iommu_ops = { | |||
2191 | .domain_has_cap = amd_iommu_domain_has_cap, | 2379 | .domain_has_cap = amd_iommu_domain_has_cap, |
2192 | }; | 2380 | }; |
2193 | 2381 | ||
2382 | /***************************************************************************** | ||
2383 | * | ||
2384 | * The next functions do a basic initialization of IOMMU for pass through | ||
2385 | * mode | ||
2386 | * | ||
2387 | * In passthrough mode the IOMMU is initialized and enabled but not used for | ||
2388 | * DMA-API translation. | ||
2389 | * | ||
2390 | *****************************************************************************/ | ||
2391 | |||
2392 | int __init amd_iommu_init_passthrough(void) | ||
2393 | { | ||
2394 | struct pci_dev *dev = NULL; | ||
2395 | u16 devid, devid2; | ||
2396 | |||
2397 | /* allocate passthroug domain */ | ||
2398 | pt_domain = protection_domain_alloc(); | ||
2399 | if (!pt_domain) | ||
2400 | return -ENOMEM; | ||
2401 | |||
2402 | pt_domain->mode |= PAGE_MODE_NONE; | ||
2403 | |||
2404 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
2405 | struct amd_iommu *iommu; | ||
2406 | |||
2407 | devid = calc_devid(dev->bus->number, dev->devfn); | ||
2408 | if (devid > amd_iommu_last_bdf) | ||
2409 | continue; | ||
2410 | |||
2411 | devid2 = amd_iommu_alias_table[devid]; | ||
2412 | |||
2413 | iommu = amd_iommu_rlookup_table[devid2]; | ||
2414 | if (!iommu) | ||
2415 | continue; | ||
2416 | |||
2417 | __attach_device(iommu, pt_domain, devid); | ||
2418 | __attach_device(iommu, pt_domain, devid2); | ||
2419 | } | ||
2420 | |||
2421 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); | ||
2422 | |||
2423 | return 0; | ||
2424 | } | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 10b2accd12ea..b4b61d462dcc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
252 | /* Function to enable the hardware */ | 252 | /* Function to enable the hardware */ |
253 | static void iommu_enable(struct amd_iommu *iommu) | 253 | static void iommu_enable(struct amd_iommu *iommu) |
254 | { | 254 | { |
255 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", | 255 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", |
256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); | 256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
257 | 257 | ||
258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
@@ -435,6 +435,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
435 | } | 435 | } |
436 | 436 | ||
437 | /* | 437 | /* |
438 | * This function resets the command buffer if the IOMMU stopped fetching | ||
439 | * commands from it. | ||
440 | */ | ||
441 | void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) | ||
442 | { | ||
443 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); | ||
444 | |||
445 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
446 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
447 | |||
448 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
449 | } | ||
450 | |||
451 | /* | ||
438 | * This function writes the command buffer address to the hardware and | 452 | * This function writes the command buffer address to the hardware and |
439 | * enables it. | 453 | * enables it. |
440 | */ | 454 | */ |
@@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) | |||
450 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 464 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
451 | &entry, sizeof(entry)); | 465 | &entry, sizeof(entry)); |
452 | 466 | ||
453 | /* set head and tail to zero manually */ | 467 | amd_iommu_reset_cmd_buffer(iommu); |
454 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
455 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
456 | |||
457 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
458 | } | 468 | } |
459 | 469 | ||
460 | static void __init free_command_buffer(struct amd_iommu *iommu) | 470 | static void __init free_command_buffer(struct amd_iommu *iommu) |
@@ -472,6 +482,8 @@ static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) | |||
472 | if (iommu->evt_buf == NULL) | 482 | if (iommu->evt_buf == NULL) |
473 | return NULL; | 483 | return NULL; |
474 | 484 | ||
485 | iommu->evt_buf_size = EVT_BUFFER_SIZE; | ||
486 | |||
475 | return iommu->evt_buf; | 487 | return iommu->evt_buf; |
476 | } | 488 | } |
477 | 489 | ||
@@ -691,6 +703,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
691 | 703 | ||
692 | devid = e->devid; | 704 | devid = e->devid; |
693 | devid_to = e->ext >> 8; | 705 | devid_to = e->ext >> 8; |
706 | set_dev_entry_from_acpi(iommu, devid , e->flags, 0); | ||
694 | set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); | 707 | set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); |
695 | amd_iommu_alias_table[devid] = devid_to; | 708 | amd_iommu_alias_table[devid] = devid_to; |
696 | break; | 709 | break; |
@@ -749,11 +762,13 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
749 | 762 | ||
750 | devid = e->devid; | 763 | devid = e->devid; |
751 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { | 764 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { |
752 | if (alias) | 765 | if (alias) { |
753 | amd_iommu_alias_table[dev_i] = devid_to; | 766 | amd_iommu_alias_table[dev_i] = devid_to; |
754 | set_dev_entry_from_acpi(iommu, | 767 | set_dev_entry_from_acpi(iommu, |
755 | amd_iommu_alias_table[dev_i], | 768 | devid_to, flags, ext_flags); |
756 | flags, ext_flags); | 769 | } |
770 | set_dev_entry_from_acpi(iommu, dev_i, | ||
771 | flags, ext_flags); | ||
757 | } | 772 | } |
758 | break; | 773 | break; |
759 | default: | 774 | default: |
@@ -853,7 +868,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
853 | switch (*p) { | 868 | switch (*p) { |
854 | case ACPI_IVHD_TYPE: | 869 | case ACPI_IVHD_TYPE: |
855 | 870 | ||
856 | DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x " | 871 | DUMP_printk("device: %02x:%02x.%01x cap: %04x " |
857 | "seg: %d flags: %01x info %04x\n", | 872 | "seg: %d flags: %01x info %04x\n", |
858 | PCI_BUS(h->devid), PCI_SLOT(h->devid), | 873 | PCI_BUS(h->devid), PCI_SLOT(h->devid), |
859 | PCI_FUNC(h->devid), h->cap_ptr, | 874 | PCI_FUNC(h->devid), h->cap_ptr, |
@@ -897,7 +912,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) | |||
897 | 912 | ||
898 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | 913 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, |
899 | IRQF_SAMPLE_RANDOM, | 914 | IRQF_SAMPLE_RANDOM, |
900 | "AMD IOMMU", | 915 | "AMD-Vi", |
901 | NULL); | 916 | NULL); |
902 | 917 | ||
903 | if (r) { | 918 | if (r) { |
@@ -1145,7 +1160,7 @@ int __init amd_iommu_init(void) | |||
1145 | 1160 | ||
1146 | 1161 | ||
1147 | if (no_iommu) { | 1162 | if (no_iommu) { |
1148 | printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n"); | 1163 | printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); |
1149 | return 0; | 1164 | return 0; |
1150 | } | 1165 | } |
1151 | 1166 | ||
@@ -1237,22 +1252,28 @@ int __init amd_iommu_init(void) | |||
1237 | if (ret) | 1252 | if (ret) |
1238 | goto free; | 1253 | goto free; |
1239 | 1254 | ||
1240 | ret = amd_iommu_init_dma_ops(); | 1255 | if (iommu_pass_through) |
1256 | ret = amd_iommu_init_passthrough(); | ||
1257 | else | ||
1258 | ret = amd_iommu_init_dma_ops(); | ||
1241 | if (ret) | 1259 | if (ret) |
1242 | goto free; | 1260 | goto free; |
1243 | 1261 | ||
1244 | enable_iommus(); | 1262 | enable_iommus(); |
1245 | 1263 | ||
1246 | printk(KERN_INFO "AMD IOMMU: device isolation "); | 1264 | if (iommu_pass_through) |
1265 | goto out; | ||
1266 | |||
1267 | printk(KERN_INFO "AMD-Vi: device isolation "); | ||
1247 | if (amd_iommu_isolate) | 1268 | if (amd_iommu_isolate) |
1248 | printk("enabled\n"); | 1269 | printk("enabled\n"); |
1249 | else | 1270 | else |
1250 | printk("disabled\n"); | 1271 | printk("disabled\n"); |
1251 | 1272 | ||
1252 | if (amd_iommu_unmap_flush) | 1273 | if (amd_iommu_unmap_flush) |
1253 | printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); | 1274 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); |
1254 | else | 1275 | else |
1255 | printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); | 1276 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); |
1256 | 1277 | ||
1257 | out: | 1278 | out: |
1258 | return ret; | 1279 | return ret; |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 676debfc1702..128111d8ffe0 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/ioport.h> | 21 | #include <linux/ioport.h> |
22 | #include <linux/suspend.h> | 22 | #include <linux/suspend.h> |
23 | #include <linux/kmemleak.h> | ||
23 | #include <asm/e820.h> | 24 | #include <asm/e820.h> |
24 | #include <asm/io.h> | 25 | #include <asm/io.h> |
25 | #include <asm/iommu.h> | 26 | #include <asm/iommu.h> |
@@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void) | |||
94 | * code for safe | 95 | * code for safe |
95 | */ | 96 | */ |
96 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); | 97 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); |
98 | /* | ||
99 | * Kmemleak should not scan this block as it may not be mapped via the | ||
100 | * kernel direct mapping. | ||
101 | */ | ||
102 | kmemleak_ignore(p); | ||
97 | if (!p || __pa(p)+aper_size > 0xffffffff) { | 103 | if (!p || __pa(p)+aper_size > 0xffffffff) { |
98 | printk(KERN_ERR | 104 | printk(KERN_ERR |
99 | "Cannot allocate aperture memory hole (%p,%uK)\n", | 105 | "Cannot allocate aperture memory hole (%p,%uK)\n", |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8c7c042ecad1..159740decc41 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <asm/mtrr.h> | 49 | #include <asm/mtrr.h> |
50 | #include <asm/smp.h> | 50 | #include <asm/smp.h> |
51 | #include <asm/mce.h> | 51 | #include <asm/mce.h> |
52 | #include <asm/kvm_para.h> | ||
52 | 53 | ||
53 | unsigned int num_processors; | 54 | unsigned int num_processors; |
54 | 55 | ||
@@ -140,7 +141,6 @@ int x2apic_mode; | |||
140 | #ifdef CONFIG_X86_X2APIC | 141 | #ifdef CONFIG_X86_X2APIC |
141 | /* x2apic enabled before OS handover */ | 142 | /* x2apic enabled before OS handover */ |
142 | static int x2apic_preenabled; | 143 | static int x2apic_preenabled; |
143 | static int disable_x2apic; | ||
144 | static __init int setup_nox2apic(char *str) | 144 | static __init int setup_nox2apic(char *str) |
145 | { | 145 | { |
146 | if (x2apic_enabled()) { | 146 | if (x2apic_enabled()) { |
@@ -149,7 +149,6 @@ static __init int setup_nox2apic(char *str) | |||
149 | return 0; | 149 | return 0; |
150 | } | 150 | } |
151 | 151 | ||
152 | disable_x2apic = 1; | ||
153 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | 152 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); |
154 | return 0; | 153 | return 0; |
155 | } | 154 | } |
@@ -1363,52 +1362,80 @@ void enable_x2apic(void) | |||
1363 | } | 1362 | } |
1364 | #endif /* CONFIG_X86_X2APIC */ | 1363 | #endif /* CONFIG_X86_X2APIC */ |
1365 | 1364 | ||
1366 | void __init enable_IR_x2apic(void) | 1365 | int __init enable_IR(void) |
1367 | { | 1366 | { |
1368 | #ifdef CONFIG_INTR_REMAP | 1367 | #ifdef CONFIG_INTR_REMAP |
1369 | int ret; | ||
1370 | unsigned long flags; | ||
1371 | struct IO_APIC_route_entry **ioapic_entries = NULL; | ||
1372 | |||
1373 | ret = dmar_table_init(); | ||
1374 | if (ret) { | ||
1375 | pr_debug("dmar_table_init() failed with %d:\n", ret); | ||
1376 | goto ir_failed; | ||
1377 | } | ||
1378 | |||
1379 | if (!intr_remapping_supported()) { | 1368 | if (!intr_remapping_supported()) { |
1380 | pr_debug("intr-remapping not supported\n"); | 1369 | pr_debug("intr-remapping not supported\n"); |
1381 | goto ir_failed; | 1370 | return 0; |
1382 | } | 1371 | } |
1383 | 1372 | ||
1384 | |||
1385 | if (!x2apic_preenabled && skip_ioapic_setup) { | 1373 | if (!x2apic_preenabled && skip_ioapic_setup) { |
1386 | pr_info("Skipped enabling intr-remap because of skipping " | 1374 | pr_info("Skipped enabling intr-remap because of skipping " |
1387 | "io-apic setup\n"); | 1375 | "io-apic setup\n"); |
1388 | return; | 1376 | return 0; |
1389 | } | 1377 | } |
1390 | 1378 | ||
1379 | if (enable_intr_remapping(x2apic_supported())) | ||
1380 | return 0; | ||
1381 | |||
1382 | pr_info("Enabled Interrupt-remapping\n"); | ||
1383 | |||
1384 | return 1; | ||
1385 | |||
1386 | #endif | ||
1387 | return 0; | ||
1388 | } | ||
1389 | |||
1390 | void __init enable_IR_x2apic(void) | ||
1391 | { | ||
1392 | unsigned long flags; | ||
1393 | struct IO_APIC_route_entry **ioapic_entries = NULL; | ||
1394 | int ret, x2apic_enabled = 0; | ||
1395 | int dmar_table_init_ret = 0; | ||
1396 | |||
1397 | #ifdef CONFIG_INTR_REMAP | ||
1398 | dmar_table_init_ret = dmar_table_init(); | ||
1399 | if (dmar_table_init_ret) | ||
1400 | pr_debug("dmar_table_init() failed with %d:\n", | ||
1401 | dmar_table_init_ret); | ||
1402 | #endif | ||
1403 | |||
1391 | ioapic_entries = alloc_ioapic_entries(); | 1404 | ioapic_entries = alloc_ioapic_entries(); |
1392 | if (!ioapic_entries) { | 1405 | if (!ioapic_entries) { |
1393 | pr_info("Allocate ioapic_entries failed: %d\n", ret); | 1406 | pr_err("Allocate ioapic_entries failed\n"); |
1394 | goto end; | 1407 | goto out; |
1395 | } | 1408 | } |
1396 | 1409 | ||
1397 | ret = save_IO_APIC_setup(ioapic_entries); | 1410 | ret = save_IO_APIC_setup(ioapic_entries); |
1398 | if (ret) { | 1411 | if (ret) { |
1399 | pr_info("Saving IO-APIC state failed: %d\n", ret); | 1412 | pr_info("Saving IO-APIC state failed: %d\n", ret); |
1400 | goto end; | 1413 | goto out; |
1401 | } | 1414 | } |
1402 | 1415 | ||
1403 | local_irq_save(flags); | 1416 | local_irq_save(flags); |
1404 | mask_IO_APIC_setup(ioapic_entries); | ||
1405 | mask_8259A(); | 1417 | mask_8259A(); |
1418 | mask_IO_APIC_setup(ioapic_entries); | ||
1406 | 1419 | ||
1407 | ret = enable_intr_remapping(x2apic_supported()); | 1420 | if (dmar_table_init_ret) |
1408 | if (ret) | 1421 | ret = 0; |
1409 | goto end_restore; | 1422 | else |
1423 | ret = enable_IR(); | ||
1410 | 1424 | ||
1411 | pr_info("Enabled Interrupt-remapping\n"); | 1425 | if (!ret) { |
1426 | /* IR is required if there is APIC ID > 255 even when running | ||
1427 | * under KVM | ||
1428 | */ | ||
1429 | if (max_physical_apicid > 255 || !kvm_para_available()) | ||
1430 | goto nox2apic; | ||
1431 | /* | ||
1432 | * without IR all CPUs can be addressed by IOAPIC/MSI | ||
1433 | * only in physical mode | ||
1434 | */ | ||
1435 | x2apic_force_phys(); | ||
1436 | } | ||
1437 | |||
1438 | x2apic_enabled = 1; | ||
1412 | 1439 | ||
1413 | if (x2apic_supported() && !x2apic_mode) { | 1440 | if (x2apic_supported() && !x2apic_mode) { |
1414 | x2apic_mode = 1; | 1441 | x2apic_mode = 1; |
@@ -1416,41 +1443,25 @@ void __init enable_IR_x2apic(void) | |||
1416 | pr_info("Enabled x2apic\n"); | 1443 | pr_info("Enabled x2apic\n"); |
1417 | } | 1444 | } |
1418 | 1445 | ||
1419 | end_restore: | 1446 | nox2apic: |
1420 | if (ret) | 1447 | if (!ret) /* IR enabling failed */ |
1421 | /* | ||
1422 | * IR enabling failed | ||
1423 | */ | ||
1424 | restore_IO_APIC_setup(ioapic_entries); | 1448 | restore_IO_APIC_setup(ioapic_entries); |
1425 | |||
1426 | unmask_8259A(); | 1449 | unmask_8259A(); |
1427 | local_irq_restore(flags); | 1450 | local_irq_restore(flags); |
1428 | 1451 | ||
1429 | end: | 1452 | out: |
1430 | if (ioapic_entries) | 1453 | if (ioapic_entries) |
1431 | free_ioapic_entries(ioapic_entries); | 1454 | free_ioapic_entries(ioapic_entries); |
1432 | 1455 | ||
1433 | if (!ret) | 1456 | if (x2apic_enabled) |
1434 | return; | 1457 | return; |
1435 | 1458 | ||
1436 | ir_failed: | ||
1437 | if (x2apic_preenabled) | 1459 | if (x2apic_preenabled) |
1438 | panic("x2apic enabled by bios. But IR enabling failed"); | 1460 | panic("x2apic: enabled by BIOS but kernel init failed."); |
1439 | else if (cpu_has_x2apic) | 1461 | else if (cpu_has_x2apic) |
1440 | pr_info("Not enabling x2apic,Intr-remapping\n"); | 1462 | pr_info("Not enabling x2apic, Intr-remapping init failed.\n"); |
1441 | #else | ||
1442 | if (!cpu_has_x2apic) | ||
1443 | return; | ||
1444 | |||
1445 | if (x2apic_preenabled) | ||
1446 | panic("x2apic enabled prior OS handover," | ||
1447 | " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP"); | ||
1448 | #endif | ||
1449 | |||
1450 | return; | ||
1451 | } | 1463 | } |
1452 | 1464 | ||
1453 | |||
1454 | #ifdef CONFIG_X86_64 | 1465 | #ifdef CONFIG_X86_64 |
1455 | /* | 1466 | /* |
1456 | * Detect and enable local APICs on non-SMP boards. | 1467 | * Detect and enable local APICs on non-SMP boards. |
@@ -1551,8 +1562,6 @@ no_apic: | |||
1551 | #ifdef CONFIG_X86_64 | 1562 | #ifdef CONFIG_X86_64 |
1552 | void __init early_init_lapic_mapping(void) | 1563 | void __init early_init_lapic_mapping(void) |
1553 | { | 1564 | { |
1554 | unsigned long phys_addr; | ||
1555 | |||
1556 | /* | 1565 | /* |
1557 | * If no local APIC can be found then go out | 1566 | * If no local APIC can be found then go out |
1558 | * : it means there is no mpatable and MADT | 1567 | * : it means there is no mpatable and MADT |
@@ -1560,11 +1569,9 @@ void __init early_init_lapic_mapping(void) | |||
1560 | if (!smp_found_config) | 1569 | if (!smp_found_config) |
1561 | return; | 1570 | return; |
1562 | 1571 | ||
1563 | phys_addr = mp_lapic_addr; | 1572 | set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); |
1564 | |||
1565 | set_fixmap_nocache(FIX_APIC_BASE, phys_addr); | ||
1566 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", | 1573 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", |
1567 | APIC_BASE, phys_addr); | 1574 | APIC_BASE, mp_lapic_addr); |
1568 | 1575 | ||
1569 | /* | 1576 | /* |
1570 | * Fetch the APIC ID of the BSP in case we have a | 1577 | * Fetch the APIC ID of the BSP in case we have a |
@@ -1653,7 +1660,6 @@ int __init APIC_init_uniprocessor(void) | |||
1653 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1660 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
1654 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", | 1661 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", |
1655 | boot_cpu_physical_apicid); | 1662 | boot_cpu_physical_apicid); |
1656 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | ||
1657 | return -1; | 1663 | return -1; |
1658 | } | 1664 | } |
1659 | #endif | 1665 | #endif |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 69328ac8de9c..89174f847b49 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -167,7 +167,7 @@ static int es7000_apic_is_cluster(void) | |||
167 | { | 167 | { |
168 | /* MPENTIUMIII */ | 168 | /* MPENTIUMIII */ |
169 | if (boot_cpu_data.x86 == 6 && | 169 | if (boot_cpu_data.x86 == 6 && |
170 | (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) | 170 | (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11)) |
171 | return 1; | 171 | return 1; |
172 | 172 | ||
173 | return 0; | 173 | return 0; |
@@ -652,7 +652,8 @@ static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, | |||
652 | return ret && es7000_apic_is_cluster(); | 652 | return ret && es7000_apic_is_cluster(); |
653 | } | 653 | } |
654 | 654 | ||
655 | struct apic apic_es7000_cluster = { | 655 | /* We've been warned by a false positive warning.Use __refdata to keep calm. */ |
656 | struct apic __refdata apic_es7000_cluster = { | ||
656 | 657 | ||
657 | .name = "es7000", | 658 | .name = "es7000", |
658 | .probe = probe_es7000, | 659 | .probe = probe_es7000, |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4d0216fcb36c..3c8f9e75d038 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -66,6 +66,8 @@ | |||
66 | #include <asm/apic.h> | 66 | #include <asm/apic.h> |
67 | 67 | ||
68 | #define __apicdebuginit(type) static type __init | 68 | #define __apicdebuginit(type) static type __init |
69 | #define for_each_irq_pin(entry, head) \ | ||
70 | for (entry = head; entry; entry = entry->next) | ||
69 | 71 | ||
70 | /* | 72 | /* |
71 | * Is the SiS APIC rmw bug present ? | 73 | * Is the SiS APIC rmw bug present ? |
@@ -85,6 +87,9 @@ int nr_ioapic_registers[MAX_IO_APICS]; | |||
85 | struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; | 87 | struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; |
86 | int nr_ioapics; | 88 | int nr_ioapics; |
87 | 89 | ||
90 | /* IO APIC gsi routing info */ | ||
91 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; | ||
92 | |||
88 | /* MP IRQ source entries */ | 93 | /* MP IRQ source entries */ |
89 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 94 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
90 | 95 | ||
@@ -116,15 +121,6 @@ static int __init parse_noapic(char *str) | |||
116 | } | 121 | } |
117 | early_param("noapic", parse_noapic); | 122 | early_param("noapic", parse_noapic); |
118 | 123 | ||
119 | struct irq_pin_list; | ||
120 | |||
121 | /* | ||
122 | * This is performance-critical, we want to do it O(1) | ||
123 | * | ||
124 | * the indexing order of this array favors 1:1 mappings | ||
125 | * between pins and IRQs. | ||
126 | */ | ||
127 | |||
128 | struct irq_pin_list { | 124 | struct irq_pin_list { |
129 | int apic, pin; | 125 | int apic, pin; |
130 | struct irq_pin_list *next; | 126 | struct irq_pin_list *next; |
@@ -139,6 +135,11 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) | |||
139 | return pin; | 135 | return pin; |
140 | } | 136 | } |
141 | 137 | ||
138 | /* | ||
139 | * This is performance-critical, we want to do it O(1) | ||
140 | * | ||
141 | * Most irqs are mapped 1:1 with pins. | ||
142 | */ | ||
142 | struct irq_cfg { | 143 | struct irq_cfg { |
143 | struct irq_pin_list *irq_2_pin; | 144 | struct irq_pin_list *irq_2_pin; |
144 | cpumask_var_t domain; | 145 | cpumask_var_t domain; |
@@ -414,13 +415,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
414 | unsigned long flags; | 415 | unsigned long flags; |
415 | 416 | ||
416 | spin_lock_irqsave(&ioapic_lock, flags); | 417 | spin_lock_irqsave(&ioapic_lock, flags); |
417 | entry = cfg->irq_2_pin; | 418 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
418 | for (;;) { | ||
419 | unsigned int reg; | 419 | unsigned int reg; |
420 | int pin; | 420 | int pin; |
421 | 421 | ||
422 | if (!entry) | ||
423 | break; | ||
424 | pin = entry->pin; | 422 | pin = entry->pin; |
425 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | 423 | reg = io_apic_read(entry->apic, 0x10 + pin*2); |
426 | /* Is the remote IRR bit set? */ | 424 | /* Is the remote IRR bit set? */ |
@@ -428,9 +426,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
428 | spin_unlock_irqrestore(&ioapic_lock, flags); | 426 | spin_unlock_irqrestore(&ioapic_lock, flags); |
429 | return true; | 427 | return true; |
430 | } | 428 | } |
431 | if (!entry->next) | ||
432 | break; | ||
433 | entry = entry->next; | ||
434 | } | 429 | } |
435 | spin_unlock_irqrestore(&ioapic_lock, flags); | 430 | spin_unlock_irqrestore(&ioapic_lock, flags); |
436 | 431 | ||
@@ -498,72 +493,68 @@ static void ioapic_mask_entry(int apic, int pin) | |||
498 | * shared ISA-space IRQs, so we have to support them. We are super | 493 | * shared ISA-space IRQs, so we have to support them. We are super |
499 | * fast in the common case, and fast for shared ISA-space IRQs. | 494 | * fast in the common case, and fast for shared ISA-space IRQs. |
500 | */ | 495 | */ |
501 | static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) | 496 | static int |
497 | add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) | ||
502 | { | 498 | { |
503 | struct irq_pin_list *entry; | 499 | struct irq_pin_list **last, *entry; |
504 | |||
505 | entry = cfg->irq_2_pin; | ||
506 | if (!entry) { | ||
507 | entry = get_one_free_irq_2_pin(node); | ||
508 | if (!entry) { | ||
509 | printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", | ||
510 | apic, pin); | ||
511 | return; | ||
512 | } | ||
513 | cfg->irq_2_pin = entry; | ||
514 | entry->apic = apic; | ||
515 | entry->pin = pin; | ||
516 | return; | ||
517 | } | ||
518 | 500 | ||
519 | while (entry->next) { | 501 | /* don't allow duplicates */ |
520 | /* not again, please */ | 502 | last = &cfg->irq_2_pin; |
503 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
521 | if (entry->apic == apic && entry->pin == pin) | 504 | if (entry->apic == apic && entry->pin == pin) |
522 | return; | 505 | return 0; |
523 | 506 | last = &entry->next; | |
524 | entry = entry->next; | ||
525 | } | 507 | } |
526 | 508 | ||
527 | entry->next = get_one_free_irq_2_pin(node); | 509 | entry = get_one_free_irq_2_pin(node); |
528 | entry = entry->next; | 510 | if (!entry) { |
511 | printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", | ||
512 | node, apic, pin); | ||
513 | return -ENOMEM; | ||
514 | } | ||
529 | entry->apic = apic; | 515 | entry->apic = apic; |
530 | entry->pin = pin; | 516 | entry->pin = pin; |
517 | |||
518 | *last = entry; | ||
519 | return 0; | ||
520 | } | ||
521 | |||
522 | static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) | ||
523 | { | ||
524 | if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) | ||
525 | panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); | ||
531 | } | 526 | } |
532 | 527 | ||
533 | /* | 528 | /* |
534 | * Reroute an IRQ to a different pin. | 529 | * Reroute an IRQ to a different pin. |
535 | */ | 530 | */ |
536 | static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, | 531 | static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, |
537 | int oldapic, int oldpin, | 532 | int oldapic, int oldpin, |
538 | int newapic, int newpin) | 533 | int newapic, int newpin) |
539 | { | 534 | { |
540 | struct irq_pin_list *entry = cfg->irq_2_pin; | 535 | struct irq_pin_list *entry; |
541 | int replaced = 0; | ||
542 | 536 | ||
543 | while (entry) { | 537 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
544 | if (entry->apic == oldapic && entry->pin == oldpin) { | 538 | if (entry->apic == oldapic && entry->pin == oldpin) { |
545 | entry->apic = newapic; | 539 | entry->apic = newapic; |
546 | entry->pin = newpin; | 540 | entry->pin = newpin; |
547 | replaced = 1; | ||
548 | /* every one is different, right? */ | 541 | /* every one is different, right? */ |
549 | break; | 542 | return; |
550 | } | 543 | } |
551 | entry = entry->next; | ||
552 | } | 544 | } |
553 | 545 | ||
554 | /* why? call replace before add? */ | 546 | /* old apic/pin didn't exist, so just add new ones */ |
555 | if (!replaced) | 547 | add_pin_to_irq_node(cfg, node, newapic, newpin); |
556 | add_pin_to_irq_node(cfg, node, newapic, newpin); | ||
557 | } | 548 | } |
558 | 549 | ||
559 | static inline void io_apic_modify_irq(struct irq_cfg *cfg, | 550 | static void io_apic_modify_irq(struct irq_cfg *cfg, |
560 | int mask_and, int mask_or, | 551 | int mask_and, int mask_or, |
561 | void (*final)(struct irq_pin_list *entry)) | 552 | void (*final)(struct irq_pin_list *entry)) |
562 | { | 553 | { |
563 | int pin; | 554 | int pin; |
564 | struct irq_pin_list *entry; | 555 | struct irq_pin_list *entry; |
565 | 556 | ||
566 | for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { | 557 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
567 | unsigned int reg; | 558 | unsigned int reg; |
568 | pin = entry->pin; | 559 | pin = entry->pin; |
569 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); | 560 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); |
@@ -580,7 +571,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) | |||
580 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); | 571 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); |
581 | } | 572 | } |
582 | 573 | ||
583 | #ifdef CONFIG_X86_64 | ||
584 | static void io_apic_sync(struct irq_pin_list *entry) | 574 | static void io_apic_sync(struct irq_pin_list *entry) |
585 | { | 575 | { |
586 | /* | 576 | /* |
@@ -596,11 +586,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) | |||
596 | { | 586 | { |
597 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 587 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
598 | } | 588 | } |
599 | #else /* CONFIG_X86_32 */ | ||
600 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) | ||
601 | { | ||
602 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); | ||
603 | } | ||
604 | 589 | ||
605 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) | 590 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) |
606 | { | 591 | { |
@@ -613,7 +598,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) | |||
613 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, | 598 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, |
614 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | 599 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); |
615 | } | 600 | } |
616 | #endif /* CONFIG_X86_32 */ | ||
617 | 601 | ||
618 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | 602 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) |
619 | { | 603 | { |
@@ -1702,12 +1686,8 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1702 | if (!entry) | 1686 | if (!entry) |
1703 | continue; | 1687 | continue; |
1704 | printk(KERN_DEBUG "IRQ%d ", irq); | 1688 | printk(KERN_DEBUG "IRQ%d ", irq); |
1705 | for (;;) { | 1689 | for_each_irq_pin(entry, cfg->irq_2_pin) |
1706 | printk("-> %d:%d", entry->apic, entry->pin); | 1690 | printk("-> %d:%d", entry->apic, entry->pin); |
1707 | if (!entry->next) | ||
1708 | break; | ||
1709 | entry = entry->next; | ||
1710 | } | ||
1711 | printk("\n"); | 1691 | printk("\n"); |
1712 | } | 1692 | } |
1713 | 1693 | ||
@@ -1716,25 +1696,19 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1716 | return; | 1696 | return; |
1717 | } | 1697 | } |
1718 | 1698 | ||
1719 | __apicdebuginit(void) print_APIC_bitfield(int base) | 1699 | __apicdebuginit(void) print_APIC_field(int base) |
1720 | { | 1700 | { |
1721 | unsigned int v; | 1701 | int i; |
1722 | int i, j; | ||
1723 | 1702 | ||
1724 | if (apic_verbosity == APIC_QUIET) | 1703 | if (apic_verbosity == APIC_QUIET) |
1725 | return; | 1704 | return; |
1726 | 1705 | ||
1727 | printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); | 1706 | printk(KERN_DEBUG); |
1728 | for (i = 0; i < 8; i++) { | 1707 | |
1729 | v = apic_read(base + i*0x10); | 1708 | for (i = 0; i < 8; i++) |
1730 | for (j = 0; j < 32; j++) { | 1709 | printk(KERN_CONT "%08x", apic_read(base + i*0x10)); |
1731 | if (v & (1<<j)) | 1710 | |
1732 | printk("1"); | 1711 | printk(KERN_CONT "\n"); |
1733 | else | ||
1734 | printk("0"); | ||
1735 | } | ||
1736 | printk("\n"); | ||
1737 | } | ||
1738 | } | 1712 | } |
1739 | 1713 | ||
1740 | __apicdebuginit(void) print_local_APIC(void *dummy) | 1714 | __apicdebuginit(void) print_local_APIC(void *dummy) |
@@ -1745,7 +1719,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1745 | if (apic_verbosity == APIC_QUIET) | 1719 | if (apic_verbosity == APIC_QUIET) |
1746 | return; | 1720 | return; |
1747 | 1721 | ||
1748 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | 1722 | printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", |
1749 | smp_processor_id(), hard_smp_processor_id()); | 1723 | smp_processor_id(), hard_smp_processor_id()); |
1750 | v = apic_read(APIC_ID); | 1724 | v = apic_read(APIC_ID); |
1751 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); | 1725 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); |
@@ -1786,11 +1760,11 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1786 | printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); | 1760 | printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); |
1787 | 1761 | ||
1788 | printk(KERN_DEBUG "... APIC ISR field:\n"); | 1762 | printk(KERN_DEBUG "... APIC ISR field:\n"); |
1789 | print_APIC_bitfield(APIC_ISR); | 1763 | print_APIC_field(APIC_ISR); |
1790 | printk(KERN_DEBUG "... APIC TMR field:\n"); | 1764 | printk(KERN_DEBUG "... APIC TMR field:\n"); |
1791 | print_APIC_bitfield(APIC_TMR); | 1765 | print_APIC_field(APIC_TMR); |
1792 | printk(KERN_DEBUG "... APIC IRR field:\n"); | 1766 | printk(KERN_DEBUG "... APIC IRR field:\n"); |
1793 | print_APIC_bitfield(APIC_IRR); | 1767 | print_APIC_field(APIC_IRR); |
1794 | 1768 | ||
1795 | if (APIC_INTEGRATED(ver)) { /* !82489DX */ | 1769 | if (APIC_INTEGRATED(ver)) { /* !82489DX */ |
1796 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 1770 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
@@ -2217,7 +2191,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2217 | return was_pending; | 2191 | return was_pending; |
2218 | } | 2192 | } |
2219 | 2193 | ||
2220 | #ifdef CONFIG_X86_64 | ||
2221 | static int ioapic_retrigger_irq(unsigned int irq) | 2194 | static int ioapic_retrigger_irq(unsigned int irq) |
2222 | { | 2195 | { |
2223 | 2196 | ||
@@ -2230,14 +2203,6 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2230 | 2203 | ||
2231 | return 1; | 2204 | return 1; |
2232 | } | 2205 | } |
2233 | #else | ||
2234 | static int ioapic_retrigger_irq(unsigned int irq) | ||
2235 | { | ||
2236 | apic->send_IPI_self(irq_cfg(irq)->vector); | ||
2237 | |||
2238 | return 1; | ||
2239 | } | ||
2240 | #endif | ||
2241 | 2206 | ||
2242 | /* | 2207 | /* |
2243 | * Level and edge triggered IO-APIC interrupts need different handling, | 2208 | * Level and edge triggered IO-APIC interrupts need different handling, |
@@ -2275,13 +2240,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
2275 | struct irq_pin_list *entry; | 2240 | struct irq_pin_list *entry; |
2276 | u8 vector = cfg->vector; | 2241 | u8 vector = cfg->vector; |
2277 | 2242 | ||
2278 | entry = cfg->irq_2_pin; | 2243 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
2279 | for (;;) { | ||
2280 | unsigned int reg; | 2244 | unsigned int reg; |
2281 | 2245 | ||
2282 | if (!entry) | ||
2283 | break; | ||
2284 | |||
2285 | apic = entry->apic; | 2246 | apic = entry->apic; |
2286 | pin = entry->pin; | 2247 | pin = entry->pin; |
2287 | /* | 2248 | /* |
@@ -2294,9 +2255,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
2294 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | 2255 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; |
2295 | reg |= vector; | 2256 | reg |= vector; |
2296 | io_apic_modify(apic, 0x10 + pin*2, reg); | 2257 | io_apic_modify(apic, 0x10 + pin*2, reg); |
2297 | if (!entry->next) | ||
2298 | break; | ||
2299 | entry = entry->next; | ||
2300 | } | 2258 | } |
2301 | } | 2259 | } |
2302 | 2260 | ||
@@ -2521,11 +2479,8 @@ atomic_t irq_mis_count; | |||
2521 | static void ack_apic_level(unsigned int irq) | 2479 | static void ack_apic_level(unsigned int irq) |
2522 | { | 2480 | { |
2523 | struct irq_desc *desc = irq_to_desc(irq); | 2481 | struct irq_desc *desc = irq_to_desc(irq); |
2524 | |||
2525 | #ifdef CONFIG_X86_32 | ||
2526 | unsigned long v; | 2482 | unsigned long v; |
2527 | int i; | 2483 | int i; |
2528 | #endif | ||
2529 | struct irq_cfg *cfg; | 2484 | struct irq_cfg *cfg; |
2530 | int do_unmask_irq = 0; | 2485 | int do_unmask_irq = 0; |
2531 | 2486 | ||
@@ -2538,31 +2493,28 @@ static void ack_apic_level(unsigned int irq) | |||
2538 | } | 2493 | } |
2539 | #endif | 2494 | #endif |
2540 | 2495 | ||
2541 | #ifdef CONFIG_X86_32 | ||
2542 | /* | 2496 | /* |
2543 | * It appears there is an erratum which affects at least version 0x11 | 2497 | * It appears there is an erratum which affects at least version 0x11 |
2544 | * of I/O APIC (that's the 82093AA and cores integrated into various | 2498 | * of I/O APIC (that's the 82093AA and cores integrated into various |
2545 | * chipsets). Under certain conditions a level-triggered interrupt is | 2499 | * chipsets). Under certain conditions a level-triggered interrupt is |
2546 | * erroneously delivered as edge-triggered one but the respective IRR | 2500 | * erroneously delivered as edge-triggered one but the respective IRR |
2547 | * bit gets set nevertheless. As a result the I/O unit expects an EOI | 2501 | * bit gets set nevertheless. As a result the I/O unit expects an EOI |
2548 | * message but it will never arrive and further interrupts are blocked | 2502 | * message but it will never arrive and further interrupts are blocked |
2549 | * from the source. The exact reason is so far unknown, but the | 2503 | * from the source. The exact reason is so far unknown, but the |
2550 | * phenomenon was observed when two consecutive interrupt requests | 2504 | * phenomenon was observed when two consecutive interrupt requests |
2551 | * from a given source get delivered to the same CPU and the source is | 2505 | * from a given source get delivered to the same CPU and the source is |
2552 | * temporarily disabled in between. | 2506 | * temporarily disabled in between. |
2553 | * | 2507 | * |
2554 | * A workaround is to simulate an EOI message manually. We achieve it | 2508 | * A workaround is to simulate an EOI message manually. We achieve it |
2555 | * by setting the trigger mode to edge and then to level when the edge | 2509 | * by setting the trigger mode to edge and then to level when the edge |
2556 | * trigger mode gets detected in the TMR of a local APIC for a | 2510 | * trigger mode gets detected in the TMR of a local APIC for a |
2557 | * level-triggered interrupt. We mask the source for the time of the | 2511 | * level-triggered interrupt. We mask the source for the time of the |
2558 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | 2512 | * operation to prevent an edge-triggered interrupt escaping meanwhile. |
2559 | * The idea is from Manfred Spraul. --macro | 2513 | * The idea is from Manfred Spraul. --macro |
2560 | */ | 2514 | */ |
2561 | cfg = desc->chip_data; | 2515 | cfg = desc->chip_data; |
2562 | i = cfg->vector; | 2516 | i = cfg->vector; |
2563 | |||
2564 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | 2517 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); |
2565 | #endif | ||
2566 | 2518 | ||
2567 | /* | 2519 | /* |
2568 | * We must acknowledge the irq before we move it or the acknowledge will | 2520 | * We must acknowledge the irq before we move it or the acknowledge will |
@@ -2604,7 +2556,7 @@ static void ack_apic_level(unsigned int irq) | |||
2604 | unmask_IO_APIC_irq_desc(desc); | 2556 | unmask_IO_APIC_irq_desc(desc); |
2605 | } | 2557 | } |
2606 | 2558 | ||
2607 | #ifdef CONFIG_X86_32 | 2559 | /* Tail end of version 0x11 I/O APIC bug workaround */ |
2608 | if (!(v & (1 << (i & 0x1f)))) { | 2560 | if (!(v & (1 << (i & 0x1f)))) { |
2609 | atomic_inc(&irq_mis_count); | 2561 | atomic_inc(&irq_mis_count); |
2610 | spin_lock(&ioapic_lock); | 2562 | spin_lock(&ioapic_lock); |
@@ -2612,26 +2564,15 @@ static void ack_apic_level(unsigned int irq) | |||
2612 | __unmask_and_level_IO_APIC_irq(cfg); | 2564 | __unmask_and_level_IO_APIC_irq(cfg); |
2613 | spin_unlock(&ioapic_lock); | 2565 | spin_unlock(&ioapic_lock); |
2614 | } | 2566 | } |
2615 | #endif | ||
2616 | } | 2567 | } |
2617 | 2568 | ||
2618 | #ifdef CONFIG_INTR_REMAP | 2569 | #ifdef CONFIG_INTR_REMAP |
2619 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | 2570 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) |
2620 | { | 2571 | { |
2621 | int apic, pin; | ||
2622 | struct irq_pin_list *entry; | 2572 | struct irq_pin_list *entry; |
2623 | 2573 | ||
2624 | entry = cfg->irq_2_pin; | 2574 | for_each_irq_pin(entry, cfg->irq_2_pin) |
2625 | for (;;) { | 2575 | io_apic_eoi(entry->apic, entry->pin); |
2626 | |||
2627 | if (!entry) | ||
2628 | break; | ||
2629 | |||
2630 | apic = entry->apic; | ||
2631 | pin = entry->pin; | ||
2632 | io_apic_eoi(apic, pin); | ||
2633 | entry = entry->next; | ||
2634 | } | ||
2635 | } | 2576 | } |
2636 | 2577 | ||
2637 | static void | 2578 | static void |
@@ -3247,8 +3188,7 @@ void destroy_irq(unsigned int irq) | |||
3247 | cfg = desc->chip_data; | 3188 | cfg = desc->chip_data; |
3248 | dynamic_irq_cleanup(irq); | 3189 | dynamic_irq_cleanup(irq); |
3249 | /* connect back irq_cfg */ | 3190 | /* connect back irq_cfg */ |
3250 | if (desc) | 3191 | desc->chip_data = cfg; |
3251 | desc->chip_data = cfg; | ||
3252 | 3192 | ||
3253 | free_irte(irq); | 3193 | free_irte(irq); |
3254 | spin_lock_irqsave(&vector_lock, flags); | 3194 | spin_lock_irqsave(&vector_lock, flags); |
@@ -3799,6 +3739,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3799 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 3739 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
3800 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 3740 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
3801 | 3741 | ||
3742 | if (cfg->move_in_progress) | ||
3743 | send_cleanup_vector(cfg); | ||
3744 | |||
3802 | return irq; | 3745 | return irq; |
3803 | } | 3746 | } |
3804 | 3747 | ||
@@ -3915,7 +3858,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, | |||
3915 | */ | 3858 | */ |
3916 | if (irq >= NR_IRQS_LEGACY) { | 3859 | if (irq >= NR_IRQS_LEGACY) { |
3917 | cfg = desc->chip_data; | 3860 | cfg = desc->chip_data; |
3918 | add_pin_to_irq_node(cfg, node, ioapic, pin); | 3861 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { |
3862 | printk(KERN_INFO "can not add pin %d for irq %d\n", | ||
3863 | pin, irq); | ||
3864 | return 0; | ||
3865 | } | ||
3919 | } | 3866 | } |
3920 | 3867 | ||
3921 | setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); | 3868 | setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); |
@@ -3944,11 +3891,28 @@ int io_apic_set_pci_routing(struct device *dev, int irq, | |||
3944 | return __io_apic_set_pci_routing(dev, irq, irq_attr); | 3891 | return __io_apic_set_pci_routing(dev, irq, irq_attr); |
3945 | } | 3892 | } |
3946 | 3893 | ||
3947 | /* -------------------------------------------------------------------------- | 3894 | u8 __init io_apic_unique_id(u8 id) |
3948 | ACPI-based IOAPIC Configuration | 3895 | { |
3949 | -------------------------------------------------------------------------- */ | 3896 | #ifdef CONFIG_X86_32 |
3897 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
3898 | !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
3899 | return io_apic_get_unique_id(nr_ioapics, id); | ||
3900 | else | ||
3901 | return id; | ||
3902 | #else | ||
3903 | int i; | ||
3904 | DECLARE_BITMAP(used, 256); | ||
3950 | 3905 | ||
3951 | #ifdef CONFIG_ACPI | 3906 | bitmap_zero(used, 256); |
3907 | for (i = 0; i < nr_ioapics; i++) { | ||
3908 | struct mpc_ioapic *ia = &mp_ioapics[i]; | ||
3909 | __set_bit(ia->apicid, used); | ||
3910 | } | ||
3911 | if (!test_bit(id, used)) | ||
3912 | return id; | ||
3913 | return find_first_zero_bit(used, 256); | ||
3914 | #endif | ||
3915 | } | ||
3952 | 3916 | ||
3953 | #ifdef CONFIG_X86_32 | 3917 | #ifdef CONFIG_X86_32 |
3954 | int __init io_apic_get_unique_id(int ioapic, int apic_id) | 3918 | int __init io_apic_get_unique_id(int ioapic, int apic_id) |
@@ -4057,8 +4021,6 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
4057 | return 0; | 4021 | return 0; |
4058 | } | 4022 | } |
4059 | 4023 | ||
4060 | #endif /* CONFIG_ACPI */ | ||
4061 | |||
4062 | /* | 4024 | /* |
4063 | * This function currently is only a helper for the i386 smp boot process where | 4025 | * This function currently is only a helper for the i386 smp boot process where |
4064 | * we need to reprogram the ioredtbls to cater for the cpus which have come online | 4026 | * we need to reprogram the ioredtbls to cater for the cpus which have come online |
@@ -4112,7 +4074,7 @@ void __init setup_ioapic_dest(void) | |||
4112 | 4074 | ||
4113 | static struct resource *ioapic_resources; | 4075 | static struct resource *ioapic_resources; |
4114 | 4076 | ||
4115 | static struct resource * __init ioapic_setup_resources(void) | 4077 | static struct resource * __init ioapic_setup_resources(int nr_ioapics) |
4116 | { | 4078 | { |
4117 | unsigned long n; | 4079 | unsigned long n; |
4118 | struct resource *res; | 4080 | struct resource *res; |
@@ -4128,15 +4090,13 @@ static struct resource * __init ioapic_setup_resources(void) | |||
4128 | mem = alloc_bootmem(n); | 4090 | mem = alloc_bootmem(n); |
4129 | res = (void *)mem; | 4091 | res = (void *)mem; |
4130 | 4092 | ||
4131 | if (mem != NULL) { | 4093 | mem += sizeof(struct resource) * nr_ioapics; |
4132 | mem += sizeof(struct resource) * nr_ioapics; | ||
4133 | 4094 | ||
4134 | for (i = 0; i < nr_ioapics; i++) { | 4095 | for (i = 0; i < nr_ioapics; i++) { |
4135 | res[i].name = mem; | 4096 | res[i].name = mem; |
4136 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; | 4097 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
4137 | sprintf(mem, "IOAPIC %u", i); | 4098 | sprintf(mem, "IOAPIC %u", i); |
4138 | mem += IOAPIC_RESOURCE_NAME_SIZE; | 4099 | mem += IOAPIC_RESOURCE_NAME_SIZE; |
4139 | } | ||
4140 | } | 4100 | } |
4141 | 4101 | ||
4142 | ioapic_resources = res; | 4102 | ioapic_resources = res; |
@@ -4150,7 +4110,7 @@ void __init ioapic_init_mappings(void) | |||
4150 | struct resource *ioapic_res; | 4110 | struct resource *ioapic_res; |
4151 | int i; | 4111 | int i; |
4152 | 4112 | ||
4153 | ioapic_res = ioapic_setup_resources(); | 4113 | ioapic_res = ioapic_setup_resources(nr_ioapics); |
4154 | for (i = 0; i < nr_ioapics; i++) { | 4114 | for (i = 0; i < nr_ioapics; i++) { |
4155 | if (smp_found_config) { | 4115 | if (smp_found_config) { |
4156 | ioapic_phys = mp_ioapics[i].apicaddr; | 4116 | ioapic_phys = mp_ioapics[i].apicaddr; |
@@ -4179,36 +4139,99 @@ fake_ioapic_page: | |||
4179 | __fix_to_virt(idx), ioapic_phys); | 4139 | __fix_to_virt(idx), ioapic_phys); |
4180 | idx++; | 4140 | idx++; |
4181 | 4141 | ||
4182 | if (ioapic_res != NULL) { | 4142 | ioapic_res->start = ioapic_phys; |
4183 | ioapic_res->start = ioapic_phys; | 4143 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; |
4184 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; | 4144 | ioapic_res++; |
4185 | ioapic_res++; | ||
4186 | } | ||
4187 | } | 4145 | } |
4188 | } | 4146 | } |
4189 | 4147 | ||
4190 | static int __init ioapic_insert_resources(void) | 4148 | void __init ioapic_insert_resources(void) |
4191 | { | 4149 | { |
4192 | int i; | 4150 | int i; |
4193 | struct resource *r = ioapic_resources; | 4151 | struct resource *r = ioapic_resources; |
4194 | 4152 | ||
4195 | if (!r) { | 4153 | if (!r) { |
4196 | if (nr_ioapics > 0) { | 4154 | if (nr_ioapics > 0) |
4197 | printk(KERN_ERR | 4155 | printk(KERN_ERR |
4198 | "IO APIC resources couldn't be allocated.\n"); | 4156 | "IO APIC resources couldn't be allocated.\n"); |
4199 | return -1; | 4157 | return; |
4200 | } | ||
4201 | return 0; | ||
4202 | } | 4158 | } |
4203 | 4159 | ||
4204 | for (i = 0; i < nr_ioapics; i++) { | 4160 | for (i = 0; i < nr_ioapics; i++) { |
4205 | insert_resource(&iomem_resource, r); | 4161 | insert_resource(&iomem_resource, r); |
4206 | r++; | 4162 | r++; |
4207 | } | 4163 | } |
4164 | } | ||
4165 | |||
4166 | int mp_find_ioapic(int gsi) | ||
4167 | { | ||
4168 | int i = 0; | ||
4169 | |||
4170 | /* Find the IOAPIC that manages this GSI. */ | ||
4171 | for (i = 0; i < nr_ioapics; i++) { | ||
4172 | if ((gsi >= mp_gsi_routing[i].gsi_base) | ||
4173 | && (gsi <= mp_gsi_routing[i].gsi_end)) | ||
4174 | return i; | ||
4175 | } | ||
4176 | |||
4177 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); | ||
4178 | return -1; | ||
4179 | } | ||
4180 | |||
4181 | int mp_find_ioapic_pin(int ioapic, int gsi) | ||
4182 | { | ||
4183 | if (WARN_ON(ioapic == -1)) | ||
4184 | return -1; | ||
4185 | if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end)) | ||
4186 | return -1; | ||
4187 | |||
4188 | return gsi - mp_gsi_routing[ioapic].gsi_base; | ||
4189 | } | ||
4208 | 4190 | ||
4191 | static int bad_ioapic(unsigned long address) | ||
4192 | { | ||
4193 | if (nr_ioapics >= MAX_IO_APICS) { | ||
4194 | printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " | ||
4195 | "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); | ||
4196 | return 1; | ||
4197 | } | ||
4198 | if (!address) { | ||
4199 | printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address" | ||
4200 | " found in table, skipping!\n"); | ||
4201 | return 1; | ||
4202 | } | ||
4209 | return 0; | 4203 | return 0; |
4210 | } | 4204 | } |
4211 | 4205 | ||
4212 | /* Insert the IO APIC resources after PCI initialization has occured to handle | 4206 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) |
4213 | * IO APICS that are mapped in on a BAR in PCI space. */ | 4207 | { |
4214 | late_initcall(ioapic_insert_resources); | 4208 | int idx = 0; |
4209 | |||
4210 | if (bad_ioapic(address)) | ||
4211 | return; | ||
4212 | |||
4213 | idx = nr_ioapics; | ||
4214 | |||
4215 | mp_ioapics[idx].type = MP_IOAPIC; | ||
4216 | mp_ioapics[idx].flags = MPC_APIC_USABLE; | ||
4217 | mp_ioapics[idx].apicaddr = address; | ||
4218 | |||
4219 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | ||
4220 | mp_ioapics[idx].apicid = io_apic_unique_id(id); | ||
4221 | mp_ioapics[idx].apicver = io_apic_get_version(idx); | ||
4222 | |||
4223 | /* | ||
4224 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | ||
4225 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | ||
4226 | */ | ||
4227 | mp_gsi_routing[idx].gsi_base = gsi_base; | ||
4228 | mp_gsi_routing[idx].gsi_end = gsi_base + | ||
4229 | io_apic_get_redir_entries(idx); | ||
4230 | |||
4231 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | ||
4232 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, | ||
4233 | mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, | ||
4234 | mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end); | ||
4235 | |||
4236 | nr_ioapics++; | ||
4237 | } | ||
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index dbf5445727a9..08385e090a6f 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c | |||
@@ -106,6 +106,9 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) | |||
106 | unsigned long mask = cpumask_bits(cpumask)[0]; | 106 | unsigned long mask = cpumask_bits(cpumask)[0]; |
107 | unsigned long flags; | 107 | unsigned long flags; |
108 | 108 | ||
109 | if (WARN_ONCE(!mask, "empty IPI mask")) | ||
110 | return; | ||
111 | |||
109 | local_irq_save(flags); | 112 | local_irq_save(flags); |
110 | WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); | 113 | WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); |
111 | __default_send_IPI_dest_field(mask, vector, apic->dest_logical); | 114 | __default_send_IPI_dest_field(mask, vector, apic->dest_logical); |
@@ -150,7 +153,7 @@ int safe_smp_processor_id(void) | |||
150 | { | 153 | { |
151 | int apicid, cpuid; | 154 | int apicid, cpuid; |
152 | 155 | ||
153 | if (!boot_cpu_has(X86_FEATURE_APIC)) | 156 | if (!cpu_has_apic) |
154 | return 0; | 157 | return 0; |
155 | 158 | ||
156 | apicid = hard_smp_processor_id(); | 159 | apicid = hard_smp_processor_id(); |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index b3025b43b63a..db7220220d09 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -39,7 +39,7 @@ | |||
39 | int unknown_nmi_panic; | 39 | int unknown_nmi_panic; |
40 | int nmi_watchdog_enabled; | 40 | int nmi_watchdog_enabled; |
41 | 41 | ||
42 | static cpumask_var_t backtrace_mask; | 42 | static cpumask_t backtrace_mask __read_mostly; |
43 | 43 | ||
44 | /* nmi_active: | 44 | /* nmi_active: |
45 | * >0: the lapic NMI watchdog is active, but can be disabled | 45 | * >0: the lapic NMI watchdog is active, but can be disabled |
@@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void) | |||
138 | if (!prev_nmi_count) | 138 | if (!prev_nmi_count) |
139 | goto error; | 139 | goto error; |
140 | 140 | ||
141 | alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); | ||
142 | printk(KERN_INFO "Testing NMI watchdog ... "); | 141 | printk(KERN_INFO "Testing NMI watchdog ... "); |
143 | 142 | ||
144 | #ifdef CONFIG_SMP | 143 | #ifdef CONFIG_SMP |
@@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
415 | } | 414 | } |
416 | 415 | ||
417 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | 416 | /* We can be called before check_nmi_watchdog, hence NULL check. */ |
418 | if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { | 417 | if (cpumask_test_cpu(cpu, &backtrace_mask)) { |
419 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 418 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ |
420 | 419 | ||
421 | spin_lock(&lock); | 420 | spin_lock(&lock); |
422 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 421 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
422 | show_regs(regs); | ||
423 | dump_stack(); | 423 | dump_stack(); |
424 | spin_unlock(&lock); | 424 | spin_unlock(&lock); |
425 | cpumask_clear_cpu(cpu, backtrace_mask); | 425 | cpumask_clear_cpu(cpu, &backtrace_mask); |
426 | |||
427 | rc = 1; | ||
426 | } | 428 | } |
427 | 429 | ||
428 | /* Could check oops_in_progress here too, but it's safer not to */ | 430 | /* Could check oops_in_progress here too, but it's safer not to */ |
@@ -552,14 +554,18 @@ int do_nmi_callback(struct pt_regs *regs, int cpu) | |||
552 | return 0; | 554 | return 0; |
553 | } | 555 | } |
554 | 556 | ||
555 | void __trigger_all_cpu_backtrace(void) | 557 | void arch_trigger_all_cpu_backtrace(void) |
556 | { | 558 | { |
557 | int i; | 559 | int i; |
558 | 560 | ||
559 | cpumask_copy(backtrace_mask, cpu_online_mask); | 561 | cpumask_copy(&backtrace_mask, cpu_online_mask); |
562 | |||
563 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
564 | apic->send_IPI_all(NMI_VECTOR); | ||
565 | |||
560 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | 566 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ |
561 | for (i = 0; i < 10 * 1000; i++) { | 567 | for (i = 0; i < 10 * 1000; i++) { |
562 | if (cpumask_empty(backtrace_mask)) | 568 | if (cpumask_empty(&backtrace_mask)) |
563 | break; | 569 | break; |
564 | mdelay(1); | 570 | mdelay(1); |
565 | } | 571 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 533e59c6fc82..ca96e68f0d23 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -493,7 +493,8 @@ static void numaq_setup_portio_remap(void) | |||
493 | (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); | 493 | (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); |
494 | } | 494 | } |
495 | 495 | ||
496 | struct apic apic_numaq = { | 496 | /* Use __refdata to keep false positive warning calm. */ |
497 | struct apic __refdata apic_numaq = { | ||
497 | 498 | ||
498 | .name = "NUMAQ", | 499 | .name = "NUMAQ", |
499 | .probe = probe_numaq, | 500 | .probe = probe_numaq, |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index bc3e880f9b82..65edc180fc82 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -44,17 +44,22 @@ static struct apic *apic_probe[] __initdata = { | |||
44 | NULL, | 44 | NULL, |
45 | }; | 45 | }; |
46 | 46 | ||
47 | static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | ||
48 | { | ||
49 | return hard_smp_processor_id() >> index_msb; | ||
50 | } | ||
51 | |||
47 | /* | 52 | /* |
48 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | 53 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
49 | */ | 54 | */ |
50 | void __init default_setup_apic_routing(void) | 55 | void __init default_setup_apic_routing(void) |
51 | { | 56 | { |
52 | #ifdef CONFIG_X86_X2APIC | 57 | #ifdef CONFIG_X86_X2APIC |
53 | if (x2apic_mode && (apic != &apic_x2apic_phys && | 58 | if (x2apic_mode |
54 | #ifdef CONFIG_X86_UV | 59 | #ifdef CONFIG_X86_UV |
55 | apic != &apic_x2apic_uv_x && | 60 | && apic != &apic_x2apic_uv_x |
56 | #endif | 61 | #endif |
57 | apic != &apic_x2apic_cluster)) { | 62 | ) { |
58 | if (x2apic_phys) | 63 | if (x2apic_phys) |
59 | apic = &apic_x2apic_phys; | 64 | apic = &apic_x2apic_phys; |
60 | else | 65 | else |
@@ -69,6 +74,11 @@ void __init default_setup_apic_routing(void) | |||
69 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | 74 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); |
70 | } | 75 | } |
71 | 76 | ||
77 | if (is_vsmp_box()) { | ||
78 | /* need to update phys_pkg_id */ | ||
79 | apic->phys_pkg_id = apicid_phys_pkg_id; | ||
80 | } | ||
81 | |||
72 | /* | 82 | /* |
73 | * Now that apic routing model is selected, configure the | 83 | * Now that apic routing model is selected, configure the |
74 | * fault handling for intr remapping. | 84 | * fault handling for intr remapping. |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8e4cbb255c38..a5371ec36776 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
17 | return x2apic_enabled(); | 17 | return x2apic_enabled(); |
18 | } | 18 | } |
19 | 19 | ||
20 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 20 | /* |
21 | 21 | * need to use more than cpu 0, because we need more vectors when | |
22 | * MSI-X are used. | ||
23 | */ | ||
22 | static const struct cpumask *x2apic_target_cpus(void) | 24 | static const struct cpumask *x2apic_target_cpus(void) |
23 | { | 25 | { |
24 | return cpumask_of(0); | 26 | return cpu_online_mask; |
25 | } | 27 | } |
26 | 28 | ||
27 | /* | 29 | /* |
@@ -170,7 +172,7 @@ static unsigned long set_apic_id(unsigned int id) | |||
170 | 172 | ||
171 | static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) | 173 | static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) |
172 | { | 174 | { |
173 | return current_cpu_data.initial_apicid >> index_msb; | 175 | return initial_apicid >> index_msb; |
174 | } | 176 | } |
175 | 177 | ||
176 | static void x2apic_send_IPI_self(int vector) | 178 | static void x2apic_send_IPI_self(int vector) |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a284359627e7..a8989aadc99a 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
27 | return 0; | 27 | return 0; |
28 | } | 28 | } |
29 | 29 | ||
30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 30 | /* |
31 | 31 | * need to use more than cpu 0, because we need more vectors when | |
32 | * MSI-X are used. | ||
33 | */ | ||
32 | static const struct cpumask *x2apic_target_cpus(void) | 34 | static const struct cpumask *x2apic_target_cpus(void) |
33 | { | 35 | { |
34 | return cpumask_of(0); | 36 | return cpu_online_mask; |
35 | } | 37 | } |
36 | 38 | ||
37 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) | 39 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) |
@@ -162,7 +164,7 @@ static unsigned long set_apic_id(unsigned int id) | |||
162 | 164 | ||
163 | static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) | 165 | static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) |
164 | { | 166 | { |
165 | return current_cpu_data.initial_apicid >> index_msb; | 167 | return initial_apicid >> index_msb; |
166 | } | 168 | } |
167 | 169 | ||
168 | static void x2apic_send_IPI_self(int vector) | 170 | static void x2apic_send_IPI_self(int vector) |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 096d19aea2f7..601159374e87 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -46,7 +46,7 @@ static int early_get_nodeid(void) | |||
46 | return node_id.s.node_id; | 46 | return node_id.s.node_id; |
47 | } | 47 | } |
48 | 48 | ||
49 | static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 49 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
50 | { | 50 | { |
51 | if (!strcmp(oem_id, "SGI")) { | 51 | if (!strcmp(oem_id, "SGI")) { |
52 | if (!strcmp(oem_table_id, "UVL")) | 52 | if (!strcmp(oem_table_id, "UVL")) |
@@ -253,7 +253,7 @@ static void uv_send_IPI_self(int vector) | |||
253 | apic_write(APIC_SELF_IPI, vector); | 253 | apic_write(APIC_SELF_IPI, vector); |
254 | } | 254 | } |
255 | 255 | ||
256 | struct apic apic_x2apic_uv_x = { | 256 | struct apic __refdata apic_x2apic_uv_x = { |
257 | 257 | ||
258 | .name = "UV large system", | 258 | .name = "UV large system", |
259 | .probe = NULL, | 259 | .probe = NULL, |
@@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = { | |||
261 | .apic_id_registered = uv_apic_id_registered, | 261 | .apic_id_registered = uv_apic_id_registered, |
262 | 262 | ||
263 | .irq_delivery_mode = dest_Fixed, | 263 | .irq_delivery_mode = dest_Fixed, |
264 | .irq_dest_mode = 1, /* logical */ | 264 | .irq_dest_mode = 0, /* physical */ |
265 | 265 | ||
266 | .target_cpus = uv_target_cpus, | 266 | .target_cpus = uv_target_cpus, |
267 | .disable_esr = 0, | 267 | .disable_esr = 0, |
@@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) | |||
362 | BUG(); | 362 | BUG(); |
363 | } | 363 | } |
364 | 364 | ||
365 | static __init void map_low_mmrs(void) | ||
366 | { | ||
367 | init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); | ||
368 | init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); | ||
369 | } | ||
370 | |||
371 | enum map_type {map_wb, map_uc}; | 365 | enum map_type {map_wb, map_uc}; |
372 | 366 | ||
373 | static __init void map_high(char *id, unsigned long base, int shift, | 367 | static __init void map_high(char *id, unsigned long base, int shift, |
@@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode) | |||
395 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); | 389 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); |
396 | } | 390 | } |
397 | 391 | ||
398 | static __init void map_config_high(int max_pnode) | ||
399 | { | ||
400 | union uvh_rh_gam_cfg_overlay_config_mmr_u cfg; | ||
401 | int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
402 | |||
403 | cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); | ||
404 | if (cfg.s.enable) | ||
405 | map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc); | ||
406 | } | ||
407 | |||
408 | static __init void map_mmr_high(int max_pnode) | ||
409 | { | ||
410 | union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; | ||
411 | int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
412 | |||
413 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); | ||
414 | if (mmr.s.enable) | ||
415 | map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); | ||
416 | } | ||
417 | |||
418 | static __init void map_mmioh_high(int max_pnode) | 392 | static __init void map_mmioh_high(int max_pnode) |
419 | { | 393 | { |
420 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; | 394 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; |
@@ -566,8 +540,6 @@ void __init uv_system_init(void) | |||
566 | unsigned long mmr_base, present, paddr; | 540 | unsigned long mmr_base, present, paddr; |
567 | unsigned short pnode_mask; | 541 | unsigned short pnode_mask; |
568 | 542 | ||
569 | map_low_mmrs(); | ||
570 | |||
571 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); | 543 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); |
572 | m_val = m_n_config.s.m_skt; | 544 | m_val = m_n_config.s.m_skt; |
573 | n_val = m_n_config.s.n_skt; | 545 | n_val = m_n_config.s.n_skt; |
@@ -591,6 +563,8 @@ void __init uv_system_init(void) | |||
591 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 563 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
592 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); | 564 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); |
593 | BUG_ON(!uv_blade_info); | 565 | BUG_ON(!uv_blade_info); |
566 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | ||
567 | uv_blade_info[blade].memory_nid = -1; | ||
594 | 568 | ||
595 | get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); | 569 | get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); |
596 | 570 | ||
@@ -629,6 +603,9 @@ void __init uv_system_init(void) | |||
629 | lcpu = uv_blade_info[blade].nr_possible_cpus; | 603 | lcpu = uv_blade_info[blade].nr_possible_cpus; |
630 | uv_blade_info[blade].nr_possible_cpus++; | 604 | uv_blade_info[blade].nr_possible_cpus++; |
631 | 605 | ||
606 | /* Any node on the blade, else will contain -1. */ | ||
607 | uv_blade_info[blade].memory_nid = nid; | ||
608 | |||
632 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; | 609 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; |
633 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; | 610 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; |
634 | uv_cpu_hub_info(cpu)->m_val = m_val; | 611 | uv_cpu_hub_info(cpu)->m_val = m_val; |
@@ -662,11 +639,10 @@ void __init uv_system_init(void) | |||
662 | pnode = (paddr >> m_val) & pnode_mask; | 639 | pnode = (paddr >> m_val) & pnode_mask; |
663 | blade = boot_pnode_to_blade(pnode); | 640 | blade = boot_pnode_to_blade(pnode); |
664 | uv_node_to_blade[nid] = blade; | 641 | uv_node_to_blade[nid] = blade; |
642 | max_pnode = max(pnode, max_pnode); | ||
665 | } | 643 | } |
666 | 644 | ||
667 | map_gru_high(max_pnode); | 645 | map_gru_high(max_pnode); |
668 | map_mmr_high(max_pnode); | ||
669 | map_config_high(max_pnode); | ||
670 | map_mmioh_high(max_pnode); | 646 | map_mmioh_high(max_pnode); |
671 | 647 | ||
672 | uv_cpu_init(); | 648 | uv_cpu_init(); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 79302e9a33a4..151ace69a5aa 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -403,7 +403,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | |||
403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | 403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); |
404 | static struct apm_user *user_list; | 404 | static struct apm_user *user_list; |
405 | static DEFINE_SPINLOCK(user_list_lock); | 405 | static DEFINE_SPINLOCK(user_list_lock); |
406 | static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; | 406 | |
407 | /* | ||
408 | * Set up a segment that references the real mode segment 0x40 | ||
409 | * that extends up to the end of page zero (that we have reserved). | ||
410 | * This is for buggy BIOS's that refer to (real mode) segment 0x40 | ||
411 | * even though they are called in protected mode. | ||
412 | */ | ||
413 | static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, | ||
414 | (unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1); | ||
407 | 415 | ||
408 | static const char driver_version[] = "1.16ac"; /* no spaces */ | 416 | static const char driver_version[] = "1.16ac"; /* no spaces */ |
409 | 417 | ||
@@ -811,7 +819,7 @@ static int apm_do_idle(void) | |||
811 | u8 ret = 0; | 819 | u8 ret = 0; |
812 | int idled = 0; | 820 | int idled = 0; |
813 | int polling; | 821 | int polling; |
814 | int err; | 822 | int err = 0; |
815 | 823 | ||
816 | polling = !!(current_thread_info()->status & TS_POLLING); | 824 | polling = !!(current_thread_info()->status & TS_POLLING); |
817 | if (polling) { | 825 | if (polling) { |
@@ -2332,15 +2340,6 @@ static int __init apm_init(void) | |||
2332 | pm_flags |= PM_APM; | 2340 | pm_flags |= PM_APM; |
2333 | 2341 | ||
2334 | /* | 2342 | /* |
2335 | * Set up a segment that references the real mode segment 0x40 | ||
2336 | * that extends up to the end of page zero (that we have reserved). | ||
2337 | * This is for buggy BIOS's that refer to (real mode) segment 0x40 | ||
2338 | * even though they are called in protected mode. | ||
2339 | */ | ||
2340 | set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); | ||
2341 | _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); | ||
2342 | |||
2343 | /* | ||
2344 | * Set up the long jump entry point to the APM BIOS, which is called | 2343 | * Set up the long jump entry point to the APM BIOS, which is called |
2345 | * from inline assembly. | 2344 | * from inline assembly. |
2346 | */ | 2345 | */ |
@@ -2358,12 +2357,12 @@ static int __init apm_init(void) | |||
2358 | * code to that CPU. | 2357 | * code to that CPU. |
2359 | */ | 2358 | */ |
2360 | gdt = get_cpu_gdt_table(0); | 2359 | gdt = get_cpu_gdt_table(0); |
2361 | set_base(gdt[APM_CS >> 3], | 2360 | set_desc_base(&gdt[APM_CS >> 3], |
2362 | __va((unsigned long)apm_info.bios.cseg << 4)); | 2361 | (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4)); |
2363 | set_base(gdt[APM_CS_16 >> 3], | 2362 | set_desc_base(&gdt[APM_CS_16 >> 3], |
2364 | __va((unsigned long)apm_info.bios.cseg_16 << 4)); | 2363 | (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4)); |
2365 | set_base(gdt[APM_DS >> 3], | 2364 | set_desc_base(&gdt[APM_DS >> 3], |
2366 | __va((unsigned long)apm_info.bios.dseg << 4)); | 2365 | (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4)); |
2367 | 2366 | ||
2368 | proc_create("apm", 0, NULL, &apm_file_ops); | 2367 | proc_create("apm", 0, NULL, &apm_file_ops); |
2369 | 2368 | ||
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 898ecc47e129..4a6aeedcd965 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * This code generates raw asm output which is post-processed to extract | 3 | * This code generates raw asm output which is post-processed to extract |
4 | * and format the required data. | 4 | * and format the required data. |
5 | */ | 5 | */ |
6 | #define COMPILE_OFFSETS | ||
6 | 7 | ||
7 | #include <linux/crypto.h> | 8 | #include <linux/crypto.h> |
8 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3efcb2b96a15..c1f253dac155 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER | |||
7 | CFLAGS_REMOVE_common.o = -pg | 7 | CFLAGS_REMOVE_common.o = -pg |
8 | endif | 8 | endif |
9 | 9 | ||
10 | # Make sure load_percpu_segment has no stackprotector | ||
11 | nostackp := $(call cc-option, -fno-stack-protector) | ||
12 | CFLAGS_common.o := $(nostackp) | ||
13 | |||
10 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
11 | obj-y += proc.o capflags.o powerflags.o common.o | 15 | obj-y += proc.o capflags.o powerflags.o common.o |
12 | obj-y += vmware.o hypervisor.o | 16 | obj-y += vmware.o hypervisor.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e5b27d8f1b47..22a47c82f3c0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -2,7 +2,7 @@ | |||
2 | #include <linux/bitops.h> | 2 | #include <linux/bitops.h> |
3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
4 | 4 | ||
5 | #include <asm/io.h> | 5 | #include <linux/io.h> |
6 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
7 | #include <asm/apic.h> | 7 | #include <asm/apic.h> |
8 | #include <asm/cpu.h> | 8 | #include <asm/cpu.h> |
@@ -45,8 +45,8 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) | |||
45 | #define CBAR_ENB (0x80000000) | 45 | #define CBAR_ENB (0x80000000) |
46 | #define CBAR_KEY (0X000000CB) | 46 | #define CBAR_KEY (0X000000CB) |
47 | if (c->x86_model == 9 || c->x86_model == 10) { | 47 | if (c->x86_model == 9 || c->x86_model == 10) { |
48 | if (inl (CBAR) & CBAR_ENB) | 48 | if (inl(CBAR) & CBAR_ENB) |
49 | outl (0 | CBAR_KEY, CBAR); | 49 | outl(0 | CBAR_KEY, CBAR); |
50 | } | 50 | } |
51 | } | 51 | } |
52 | 52 | ||
@@ -87,9 +87,10 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) | |||
87 | d = d2-d; | 87 | d = d2-d; |
88 | 88 | ||
89 | if (d > 20*K6_BUG_LOOP) | 89 | if (d > 20*K6_BUG_LOOP) |
90 | printk("system stability may be impaired when more than 32 MB are used.\n"); | 90 | printk(KERN_CONT |
91 | "system stability may be impaired when more than 32 MB are used.\n"); | ||
91 | else | 92 | else |
92 | printk("probably OK (after B9730xxxx).\n"); | 93 | printk(KERN_CONT "probably OK (after B9730xxxx).\n"); |
93 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); | 94 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); |
94 | } | 95 | } |
95 | 96 | ||
@@ -219,8 +220,9 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | |||
219 | if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { | 220 | if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { |
220 | rdmsr(MSR_K7_CLK_CTL, l, h); | 221 | rdmsr(MSR_K7_CLK_CTL, l, h); |
221 | if ((l & 0xfff00000) != 0x20000000) { | 222 | if ((l & 0xfff00000) != 0x20000000) { |
222 | printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, | 223 | printk(KERN_INFO |
223 | ((l & 0x000fffff)|0x20000000)); | 224 | "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", |
225 | l, ((l & 0x000fffff)|0x20000000)); | ||
224 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); | 226 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); |
225 | } | 227 | } |
226 | } | 228 | } |
@@ -251,6 +253,64 @@ static int __cpuinit nearby_node(int apicid) | |||
251 | #endif | 253 | #endif |
252 | 254 | ||
253 | /* | 255 | /* |
256 | * Fixup core topology information for AMD multi-node processors. | ||
257 | * Assumption 1: Number of cores in each internal node is the same. | ||
258 | * Assumption 2: Mixed systems with both single-node and dual-node | ||
259 | * processors are not supported. | ||
260 | */ | ||
261 | #ifdef CONFIG_X86_HT | ||
262 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) | ||
263 | { | ||
264 | #ifdef CONFIG_PCI | ||
265 | u32 t, cpn; | ||
266 | u8 n, n_id; | ||
267 | int cpu = smp_processor_id(); | ||
268 | |||
269 | /* fixup topology information only once for a core */ | ||
270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) | ||
271 | return; | ||
272 | |||
273 | /* check for multi-node processor on boot cpu */ | ||
274 | t = read_pci_config(0, 24, 3, 0xe8); | ||
275 | if (!(t & (1 << 29))) | ||
276 | return; | ||
277 | |||
278 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | ||
279 | |||
280 | /* cores per node: each internal node has half the number of cores */ | ||
281 | cpn = c->x86_max_cores >> 1; | ||
282 | |||
283 | /* even-numbered NB_id of this dual-node processor */ | ||
284 | n = c->phys_proc_id << 1; | ||
285 | |||
286 | /* | ||
287 | * determine internal node id and assign cores fifty-fifty to | ||
288 | * each node of the dual-node processor | ||
289 | */ | ||
290 | t = read_pci_config(0, 24 + n, 3, 0xe8); | ||
291 | n = (t>>30) & 0x3; | ||
292 | if (n == 0) { | ||
293 | if (c->cpu_core_id < cpn) | ||
294 | n_id = 0; | ||
295 | else | ||
296 | n_id = 1; | ||
297 | } else { | ||
298 | if (c->cpu_core_id < cpn) | ||
299 | n_id = 1; | ||
300 | else | ||
301 | n_id = 0; | ||
302 | } | ||
303 | |||
304 | /* compute entire NodeID, use llc_shared_map to store sibling info */ | ||
305 | per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id; | ||
306 | |||
307 | /* fixup core id to be in range from 0 to cpn */ | ||
308 | c->cpu_core_id = c->cpu_core_id % cpn; | ||
309 | #endif | ||
310 | } | ||
311 | #endif | ||
312 | |||
313 | /* | ||
254 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | 314 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. |
255 | * Assumes number of cores is a power of two. | 315 | * Assumes number of cores is a power of two. |
256 | */ | 316 | */ |
@@ -258,13 +318,18 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
258 | { | 318 | { |
259 | #ifdef CONFIG_X86_HT | 319 | #ifdef CONFIG_X86_HT |
260 | unsigned bits; | 320 | unsigned bits; |
321 | int cpu = smp_processor_id(); | ||
261 | 322 | ||
262 | bits = c->x86_coreid_bits; | 323 | bits = c->x86_coreid_bits; |
263 | |||
264 | /* Low order bits define the core id (index of core in socket) */ | 324 | /* Low order bits define the core id (index of core in socket) */ |
265 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | 325 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); |
266 | /* Convert the initial APIC ID into the socket ID */ | 326 | /* Convert the initial APIC ID into the socket ID */ |
267 | c->phys_proc_id = c->initial_apicid >> bits; | 327 | c->phys_proc_id = c->initial_apicid >> bits; |
328 | /* use socket ID also for last level cache */ | ||
329 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; | ||
330 | /* fixup topology information on multi-node processors */ | ||
331 | if ((c->x86 == 0x10) && (c->x86_model == 9)) | ||
332 | amd_fixup_dcm(c); | ||
268 | #endif | 333 | #endif |
269 | } | 334 | } |
270 | 335 | ||
@@ -273,9 +338,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
273 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 338 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
274 | int cpu = smp_processor_id(); | 339 | int cpu = smp_processor_id(); |
275 | int node; | 340 | int node; |
276 | unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; | 341 | unsigned apicid = c->apicid; |
342 | |||
343 | node = per_cpu(cpu_llc_id, cpu); | ||
277 | 344 | ||
278 | node = c->phys_proc_id; | ||
279 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | 345 | if (apicid_to_node[apicid] != NUMA_NO_NODE) |
280 | node = apicid_to_node[apicid]; | 346 | node = apicid_to_node[apicid]; |
281 | if (!node_online(node)) { | 347 | if (!node_online(node)) { |
@@ -354,7 +420,7 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
354 | #endif | 420 | #endif |
355 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) | 421 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) |
356 | /* check CPU config space for extended APIC ID */ | 422 | /* check CPU config space for extended APIC ID */ |
357 | if (c->x86 >= 0xf) { | 423 | if (cpu_has_apic && c->x86 >= 0xf) { |
358 | unsigned int val; | 424 | unsigned int val; |
359 | val = read_pci_config(0, 24, 0, 0x68); | 425 | val = read_pci_config(0, 24, 0, 0x68); |
360 | if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) | 426 | if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) |
@@ -396,11 +462,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
396 | u32 level; | 462 | u32 level; |
397 | 463 | ||
398 | level = cpuid_eax(1); | 464 | level = cpuid_eax(1); |
399 | if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) | 465 | if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) |
400 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 466 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
467 | |||
468 | /* | ||
469 | * Some BIOSes incorrectly force this feature, but only K8 | ||
470 | * revision D (model = 0x14) and later actually support it. | ||
471 | * (AMD Erratum #110, docId: 25759). | ||
472 | */ | ||
473 | if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { | ||
474 | u64 val; | ||
475 | |||
476 | clear_cpu_cap(c, X86_FEATURE_LAHF_LM); | ||
477 | if (!rdmsrl_amd_safe(0xc001100d, &val)) { | ||
478 | val &= ~(1ULL << 32); | ||
479 | wrmsrl_amd_safe(0xc001100d, val); | ||
480 | } | ||
481 | } | ||
482 | |||
401 | } | 483 | } |
402 | if (c->x86 == 0x10 || c->x86 == 0x11) | 484 | if (c->x86 == 0x10 || c->x86 == 0x11) |
403 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 485 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
486 | |||
487 | /* get apicid instead of initial apic id from cpuid */ | ||
488 | c->apicid = hard_smp_processor_id(); | ||
404 | #else | 489 | #else |
405 | 490 | ||
406 | /* | 491 | /* |
@@ -485,27 +570,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
485 | * benefit in doing so. | 570 | * benefit in doing so. |
486 | */ | 571 | */ |
487 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | 572 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { |
488 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | 573 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); |
489 | if ((tseg>>PMD_SHIFT) < | 574 | if ((tseg>>PMD_SHIFT) < |
490 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | 575 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || |
491 | ((tseg>>PMD_SHIFT) < | 576 | ((tseg>>PMD_SHIFT) < |
492 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | 577 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && |
493 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | 578 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) |
494 | set_memory_4k((unsigned long)__va(tseg), 1); | 579 | set_memory_4k((unsigned long)__va(tseg), 1); |
495 | } | 580 | } |
496 | } | 581 | } |
497 | #endif | 582 | #endif |
498 | } | 583 | } |
499 | 584 | ||
500 | #ifdef CONFIG_X86_32 | 585 | #ifdef CONFIG_X86_32 |
501 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) | 586 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, |
587 | unsigned int size) | ||
502 | { | 588 | { |
503 | /* AMD errata T13 (order #21922) */ | 589 | /* AMD errata T13 (order #21922) */ |
504 | if ((c->x86 == 6)) { | 590 | if ((c->x86 == 6)) { |
505 | if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ | 591 | /* Duron Rev A0 */ |
592 | if (c->x86_model == 3 && c->x86_mask == 0) | ||
506 | size = 64; | 593 | size = 64; |
594 | /* Tbird rev A1/A2 */ | ||
507 | if (c->x86_model == 4 && | 595 | if (c->x86_model == 4 && |
508 | (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ | 596 | (c->x86_mask == 0 || c->x86_mask == 1)) |
509 | size = 256; | 597 | size = 256; |
510 | } | 598 | } |
511 | return size; | 599 | return size; |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c8e315f1aa83..01a265212395 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -81,7 +81,7 @@ static void __init check_fpu(void) | |||
81 | 81 | ||
82 | boot_cpu_data.fdiv_bug = fdiv_bug; | 82 | boot_cpu_data.fdiv_bug = fdiv_bug; |
83 | if (boot_cpu_data.fdiv_bug) | 83 | if (boot_cpu_data.fdiv_bug) |
84 | printk("Hmm, FPU with FDIV bug.\n"); | 84 | printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); |
85 | } | 85 | } |
86 | 86 | ||
87 | static void __init check_hlt(void) | 87 | static void __init check_hlt(void) |
@@ -98,7 +98,7 @@ static void __init check_hlt(void) | |||
98 | halt(); | 98 | halt(); |
99 | halt(); | 99 | halt(); |
100 | halt(); | 100 | halt(); |
101 | printk("OK.\n"); | 101 | printk(KERN_CONT "OK.\n"); |
102 | } | 102 | } |
103 | 103 | ||
104 | /* | 104 | /* |
@@ -122,9 +122,9 @@ static void __init check_popad(void) | |||
122 | * CPU hard. Too bad. | 122 | * CPU hard. Too bad. |
123 | */ | 123 | */ |
124 | if (res != 12345678) | 124 | if (res != 12345678) |
125 | printk("Buggy.\n"); | 125 | printk(KERN_CONT "Buggy.\n"); |
126 | else | 126 | else |
127 | printk("OK.\n"); | 127 | printk(KERN_CONT "OK.\n"); |
128 | #endif | 128 | #endif |
129 | } | 129 | } |
130 | 130 | ||
@@ -156,7 +156,7 @@ void __init check_bugs(void) | |||
156 | { | 156 | { |
157 | identify_boot_cpu(); | 157 | identify_boot_cpu(); |
158 | #ifndef CONFIG_SMP | 158 | #ifndef CONFIG_SMP |
159 | printk("CPU: "); | 159 | printk(KERN_INFO "CPU: "); |
160 | print_cpu_info(&boot_cpu_data); | 160 | print_cpu_info(&boot_cpu_data); |
161 | #endif | 161 | #endif |
162 | check_config(); | 162 | check_config(); |
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c index 9a3ed0649d4e..04f0fe5af83e 100644 --- a/arch/x86/kernel/cpu/bugs_64.c +++ b/arch/x86/kernel/cpu/bugs_64.c | |||
@@ -15,7 +15,7 @@ void __init check_bugs(void) | |||
15 | { | 15 | { |
16 | identify_boot_cpu(); | 16 | identify_boot_cpu(); |
17 | #if !defined(CONFIG_SMP) | 17 | #if !defined(CONFIG_SMP) |
18 | printk("CPU: "); | 18 | printk(KERN_INFO "CPU: "); |
19 | print_cpu_info(&boot_cpu_data); | 19 | print_cpu_info(&boot_cpu_data); |
20 | #endif | 20 | #endif |
21 | alternative_instructions(); | 21 | alternative_instructions(); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6b26d4deada0..2055fc2b2e6b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -18,8 +18,8 @@ | |||
18 | #include <asm/hypervisor.h> | 18 | #include <asm/hypervisor.h> |
19 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
20 | #include <asm/sections.h> | 20 | #include <asm/sections.h> |
21 | #include <asm/topology.h> | 21 | #include <linux/topology.h> |
22 | #include <asm/cpumask.h> | 22 | #include <linux/cpumask.h> |
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/atomic.h> | 24 | #include <asm/atomic.h> |
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
@@ -28,13 +28,13 @@ | |||
28 | #include <asm/desc.h> | 28 | #include <asm/desc.h> |
29 | #include <asm/i387.h> | 29 | #include <asm/i387.h> |
30 | #include <asm/mtrr.h> | 30 | #include <asm/mtrr.h> |
31 | #include <asm/numa.h> | 31 | #include <linux/numa.h> |
32 | #include <asm/asm.h> | 32 | #include <asm/asm.h> |
33 | #include <asm/cpu.h> | 33 | #include <asm/cpu.h> |
34 | #include <asm/mce.h> | 34 | #include <asm/mce.h> |
35 | #include <asm/msr.h> | 35 | #include <asm/msr.h> |
36 | #include <asm/pat.h> | 36 | #include <asm/pat.h> |
37 | #include <asm/smp.h> | 37 | #include <linux/smp.h> |
38 | 38 | ||
39 | #ifdef CONFIG_X86_LOCAL_APIC | 39 | #ifdef CONFIG_X86_LOCAL_APIC |
40 | #include <asm/uv/uv.h> | 40 | #include <asm/uv/uv.h> |
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void) | |||
59 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); | 59 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); |
60 | } | 60 | } |
61 | 61 | ||
62 | static const struct cpu_dev *this_cpu __cpuinitdata; | 62 | static void __cpuinit default_init(struct cpuinfo_x86 *c) |
63 | { | ||
64 | #ifdef CONFIG_X86_64 | ||
65 | display_cacheinfo(c); | ||
66 | #else | ||
67 | /* Not much we can do here... */ | ||
68 | /* Check if at least it has cpuid */ | ||
69 | if (c->cpuid_level == -1) { | ||
70 | /* No cpuid. It must be an ancient CPU */ | ||
71 | if (c->x86 == 4) | ||
72 | strcpy(c->x86_model_id, "486"); | ||
73 | else if (c->x86 == 3) | ||
74 | strcpy(c->x86_model_id, "386"); | ||
75 | } | ||
76 | #endif | ||
77 | } | ||
78 | |||
79 | static const struct cpu_dev __cpuinitconst default_cpu = { | ||
80 | .c_init = default_init, | ||
81 | .c_vendor = "Unknown", | ||
82 | .c_x86_vendor = X86_VENDOR_UNKNOWN, | ||
83 | }; | ||
84 | |||
85 | static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; | ||
63 | 86 | ||
64 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | 87 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { |
65 | #ifdef CONFIG_X86_64 | 88 | #ifdef CONFIG_X86_64 |
@@ -71,45 +94,45 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
71 | * TLS descriptors are currently at a different place compared to i386. | 94 | * TLS descriptors are currently at a different place compared to i386. |
72 | * Hopefully nobody expects them at a fixed place (Wine?) | 95 | * Hopefully nobody expects them at a fixed place (Wine?) |
73 | */ | 96 | */ |
74 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | 97 | [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), |
75 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | 98 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), |
76 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | 99 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), |
77 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | 100 | [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), |
78 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | 101 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), |
79 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | 102 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), |
80 | #else | 103 | #else |
81 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, | 104 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), |
82 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, | 105 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
83 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, | 106 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), |
84 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, | 107 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), |
85 | /* | 108 | /* |
86 | * Segments used for calling PnP BIOS have byte granularity. | 109 | * Segments used for calling PnP BIOS have byte granularity. |
87 | * They code segments and data segments have fixed 64k limits, | 110 | * They code segments and data segments have fixed 64k limits, |
88 | * the transfer segment sizes are set at run time. | 111 | * the transfer segment sizes are set at run time. |
89 | */ | 112 | */ |
90 | /* 32-bit code */ | 113 | /* 32-bit code */ |
91 | [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, | 114 | [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), |
92 | /* 16-bit code */ | 115 | /* 16-bit code */ |
93 | [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, | 116 | [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), |
94 | /* 16-bit data */ | 117 | /* 16-bit data */ |
95 | [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, | 118 | [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), |
96 | /* 16-bit data */ | 119 | /* 16-bit data */ |
97 | [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, | 120 | [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), |
98 | /* 16-bit data */ | 121 | /* 16-bit data */ |
99 | [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, | 122 | [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), |
100 | /* | 123 | /* |
101 | * The APM segments have byte granularity and their bases | 124 | * The APM segments have byte granularity and their bases |
102 | * are set at run time. All have 64k limits. | 125 | * are set at run time. All have 64k limits. |
103 | */ | 126 | */ |
104 | /* 32-bit code */ | 127 | /* 32-bit code */ |
105 | [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, | 128 | [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), |
106 | /* 16-bit code */ | 129 | /* 16-bit code */ |
107 | [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, | 130 | [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), |
108 | /* data */ | 131 | /* data */ |
109 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | 132 | [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), |
110 | 133 | ||
111 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } }, | 134 | [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
112 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, | 135 | [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
113 | GDT_STACK_CANARY_INIT | 136 | GDT_STACK_CANARY_INIT |
114 | #endif | 137 | #endif |
115 | } }; | 138 | } }; |
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu) | |||
332 | 355 | ||
333 | static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; | 356 | static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; |
334 | 357 | ||
335 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | ||
336 | { | ||
337 | #ifdef CONFIG_X86_64 | ||
338 | display_cacheinfo(c); | ||
339 | #else | ||
340 | /* Not much we can do here... */ | ||
341 | /* Check if at least it has cpuid */ | ||
342 | if (c->cpuid_level == -1) { | ||
343 | /* No cpuid. It must be an ancient CPU */ | ||
344 | if (c->x86 == 4) | ||
345 | strcpy(c->x86_model_id, "486"); | ||
346 | else if (c->x86 == 3) | ||
347 | strcpy(c->x86_model_id, "386"); | ||
348 | } | ||
349 | #endif | ||
350 | } | ||
351 | |||
352 | static const struct cpu_dev __cpuinitconst default_cpu = { | ||
353 | .c_init = default_init, | ||
354 | .c_vendor = "Unknown", | ||
355 | .c_x86_vendor = X86_VENDOR_UNKNOWN, | ||
356 | }; | ||
357 | |||
358 | static void __cpuinit get_model_name(struct cpuinfo_x86 *c) | 358 | static void __cpuinit get_model_name(struct cpuinfo_x86 *c) |
359 | { | 359 | { |
360 | unsigned int *v; | 360 | unsigned int *v; |
@@ -848,9 +848,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
848 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 848 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
849 | numa_add_cpu(smp_processor_id()); | 849 | numa_add_cpu(smp_processor_id()); |
850 | #endif | 850 | #endif |
851 | |||
852 | /* Cap the iomem address space to what is addressable on all CPUs */ | ||
853 | iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1; | ||
854 | } | 851 | } |
855 | 852 | ||
856 | #ifdef CONFIG_X86_64 | 853 | #ifdef CONFIG_X86_64 |
@@ -985,18 +982,26 @@ static __init int setup_disablecpuid(char *arg) | |||
985 | __setup("clearcpuid=", setup_disablecpuid); | 982 | __setup("clearcpuid=", setup_disablecpuid); |
986 | 983 | ||
987 | #ifdef CONFIG_X86_64 | 984 | #ifdef CONFIG_X86_64 |
988 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 985 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; |
989 | 986 | ||
990 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 987 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
991 | irq_stack_union) __aligned(PAGE_SIZE); | 988 | irq_stack_union) __aligned(PAGE_SIZE); |
992 | 989 | ||
993 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 990 | /* |
994 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | 991 | * The following four percpu variables are hot. Align current_task to |
992 | * cacheline size such that all four fall in the same cacheline. | ||
993 | */ | ||
994 | DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = | ||
995 | &init_task; | ||
996 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
995 | 997 | ||
996 | DEFINE_PER_CPU(unsigned long, kernel_stack) = | 998 | DEFINE_PER_CPU(unsigned long, kernel_stack) = |
997 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; | 999 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; |
998 | EXPORT_PER_CPU_SYMBOL(kernel_stack); | 1000 | EXPORT_PER_CPU_SYMBOL(kernel_stack); |
999 | 1001 | ||
1002 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | ||
1003 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | ||
1004 | |||
1000 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; | 1005 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; |
1001 | 1006 | ||
1002 | /* | 1007 | /* |
@@ -1011,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { | |||
1011 | }; | 1016 | }; |
1012 | 1017 | ||
1013 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | 1018 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks |
1014 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) | 1019 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); |
1015 | __aligned(PAGE_SIZE); | ||
1016 | 1020 | ||
1017 | /* May not be marked __init: used by software suspend */ | 1021 | /* May not be marked __init: used by software suspend */ |
1018 | void syscall_init(void) | 1022 | void syscall_init(void) |
@@ -1045,8 +1049,11 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist); | |||
1045 | 1049 | ||
1046 | #else /* CONFIG_X86_64 */ | 1050 | #else /* CONFIG_X86_64 */ |
1047 | 1051 | ||
1052 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
1053 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
1054 | |||
1048 | #ifdef CONFIG_CC_STACKPROTECTOR | 1055 | #ifdef CONFIG_CC_STACKPROTECTOR |
1049 | DEFINE_PER_CPU(unsigned long, stack_canary); | 1056 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); |
1050 | #endif | 1057 | #endif |
1051 | 1058 | ||
1052 | /* Make sure %fs and %gs are initialized properly in idle threads */ | 1059 | /* Make sure %fs and %gs are initialized properly in idle threads */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 81cbe64ed6b4..2a50ef891000 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -299,7 +299,7 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate) | |||
299 | static int transition_fid_vid(struct powernow_k8_data *data, | 299 | static int transition_fid_vid(struct powernow_k8_data *data, |
300 | u32 reqfid, u32 reqvid) | 300 | u32 reqfid, u32 reqvid) |
301 | { | 301 | { |
302 | if (core_voltage_pre_transition(data, reqvid)) | 302 | if (core_voltage_pre_transition(data, reqvid, reqfid)) |
303 | return 1; | 303 | return 1; |
304 | 304 | ||
305 | if (core_frequency_transition(data, reqfid)) | 305 | if (core_frequency_transition(data, reqfid)) |
@@ -327,17 +327,20 @@ static int transition_fid_vid(struct powernow_k8_data *data, | |||
327 | 327 | ||
328 | /* Phase 1 - core voltage transition ... setup voltage */ | 328 | /* Phase 1 - core voltage transition ... setup voltage */ |
329 | static int core_voltage_pre_transition(struct powernow_k8_data *data, | 329 | static int core_voltage_pre_transition(struct powernow_k8_data *data, |
330 | u32 reqvid) | 330 | u32 reqvid, u32 reqfid) |
331 | { | 331 | { |
332 | u32 rvosteps = data->rvo; | 332 | u32 rvosteps = data->rvo; |
333 | u32 savefid = data->currfid; | 333 | u32 savefid = data->currfid; |
334 | u32 maxvid, lo; | 334 | u32 maxvid, lo, rvomult = 1; |
335 | 335 | ||
336 | dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " | 336 | dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " |
337 | "reqvid 0x%x, rvo 0x%x\n", | 337 | "reqvid 0x%x, rvo 0x%x\n", |
338 | smp_processor_id(), | 338 | smp_processor_id(), |
339 | data->currfid, data->currvid, reqvid, data->rvo); | 339 | data->currfid, data->currvid, reqvid, data->rvo); |
340 | 340 | ||
341 | if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP)) | ||
342 | rvomult = 2; | ||
343 | rvosteps *= rvomult; | ||
341 | rdmsr(MSR_FIDVID_STATUS, lo, maxvid); | 344 | rdmsr(MSR_FIDVID_STATUS, lo, maxvid); |
342 | maxvid = 0x1f & (maxvid >> 16); | 345 | maxvid = 0x1f & (maxvid >> 16); |
343 | dprintk("ph1 maxvid=0x%x\n", maxvid); | 346 | dprintk("ph1 maxvid=0x%x\n", maxvid); |
@@ -351,7 +354,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, | |||
351 | return 1; | 354 | return 1; |
352 | } | 355 | } |
353 | 356 | ||
354 | while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { | 357 | while ((rvosteps > 0) && |
358 | ((rvomult * data->rvo + data->currvid) > reqvid)) { | ||
355 | if (data->currvid == maxvid) { | 359 | if (data->currvid == maxvid) { |
356 | rvosteps = 0; | 360 | rvosteps = 0; |
357 | } else { | 361 | } else { |
@@ -384,13 +388,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) | |||
384 | u32 vcoreqfid, vcocurrfid, vcofiddiff; | 388 | u32 vcoreqfid, vcocurrfid, vcofiddiff; |
385 | u32 fid_interval, savevid = data->currvid; | 389 | u32 fid_interval, savevid = data->currvid; |
386 | 390 | ||
387 | if ((reqfid < HI_FID_TABLE_BOTTOM) && | ||
388 | (data->currfid < HI_FID_TABLE_BOTTOM)) { | ||
389 | printk(KERN_ERR PFX "ph2: illegal lo-lo transition " | ||
390 | "0x%x 0x%x\n", reqfid, data->currfid); | ||
391 | return 1; | ||
392 | } | ||
393 | |||
394 | if (data->currfid == reqfid) { | 391 | if (data->currfid == reqfid) { |
395 | printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", | 392 | printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", |
396 | data->currfid); | 393 | data->currfid); |
@@ -407,6 +404,9 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) | |||
407 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid | 404 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid |
408 | : vcoreqfid - vcocurrfid; | 405 | : vcoreqfid - vcocurrfid; |
409 | 406 | ||
407 | if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP)) | ||
408 | vcofiddiff = 0; | ||
409 | |||
410 | while (vcofiddiff > 2) { | 410 | while (vcofiddiff > 2) { |
411 | (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); | 411 | (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); |
412 | 412 | ||
@@ -1081,14 +1081,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, | |||
1081 | return 0; | 1081 | return 0; |
1082 | } | 1082 | } |
1083 | 1083 | ||
1084 | if ((fid < HI_FID_TABLE_BOTTOM) && | ||
1085 | (data->currfid < HI_FID_TABLE_BOTTOM)) { | ||
1086 | printk(KERN_ERR PFX | ||
1087 | "ignoring illegal change in lo freq table-%x to 0x%x\n", | ||
1088 | data->currfid, fid); | ||
1089 | return 1; | ||
1090 | } | ||
1091 | |||
1092 | dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", | 1084 | dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", |
1093 | smp_processor_id(), fid, vid); | 1085 | smp_processor_id(), fid, vid); |
1094 | freqs.old = find_khz_freq_from_fid(data->currfid); | 1086 | freqs.old = find_khz_freq_from_fid(data->currfid); |
@@ -1267,7 +1259,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1267 | { | 1259 | { |
1268 | static const char ACPI_PSS_BIOS_BUG_MSG[] = | 1260 | static const char ACPI_PSS_BIOS_BUG_MSG[] = |
1269 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" | 1261 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" |
1270 | KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; | 1262 | FW_BUG PFX "Try again with latest BIOS.\n"; |
1271 | struct powernow_k8_data *data; | 1263 | struct powernow_k8_data *data; |
1272 | struct init_on_cpu init_on_cpu; | 1264 | struct init_on_cpu init_on_cpu; |
1273 | int rc; | 1265 | int rc; |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index c9c1190b5e1f..02ce824073cb 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -215,7 +215,8 @@ struct pst_s { | |||
215 | 215 | ||
216 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) | 216 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) |
217 | 217 | ||
218 | static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); | 218 | static int core_voltage_pre_transition(struct powernow_k8_data *data, |
219 | u32 reqvid, u32 regfid); | ||
219 | static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); | 220 | static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); |
220 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); | 221 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); |
221 | 222 | ||
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 593171e967ef..19807b89f058 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -3,10 +3,10 @@ | |||
3 | #include <linux/delay.h> | 3 | #include <linux/delay.h> |
4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
5 | #include <asm/dma.h> | 5 | #include <asm/dma.h> |
6 | #include <asm/io.h> | 6 | #include <linux/io.h> |
7 | #include <asm/processor-cyrix.h> | 7 | #include <asm/processor-cyrix.h> |
8 | #include <asm/processor-flags.h> | 8 | #include <asm/processor-flags.h> |
9 | #include <asm/timer.h> | 9 | #include <linux/timer.h> |
10 | #include <asm/pci-direct.h> | 10 | #include <asm/pci-direct.h> |
11 | #include <asm/tsc.h> | 11 | #include <asm/tsc.h> |
12 | 12 | ||
@@ -282,7 +282,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
282 | * The 5510/5520 companion chips have a funky PIT. | 282 | * The 5510/5520 companion chips have a funky PIT. |
283 | */ | 283 | */ |
284 | if (vendor == PCI_VENDOR_ID_CYRIX && | 284 | if (vendor == PCI_VENDOR_ID_CYRIX && |
285 | (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) | 285 | (device == PCI_DEVICE_ID_CYRIX_5510 || |
286 | device == PCI_DEVICE_ID_CYRIX_5520)) | ||
286 | mark_tsc_unstable("cyrix 5510/5520 detected"); | 287 | mark_tsc_unstable("cyrix 5510/5520 detected"); |
287 | } | 288 | } |
288 | #endif | 289 | #endif |
@@ -299,7 +300,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
299 | * ? : 0x7x | 300 | * ? : 0x7x |
300 | * GX1 : 0x8x GX1 datasheet 56 | 301 | * GX1 : 0x8x GX1 datasheet 56 |
301 | */ | 302 | */ |
302 | if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) | 303 | if ((0x30 <= dir1 && dir1 <= 0x6f) || |
304 | (0x80 <= dir1 && dir1 <= 0x8f)) | ||
303 | geode_configure(); | 305 | geode_configure(); |
304 | return; | 306 | return; |
305 | } else { /* MediaGX */ | 307 | } else { /* MediaGX */ |
@@ -427,9 +429,12 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) | |||
427 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); | 429 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); |
428 | local_irq_save(flags); | 430 | local_irq_save(flags); |
429 | ccr3 = getCx86(CX86_CCR3); | 431 | ccr3 = getCx86(CX86_CCR3); |
430 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 432 | /* enable MAPEN */ |
431 | setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ | 433 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); |
432 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | 434 | /* enable cpuid */ |
435 | setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); | ||
436 | /* disable MAPEN */ | ||
437 | setCx86(CX86_CCR3, ccr3); | ||
433 | local_irq_restore(flags); | 438 | local_irq_restore(flags); |
434 | } | 439 | } |
435 | } | 440 | } |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index fb5b86af0b01..93ba8eeb100a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -28,11 +28,10 @@ | |||
28 | static inline void __cpuinit | 28 | static inline void __cpuinit |
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | 29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) |
30 | { | 30 | { |
31 | if (vmware_platform()) { | 31 | if (vmware_platform()) |
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | 32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; |
33 | } else { | 33 | else |
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | 34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; |
35 | } | ||
36 | } | 35 | } |
37 | 36 | ||
38 | unsigned long get_hypervisor_tsc_freq(void) | 37 | unsigned long get_hypervisor_tsc_freq(void) |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3260ab044996..80a722a071b5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -7,17 +7,17 @@ | |||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/thread_info.h> | 8 | #include <linux/thread_info.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/uaccess.h> | ||
10 | 11 | ||
11 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
12 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | #include <asm/uaccess.h> | ||
15 | #include <asm/ds.h> | 15 | #include <asm/ds.h> |
16 | #include <asm/bugs.h> | 16 | #include <asm/bugs.h> |
17 | #include <asm/cpu.h> | 17 | #include <asm/cpu.h> |
18 | 18 | ||
19 | #ifdef CONFIG_X86_64 | 19 | #ifdef CONFIG_X86_64 |
20 | #include <asm/topology.h> | 20 | #include <linux/topology.h> |
21 | #include <asm/numa_64.h> | 21 | #include <asm/numa_64.h> |
22 | #endif | 22 | #endif |
23 | 23 | ||
@@ -174,7 +174,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | |||
174 | #ifdef CONFIG_X86_F00F_BUG | 174 | #ifdef CONFIG_X86_F00F_BUG |
175 | /* | 175 | /* |
176 | * All current models of Pentium and Pentium with MMX technology CPUs | 176 | * All current models of Pentium and Pentium with MMX technology CPUs |
177 | * have the F0 0F bug, which lets nonprivileged users lock up the system. | 177 | * have the F0 0F bug, which lets nonprivileged users lock up the |
178 | * system. | ||
178 | * Note that the workaround only should be initialized once... | 179 | * Note that the workaround only should be initialized once... |
179 | */ | 180 | */ |
180 | c->f00f_bug = 0; | 181 | c->f00f_bug = 0; |
@@ -207,7 +208,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | |||
207 | printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); | 208 | printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); |
208 | printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); | 209 | printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); |
209 | lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; | 210 | lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; |
210 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); | 211 | wrmsr(MSR_IA32_MISC_ENABLE, lo, hi); |
211 | } | 212 | } |
212 | } | 213 | } |
213 | 214 | ||
@@ -283,7 +284,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) | |||
283 | /* Intel has a non-standard dependency on %ecx for this CPUID level. */ | 284 | /* Intel has a non-standard dependency on %ecx for this CPUID level. */ |
284 | cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); | 285 | cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); |
285 | if (eax & 0x1f) | 286 | if (eax & 0x1f) |
286 | return ((eax >> 26) + 1); | 287 | return (eax >> 26) + 1; |
287 | else | 288 | else |
288 | return 1; | 289 | return 1; |
289 | } | 290 | } |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 789efe217e1a..804c40e2bc3e 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Changes: | 4 | * Changes: |
5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) | 5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) |
6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. | 6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. |
7 | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. | 7 | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. |
8 | */ | 8 | */ |
9 | 9 | ||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/pci.h> | 16 | #include <linux/pci.h> |
17 | 17 | ||
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <asm/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <asm/k8.h> | 20 | #include <asm/k8.h> |
21 | 21 | ||
22 | #define LVL_1_INST 1 | 22 | #define LVL_1_INST 1 |
@@ -25,14 +25,15 @@ | |||
25 | #define LVL_3 4 | 25 | #define LVL_3 4 |
26 | #define LVL_TRACE 5 | 26 | #define LVL_TRACE 5 |
27 | 27 | ||
28 | struct _cache_table | 28 | struct _cache_table { |
29 | { | ||
30 | unsigned char descriptor; | 29 | unsigned char descriptor; |
31 | char cache_type; | 30 | char cache_type; |
32 | short size; | 31 | short size; |
33 | }; | 32 | }; |
34 | 33 | ||
35 | /* all the cache descriptor types we care about (no TLB or trace cache entries) */ | 34 | /* All the cache descriptor types we care about (no TLB or |
35 | trace cache entries) */ | ||
36 | |||
36 | static const struct _cache_table __cpuinitconst cache_table[] = | 37 | static const struct _cache_table __cpuinitconst cache_table[] = |
37 | { | 38 | { |
38 | { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ | 39 | { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ |
@@ -105,8 +106,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
105 | }; | 106 | }; |
106 | 107 | ||
107 | 108 | ||
108 | enum _cache_type | 109 | enum _cache_type { |
109 | { | ||
110 | CACHE_TYPE_NULL = 0, | 110 | CACHE_TYPE_NULL = 0, |
111 | CACHE_TYPE_DATA = 1, | 111 | CACHE_TYPE_DATA = 1, |
112 | CACHE_TYPE_INST = 2, | 112 | CACHE_TYPE_INST = 2, |
@@ -170,31 +170,31 @@ unsigned short num_cache_leaves; | |||
170 | Maybe later */ | 170 | Maybe later */ |
171 | union l1_cache { | 171 | union l1_cache { |
172 | struct { | 172 | struct { |
173 | unsigned line_size : 8; | 173 | unsigned line_size:8; |
174 | unsigned lines_per_tag : 8; | 174 | unsigned lines_per_tag:8; |
175 | unsigned assoc : 8; | 175 | unsigned assoc:8; |
176 | unsigned size_in_kb : 8; | 176 | unsigned size_in_kb:8; |
177 | }; | 177 | }; |
178 | unsigned val; | 178 | unsigned val; |
179 | }; | 179 | }; |
180 | 180 | ||
181 | union l2_cache { | 181 | union l2_cache { |
182 | struct { | 182 | struct { |
183 | unsigned line_size : 8; | 183 | unsigned line_size:8; |
184 | unsigned lines_per_tag : 4; | 184 | unsigned lines_per_tag:4; |
185 | unsigned assoc : 4; | 185 | unsigned assoc:4; |
186 | unsigned size_in_kb : 16; | 186 | unsigned size_in_kb:16; |
187 | }; | 187 | }; |
188 | unsigned val; | 188 | unsigned val; |
189 | }; | 189 | }; |
190 | 190 | ||
191 | union l3_cache { | 191 | union l3_cache { |
192 | struct { | 192 | struct { |
193 | unsigned line_size : 8; | 193 | unsigned line_size:8; |
194 | unsigned lines_per_tag : 4; | 194 | unsigned lines_per_tag:4; |
195 | unsigned assoc : 4; | 195 | unsigned assoc:4; |
196 | unsigned res : 2; | 196 | unsigned res:2; |
197 | unsigned size_encoded : 14; | 197 | unsigned size_encoded:14; |
198 | }; | 198 | }; |
199 | unsigned val; | 199 | unsigned val; |
200 | }; | 200 | }; |
@@ -241,7 +241,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
241 | case 0: | 241 | case 0: |
242 | if (!l1->val) | 242 | if (!l1->val) |
243 | return; | 243 | return; |
244 | assoc = l1->assoc; | 244 | assoc = assocs[l1->assoc]; |
245 | line_size = l1->line_size; | 245 | line_size = l1->line_size; |
246 | lines_per_tag = l1->lines_per_tag; | 246 | lines_per_tag = l1->lines_per_tag; |
247 | size_in_kb = l1->size_in_kb; | 247 | size_in_kb = l1->size_in_kb; |
@@ -249,7 +249,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
249 | case 2: | 249 | case 2: |
250 | if (!l2.val) | 250 | if (!l2.val) |
251 | return; | 251 | return; |
252 | assoc = l2.assoc; | 252 | assoc = assocs[l2.assoc]; |
253 | line_size = l2.line_size; | 253 | line_size = l2.line_size; |
254 | lines_per_tag = l2.lines_per_tag; | 254 | lines_per_tag = l2.lines_per_tag; |
255 | /* cpu_data has errata corrections for K7 applied */ | 255 | /* cpu_data has errata corrections for K7 applied */ |
@@ -258,10 +258,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
258 | case 3: | 258 | case 3: |
259 | if (!l3.val) | 259 | if (!l3.val) |
260 | return; | 260 | return; |
261 | assoc = l3.assoc; | 261 | assoc = assocs[l3.assoc]; |
262 | line_size = l3.line_size; | 262 | line_size = l3.line_size; |
263 | lines_per_tag = l3.lines_per_tag; | 263 | lines_per_tag = l3.lines_per_tag; |
264 | size_in_kb = l3.size_encoded * 512; | 264 | size_in_kb = l3.size_encoded * 512; |
265 | if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { | ||
266 | size_in_kb = size_in_kb >> 1; | ||
267 | assoc = assoc >> 1; | ||
268 | } | ||
265 | break; | 269 | break; |
266 | default: | 270 | default: |
267 | return; | 271 | return; |
@@ -270,18 +274,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
270 | eax->split.is_self_initializing = 1; | 274 | eax->split.is_self_initializing = 1; |
271 | eax->split.type = types[leaf]; | 275 | eax->split.type = types[leaf]; |
272 | eax->split.level = levels[leaf]; | 276 | eax->split.level = levels[leaf]; |
273 | if (leaf == 3) | 277 | eax->split.num_threads_sharing = 0; |
274 | eax->split.num_threads_sharing = | ||
275 | current_cpu_data.x86_max_cores - 1; | ||
276 | else | ||
277 | eax->split.num_threads_sharing = 0; | ||
278 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; | 278 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; |
279 | 279 | ||
280 | 280 | ||
281 | if (assoc == 0xf) | 281 | if (assoc == 0xffff) |
282 | eax->split.is_fully_associative = 1; | 282 | eax->split.is_fully_associative = 1; |
283 | ebx->split.coherency_line_size = line_size - 1; | 283 | ebx->split.coherency_line_size = line_size - 1; |
284 | ebx->split.ways_of_associativity = assocs[assoc] - 1; | 284 | ebx->split.ways_of_associativity = assoc - 1; |
285 | ebx->split.physical_line_partition = lines_per_tag - 1; | 285 | ebx->split.physical_line_partition = lines_per_tag - 1; |
286 | ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / | 286 | ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / |
287 | (ebx->split.ways_of_associativity + 1) - 1; | 287 | (ebx->split.ways_of_associativity + 1) - 1; |
@@ -350,7 +350,8 @@ static int __cpuinit find_num_cache_leaves(void) | |||
350 | 350 | ||
351 | unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | 351 | unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) |
352 | { | 352 | { |
353 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ | 353 | /* Cache sizes */ |
354 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; | ||
354 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ | 355 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ |
355 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ | 356 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ |
356 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; | 357 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; |
@@ -377,8 +378,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
377 | 378 | ||
378 | retval = cpuid4_cache_lookup_regs(i, &this_leaf); | 379 | retval = cpuid4_cache_lookup_regs(i, &this_leaf); |
379 | if (retval >= 0) { | 380 | if (retval >= 0) { |
380 | switch(this_leaf.eax.split.level) { | 381 | switch (this_leaf.eax.split.level) { |
381 | case 1: | 382 | case 1: |
382 | if (this_leaf.eax.split.type == | 383 | if (this_leaf.eax.split.type == |
383 | CACHE_TYPE_DATA) | 384 | CACHE_TYPE_DATA) |
384 | new_l1d = this_leaf.size/1024; | 385 | new_l1d = this_leaf.size/1024; |
@@ -386,19 +387,20 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
386 | CACHE_TYPE_INST) | 387 | CACHE_TYPE_INST) |
387 | new_l1i = this_leaf.size/1024; | 388 | new_l1i = this_leaf.size/1024; |
388 | break; | 389 | break; |
389 | case 2: | 390 | case 2: |
390 | new_l2 = this_leaf.size/1024; | 391 | new_l2 = this_leaf.size/1024; |
391 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | 392 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
392 | index_msb = get_count_order(num_threads_sharing); | 393 | index_msb = get_count_order(num_threads_sharing); |
393 | l2_id = c->apicid >> index_msb; | 394 | l2_id = c->apicid >> index_msb; |
394 | break; | 395 | break; |
395 | case 3: | 396 | case 3: |
396 | new_l3 = this_leaf.size/1024; | 397 | new_l3 = this_leaf.size/1024; |
397 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | 398 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
398 | index_msb = get_count_order(num_threads_sharing); | 399 | index_msb = get_count_order( |
400 | num_threads_sharing); | ||
399 | l3_id = c->apicid >> index_msb; | 401 | l3_id = c->apicid >> index_msb; |
400 | break; | 402 | break; |
401 | default: | 403 | default: |
402 | break; | 404 | break; |
403 | } | 405 | } |
404 | } | 406 | } |
@@ -421,22 +423,21 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
421 | /* Number of times to iterate */ | 423 | /* Number of times to iterate */ |
422 | n = cpuid_eax(2) & 0xFF; | 424 | n = cpuid_eax(2) & 0xFF; |
423 | 425 | ||
424 | for ( i = 0 ; i < n ; i++ ) { | 426 | for (i = 0 ; i < n ; i++) { |
425 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); | 427 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); |
426 | 428 | ||
427 | /* If bit 31 is set, this is an unknown format */ | 429 | /* If bit 31 is set, this is an unknown format */ |
428 | for ( j = 0 ; j < 3 ; j++ ) { | 430 | for (j = 0 ; j < 3 ; j++) |
429 | if (regs[j] & (1 << 31)) regs[j] = 0; | 431 | if (regs[j] & (1 << 31)) |
430 | } | 432 | regs[j] = 0; |
431 | 433 | ||
432 | /* Byte 0 is level count, not a descriptor */ | 434 | /* Byte 0 is level count, not a descriptor */ |
433 | for ( j = 1 ; j < 16 ; j++ ) { | 435 | for (j = 1 ; j < 16 ; j++) { |
434 | unsigned char des = dp[j]; | 436 | unsigned char des = dp[j]; |
435 | unsigned char k = 0; | 437 | unsigned char k = 0; |
436 | 438 | ||
437 | /* look up this descriptor in the table */ | 439 | /* look up this descriptor in the table */ |
438 | while (cache_table[k].descriptor != 0) | 440 | while (cache_table[k].descriptor != 0) { |
439 | { | ||
440 | if (cache_table[k].descriptor == des) { | 441 | if (cache_table[k].descriptor == des) { |
441 | if (only_trace && cache_table[k].cache_type != LVL_TRACE) | 442 | if (only_trace && cache_table[k].cache_type != LVL_TRACE) |
442 | break; | 443 | break; |
@@ -488,14 +489,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
488 | } | 489 | } |
489 | 490 | ||
490 | if (trace) | 491 | if (trace) |
491 | printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); | 492 | printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); |
492 | else if ( l1i ) | 493 | else if (l1i) |
493 | printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); | 494 | printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); |
494 | 495 | ||
495 | if (l1d) | 496 | if (l1d) |
496 | printk(", L1 D cache: %dK\n", l1d); | 497 | printk(KERN_CONT ", L1 D cache: %dK\n", l1d); |
497 | else | 498 | else |
498 | printk("\n"); | 499 | printk(KERN_CONT "\n"); |
499 | 500 | ||
500 | if (l2) | 501 | if (l2) |
501 | printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); | 502 | printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); |
@@ -522,6 +523,18 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
522 | int index_msb, i; | 523 | int index_msb, i; |
523 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 524 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
524 | 525 | ||
526 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { | ||
527 | struct cpuinfo_x86 *d; | ||
528 | for_each_online_cpu(i) { | ||
529 | if (!per_cpu(cpuid4_info, i)) | ||
530 | continue; | ||
531 | d = &cpu_data(i); | ||
532 | this_leaf = CPUID4_INFO_IDX(i, index); | ||
533 | cpumask_copy(to_cpumask(this_leaf->shared_cpu_map), | ||
534 | d->llc_shared_map); | ||
535 | } | ||
536 | return; | ||
537 | } | ||
525 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 538 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
526 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; | 539 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; |
527 | 540 | ||
@@ -558,8 +571,13 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | |||
558 | } | 571 | } |
559 | } | 572 | } |
560 | #else | 573 | #else |
561 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {} | 574 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) |
562 | static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {} | 575 | { |
576 | } | ||
577 | |||
578 | static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | ||
579 | { | ||
580 | } | ||
563 | #endif | 581 | #endif |
564 | 582 | ||
565 | static void __cpuinit free_cache_attributes(unsigned int cpu) | 583 | static void __cpuinit free_cache_attributes(unsigned int cpu) |
@@ -645,7 +663,7 @@ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); | |||
645 | static ssize_t show_##file_name \ | 663 | static ssize_t show_##file_name \ |
646 | (struct _cpuid4_info *this_leaf, char *buf) \ | 664 | (struct _cpuid4_info *this_leaf, char *buf) \ |
647 | { \ | 665 | { \ |
648 | return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ | 666 | return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ |
649 | } | 667 | } |
650 | 668 | ||
651 | show_one_plus(level, eax.split.level, 0); | 669 | show_one_plus(level, eax.split.level, 0); |
@@ -656,7 +674,7 @@ show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); | |||
656 | 674 | ||
657 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) | 675 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) |
658 | { | 676 | { |
659 | return sprintf (buf, "%luK\n", this_leaf->size / 1024); | 677 | return sprintf(buf, "%luK\n", this_leaf->size / 1024); |
660 | } | 678 | } |
661 | 679 | ||
662 | static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | 680 | static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, |
@@ -669,7 +687,7 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | |||
669 | const struct cpumask *mask; | 687 | const struct cpumask *mask; |
670 | 688 | ||
671 | mask = to_cpumask(this_leaf->shared_cpu_map); | 689 | mask = to_cpumask(this_leaf->shared_cpu_map); |
672 | n = type? | 690 | n = type ? |
673 | cpulist_scnprintf(buf, len-2, mask) : | 691 | cpulist_scnprintf(buf, len-2, mask) : |
674 | cpumask_scnprintf(buf, len-2, mask); | 692 | cpumask_scnprintf(buf, len-2, mask); |
675 | buf[n++] = '\n'; | 693 | buf[n++] = '\n'; |
@@ -800,7 +818,7 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | |||
800 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | 818 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, |
801 | show_cache_disable_1, store_cache_disable_1); | 819 | show_cache_disable_1, store_cache_disable_1); |
802 | 820 | ||
803 | static struct attribute * default_attrs[] = { | 821 | static struct attribute *default_attrs[] = { |
804 | &type.attr, | 822 | &type.attr, |
805 | &level.attr, | 823 | &level.attr, |
806 | &coherency_line_size.attr, | 824 | &coherency_line_size.attr, |
@@ -815,7 +833,7 @@ static struct attribute * default_attrs[] = { | |||
815 | NULL | 833 | NULL |
816 | }; | 834 | }; |
817 | 835 | ||
818 | static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) | 836 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) |
819 | { | 837 | { |
820 | struct _cache_attr *fattr = to_attr(attr); | 838 | struct _cache_attr *fattr = to_attr(attr); |
821 | struct _index_kobject *this_leaf = to_object(kobj); | 839 | struct _index_kobject *this_leaf = to_object(kobj); |
@@ -828,8 +846,8 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) | |||
828 | return ret; | 846 | return ret; |
829 | } | 847 | } |
830 | 848 | ||
831 | static ssize_t store(struct kobject * kobj, struct attribute * attr, | 849 | static ssize_t store(struct kobject *kobj, struct attribute *attr, |
832 | const char * buf, size_t count) | 850 | const char *buf, size_t count) |
833 | { | 851 | { |
834 | struct _cache_attr *fattr = to_attr(attr); | 852 | struct _cache_attr *fattr = to_attr(attr); |
835 | struct _index_kobject *this_leaf = to_object(kobj); | 853 | struct _index_kobject *this_leaf = to_object(kobj); |
@@ -883,7 +901,7 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) | |||
883 | goto err_out; | 901 | goto err_out; |
884 | 902 | ||
885 | per_cpu(index_kobject, cpu) = kzalloc( | 903 | per_cpu(index_kobject, cpu) = kzalloc( |
886 | sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); | 904 | sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); |
887 | if (unlikely(per_cpu(index_kobject, cpu) == NULL)) | 905 | if (unlikely(per_cpu(index_kobject, cpu) == NULL)) |
888 | goto err_out; | 906 | goto err_out; |
889 | 907 | ||
@@ -917,7 +935,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
917 | } | 935 | } |
918 | 936 | ||
919 | for (i = 0; i < num_cache_leaves; i++) { | 937 | for (i = 0; i < num_cache_leaves; i++) { |
920 | this_object = INDEX_KOBJECT_PTR(cpu,i); | 938 | this_object = INDEX_KOBJECT_PTR(cpu, i); |
921 | this_object->cpu = cpu; | 939 | this_object->cpu = cpu; |
922 | this_object->index = i; | 940 | this_object->index = i; |
923 | retval = kobject_init_and_add(&(this_object->kobj), | 941 | retval = kobject_init_and_add(&(this_object->kobj), |
@@ -925,9 +943,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
925 | per_cpu(cache_kobject, cpu), | 943 | per_cpu(cache_kobject, cpu), |
926 | "index%1lu", i); | 944 | "index%1lu", i); |
927 | if (unlikely(retval)) { | 945 | if (unlikely(retval)) { |
928 | for (j = 0; j < i; j++) { | 946 | for (j = 0; j < i; j++) |
929 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); | 947 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); |
930 | } | ||
931 | kobject_put(per_cpu(cache_kobject, cpu)); | 948 | kobject_put(per_cpu(cache_kobject, cpu)); |
932 | cpuid4_cache_sysfs_exit(cpu); | 949 | cpuid4_cache_sysfs_exit(cpu); |
933 | return retval; | 950 | return retval; |
@@ -952,7 +969,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | |||
952 | cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); | 969 | cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); |
953 | 970 | ||
954 | for (i = 0; i < num_cache_leaves; i++) | 971 | for (i = 0; i < num_cache_leaves; i++) |
955 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); | 972 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); |
956 | kobject_put(per_cpu(cache_kobject, cpu)); | 973 | kobject_put(per_cpu(cache_kobject, cpu)); |
957 | cpuid4_cache_sysfs_exit(cpu); | 974 | cpuid4_cache_sysfs_exit(cpu); |
958 | } | 975 | } |
@@ -977,8 +994,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, | |||
977 | return NOTIFY_OK; | 994 | return NOTIFY_OK; |
978 | } | 995 | } |
979 | 996 | ||
980 | static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = | 997 | static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { |
981 | { | ||
982 | .notifier_call = cacheinfo_cpu_callback, | 998 | .notifier_call = cacheinfo_cpu_callback, |
983 | }; | 999 | }; |
984 | 1000 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 284d1de968bc..01213048f62f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -194,14 +194,14 @@ static void print_mce(struct mce *m) | |||
194 | m->cs, m->ip); | 194 | m->cs, m->ip); |
195 | if (m->cs == __KERNEL_CS) | 195 | if (m->cs == __KERNEL_CS) |
196 | print_symbol("{%s}", m->ip); | 196 | print_symbol("{%s}", m->ip); |
197 | printk("\n"); | 197 | printk(KERN_CONT "\n"); |
198 | } | 198 | } |
199 | printk(KERN_EMERG "TSC %llx ", m->tsc); | 199 | printk(KERN_EMERG "TSC %llx ", m->tsc); |
200 | if (m->addr) | 200 | if (m->addr) |
201 | printk("ADDR %llx ", m->addr); | 201 | printk(KERN_CONT "ADDR %llx ", m->addr); |
202 | if (m->misc) | 202 | if (m->misc) |
203 | printk("MISC %llx ", m->misc); | 203 | printk(KERN_CONT "MISC %llx ", m->misc); |
204 | printk("\n"); | 204 | printk(KERN_CONT "\n"); |
205 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 205 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
206 | m->cpuvendor, m->cpuid, m->time, m->socketid, | 206 | m->cpuvendor, m->cpuid, m->time, m->socketid, |
207 | m->apicid); | 207 | m->apicid); |
@@ -209,13 +209,13 @@ static void print_mce(struct mce *m) | |||
209 | 209 | ||
210 | static void print_mce_head(void) | 210 | static void print_mce_head(void) |
211 | { | 211 | { |
212 | printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); | 212 | printk(KERN_EMERG "\nHARDWARE ERROR\n"); |
213 | } | 213 | } |
214 | 214 | ||
215 | static void print_mce_tail(void) | 215 | static void print_mce_tail(void) |
216 | { | 216 | { |
217 | printk(KERN_EMERG "This is not a software problem!\n" | 217 | printk(KERN_EMERG "This is not a software problem!\n" |
218 | KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | 218 | "Run through mcelog --ascii to decode and contact your hardware vendor\n"); |
219 | } | 219 | } |
220 | 220 | ||
221 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 221 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
@@ -1117,7 +1117,7 @@ static void mcheck_timer(unsigned long data) | |||
1117 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | 1117 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); |
1118 | 1118 | ||
1119 | t->expires = jiffies + *n; | 1119 | t->expires = jiffies + *n; |
1120 | add_timer(t); | 1120 | add_timer_on(t, smp_processor_id()); |
1121 | } | 1121 | } |
1122 | 1122 | ||
1123 | static void mce_do_trigger(struct work_struct *work) | 1123 | static void mce_do_trigger(struct work_struct *work) |
@@ -1226,8 +1226,13 @@ static void mce_init(void) | |||
1226 | } | 1226 | } |
1227 | 1227 | ||
1228 | /* Add per CPU specific workarounds here */ | 1228 | /* Add per CPU specific workarounds here */ |
1229 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) | 1229 | static int mce_cpu_quirks(struct cpuinfo_x86 *c) |
1230 | { | 1230 | { |
1231 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | ||
1232 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | ||
1233 | return -EOPNOTSUPP; | ||
1234 | } | ||
1235 | |||
1231 | /* This should be disabled by the BIOS, but isn't always */ | 1236 | /* This should be disabled by the BIOS, but isn't always */ |
1232 | if (c->x86_vendor == X86_VENDOR_AMD) { | 1237 | if (c->x86_vendor == X86_VENDOR_AMD) { |
1233 | if (c->x86 == 15 && banks > 4) { | 1238 | if (c->x86 == 15 && banks > 4) { |
@@ -1273,11 +1278,20 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1273 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && | 1278 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && |
1274 | monarch_timeout < 0) | 1279 | monarch_timeout < 0) |
1275 | monarch_timeout = USEC_PER_SEC; | 1280 | monarch_timeout = USEC_PER_SEC; |
1281 | |||
1282 | /* | ||
1283 | * There are also broken BIOSes on some Pentium M and | ||
1284 | * earlier systems: | ||
1285 | */ | ||
1286 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) | ||
1287 | mce_bootlog = 0; | ||
1276 | } | 1288 | } |
1277 | if (monarch_timeout < 0) | 1289 | if (monarch_timeout < 0) |
1278 | monarch_timeout = 0; | 1290 | monarch_timeout = 0; |
1279 | if (mce_bootlog != 0) | 1291 | if (mce_bootlog != 0) |
1280 | mce_panic_timeout = 30; | 1292 | mce_panic_timeout = 30; |
1293 | |||
1294 | return 0; | ||
1281 | } | 1295 | } |
1282 | 1296 | ||
1283 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | 1297 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) |
@@ -1321,7 +1335,7 @@ static void mce_init_timer(void) | |||
1321 | return; | 1335 | return; |
1322 | setup_timer(t, mcheck_timer, smp_processor_id()); | 1336 | setup_timer(t, mcheck_timer, smp_processor_id()); |
1323 | t->expires = round_jiffies(jiffies + *n); | 1337 | t->expires = round_jiffies(jiffies + *n); |
1324 | add_timer(t); | 1338 | add_timer_on(t, smp_processor_id()); |
1325 | } | 1339 | } |
1326 | 1340 | ||
1327 | /* | 1341 | /* |
@@ -1338,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | |||
1338 | if (!mce_available(c)) | 1352 | if (!mce_available(c)) |
1339 | return; | 1353 | return; |
1340 | 1354 | ||
1341 | if (mce_cap_init() < 0) { | 1355 | if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { |
1342 | mce_disabled = 1; | 1356 | mce_disabled = 1; |
1343 | return; | 1357 | return; |
1344 | } | 1358 | } |
1345 | mce_cpu_quirks(c); | ||
1346 | 1359 | ||
1347 | machine_check_vector = do_machine_check; | 1360 | machine_check_vector = do_machine_check; |
1348 | 1361 | ||
@@ -1692,17 +1705,15 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1692 | const char *buf, size_t siz) | 1705 | const char *buf, size_t siz) |
1693 | { | 1706 | { |
1694 | char *p; | 1707 | char *p; |
1695 | int len; | ||
1696 | 1708 | ||
1697 | strncpy(mce_helper, buf, sizeof(mce_helper)); | 1709 | strncpy(mce_helper, buf, sizeof(mce_helper)); |
1698 | mce_helper[sizeof(mce_helper)-1] = 0; | 1710 | mce_helper[sizeof(mce_helper)-1] = 0; |
1699 | len = strlen(mce_helper); | ||
1700 | p = strchr(mce_helper, '\n'); | 1711 | p = strchr(mce_helper, '\n'); |
1701 | 1712 | ||
1702 | if (*p) | 1713 | if (p) |
1703 | *p = 0; | 1714 | *p = 0; |
1704 | 1715 | ||
1705 | return len; | 1716 | return strlen(mce_helper) + !!p; |
1706 | } | 1717 | } |
1707 | 1718 | ||
1708 | static ssize_t set_ignore_ce(struct sys_device *s, | 1719 | static ssize_t set_ignore_ce(struct sys_device *s, |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ddae21620bda..1fecba404fd8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -489,12 +489,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
489 | int i, err = 0; | 489 | int i, err = 0; |
490 | struct threshold_bank *b = NULL; | 490 | struct threshold_bank *b = NULL; |
491 | char name[32]; | 491 | char name[32]; |
492 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
493 | |||
492 | 494 | ||
493 | sprintf(name, "threshold_bank%i", bank); | 495 | sprintf(name, "threshold_bank%i", bank); |
494 | 496 | ||
495 | #ifdef CONFIG_SMP | 497 | #ifdef CONFIG_SMP |
496 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 498 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ |
497 | i = cpumask_first(cpu_core_mask(cpu)); | 499 | i = cpumask_first(c->llc_shared_map); |
498 | 500 | ||
499 | /* first core not up yet */ | 501 | /* first core not up yet */ |
500 | if (cpu_data(i).cpu_core_id) | 502 | if (cpu_data(i).cpu_core_id) |
@@ -514,7 +516,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
514 | if (err) | 516 | if (err) |
515 | goto out; | 517 | goto out; |
516 | 518 | ||
517 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); | 519 | cpumask_copy(b->cpus, c->llc_shared_map); |
518 | per_cpu(threshold_banks, cpu)[bank] = b; | 520 | per_cpu(threshold_banks, cpu)[bank] = b; |
519 | 521 | ||
520 | goto out; | 522 | goto out; |
@@ -539,7 +541,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
539 | #ifndef CONFIG_SMP | 541 | #ifndef CONFIG_SMP |
540 | cpumask_setall(b->cpus); | 542 | cpumask_setall(b->cpus); |
541 | #else | 543 | #else |
542 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); | 544 | cpumask_copy(b->cpus, c->llc_shared_map); |
543 | #endif | 545 | #endif |
544 | 546 | ||
545 | per_cpu(threshold_banks, cpu)[bank] = b; | 547 | per_cpu(threshold_banks, cpu)[bank] = b; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index bff8dd191dd5..5957a93e5173 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -36,6 +36,7 @@ | |||
36 | 36 | ||
37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | 37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; |
38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | 38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); |
39 | static DEFINE_PER_CPU(bool, thermal_throttle_active); | ||
39 | 40 | ||
40 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 41 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
41 | 42 | ||
@@ -96,27 +97,33 @@ static int therm_throt_process(int curr) | |||
96 | { | 97 | { |
97 | unsigned int cpu = smp_processor_id(); | 98 | unsigned int cpu = smp_processor_id(); |
98 | __u64 tmp_jiffs = get_jiffies_64(); | 99 | __u64 tmp_jiffs = get_jiffies_64(); |
100 | bool was_throttled = __get_cpu_var(thermal_throttle_active); | ||
101 | bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; | ||
99 | 102 | ||
100 | if (curr) | 103 | if (is_throttled) |
101 | __get_cpu_var(thermal_throttle_count)++; | 104 | __get_cpu_var(thermal_throttle_count)++; |
102 | 105 | ||
103 | if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) | 106 | if (!(was_throttled ^ is_throttled) && |
107 | time_before64(tmp_jiffs, __get_cpu_var(next_check))) | ||
104 | return 0; | 108 | return 0; |
105 | 109 | ||
106 | __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; | 110 | __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; |
107 | 111 | ||
108 | /* if we just entered the thermal event */ | 112 | /* if we just entered the thermal event */ |
109 | if (curr) { | 113 | if (is_throttled) { |
110 | printk(KERN_CRIT "CPU%d: Temperature above threshold, " | 114 | printk(KERN_CRIT "CPU%d: Temperature above threshold, " |
111 | "cpu clock throttled (total events = %lu)\n", cpu, | 115 | "cpu clock throttled (total events = %lu)\n", |
112 | __get_cpu_var(thermal_throttle_count)); | 116 | cpu, __get_cpu_var(thermal_throttle_count)); |
113 | 117 | ||
114 | add_taint(TAINT_MACHINE_CHECK); | 118 | add_taint(TAINT_MACHINE_CHECK); |
115 | } else { | 119 | return 1; |
116 | printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); | 120 | } |
121 | if (was_throttled) { | ||
122 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); | ||
123 | return 1; | ||
117 | } | 124 | } |
118 | 125 | ||
119 | return 1; | 126 | return 0; |
120 | } | 127 | } |
121 | 128 | ||
122 | #ifdef CONFIG_SYSFS | 129 | #ifdef CONFIG_SYSFS |
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index ee2331b0e58f..33af14110dfd 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c | |||
@@ -7,15 +7,15 @@ | |||
7 | 7 | ||
8 | static void | 8 | static void |
9 | amd_get_mtrr(unsigned int reg, unsigned long *base, | 9 | amd_get_mtrr(unsigned int reg, unsigned long *base, |
10 | unsigned long *size, mtrr_type * type) | 10 | unsigned long *size, mtrr_type *type) |
11 | { | 11 | { |
12 | unsigned long low, high; | 12 | unsigned long low, high; |
13 | 13 | ||
14 | rdmsr(MSR_K6_UWCCR, low, high); | 14 | rdmsr(MSR_K6_UWCCR, low, high); |
15 | /* Upper dword is region 1, lower is region 0 */ | 15 | /* Upper dword is region 1, lower is region 0 */ |
16 | if (reg == 1) | 16 | if (reg == 1) |
17 | low = high; | 17 | low = high; |
18 | /* The base masks off on the right alignment */ | 18 | /* The base masks off on the right alignment */ |
19 | *base = (low & 0xFFFE0000) >> PAGE_SHIFT; | 19 | *base = (low & 0xFFFE0000) >> PAGE_SHIFT; |
20 | *type = 0; | 20 | *type = 0; |
21 | if (low & 1) | 21 | if (low & 1) |
@@ -27,74 +27,81 @@ amd_get_mtrr(unsigned int reg, unsigned long *base, | |||
27 | return; | 27 | return; |
28 | } | 28 | } |
29 | /* | 29 | /* |
30 | * This needs a little explaining. The size is stored as an | 30 | * This needs a little explaining. The size is stored as an |
31 | * inverted mask of bits of 128K granularity 15 bits long offset | 31 | * inverted mask of bits of 128K granularity 15 bits long offset |
32 | * 2 bits | 32 | * 2 bits. |
33 | * | 33 | * |
34 | * So to get a size we do invert the mask and add 1 to the lowest | 34 | * So to get a size we do invert the mask and add 1 to the lowest |
35 | * mask bit (4 as its 2 bits in). This gives us a size we then shift | 35 | * mask bit (4 as its 2 bits in). This gives us a size we then shift |
36 | * to turn into 128K blocks | 36 | * to turn into 128K blocks. |
37 | * | 37 | * |
38 | * eg 111 1111 1111 1100 is 512K | 38 | * eg 111 1111 1111 1100 is 512K |
39 | * | 39 | * |
40 | * invert 000 0000 0000 0011 | 40 | * invert 000 0000 0000 0011 |
41 | * +1 000 0000 0000 0100 | 41 | * +1 000 0000 0000 0100 |
42 | * *128K ... | 42 | * *128K ... |
43 | */ | 43 | */ |
44 | low = (~low) & 0x1FFFC; | 44 | low = (~low) & 0x1FFFC; |
45 | *size = (low + 4) << (15 - PAGE_SHIFT); | 45 | *size = (low + 4) << (15 - PAGE_SHIFT); |
46 | return; | ||
47 | } | 46 | } |
48 | 47 | ||
49 | static void amd_set_mtrr(unsigned int reg, unsigned long base, | 48 | /** |
50 | unsigned long size, mtrr_type type) | 49 | * amd_set_mtrr - Set variable MTRR register on the local CPU. |
51 | /* [SUMMARY] Set variable MTRR register on the local CPU. | 50 | * |
52 | <reg> The register to set. | 51 | * @reg The register to set. |
53 | <base> The base address of the region. | 52 | * @base The base address of the region. |
54 | <size> The size of the region. If this is 0 the region is disabled. | 53 | * @size The size of the region. If this is 0 the region is disabled. |
55 | <type> The type of the region. | 54 | * @type The type of the region. |
56 | [RETURNS] Nothing. | 55 | * |
57 | */ | 56 | * Returns nothing. |
57 | */ | ||
58 | static void | ||
59 | amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) | ||
58 | { | 60 | { |
59 | u32 regs[2]; | 61 | u32 regs[2]; |
60 | 62 | ||
61 | /* | 63 | /* |
62 | * Low is MTRR0 , High MTRR 1 | 64 | * Low is MTRR0, High MTRR 1 |
63 | */ | 65 | */ |
64 | rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); | 66 | rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); |
65 | /* | 67 | /* |
66 | * Blank to disable | 68 | * Blank to disable |
67 | */ | 69 | */ |
68 | if (size == 0) | 70 | if (size == 0) { |
69 | regs[reg] = 0; | 71 | regs[reg] = 0; |
70 | else | 72 | } else { |
71 | /* Set the register to the base, the type (off by one) and an | 73 | /* |
72 | inverted bitmask of the size The size is the only odd | 74 | * Set the register to the base, the type (off by one) and an |
73 | bit. We are fed say 512K We invert this and we get 111 1111 | 75 | * inverted bitmask of the size The size is the only odd |
74 | 1111 1011 but if you subtract one and invert you get the | 76 | * bit. We are fed say 512K We invert this and we get 111 1111 |
75 | desired 111 1111 1111 1100 mask | 77 | * 1111 1011 but if you subtract one and invert you get the |
76 | 78 | * desired 111 1111 1111 1100 mask | |
77 | But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ | 79 | * |
80 | * But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! | ||
81 | */ | ||
78 | regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) | 82 | regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) |
79 | | (base << PAGE_SHIFT) | (type + 1); | 83 | | (base << PAGE_SHIFT) | (type + 1); |
84 | } | ||
80 | 85 | ||
81 | /* | 86 | /* |
82 | * The writeback rule is quite specific. See the manual. Its | 87 | * The writeback rule is quite specific. See the manual. Its |
83 | * disable local interrupts, write back the cache, set the mtrr | 88 | * disable local interrupts, write back the cache, set the mtrr |
84 | */ | 89 | */ |
85 | wbinvd(); | 90 | wbinvd(); |
86 | wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); | 91 | wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); |
87 | } | 92 | } |
88 | 93 | ||
89 | static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | 94 | static int |
95 | amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | ||
90 | { | 96 | { |
91 | /* Apply the K6 block alignment and size rules | 97 | /* |
92 | In order | 98 | * Apply the K6 block alignment and size rules |
93 | o Uncached or gathering only | 99 | * In order |
94 | o 128K or bigger block | 100 | * o Uncached or gathering only |
95 | o Power of 2 block | 101 | * o 128K or bigger block |
96 | o base suitably aligned to the power | 102 | * o Power of 2 block |
97 | */ | 103 | * o base suitably aligned to the power |
104 | */ | ||
98 | if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) | 105 | if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) |
99 | || (size & ~(size - 1)) - size || (base & (size - 1))) | 106 | || (size & ~(size - 1)) - size || (base & (size - 1))) |
100 | return -EINVAL; | 107 | return -EINVAL; |
@@ -115,5 +122,3 @@ int __init amd_init_mtrr(void) | |||
115 | set_mtrr_ops(&amd_mtrr_ops); | 122 | set_mtrr_ops(&amd_mtrr_ops); |
116 | return 0; | 123 | return 0; |
117 | } | 124 | } |
118 | |||
119 | //arch_initcall(amd_mtrr_init); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index cb9aa3a7a7ab..de89f14eff3a 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c | |||
@@ -1,7 +1,9 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/mm.h> | 2 | #include <linux/mm.h> |
3 | |||
3 | #include <asm/mtrr.h> | 4 | #include <asm/mtrr.h> |
4 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
6 | |||
5 | #include "mtrr.h" | 7 | #include "mtrr.h" |
6 | 8 | ||
7 | static struct { | 9 | static struct { |
@@ -12,25 +14,25 @@ static struct { | |||
12 | static u8 centaur_mcr_reserved; | 14 | static u8 centaur_mcr_reserved; |
13 | static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ | 15 | static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ |
14 | 16 | ||
15 | /* | 17 | /** |
16 | * Report boot time MCR setups | 18 | * centaur_get_free_region - Get a free MTRR. |
19 | * | ||
20 | * @base: The starting (base) address of the region. | ||
21 | * @size: The size (in bytes) of the region. | ||
22 | * | ||
23 | * Returns: the index of the region on success, else -1 on error. | ||
17 | */ | 24 | */ |
18 | |||
19 | static int | 25 | static int |
20 | centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) | 26 | centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) |
21 | /* [SUMMARY] Get a free MTRR. | ||
22 | <base> The starting (base) address of the region. | ||
23 | <size> The size (in bytes) of the region. | ||
24 | [RETURNS] The index of the region on success, else -1 on error. | ||
25 | */ | ||
26 | { | 27 | { |
27 | int i, max; | ||
28 | mtrr_type ltype; | ||
29 | unsigned long lbase, lsize; | 28 | unsigned long lbase, lsize; |
29 | mtrr_type ltype; | ||
30 | int i, max; | ||
30 | 31 | ||
31 | max = num_var_ranges; | 32 | max = num_var_ranges; |
32 | if (replace_reg >= 0 && replace_reg < max) | 33 | if (replace_reg >= 0 && replace_reg < max) |
33 | return replace_reg; | 34 | return replace_reg; |
35 | |||
34 | for (i = 0; i < max; ++i) { | 36 | for (i = 0; i < max; ++i) { |
35 | if (centaur_mcr_reserved & (1 << i)) | 37 | if (centaur_mcr_reserved & (1 << i)) |
36 | continue; | 38 | continue; |
@@ -38,11 +40,14 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) | |||
38 | if (lsize == 0) | 40 | if (lsize == 0) |
39 | return i; | 41 | return i; |
40 | } | 42 | } |
43 | |||
41 | return -ENOSPC; | 44 | return -ENOSPC; |
42 | } | 45 | } |
43 | 46 | ||
44 | void | 47 | /* |
45 | mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) | 48 | * Report boot time MCR setups |
49 | */ | ||
50 | void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) | ||
46 | { | 51 | { |
47 | centaur_mcr[mcr].low = lo; | 52 | centaur_mcr[mcr].low = lo; |
48 | centaur_mcr[mcr].high = hi; | 53 | centaur_mcr[mcr].high = hi; |
@@ -54,33 +59,35 @@ centaur_get_mcr(unsigned int reg, unsigned long *base, | |||
54 | { | 59 | { |
55 | *base = centaur_mcr[reg].high >> PAGE_SHIFT; | 60 | *base = centaur_mcr[reg].high >> PAGE_SHIFT; |
56 | *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; | 61 | *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; |
57 | *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ | 62 | *type = MTRR_TYPE_WRCOMB; /* write-combining */ |
63 | |||
58 | if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) | 64 | if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) |
59 | *type = MTRR_TYPE_UNCACHABLE; | 65 | *type = MTRR_TYPE_UNCACHABLE; |
60 | if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) | 66 | if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) |
61 | *type = MTRR_TYPE_WRBACK; | 67 | *type = MTRR_TYPE_WRBACK; |
62 | if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) | 68 | if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) |
63 | *type = MTRR_TYPE_WRBACK; | 69 | *type = MTRR_TYPE_WRBACK; |
64 | |||
65 | } | 70 | } |
66 | 71 | ||
67 | static void centaur_set_mcr(unsigned int reg, unsigned long base, | 72 | static void |
68 | unsigned long size, mtrr_type type) | 73 | centaur_set_mcr(unsigned int reg, unsigned long base, |
74 | unsigned long size, mtrr_type type) | ||
69 | { | 75 | { |
70 | unsigned long low, high; | 76 | unsigned long low, high; |
71 | 77 | ||
72 | if (size == 0) { | 78 | if (size == 0) { |
73 | /* Disable */ | 79 | /* Disable */ |
74 | high = low = 0; | 80 | high = low = 0; |
75 | } else { | 81 | } else { |
76 | high = base << PAGE_SHIFT; | 82 | high = base << PAGE_SHIFT; |
77 | if (centaur_mcr_type == 0) | 83 | if (centaur_mcr_type == 0) { |
78 | low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ | 84 | /* Only support write-combining... */ |
79 | else { | 85 | low = -size << PAGE_SHIFT | 0x1f; |
86 | } else { | ||
80 | if (type == MTRR_TYPE_UNCACHABLE) | 87 | if (type == MTRR_TYPE_UNCACHABLE) |
81 | low = -size << PAGE_SHIFT | 0x02; /* NC */ | 88 | low = -size << PAGE_SHIFT | 0x02; /* NC */ |
82 | else | 89 | else |
83 | low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ | 90 | low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */ |
84 | } | 91 | } |
85 | } | 92 | } |
86 | centaur_mcr[reg].high = high; | 93 | centaur_mcr[reg].high = high; |
@@ -88,118 +95,16 @@ static void centaur_set_mcr(unsigned int reg, unsigned long base, | |||
88 | wrmsr(MSR_IDT_MCR0 + reg, low, high); | 95 | wrmsr(MSR_IDT_MCR0 + reg, low, high); |
89 | } | 96 | } |
90 | 97 | ||
91 | #if 0 | 98 | static int |
92 | /* | 99 | centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type) |
93 | * Initialise the later (saner) Winchip MCR variant. In this version | ||
94 | * the BIOS can pass us the registers it has used (but not their values) | ||
95 | * and the control register is read/write | ||
96 | */ | ||
97 | |||
98 | static void __init | ||
99 | centaur_mcr1_init(void) | ||
100 | { | ||
101 | unsigned i; | ||
102 | u32 lo, hi; | ||
103 | |||
104 | /* Unfortunately, MCR's are read-only, so there is no way to | ||
105 | * find out what the bios might have done. | ||
106 | */ | ||
107 | |||
108 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
109 | if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */ | ||
110 | lo &= ~0x1C0; /* clear key */ | ||
111 | lo |= 0x040; /* set key to 1 */ | ||
112 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */ | ||
113 | } | ||
114 | |||
115 | centaur_mcr_type = 1; | ||
116 | |||
117 | /* | ||
118 | * Clear any unconfigured MCR's. | ||
119 | */ | ||
120 | |||
121 | for (i = 0; i < 8; ++i) { | ||
122 | if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) { | ||
123 | if (!(lo & (1 << (9 + i)))) | ||
124 | wrmsr(MSR_IDT_MCR0 + i, 0, 0); | ||
125 | else | ||
126 | /* | ||
127 | * If the BIOS set up an MCR we cannot see it | ||
128 | * but we don't wish to obliterate it | ||
129 | */ | ||
130 | centaur_mcr_reserved |= (1 << i); | ||
131 | } | ||
132 | } | ||
133 | /* | ||
134 | * Throw the main write-combining switch... | ||
135 | * However if OOSTORE is enabled then people have already done far | ||
136 | * cleverer things and we should behave. | ||
137 | */ | ||
138 | |||
139 | lo |= 15; /* Write combine enables */ | ||
140 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Initialise the original winchip with read only MCR registers | ||
145 | * no used bitmask for the BIOS to pass on and write only control | ||
146 | */ | ||
147 | |||
148 | static void __init | ||
149 | centaur_mcr0_init(void) | ||
150 | { | ||
151 | unsigned i; | ||
152 | |||
153 | /* Unfortunately, MCR's are read-only, so there is no way to | ||
154 | * find out what the bios might have done. | ||
155 | */ | ||
156 | |||
157 | /* Clear any unconfigured MCR's. | ||
158 | * This way we are sure that the centaur_mcr array contains the actual | ||
159 | * values. The disadvantage is that any BIOS tweaks are thus undone. | ||
160 | * | ||
161 | */ | ||
162 | for (i = 0; i < 8; ++i) { | ||
163 | if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) | ||
164 | wrmsr(MSR_IDT_MCR0 + i, 0, 0); | ||
165 | } | ||
166 | |||
167 | wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */ | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * Initialise Winchip series MCR registers | ||
172 | */ | ||
173 | |||
174 | static void __init | ||
175 | centaur_mcr_init(void) | ||
176 | { | ||
177 | struct set_mtrr_context ctxt; | ||
178 | |||
179 | set_mtrr_prepare_save(&ctxt); | ||
180 | set_mtrr_cache_disable(&ctxt); | ||
181 | |||
182 | if (boot_cpu_data.x86_model == 4) | ||
183 | centaur_mcr0_init(); | ||
184 | else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9) | ||
185 | centaur_mcr1_init(); | ||
186 | |||
187 | set_mtrr_done(&ctxt); | ||
188 | } | ||
189 | #endif | ||
190 | |||
191 | static int centaur_validate_add_page(unsigned long base, | ||
192 | unsigned long size, unsigned int type) | ||
193 | { | 100 | { |
194 | /* | 101 | /* |
195 | * FIXME: Winchip2 supports uncached | 102 | * FIXME: Winchip2 supports uncached |
196 | */ | 103 | */ |
197 | if (type != MTRR_TYPE_WRCOMB && | 104 | if (type != MTRR_TYPE_WRCOMB && |
198 | (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { | 105 | (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { |
199 | printk(KERN_WARNING | 106 | pr_warning("mtrr: only write-combining%s supported\n", |
200 | "mtrr: only write-combining%s supported\n", | 107 | centaur_mcr_type ? " and uncacheable are" : " is"); |
201 | centaur_mcr_type ? " and uncacheable are" | ||
202 | : " is"); | ||
203 | return -EINVAL; | 108 | return -EINVAL; |
204 | } | 109 | } |
205 | return 0; | 110 | return 0; |
@@ -207,7 +112,6 @@ static int centaur_validate_add_page(unsigned long base, | |||
207 | 112 | ||
208 | static struct mtrr_ops centaur_mtrr_ops = { | 113 | static struct mtrr_ops centaur_mtrr_ops = { |
209 | .vendor = X86_VENDOR_CENTAUR, | 114 | .vendor = X86_VENDOR_CENTAUR, |
210 | // .init = centaur_mcr_init, | ||
211 | .set = centaur_set_mcr, | 115 | .set = centaur_set_mcr, |
212 | .get = centaur_get_mcr, | 116 | .get = centaur_get_mcr, |
213 | .get_free_region = centaur_get_free_region, | 117 | .get_free_region = centaur_get_free_region, |
@@ -220,5 +124,3 @@ int __init centaur_init_mtrr(void) | |||
220 | set_mtrr_ops(¢aur_mtrr_ops); | 124 | set_mtrr_ops(¢aur_mtrr_ops); |
221 | return 0; | 125 | return 0; |
222 | } | 126 | } |
223 | |||
224 | //arch_initcall(centaur_init_mtrr); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 1d584a18a50d..315738c74aad 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -1,51 +1,75 @@ | |||
1 | /* MTRR (Memory Type Range Register) cleanup | 1 | /* |
2 | 2 | * MTRR (Memory Type Range Register) cleanup | |
3 | Copyright (C) 2009 Yinghai Lu | 3 | * |
4 | 4 | * Copyright (C) 2009 Yinghai Lu | |
5 | This library is free software; you can redistribute it and/or | 5 | * |
6 | modify it under the terms of the GNU Library General Public | 6 | * This library is free software; you can redistribute it and/or |
7 | License as published by the Free Software Foundation; either | 7 | * modify it under the terms of the GNU Library General Public |
8 | version 2 of the License, or (at your option) any later version. | 8 | * License as published by the Free Software Foundation; either |
9 | 9 | * version 2 of the License, or (at your option) any later version. | |
10 | This library is distributed in the hope that it will be useful, | 10 | * |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * This library is distributed in the hope that it will be useful, |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | Library General Public License for more details. | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | 14 | * Library General Public License for more details. | |
15 | You should have received a copy of the GNU Library General Public | 15 | * |
16 | License along with this library; if not, write to the Free | 16 | * You should have received a copy of the GNU Library General Public |
17 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 17 | * License along with this library; if not, write to the Free |
18 | */ | 18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | 19 | */ | |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
25 | #include <linux/mutex.h> | ||
26 | #include <linux/sort.h> | 25 | #include <linux/sort.h> |
26 | #include <linux/mutex.h> | ||
27 | #include <linux/uaccess.h> | ||
28 | #include <linux/kvm_para.h> | ||
27 | 29 | ||
30 | #include <asm/processor.h> | ||
28 | #include <asm/e820.h> | 31 | #include <asm/e820.h> |
29 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
30 | #include <asm/uaccess.h> | ||
31 | #include <asm/processor.h> | ||
32 | #include <asm/msr.h> | 33 | #include <asm/msr.h> |
33 | #include <asm/kvm_para.h> | ||
34 | #include "mtrr.h" | ||
35 | 34 | ||
36 | /* should be related to MTRR_VAR_RANGES nums */ | 35 | #include "mtrr.h" |
37 | #define RANGE_NUM 256 | ||
38 | 36 | ||
39 | struct res_range { | 37 | struct res_range { |
40 | unsigned long start; | 38 | unsigned long start; |
41 | unsigned long end; | 39 | unsigned long end; |
40 | }; | ||
41 | |||
42 | struct var_mtrr_range_state { | ||
43 | unsigned long base_pfn; | ||
44 | unsigned long size_pfn; | ||
45 | mtrr_type type; | ||
46 | }; | ||
47 | |||
48 | struct var_mtrr_state { | ||
49 | unsigned long range_startk; | ||
50 | unsigned long range_sizek; | ||
51 | unsigned long chunk_sizek; | ||
52 | unsigned long gran_sizek; | ||
53 | unsigned int reg; | ||
42 | }; | 54 | }; |
43 | 55 | ||
56 | /* Should be related to MTRR_VAR_RANGES nums */ | ||
57 | #define RANGE_NUM 256 | ||
58 | |||
59 | static struct res_range __initdata range[RANGE_NUM]; | ||
60 | static int __initdata nr_range; | ||
61 | |||
62 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | ||
63 | |||
64 | static int __initdata debug_print; | ||
65 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) | ||
66 | |||
67 | |||
44 | static int __init | 68 | static int __init |
45 | add_range(struct res_range *range, int nr_range, unsigned long start, | 69 | add_range(struct res_range *range, int nr_range, |
46 | unsigned long end) | 70 | unsigned long start, unsigned long end) |
47 | { | 71 | { |
48 | /* out of slots */ | 72 | /* Out of slots: */ |
49 | if (nr_range >= RANGE_NUM) | 73 | if (nr_range >= RANGE_NUM) |
50 | return nr_range; | 74 | return nr_range; |
51 | 75 | ||
@@ -58,12 +82,12 @@ add_range(struct res_range *range, int nr_range, unsigned long start, | |||
58 | } | 82 | } |
59 | 83 | ||
60 | static int __init | 84 | static int __init |
61 | add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, | 85 | add_range_with_merge(struct res_range *range, int nr_range, |
62 | unsigned long end) | 86 | unsigned long start, unsigned long end) |
63 | { | 87 | { |
64 | int i; | 88 | int i; |
65 | 89 | ||
66 | /* try to merge it with old one */ | 90 | /* Try to merge it with old one: */ |
67 | for (i = 0; i < nr_range; i++) { | 91 | for (i = 0; i < nr_range; i++) { |
68 | unsigned long final_start, final_end; | 92 | unsigned long final_start, final_end; |
69 | unsigned long common_start, common_end; | 93 | unsigned long common_start, common_end; |
@@ -84,7 +108,7 @@ add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, | |||
84 | return nr_range; | 108 | return nr_range; |
85 | } | 109 | } |
86 | 110 | ||
87 | /* need to add that */ | 111 | /* Need to add it: */ |
88 | return add_range(range, nr_range, start, end); | 112 | return add_range(range, nr_range, start, end); |
89 | } | 113 | } |
90 | 114 | ||
@@ -117,7 +141,7 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end) | |||
117 | } | 141 | } |
118 | 142 | ||
119 | if (start > range[j].start && end < range[j].end) { | 143 | if (start > range[j].start && end < range[j].end) { |
120 | /* find the new spare */ | 144 | /* Find the new spare: */ |
121 | for (i = 0; i < RANGE_NUM; i++) { | 145 | for (i = 0; i < RANGE_NUM; i++) { |
122 | if (range[i].end == 0) | 146 | if (range[i].end == 0) |
123 | break; | 147 | break; |
@@ -146,14 +170,8 @@ static int __init cmp_range(const void *x1, const void *x2) | |||
146 | return start1 - start2; | 170 | return start1 - start2; |
147 | } | 171 | } |
148 | 172 | ||
149 | struct var_mtrr_range_state { | 173 | #define BIOS_BUG_MSG KERN_WARNING \ |
150 | unsigned long base_pfn; | 174 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
151 | unsigned long size_pfn; | ||
152 | mtrr_type type; | ||
153 | }; | ||
154 | |||
155 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | ||
156 | static int __initdata debug_print; | ||
157 | 175 | ||
158 | static int __init | 176 | static int __init |
159 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | 177 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, |
@@ -180,7 +198,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
180 | range[i].start, range[i].end + 1); | 198 | range[i].start, range[i].end + 1); |
181 | } | 199 | } |
182 | 200 | ||
183 | /* take out UC ranges */ | 201 | /* Take out UC ranges: */ |
184 | for (i = 0; i < num_var_ranges; i++) { | 202 | for (i = 0; i < num_var_ranges; i++) { |
185 | type = range_state[i].type; | 203 | type = range_state[i].type; |
186 | if (type != MTRR_TYPE_UNCACHABLE && | 204 | if (type != MTRR_TYPE_UNCACHABLE && |
@@ -193,9 +211,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
193 | if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && | 211 | if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && |
194 | (mtrr_state.enabled & 1)) { | 212 | (mtrr_state.enabled & 1)) { |
195 | /* Var MTRR contains UC entry below 1M? Skip it: */ | 213 | /* Var MTRR contains UC entry below 1M? Skip it: */ |
196 | printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d " | 214 | printk(BIOS_BUG_MSG, i); |
197 | "contains strange UC entry under 1M, check " | ||
198 | "with your system vendor!\n", i); | ||
199 | if (base + size <= (1<<(20-PAGE_SHIFT))) | 215 | if (base + size <= (1<<(20-PAGE_SHIFT))) |
200 | continue; | 216 | continue; |
201 | size -= (1<<(20-PAGE_SHIFT)) - base; | 217 | size -= (1<<(20-PAGE_SHIFT)) - base; |
@@ -237,17 +253,13 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
237 | return nr_range; | 253 | return nr_range; |
238 | } | 254 | } |
239 | 255 | ||
240 | static struct res_range __initdata range[RANGE_NUM]; | ||
241 | static int __initdata nr_range; | ||
242 | |||
243 | #ifdef CONFIG_MTRR_SANITIZER | 256 | #ifdef CONFIG_MTRR_SANITIZER |
244 | 257 | ||
245 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | 258 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) |
246 | { | 259 | { |
247 | unsigned long sum; | 260 | unsigned long sum = 0; |
248 | int i; | 261 | int i; |
249 | 262 | ||
250 | sum = 0; | ||
251 | for (i = 0; i < nr_range; i++) | 263 | for (i = 0; i < nr_range; i++) |
252 | sum += range[i].end + 1 - range[i].start; | 264 | sum += range[i].end + 1 - range[i].start; |
253 | 265 | ||
@@ -278,17 +290,9 @@ static int __init mtrr_cleanup_debug_setup(char *str) | |||
278 | } | 290 | } |
279 | early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); | 291 | early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); |
280 | 292 | ||
281 | struct var_mtrr_state { | ||
282 | unsigned long range_startk; | ||
283 | unsigned long range_sizek; | ||
284 | unsigned long chunk_sizek; | ||
285 | unsigned long gran_sizek; | ||
286 | unsigned int reg; | ||
287 | }; | ||
288 | |||
289 | static void __init | 293 | static void __init |
290 | set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | 294 | set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, |
291 | unsigned char type, unsigned int address_bits) | 295 | unsigned char type, unsigned int address_bits) |
292 | { | 296 | { |
293 | u32 base_lo, base_hi, mask_lo, mask_hi; | 297 | u32 base_lo, base_hi, mask_lo, mask_hi; |
294 | u64 base, mask; | 298 | u64 base, mask; |
@@ -301,7 +305,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | |||
301 | mask = (1ULL << address_bits) - 1; | 305 | mask = (1ULL << address_bits) - 1; |
302 | mask &= ~((((u64)sizek) << 10) - 1); | 306 | mask &= ~((((u64)sizek) << 10) - 1); |
303 | 307 | ||
304 | base = ((u64)basek) << 10; | 308 | base = ((u64)basek) << 10; |
305 | 309 | ||
306 | base |= type; | 310 | base |= type; |
307 | mask |= 0x800; | 311 | mask |= 0x800; |
@@ -317,15 +321,14 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | |||
317 | 321 | ||
318 | static void __init | 322 | static void __init |
319 | save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | 323 | save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, |
320 | unsigned char type) | 324 | unsigned char type) |
321 | { | 325 | { |
322 | range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); | 326 | range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); |
323 | range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); | 327 | range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); |
324 | range_state[reg].type = type; | 328 | range_state[reg].type = type; |
325 | } | 329 | } |
326 | 330 | ||
327 | static void __init | 331 | static void __init set_var_mtrr_all(unsigned int address_bits) |
328 | set_var_mtrr_all(unsigned int address_bits) | ||
329 | { | 332 | { |
330 | unsigned long basek, sizek; | 333 | unsigned long basek, sizek; |
331 | unsigned char type; | 334 | unsigned char type; |
@@ -342,11 +345,11 @@ set_var_mtrr_all(unsigned int address_bits) | |||
342 | 345 | ||
343 | static unsigned long to_size_factor(unsigned long sizek, char *factorp) | 346 | static unsigned long to_size_factor(unsigned long sizek, char *factorp) |
344 | { | 347 | { |
345 | char factor; | ||
346 | unsigned long base = sizek; | 348 | unsigned long base = sizek; |
349 | char factor; | ||
347 | 350 | ||
348 | if (base & ((1<<10) - 1)) { | 351 | if (base & ((1<<10) - 1)) { |
349 | /* not MB alignment */ | 352 | /* Not MB-aligned: */ |
350 | factor = 'K'; | 353 | factor = 'K'; |
351 | } else if (base & ((1<<20) - 1)) { | 354 | } else if (base & ((1<<20) - 1)) { |
352 | factor = 'M'; | 355 | factor = 'M'; |
@@ -372,11 +375,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, | |||
372 | unsigned long max_align, align; | 375 | unsigned long max_align, align; |
373 | unsigned long sizek; | 376 | unsigned long sizek; |
374 | 377 | ||
375 | /* Compute the maximum size I can make a range */ | 378 | /* Compute the maximum size with which we can make a range: */ |
376 | if (range_startk) | 379 | if (range_startk) |
377 | max_align = ffs(range_startk) - 1; | 380 | max_align = ffs(range_startk) - 1; |
378 | else | 381 | else |
379 | max_align = 32; | 382 | max_align = 32; |
383 | |||
380 | align = fls(range_sizek) - 1; | 384 | align = fls(range_sizek) - 1; |
381 | if (align > max_align) | 385 | if (align > max_align) |
382 | align = max_align; | 386 | align = max_align; |
@@ -386,11 +390,10 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, | |||
386 | char start_factor = 'K', size_factor = 'K'; | 390 | char start_factor = 'K', size_factor = 'K'; |
387 | unsigned long start_base, size_base; | 391 | unsigned long start_base, size_base; |
388 | 392 | ||
389 | start_base = to_size_factor(range_startk, | 393 | start_base = to_size_factor(range_startk, &start_factor); |
390 | &start_factor), | 394 | size_base = to_size_factor(sizek, &size_factor); |
391 | size_base = to_size_factor(sizek, &size_factor), | ||
392 | 395 | ||
393 | printk(KERN_DEBUG "Setting variable MTRR %d, " | 396 | Dprintk("Setting variable MTRR %d, " |
394 | "base: %ld%cB, range: %ld%cB, type %s\n", | 397 | "base: %ld%cB, range: %ld%cB, type %s\n", |
395 | reg, start_base, start_factor, | 398 | reg, start_base, start_factor, |
396 | size_base, size_factor, | 399 | size_base, size_factor, |
@@ -425,10 +428,11 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
425 | chunk_sizek = state->chunk_sizek; | 428 | chunk_sizek = state->chunk_sizek; |
426 | gran_sizek = state->gran_sizek; | 429 | gran_sizek = state->gran_sizek; |
427 | 430 | ||
428 | /* align with gran size, prevent small block used up MTRRs */ | 431 | /* Align with gran size, prevent small block used up MTRRs: */ |
429 | range_basek = ALIGN(state->range_startk, gran_sizek); | 432 | range_basek = ALIGN(state->range_startk, gran_sizek); |
430 | if ((range_basek > basek) && basek) | 433 | if ((range_basek > basek) && basek) |
431 | return second_sizek; | 434 | return second_sizek; |
435 | |||
432 | state->range_sizek -= (range_basek - state->range_startk); | 436 | state->range_sizek -= (range_basek - state->range_startk); |
433 | range_sizek = ALIGN(state->range_sizek, gran_sizek); | 437 | range_sizek = ALIGN(state->range_sizek, gran_sizek); |
434 | 438 | ||
@@ -439,22 +443,21 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
439 | } | 443 | } |
440 | state->range_sizek = range_sizek; | 444 | state->range_sizek = range_sizek; |
441 | 445 | ||
442 | /* try to append some small hole */ | 446 | /* Try to append some small hole: */ |
443 | range0_basek = state->range_startk; | 447 | range0_basek = state->range_startk; |
444 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); | 448 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); |
445 | 449 | ||
446 | /* no increase */ | 450 | /* No increase: */ |
447 | if (range0_sizek == state->range_sizek) { | 451 | if (range0_sizek == state->range_sizek) { |
448 | if (debug_print) | 452 | Dprintk("rangeX: %016lx - %016lx\n", |
449 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", | 453 | range0_basek<<10, |
450 | range0_basek<<10, | 454 | (range0_basek + state->range_sizek)<<10); |
451 | (range0_basek + state->range_sizek)<<10); | ||
452 | state->reg = range_to_mtrr(state->reg, range0_basek, | 455 | state->reg = range_to_mtrr(state->reg, range0_basek, |
453 | state->range_sizek, MTRR_TYPE_WRBACK); | 456 | state->range_sizek, MTRR_TYPE_WRBACK); |
454 | return 0; | 457 | return 0; |
455 | } | 458 | } |
456 | 459 | ||
457 | /* only cut back, when it is not the last */ | 460 | /* Only cut back when it is not the last: */ |
458 | if (sizek) { | 461 | if (sizek) { |
459 | while (range0_basek + range0_sizek > (basek + sizek)) { | 462 | while (range0_basek + range0_sizek > (basek + sizek)) { |
460 | if (range0_sizek >= chunk_sizek) | 463 | if (range0_sizek >= chunk_sizek) |
@@ -470,16 +473,16 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
470 | second_try: | 473 | second_try: |
471 | range_basek = range0_basek + range0_sizek; | 474 | range_basek = range0_basek + range0_sizek; |
472 | 475 | ||
473 | /* one hole in the middle */ | 476 | /* One hole in the middle: */ |
474 | if (range_basek > basek && range_basek <= (basek + sizek)) | 477 | if (range_basek > basek && range_basek <= (basek + sizek)) |
475 | second_sizek = range_basek - basek; | 478 | second_sizek = range_basek - basek; |
476 | 479 | ||
477 | if (range0_sizek > state->range_sizek) { | 480 | if (range0_sizek > state->range_sizek) { |
478 | 481 | ||
479 | /* one hole in middle or at end */ | 482 | /* One hole in middle or at the end: */ |
480 | hole_sizek = range0_sizek - state->range_sizek - second_sizek; | 483 | hole_sizek = range0_sizek - state->range_sizek - second_sizek; |
481 | 484 | ||
482 | /* hole size should be less than half of range0 size */ | 485 | /* Hole size should be less than half of range0 size: */ |
483 | if (hole_sizek >= (range0_sizek >> 1) && | 486 | if (hole_sizek >= (range0_sizek >> 1) && |
484 | range0_sizek >= chunk_sizek) { | 487 | range0_sizek >= chunk_sizek) { |
485 | range0_sizek -= chunk_sizek; | 488 | range0_sizek -= chunk_sizek; |
@@ -491,32 +494,30 @@ second_try: | |||
491 | } | 494 | } |
492 | 495 | ||
493 | if (range0_sizek) { | 496 | if (range0_sizek) { |
494 | if (debug_print) | 497 | Dprintk("range0: %016lx - %016lx\n", |
495 | printk(KERN_DEBUG "range0: %016lx - %016lx\n", | 498 | range0_basek<<10, |
496 | range0_basek<<10, | 499 | (range0_basek + range0_sizek)<<10); |
497 | (range0_basek + range0_sizek)<<10); | ||
498 | state->reg = range_to_mtrr(state->reg, range0_basek, | 500 | state->reg = range_to_mtrr(state->reg, range0_basek, |
499 | range0_sizek, MTRR_TYPE_WRBACK); | 501 | range0_sizek, MTRR_TYPE_WRBACK); |
500 | } | 502 | } |
501 | 503 | ||
502 | if (range0_sizek < state->range_sizek) { | 504 | if (range0_sizek < state->range_sizek) { |
503 | /* need to handle left over */ | 505 | /* Need to handle left over range: */ |
504 | range_sizek = state->range_sizek - range0_sizek; | 506 | range_sizek = state->range_sizek - range0_sizek; |
505 | 507 | ||
506 | if (debug_print) | 508 | Dprintk("range: %016lx - %016lx\n", |
507 | printk(KERN_DEBUG "range: %016lx - %016lx\n", | 509 | range_basek<<10, |
508 | range_basek<<10, | 510 | (range_basek + range_sizek)<<10); |
509 | (range_basek + range_sizek)<<10); | 511 | |
510 | state->reg = range_to_mtrr(state->reg, range_basek, | 512 | state->reg = range_to_mtrr(state->reg, range_basek, |
511 | range_sizek, MTRR_TYPE_WRBACK); | 513 | range_sizek, MTRR_TYPE_WRBACK); |
512 | } | 514 | } |
513 | 515 | ||
514 | if (hole_sizek) { | 516 | if (hole_sizek) { |
515 | hole_basek = range_basek - hole_sizek - second_sizek; | 517 | hole_basek = range_basek - hole_sizek - second_sizek; |
516 | if (debug_print) | 518 | Dprintk("hole: %016lx - %016lx\n", |
517 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", | 519 | hole_basek<<10, |
518 | hole_basek<<10, | 520 | (hole_basek + hole_sizek)<<10); |
519 | (hole_basek + hole_sizek)<<10); | ||
520 | state->reg = range_to_mtrr(state->reg, hole_basek, | 521 | state->reg = range_to_mtrr(state->reg, hole_basek, |
521 | hole_sizek, MTRR_TYPE_UNCACHABLE); | 522 | hole_sizek, MTRR_TYPE_UNCACHABLE); |
522 | } | 523 | } |
@@ -537,23 +538,23 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, | |||
537 | basek = base_pfn << (PAGE_SHIFT - 10); | 538 | basek = base_pfn << (PAGE_SHIFT - 10); |
538 | sizek = size_pfn << (PAGE_SHIFT - 10); | 539 | sizek = size_pfn << (PAGE_SHIFT - 10); |
539 | 540 | ||
540 | /* See if I can merge with the last range */ | 541 | /* See if I can merge with the last range: */ |
541 | if ((basek <= 1024) || | 542 | if ((basek <= 1024) || |
542 | (state->range_startk + state->range_sizek == basek)) { | 543 | (state->range_startk + state->range_sizek == basek)) { |
543 | unsigned long endk = basek + sizek; | 544 | unsigned long endk = basek + sizek; |
544 | state->range_sizek = endk - state->range_startk; | 545 | state->range_sizek = endk - state->range_startk; |
545 | return; | 546 | return; |
546 | } | 547 | } |
547 | /* Write the range mtrrs */ | 548 | /* Write the range mtrrs: */ |
548 | if (state->range_sizek != 0) | 549 | if (state->range_sizek != 0) |
549 | second_sizek = range_to_mtrr_with_hole(state, basek, sizek); | 550 | second_sizek = range_to_mtrr_with_hole(state, basek, sizek); |
550 | 551 | ||
551 | /* Allocate an msr */ | 552 | /* Allocate an msr: */ |
552 | state->range_startk = basek + second_sizek; | 553 | state->range_startk = basek + second_sizek; |
553 | state->range_sizek = sizek - second_sizek; | 554 | state->range_sizek = sizek - second_sizek; |
554 | } | 555 | } |
555 | 556 | ||
556 | /* mininum size of mtrr block that can take hole */ | 557 | /* Mininum size of mtrr block that can take hole: */ |
557 | static u64 mtrr_chunk_size __initdata = (256ULL<<20); | 558 | static u64 mtrr_chunk_size __initdata = (256ULL<<20); |
558 | 559 | ||
559 | static int __init parse_mtrr_chunk_size_opt(char *p) | 560 | static int __init parse_mtrr_chunk_size_opt(char *p) |
@@ -565,7 +566,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p) | |||
565 | } | 566 | } |
566 | early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); | 567 | early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); |
567 | 568 | ||
568 | /* granity of mtrr of block */ | 569 | /* Granularity of mtrr of block: */ |
569 | static u64 mtrr_gran_size __initdata; | 570 | static u64 mtrr_gran_size __initdata; |
570 | 571 | ||
571 | static int __init parse_mtrr_gran_size_opt(char *p) | 572 | static int __init parse_mtrr_gran_size_opt(char *p) |
@@ -577,7 +578,7 @@ static int __init parse_mtrr_gran_size_opt(char *p) | |||
577 | } | 578 | } |
578 | early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); | 579 | early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); |
579 | 580 | ||
580 | static int nr_mtrr_spare_reg __initdata = | 581 | static unsigned long nr_mtrr_spare_reg __initdata = |
581 | CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; | 582 | CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; |
582 | 583 | ||
583 | static int __init parse_mtrr_spare_reg(char *arg) | 584 | static int __init parse_mtrr_spare_reg(char *arg) |
@@ -586,7 +587,6 @@ static int __init parse_mtrr_spare_reg(char *arg) | |||
586 | nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); | 587 | nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); |
587 | return 0; | 588 | return 0; |
588 | } | 589 | } |
589 | |||
590 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | 590 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); |
591 | 591 | ||
592 | static int __init | 592 | static int __init |
@@ -594,8 +594,8 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
594 | u64 chunk_size, u64 gran_size) | 594 | u64 chunk_size, u64 gran_size) |
595 | { | 595 | { |
596 | struct var_mtrr_state var_state; | 596 | struct var_mtrr_state var_state; |
597 | int i; | ||
598 | int num_reg; | 597 | int num_reg; |
598 | int i; | ||
599 | 599 | ||
600 | var_state.range_startk = 0; | 600 | var_state.range_startk = 0; |
601 | var_state.range_sizek = 0; | 601 | var_state.range_sizek = 0; |
@@ -605,17 +605,18 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
605 | 605 | ||
606 | memset(range_state, 0, sizeof(range_state)); | 606 | memset(range_state, 0, sizeof(range_state)); |
607 | 607 | ||
608 | /* Write the range etc */ | 608 | /* Write the range: */ |
609 | for (i = 0; i < nr_range; i++) | 609 | for (i = 0; i < nr_range; i++) { |
610 | set_var_mtrr_range(&var_state, range[i].start, | 610 | set_var_mtrr_range(&var_state, range[i].start, |
611 | range[i].end - range[i].start + 1); | 611 | range[i].end - range[i].start + 1); |
612 | } | ||
612 | 613 | ||
613 | /* Write the last range */ | 614 | /* Write the last range: */ |
614 | if (var_state.range_sizek != 0) | 615 | if (var_state.range_sizek != 0) |
615 | range_to_mtrr_with_hole(&var_state, 0, 0); | 616 | range_to_mtrr_with_hole(&var_state, 0, 0); |
616 | 617 | ||
617 | num_reg = var_state.reg; | 618 | num_reg = var_state.reg; |
618 | /* Clear out the extra MTRR's */ | 619 | /* Clear out the extra MTRR's: */ |
619 | while (var_state.reg < num_var_ranges) { | 620 | while (var_state.reg < num_var_ranges) { |
620 | save_var_mtrr(var_state.reg, 0, 0, 0); | 621 | save_var_mtrr(var_state.reg, 0, 0, 0); |
621 | var_state.reg++; | 622 | var_state.reg++; |
@@ -625,11 +626,11 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
625 | } | 626 | } |
626 | 627 | ||
627 | struct mtrr_cleanup_result { | 628 | struct mtrr_cleanup_result { |
628 | unsigned long gran_sizek; | 629 | unsigned long gran_sizek; |
629 | unsigned long chunk_sizek; | 630 | unsigned long chunk_sizek; |
630 | unsigned long lose_cover_sizek; | 631 | unsigned long lose_cover_sizek; |
631 | unsigned int num_reg; | 632 | unsigned int num_reg; |
632 | int bad; | 633 | int bad; |
633 | }; | 634 | }; |
634 | 635 | ||
635 | /* | 636 | /* |
@@ -645,10 +646,10 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | |||
645 | 646 | ||
646 | static void __init print_out_mtrr_range_state(void) | 647 | static void __init print_out_mtrr_range_state(void) |
647 | { | 648 | { |
648 | int i; | ||
649 | char start_factor = 'K', size_factor = 'K'; | 649 | char start_factor = 'K', size_factor = 'K'; |
650 | unsigned long start_base, size_base; | 650 | unsigned long start_base, size_base; |
651 | mtrr_type type; | 651 | mtrr_type type; |
652 | int i; | ||
652 | 653 | ||
653 | for (i = 0; i < num_var_ranges; i++) { | 654 | for (i = 0; i < num_var_ranges; i++) { |
654 | 655 | ||
@@ -676,10 +677,10 @@ static int __init mtrr_need_cleanup(void) | |||
676 | int i; | 677 | int i; |
677 | mtrr_type type; | 678 | mtrr_type type; |
678 | unsigned long size; | 679 | unsigned long size; |
679 | /* extra one for all 0 */ | 680 | /* Extra one for all 0: */ |
680 | int num[MTRR_NUM_TYPES + 1]; | 681 | int num[MTRR_NUM_TYPES + 1]; |
681 | 682 | ||
682 | /* check entries number */ | 683 | /* Check entries number: */ |
683 | memset(num, 0, sizeof(num)); | 684 | memset(num, 0, sizeof(num)); |
684 | for (i = 0; i < num_var_ranges; i++) { | 685 | for (i = 0; i < num_var_ranges; i++) { |
685 | type = range_state[i].type; | 686 | type = range_state[i].type; |
@@ -693,88 +694,86 @@ static int __init mtrr_need_cleanup(void) | |||
693 | num[type]++; | 694 | num[type]++; |
694 | } | 695 | } |
695 | 696 | ||
696 | /* check if we got UC entries */ | 697 | /* Check if we got UC entries: */ |
697 | if (!num[MTRR_TYPE_UNCACHABLE]) | 698 | if (!num[MTRR_TYPE_UNCACHABLE]) |
698 | return 0; | 699 | return 0; |
699 | 700 | ||
700 | /* check if we only had WB and UC */ | 701 | /* Check if we only had WB and UC */ |
701 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | 702 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != |
702 | num_var_ranges - num[MTRR_NUM_TYPES]) | 703 | num_var_ranges - num[MTRR_NUM_TYPES]) |
703 | return 0; | 704 | return 0; |
704 | 705 | ||
705 | return 1; | 706 | return 1; |
706 | } | 707 | } |
707 | 708 | ||
708 | static unsigned long __initdata range_sums; | 709 | static unsigned long __initdata range_sums; |
709 | static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | 710 | |
710 | unsigned long extra_remove_base, | 711 | static void __init |
711 | unsigned long extra_remove_size, | 712 | mtrr_calc_range_state(u64 chunk_size, u64 gran_size, |
712 | int i) | 713 | unsigned long x_remove_base, |
714 | unsigned long x_remove_size, int i) | ||
713 | { | 715 | { |
714 | int num_reg; | ||
715 | static struct res_range range_new[RANGE_NUM]; | 716 | static struct res_range range_new[RANGE_NUM]; |
716 | static int nr_range_new; | ||
717 | unsigned long range_sums_new; | 717 | unsigned long range_sums_new; |
718 | static int nr_range_new; | ||
719 | int num_reg; | ||
718 | 720 | ||
719 | /* convert ranges to var ranges state */ | 721 | /* Convert ranges to var ranges state: */ |
720 | num_reg = x86_setup_var_mtrrs(range, nr_range, | 722 | num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); |
721 | chunk_size, gran_size); | ||
722 | 723 | ||
723 | /* we got new setting in range_state, check it */ | 724 | /* We got new setting in range_state, check it: */ |
724 | memset(range_new, 0, sizeof(range_new)); | 725 | memset(range_new, 0, sizeof(range_new)); |
725 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | 726 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, |
726 | extra_remove_base, extra_remove_size); | 727 | x_remove_base, x_remove_size); |
727 | range_sums_new = sum_ranges(range_new, nr_range_new); | 728 | range_sums_new = sum_ranges(range_new, nr_range_new); |
728 | 729 | ||
729 | result[i].chunk_sizek = chunk_size >> 10; | 730 | result[i].chunk_sizek = chunk_size >> 10; |
730 | result[i].gran_sizek = gran_size >> 10; | 731 | result[i].gran_sizek = gran_size >> 10; |
731 | result[i].num_reg = num_reg; | 732 | result[i].num_reg = num_reg; |
733 | |||
732 | if (range_sums < range_sums_new) { | 734 | if (range_sums < range_sums_new) { |
733 | result[i].lose_cover_sizek = | 735 | result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT; |
734 | (range_sums_new - range_sums) << PSHIFT; | ||
735 | result[i].bad = 1; | 736 | result[i].bad = 1; |
736 | } else | 737 | } else { |
737 | result[i].lose_cover_sizek = | 738 | result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; |
738 | (range_sums - range_sums_new) << PSHIFT; | 739 | } |
739 | 740 | ||
740 | /* double check it */ | 741 | /* Double check it: */ |
741 | if (!result[i].bad && !result[i].lose_cover_sizek) { | 742 | if (!result[i].bad && !result[i].lose_cover_sizek) { |
742 | if (nr_range_new != nr_range || | 743 | if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range))) |
743 | memcmp(range, range_new, sizeof(range))) | 744 | result[i].bad = 1; |
744 | result[i].bad = 1; | ||
745 | } | 745 | } |
746 | 746 | ||
747 | if (!result[i].bad && (range_sums - range_sums_new < | 747 | if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg])) |
748 | min_loss_pfn[num_reg])) { | 748 | min_loss_pfn[num_reg] = range_sums - range_sums_new; |
749 | min_loss_pfn[num_reg] = | ||
750 | range_sums - range_sums_new; | ||
751 | } | ||
752 | } | 749 | } |
753 | 750 | ||
754 | static void __init mtrr_print_out_one_result(int i) | 751 | static void __init mtrr_print_out_one_result(int i) |
755 | { | 752 | { |
756 | char gran_factor, chunk_factor, lose_factor; | ||
757 | unsigned long gran_base, chunk_base, lose_base; | 753 | unsigned long gran_base, chunk_base, lose_base; |
754 | char gran_factor, chunk_factor, lose_factor; | ||
758 | 755 | ||
759 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 756 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
760 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 757 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
761 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | 758 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
762 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | 759 | |
763 | result[i].bad ? "*BAD*" : " ", | 760 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", |
764 | gran_base, gran_factor, chunk_base, chunk_factor); | 761 | result[i].bad ? "*BAD*" : " ", |
765 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | 762 | gran_base, gran_factor, chunk_base, chunk_factor); |
766 | result[i].num_reg, result[i].bad ? "-" : "", | 763 | pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n", |
767 | lose_base, lose_factor); | 764 | result[i].num_reg, result[i].bad ? "-" : "", |
765 | lose_base, lose_factor); | ||
768 | } | 766 | } |
769 | 767 | ||
770 | static int __init mtrr_search_optimal_index(void) | 768 | static int __init mtrr_search_optimal_index(void) |
771 | { | 769 | { |
772 | int i; | ||
773 | int num_reg_good; | 770 | int num_reg_good; |
774 | int index_good; | 771 | int index_good; |
772 | int i; | ||
775 | 773 | ||
776 | if (nr_mtrr_spare_reg >= num_var_ranges) | 774 | if (nr_mtrr_spare_reg >= num_var_ranges) |
777 | nr_mtrr_spare_reg = num_var_ranges - 1; | 775 | nr_mtrr_spare_reg = num_var_ranges - 1; |
776 | |||
778 | num_reg_good = -1; | 777 | num_reg_good = -1; |
779 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | 778 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { |
780 | if (!min_loss_pfn[i]) | 779 | if (!min_loss_pfn[i]) |
@@ -796,24 +795,24 @@ static int __init mtrr_search_optimal_index(void) | |||
796 | return index_good; | 795 | return index_good; |
797 | } | 796 | } |
798 | 797 | ||
799 | |||
800 | int __init mtrr_cleanup(unsigned address_bits) | 798 | int __init mtrr_cleanup(unsigned address_bits) |
801 | { | 799 | { |
802 | unsigned long extra_remove_base, extra_remove_size; | 800 | unsigned long x_remove_base, x_remove_size; |
803 | unsigned long base, size, def, dummy; | 801 | unsigned long base, size, def, dummy; |
804 | mtrr_type type; | ||
805 | u64 chunk_size, gran_size; | 802 | u64 chunk_size, gran_size; |
803 | mtrr_type type; | ||
806 | int index_good; | 804 | int index_good; |
807 | int i; | 805 | int i; |
808 | 806 | ||
809 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | 807 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) |
810 | return 0; | 808 | return 0; |
809 | |||
811 | rdmsr(MSR_MTRRdefType, def, dummy); | 810 | rdmsr(MSR_MTRRdefType, def, dummy); |
812 | def &= 0xff; | 811 | def &= 0xff; |
813 | if (def != MTRR_TYPE_UNCACHABLE) | 812 | if (def != MTRR_TYPE_UNCACHABLE) |
814 | return 0; | 813 | return 0; |
815 | 814 | ||
816 | /* get it and store it aside */ | 815 | /* Get it and store it aside: */ |
817 | memset(range_state, 0, sizeof(range_state)); | 816 | memset(range_state, 0, sizeof(range_state)); |
818 | for (i = 0; i < num_var_ranges; i++) { | 817 | for (i = 0; i < num_var_ranges; i++) { |
819 | mtrr_if->get(i, &base, &size, &type); | 818 | mtrr_if->get(i, &base, &size, &type); |
@@ -822,29 +821,28 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
822 | range_state[i].type = type; | 821 | range_state[i].type = type; |
823 | } | 822 | } |
824 | 823 | ||
825 | /* check if we need handle it and can handle it */ | 824 | /* Check if we need handle it and can handle it: */ |
826 | if (!mtrr_need_cleanup()) | 825 | if (!mtrr_need_cleanup()) |
827 | return 0; | 826 | return 0; |
828 | 827 | ||
829 | /* print original var MTRRs at first, for debugging: */ | 828 | /* Print original var MTRRs at first, for debugging: */ |
830 | printk(KERN_DEBUG "original variable MTRRs\n"); | 829 | printk(KERN_DEBUG "original variable MTRRs\n"); |
831 | print_out_mtrr_range_state(); | 830 | print_out_mtrr_range_state(); |
832 | 831 | ||
833 | memset(range, 0, sizeof(range)); | 832 | memset(range, 0, sizeof(range)); |
834 | extra_remove_size = 0; | 833 | x_remove_size = 0; |
835 | extra_remove_base = 1 << (32 - PAGE_SHIFT); | 834 | x_remove_base = 1 << (32 - PAGE_SHIFT); |
836 | if (mtrr_tom2) | 835 | if (mtrr_tom2) |
837 | extra_remove_size = | 836 | x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base; |
838 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; | 837 | |
839 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, | 838 | nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size); |
840 | extra_remove_size); | ||
841 | /* | 839 | /* |
842 | * [0, 1M) should always be coverred by var mtrr with WB | 840 | * [0, 1M) should always be covered by var mtrr with WB |
843 | * and fixed mtrrs should take effective before var mtrr for it | 841 | * and fixed mtrrs should take effect before var mtrr for it: |
844 | */ | 842 | */ |
845 | nr_range = add_range_with_merge(range, nr_range, 0, | 843 | nr_range = add_range_with_merge(range, nr_range, 0, |
846 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | 844 | (1ULL<<(20 - PAGE_SHIFT)) - 1); |
847 | /* sort the ranges */ | 845 | /* Sort the ranges: */ |
848 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 846 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); |
849 | 847 | ||
850 | range_sums = sum_ranges(range, nr_range); | 848 | range_sums = sum_ranges(range, nr_range); |
@@ -854,7 +852,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
854 | if (mtrr_chunk_size && mtrr_gran_size) { | 852 | if (mtrr_chunk_size && mtrr_gran_size) { |
855 | i = 0; | 853 | i = 0; |
856 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, | 854 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, |
857 | extra_remove_base, extra_remove_size, i); | 855 | x_remove_base, x_remove_size, i); |
858 | 856 | ||
859 | mtrr_print_out_one_result(i); | 857 | mtrr_print_out_one_result(i); |
860 | 858 | ||
@@ -880,7 +878,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
880 | continue; | 878 | continue; |
881 | 879 | ||
882 | mtrr_calc_range_state(chunk_size, gran_size, | 880 | mtrr_calc_range_state(chunk_size, gran_size, |
883 | extra_remove_base, extra_remove_size, i); | 881 | x_remove_base, x_remove_size, i); |
884 | if (debug_print) { | 882 | if (debug_print) { |
885 | mtrr_print_out_one_result(i); | 883 | mtrr_print_out_one_result(i); |
886 | printk(KERN_INFO "\n"); | 884 | printk(KERN_INFO "\n"); |
@@ -890,7 +888,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
890 | } | 888 | } |
891 | } | 889 | } |
892 | 890 | ||
893 | /* try to find the optimal index */ | 891 | /* Try to find the optimal index: */ |
894 | index_good = mtrr_search_optimal_index(); | 892 | index_good = mtrr_search_optimal_index(); |
895 | 893 | ||
896 | if (index_good != -1) { | 894 | if (index_good != -1) { |
@@ -898,7 +896,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
898 | i = index_good; | 896 | i = index_good; |
899 | mtrr_print_out_one_result(i); | 897 | mtrr_print_out_one_result(i); |
900 | 898 | ||
901 | /* convert ranges to var ranges state */ | 899 | /* Convert ranges to var ranges state: */ |
902 | chunk_size = result[i].chunk_sizek; | 900 | chunk_size = result[i].chunk_sizek; |
903 | chunk_size <<= 10; | 901 | chunk_size <<= 10; |
904 | gran_size = result[i].gran_sizek; | 902 | gran_size = result[i].gran_sizek; |
@@ -941,8 +939,8 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup); | |||
941 | * Note this won't check if the MTRRs < 4GB where the magic bit doesn't | 939 | * Note this won't check if the MTRRs < 4GB where the magic bit doesn't |
942 | * apply to are wrong, but so far we don't know of any such case in the wild. | 940 | * apply to are wrong, but so far we don't know of any such case in the wild. |
943 | */ | 941 | */ |
944 | #define Tom2Enabled (1U << 21) | 942 | #define Tom2Enabled (1U << 21) |
945 | #define Tom2ForceMemTypeWB (1U << 22) | 943 | #define Tom2ForceMemTypeWB (1U << 22) |
946 | 944 | ||
947 | int __init amd_special_default_mtrr(void) | 945 | int __init amd_special_default_mtrr(void) |
948 | { | 946 | { |
@@ -952,7 +950,7 @@ int __init amd_special_default_mtrr(void) | |||
952 | return 0; | 950 | return 0; |
953 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) | 951 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) |
954 | return 0; | 952 | return 0; |
955 | /* In case some hypervisor doesn't pass SYSCFG through */ | 953 | /* In case some hypervisor doesn't pass SYSCFG through: */ |
956 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) | 954 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) |
957 | return 0; | 955 | return 0; |
958 | /* | 956 | /* |
@@ -965,19 +963,21 @@ int __init amd_special_default_mtrr(void) | |||
965 | return 0; | 963 | return 0; |
966 | } | 964 | } |
967 | 965 | ||
968 | static u64 __init real_trim_memory(unsigned long start_pfn, | 966 | static u64 __init |
969 | unsigned long limit_pfn) | 967 | real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) |
970 | { | 968 | { |
971 | u64 trim_start, trim_size; | 969 | u64 trim_start, trim_size; |
970 | |||
972 | trim_start = start_pfn; | 971 | trim_start = start_pfn; |
973 | trim_start <<= PAGE_SHIFT; | 972 | trim_start <<= PAGE_SHIFT; |
973 | |||
974 | trim_size = limit_pfn; | 974 | trim_size = limit_pfn; |
975 | trim_size <<= PAGE_SHIFT; | 975 | trim_size <<= PAGE_SHIFT; |
976 | trim_size -= trim_start; | 976 | trim_size -= trim_start; |
977 | 977 | ||
978 | return e820_update_range(trim_start, trim_size, E820_RAM, | 978 | return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED); |
979 | E820_RESERVED); | ||
980 | } | 979 | } |
980 | |||
981 | /** | 981 | /** |
982 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs | 982 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs |
983 | * @end_pfn: ending page frame number | 983 | * @end_pfn: ending page frame number |
@@ -985,7 +985,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn, | |||
985 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain | 985 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain |
986 | * memory configurations. This routine checks that the highest MTRR matches | 986 | * memory configurations. This routine checks that the highest MTRR matches |
987 | * the end of memory, to make sure the MTRRs having a write back type cover | 987 | * the end of memory, to make sure the MTRRs having a write back type cover |
988 | * all of the memory the kernel is intending to use. If not, it'll trim any | 988 | * all of the memory the kernel is intending to use. If not, it'll trim any |
989 | * memory off the end by adjusting end_pfn, removing it from the kernel's | 989 | * memory off the end by adjusting end_pfn, removing it from the kernel's |
990 | * allocation pools, warning the user with an obnoxious message. | 990 | * allocation pools, warning the user with an obnoxious message. |
991 | */ | 991 | */ |
@@ -994,21 +994,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
994 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | 994 | unsigned long i, base, size, highest_pfn = 0, def, dummy; |
995 | mtrr_type type; | 995 | mtrr_type type; |
996 | u64 total_trim_size; | 996 | u64 total_trim_size; |
997 | |||
998 | /* extra one for all 0 */ | 997 | /* extra one for all 0 */ |
999 | int num[MTRR_NUM_TYPES + 1]; | 998 | int num[MTRR_NUM_TYPES + 1]; |
999 | |||
1000 | /* | 1000 | /* |
1001 | * Make sure we only trim uncachable memory on machines that | 1001 | * Make sure we only trim uncachable memory on machines that |
1002 | * support the Intel MTRR architecture: | 1002 | * support the Intel MTRR architecture: |
1003 | */ | 1003 | */ |
1004 | if (!is_cpu(INTEL) || disable_mtrr_trim) | 1004 | if (!is_cpu(INTEL) || disable_mtrr_trim) |
1005 | return 0; | 1005 | return 0; |
1006 | |||
1006 | rdmsr(MSR_MTRRdefType, def, dummy); | 1007 | rdmsr(MSR_MTRRdefType, def, dummy); |
1007 | def &= 0xff; | 1008 | def &= 0xff; |
1008 | if (def != MTRR_TYPE_UNCACHABLE) | 1009 | if (def != MTRR_TYPE_UNCACHABLE) |
1009 | return 0; | 1010 | return 0; |
1010 | 1011 | ||
1011 | /* get it and store it aside */ | 1012 | /* Get it and store it aside: */ |
1012 | memset(range_state, 0, sizeof(range_state)); | 1013 | memset(range_state, 0, sizeof(range_state)); |
1013 | for (i = 0; i < num_var_ranges; i++) { | 1014 | for (i = 0; i < num_var_ranges; i++) { |
1014 | mtrr_if->get(i, &base, &size, &type); | 1015 | mtrr_if->get(i, &base, &size, &type); |
@@ -1017,7 +1018,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1017 | range_state[i].type = type; | 1018 | range_state[i].type = type; |
1018 | } | 1019 | } |
1019 | 1020 | ||
1020 | /* Find highest cached pfn */ | 1021 | /* Find highest cached pfn: */ |
1021 | for (i = 0; i < num_var_ranges; i++) { | 1022 | for (i = 0; i < num_var_ranges; i++) { |
1022 | type = range_state[i].type; | 1023 | type = range_state[i].type; |
1023 | if (type != MTRR_TYPE_WRBACK) | 1024 | if (type != MTRR_TYPE_WRBACK) |
@@ -1028,13 +1029,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1028 | highest_pfn = base + size; | 1029 | highest_pfn = base + size; |
1029 | } | 1030 | } |
1030 | 1031 | ||
1031 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | 1032 | /* kvm/qemu doesn't have mtrr set right, don't trim them all: */ |
1032 | if (!highest_pfn) { | 1033 | if (!highest_pfn) { |
1033 | printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); | 1034 | printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); |
1034 | return 0; | 1035 | return 0; |
1035 | } | 1036 | } |
1036 | 1037 | ||
1037 | /* check entries number */ | 1038 | /* Check entries number: */ |
1038 | memset(num, 0, sizeof(num)); | 1039 | memset(num, 0, sizeof(num)); |
1039 | for (i = 0; i < num_var_ranges; i++) { | 1040 | for (i = 0; i < num_var_ranges; i++) { |
1040 | type = range_state[i].type; | 1041 | type = range_state[i].type; |
@@ -1046,11 +1047,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1046 | num[type]++; | 1047 | num[type]++; |
1047 | } | 1048 | } |
1048 | 1049 | ||
1049 | /* no entry for WB? */ | 1050 | /* No entry for WB? */ |
1050 | if (!num[MTRR_TYPE_WRBACK]) | 1051 | if (!num[MTRR_TYPE_WRBACK]) |
1051 | return 0; | 1052 | return 0; |
1052 | 1053 | ||
1053 | /* check if we only had WB and UC */ | 1054 | /* Check if we only had WB and UC: */ |
1054 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | 1055 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != |
1055 | num_var_ranges - num[MTRR_NUM_TYPES]) | 1056 | num_var_ranges - num[MTRR_NUM_TYPES]) |
1056 | return 0; | 1057 | return 0; |
@@ -1066,31 +1067,31 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1066 | } | 1067 | } |
1067 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | 1068 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); |
1068 | 1069 | ||
1070 | /* Check the head: */ | ||
1069 | total_trim_size = 0; | 1071 | total_trim_size = 0; |
1070 | /* check the head */ | ||
1071 | if (range[0].start) | 1072 | if (range[0].start) |
1072 | total_trim_size += real_trim_memory(0, range[0].start); | 1073 | total_trim_size += real_trim_memory(0, range[0].start); |
1073 | /* check the holes */ | 1074 | |
1075 | /* Check the holes: */ | ||
1074 | for (i = 0; i < nr_range - 1; i++) { | 1076 | for (i = 0; i < nr_range - 1; i++) { |
1075 | if (range[i].end + 1 < range[i+1].start) | 1077 | if (range[i].end + 1 < range[i+1].start) |
1076 | total_trim_size += real_trim_memory(range[i].end + 1, | 1078 | total_trim_size += real_trim_memory(range[i].end + 1, |
1077 | range[i+1].start); | 1079 | range[i+1].start); |
1078 | } | 1080 | } |
1079 | /* check the top */ | 1081 | |
1082 | /* Check the top: */ | ||
1080 | i = nr_range - 1; | 1083 | i = nr_range - 1; |
1081 | if (range[i].end + 1 < end_pfn) | 1084 | if (range[i].end + 1 < end_pfn) |
1082 | total_trim_size += real_trim_memory(range[i].end + 1, | 1085 | total_trim_size += real_trim_memory(range[i].end + 1, |
1083 | end_pfn); | 1086 | end_pfn); |
1084 | 1087 | ||
1085 | if (total_trim_size) { | 1088 | if (total_trim_size) { |
1086 | printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" | 1089 | pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20); |
1087 | " all of memory, losing %lluMB of RAM.\n", | ||
1088 | total_trim_size >> 20); | ||
1089 | 1090 | ||
1090 | if (!changed_by_mtrr_cleanup) | 1091 | if (!changed_by_mtrr_cleanup) |
1091 | WARN_ON(1); | 1092 | WARN_ON(1); |
1092 | 1093 | ||
1093 | printk(KERN_INFO "update e820 for mtrr\n"); | 1094 | pr_info("update e820 for mtrr\n"); |
1094 | update_e820(); | 1095 | update_e820(); |
1095 | 1096 | ||
1096 | return 1; | 1097 | return 1; |
@@ -1098,4 +1099,3 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1098 | 1099 | ||
1099 | return 0; | 1100 | return 0; |
1100 | } | 1101 | } |
1101 | |||
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index ff14c320040c..228d982ce09c 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c | |||
@@ -1,38 +1,40 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/io.h> | ||
2 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
3 | #include <asm/mtrr.h> | 4 | |
4 | #include <asm/msr.h> | ||
5 | #include <asm/io.h> | ||
6 | #include <asm/processor-cyrix.h> | 5 | #include <asm/processor-cyrix.h> |
7 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
7 | #include <asm/mtrr.h> | ||
8 | #include <asm/msr.h> | ||
9 | |||
8 | #include "mtrr.h" | 10 | #include "mtrr.h" |
9 | 11 | ||
10 | static void | 12 | static void |
11 | cyrix_get_arr(unsigned int reg, unsigned long *base, | 13 | cyrix_get_arr(unsigned int reg, unsigned long *base, |
12 | unsigned long *size, mtrr_type * type) | 14 | unsigned long *size, mtrr_type * type) |
13 | { | 15 | { |
14 | unsigned long flags; | ||
15 | unsigned char arr, ccr3, rcr, shift; | 16 | unsigned char arr, ccr3, rcr, shift; |
17 | unsigned long flags; | ||
16 | 18 | ||
17 | arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ | 19 | arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ |
18 | 20 | ||
19 | /* Save flags and disable interrupts */ | ||
20 | local_irq_save(flags); | 21 | local_irq_save(flags); |
21 | 22 | ||
22 | ccr3 = getCx86(CX86_CCR3); | 23 | ccr3 = getCx86(CX86_CCR3); |
23 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 24 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
24 | ((unsigned char *) base)[3] = getCx86(arr); | 25 | ((unsigned char *)base)[3] = getCx86(arr); |
25 | ((unsigned char *) base)[2] = getCx86(arr + 1); | 26 | ((unsigned char *)base)[2] = getCx86(arr + 1); |
26 | ((unsigned char *) base)[1] = getCx86(arr + 2); | 27 | ((unsigned char *)base)[1] = getCx86(arr + 2); |
27 | rcr = getCx86(CX86_RCR_BASE + reg); | 28 | rcr = getCx86(CX86_RCR_BASE + reg); |
28 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | 29 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ |
29 | 30 | ||
30 | /* Enable interrupts if it was enabled previously */ | ||
31 | local_irq_restore(flags); | 31 | local_irq_restore(flags); |
32 | |||
32 | shift = ((unsigned char *) base)[1] & 0x0f; | 33 | shift = ((unsigned char *) base)[1] & 0x0f; |
33 | *base >>= PAGE_SHIFT; | 34 | *base >>= PAGE_SHIFT; |
34 | 35 | ||
35 | /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 | 36 | /* |
37 | * Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 | ||
36 | * Note: shift==0xf means 4G, this is unsupported. | 38 | * Note: shift==0xf means 4G, this is unsupported. |
37 | */ | 39 | */ |
38 | if (shift) | 40 | if (shift) |
@@ -76,17 +78,20 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, | |||
76 | } | 78 | } |
77 | } | 79 | } |
78 | 80 | ||
81 | /* | ||
82 | * cyrix_get_free_region - get a free ARR. | ||
83 | * | ||
84 | * @base: the starting (base) address of the region. | ||
85 | * @size: the size (in bytes) of the region. | ||
86 | * | ||
87 | * Returns: the index of the region on success, else -1 on error. | ||
88 | */ | ||
79 | static int | 89 | static int |
80 | cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) | 90 | cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) |
81 | /* [SUMMARY] Get a free ARR. | ||
82 | <base> The starting (base) address of the region. | ||
83 | <size> The size (in bytes) of the region. | ||
84 | [RETURNS] The index of the region on success, else -1 on error. | ||
85 | */ | ||
86 | { | 91 | { |
87 | int i; | ||
88 | mtrr_type ltype; | ||
89 | unsigned long lbase, lsize; | 92 | unsigned long lbase, lsize; |
93 | mtrr_type ltype; | ||
94 | int i; | ||
90 | 95 | ||
91 | switch (replace_reg) { | 96 | switch (replace_reg) { |
92 | case 7: | 97 | case 7: |
@@ -107,14 +112,17 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) | |||
107 | cyrix_get_arr(7, &lbase, &lsize, <ype); | 112 | cyrix_get_arr(7, &lbase, &lsize, <ype); |
108 | if (lsize == 0) | 113 | if (lsize == 0) |
109 | return 7; | 114 | return 7; |
110 | /* Else try ARR0-ARR6 first */ | 115 | /* Else try ARR0-ARR6 first */ |
111 | } else { | 116 | } else { |
112 | for (i = 0; i < 7; i++) { | 117 | for (i = 0; i < 7; i++) { |
113 | cyrix_get_arr(i, &lbase, &lsize, <ype); | 118 | cyrix_get_arr(i, &lbase, &lsize, <ype); |
114 | if (lsize == 0) | 119 | if (lsize == 0) |
115 | return i; | 120 | return i; |
116 | } | 121 | } |
117 | /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ | 122 | /* |
123 | * ARR0-ARR6 isn't free | ||
124 | * try ARR7 but its size must be at least 256K | ||
125 | */ | ||
118 | cyrix_get_arr(i, &lbase, &lsize, <ype); | 126 | cyrix_get_arr(i, &lbase, &lsize, <ype); |
119 | if ((lsize == 0) && (size >= 0x40)) | 127 | if ((lsize == 0) && (size >= 0x40)) |
120 | return i; | 128 | return i; |
@@ -122,21 +130,22 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) | |||
122 | return -ENOSPC; | 130 | return -ENOSPC; |
123 | } | 131 | } |
124 | 132 | ||
125 | static u32 cr4 = 0; | 133 | static u32 cr4, ccr3; |
126 | static u32 ccr3; | ||
127 | 134 | ||
128 | static void prepare_set(void) | 135 | static void prepare_set(void) |
129 | { | 136 | { |
130 | u32 cr0; | 137 | u32 cr0; |
131 | 138 | ||
132 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | 139 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
133 | if ( cpu_has_pge ) { | 140 | if (cpu_has_pge) { |
134 | cr4 = read_cr4(); | 141 | cr4 = read_cr4(); |
135 | write_cr4(cr4 & ~X86_CR4_PGE); | 142 | write_cr4(cr4 & ~X86_CR4_PGE); |
136 | } | 143 | } |
137 | 144 | ||
138 | /* Disable and flush caches. Note that wbinvd flushes the TLBs as | 145 | /* |
139 | a side-effect */ | 146 | * Disable and flush caches. |
147 | * Note that wbinvd flushes the TLBs as a side-effect | ||
148 | */ | ||
140 | cr0 = read_cr0() | X86_CR0_CD; | 149 | cr0 = read_cr0() | X86_CR0_CD; |
141 | wbinvd(); | 150 | wbinvd(); |
142 | write_cr0(cr0); | 151 | write_cr0(cr0); |
@@ -147,22 +156,21 @@ static void prepare_set(void) | |||
147 | 156 | ||
148 | /* Cyrix ARRs - everything else was excluded at the top */ | 157 | /* Cyrix ARRs - everything else was excluded at the top */ |
149 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); | 158 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); |
150 | |||
151 | } | 159 | } |
152 | 160 | ||
153 | static void post_set(void) | 161 | static void post_set(void) |
154 | { | 162 | { |
155 | /* Flush caches and TLBs */ | 163 | /* Flush caches and TLBs */ |
156 | wbinvd(); | 164 | wbinvd(); |
157 | 165 | ||
158 | /* Cyrix ARRs - everything else was excluded at the top */ | 166 | /* Cyrix ARRs - everything else was excluded at the top */ |
159 | setCx86(CX86_CCR3, ccr3); | 167 | setCx86(CX86_CCR3, ccr3); |
160 | 168 | ||
161 | /* Enable caches */ | 169 | /* Enable caches */ |
162 | write_cr0(read_cr0() & 0xbfffffff); | 170 | write_cr0(read_cr0() & 0xbfffffff); |
163 | 171 | ||
164 | /* Restore value of CR4 */ | 172 | /* Restore value of CR4 */ |
165 | if ( cpu_has_pge ) | 173 | if (cpu_has_pge) |
166 | write_cr4(cr4); | 174 | write_cr4(cr4); |
167 | } | 175 | } |
168 | 176 | ||
@@ -178,7 +186,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, | |||
178 | size >>= 6; | 186 | size >>= 6; |
179 | 187 | ||
180 | size &= 0x7fff; /* make sure arr_size <= 14 */ | 188 | size &= 0x7fff; /* make sure arr_size <= 14 */ |
181 | for (arr_size = 0; size; arr_size++, size >>= 1) ; | 189 | for (arr_size = 0; size; arr_size++, size >>= 1) |
190 | ; | ||
182 | 191 | ||
183 | if (reg < 7) { | 192 | if (reg < 7) { |
184 | switch (type) { | 193 | switch (type) { |
@@ -215,18 +224,18 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, | |||
215 | prepare_set(); | 224 | prepare_set(); |
216 | 225 | ||
217 | base <<= PAGE_SHIFT; | 226 | base <<= PAGE_SHIFT; |
218 | setCx86(arr, ((unsigned char *) &base)[3]); | 227 | setCx86(arr + 0, ((unsigned char *)&base)[3]); |
219 | setCx86(arr + 1, ((unsigned char *) &base)[2]); | 228 | setCx86(arr + 1, ((unsigned char *)&base)[2]); |
220 | setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); | 229 | setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size); |
221 | setCx86(CX86_RCR_BASE + reg, arr_type); | 230 | setCx86(CX86_RCR_BASE + reg, arr_type); |
222 | 231 | ||
223 | post_set(); | 232 | post_set(); |
224 | } | 233 | } |
225 | 234 | ||
226 | typedef struct { | 235 | typedef struct { |
227 | unsigned long base; | 236 | unsigned long base; |
228 | unsigned long size; | 237 | unsigned long size; |
229 | mtrr_type type; | 238 | mtrr_type type; |
230 | } arr_state_t; | 239 | } arr_state_t; |
231 | 240 | ||
232 | static arr_state_t arr_state[8] = { | 241 | static arr_state_t arr_state[8] = { |
@@ -247,16 +256,17 @@ static void cyrix_set_all(void) | |||
247 | setCx86(CX86_CCR0 + i, ccr_state[i]); | 256 | setCx86(CX86_CCR0 + i, ccr_state[i]); |
248 | for (; i < 7; i++) | 257 | for (; i < 7; i++) |
249 | setCx86(CX86_CCR4 + i, ccr_state[i]); | 258 | setCx86(CX86_CCR4 + i, ccr_state[i]); |
250 | for (i = 0; i < 8; i++) | 259 | |
251 | cyrix_set_arr(i, arr_state[i].base, | 260 | for (i = 0; i < 8; i++) { |
261 | cyrix_set_arr(i, arr_state[i].base, | ||
252 | arr_state[i].size, arr_state[i].type); | 262 | arr_state[i].size, arr_state[i].type); |
263 | } | ||
253 | 264 | ||
254 | post_set(); | 265 | post_set(); |
255 | } | 266 | } |
256 | 267 | ||
257 | static struct mtrr_ops cyrix_mtrr_ops = { | 268 | static struct mtrr_ops cyrix_mtrr_ops = { |
258 | .vendor = X86_VENDOR_CYRIX, | 269 | .vendor = X86_VENDOR_CYRIX, |
259 | // .init = cyrix_arr_init, | ||
260 | .set_all = cyrix_set_all, | 270 | .set_all = cyrix_set_all, |
261 | .set = cyrix_set_arr, | 271 | .set = cyrix_set_arr, |
262 | .get = cyrix_get_arr, | 272 | .get = cyrix_get_arr, |
@@ -270,5 +280,3 @@ int __init cyrix_init_mtrr(void) | |||
270 | set_mtrr_ops(&cyrix_mtrr_ops); | 280 | set_mtrr_ops(&cyrix_mtrr_ops); |
271 | return 0; | 281 | return 0; |
272 | } | 282 | } |
273 | |||
274 | //arch_initcall(cyrix_init_mtrr); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0543f69f0b27..55da0c5f68dd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -1,28 +1,34 @@ | |||
1 | /* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong | 1 | /* |
2 | because MTRRs can span upto 40 bits (36bits on most modern x86) */ | 2 | * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong |
3 | * because MTRRs can span upto 40 bits (36bits on most modern x86) | ||
4 | */ | ||
5 | #define DEBUG | ||
6 | |||
7 | #include <linux/module.h> | ||
3 | #include <linux/init.h> | 8 | #include <linux/init.h> |
4 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/io.h> | ||
5 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
6 | #include <linux/module.h> | 12 | |
7 | #include <asm/io.h> | ||
8 | #include <asm/mtrr.h> | ||
9 | #include <asm/msr.h> | ||
10 | #include <asm/system.h> | ||
11 | #include <asm/cpufeature.h> | ||
12 | #include <asm/processor-flags.h> | 13 | #include <asm/processor-flags.h> |
14 | #include <asm/cpufeature.h> | ||
13 | #include <asm/tlbflush.h> | 15 | #include <asm/tlbflush.h> |
16 | #include <asm/system.h> | ||
17 | #include <asm/mtrr.h> | ||
18 | #include <asm/msr.h> | ||
14 | #include <asm/pat.h> | 19 | #include <asm/pat.h> |
20 | |||
15 | #include "mtrr.h" | 21 | #include "mtrr.h" |
16 | 22 | ||
17 | struct fixed_range_block { | 23 | struct fixed_range_block { |
18 | int base_msr; /* start address of an MTRR block */ | 24 | int base_msr; /* start address of an MTRR block */ |
19 | int ranges; /* number of MTRRs in this block */ | 25 | int ranges; /* number of MTRRs in this block */ |
20 | }; | 26 | }; |
21 | 27 | ||
22 | static struct fixed_range_block fixed_range_blocks[] = { | 28 | static struct fixed_range_block fixed_range_blocks[] = { |
23 | { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ | 29 | { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ |
24 | { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ | 30 | { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ |
25 | { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ | 31 | { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ |
26 | {} | 32 | {} |
27 | }; | 33 | }; |
28 | 34 | ||
@@ -30,10 +36,10 @@ static unsigned long smp_changes_mask; | |||
30 | static int mtrr_state_set; | 36 | static int mtrr_state_set; |
31 | u64 mtrr_tom2; | 37 | u64 mtrr_tom2; |
32 | 38 | ||
33 | struct mtrr_state_type mtrr_state = {}; | 39 | struct mtrr_state_type mtrr_state; |
34 | EXPORT_SYMBOL_GPL(mtrr_state); | 40 | EXPORT_SYMBOL_GPL(mtrr_state); |
35 | 41 | ||
36 | /** | 42 | /* |
37 | * BIOS is expected to clear MtrrFixDramModEn bit, see for example | 43 | * BIOS is expected to clear MtrrFixDramModEn bit, see for example |
38 | * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD | 44 | * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD |
39 | * Opteron Processors" (26094 Rev. 3.30 February 2006), section | 45 | * Opteron Processors" (26094 Rev. 3.30 February 2006), section |
@@ -104,9 +110,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
104 | * Look of multiple ranges matching this address and pick type | 110 | * Look of multiple ranges matching this address and pick type |
105 | * as per MTRR precedence | 111 | * as per MTRR precedence |
106 | */ | 112 | */ |
107 | if (!(mtrr_state.enabled & 2)) { | 113 | if (!(mtrr_state.enabled & 2)) |
108 | return mtrr_state.def_type; | 114 | return mtrr_state.def_type; |
109 | } | ||
110 | 115 | ||
111 | prev_match = 0xFF; | 116 | prev_match = 0xFF; |
112 | for (i = 0; i < num_var_ranges; ++i) { | 117 | for (i = 0; i < num_var_ranges; ++i) { |
@@ -125,9 +130,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
125 | if (start_state != end_state) | 130 | if (start_state != end_state) |
126 | return 0xFE; | 131 | return 0xFE; |
127 | 132 | ||
128 | if ((start & mask) != (base & mask)) { | 133 | if ((start & mask) != (base & mask)) |
129 | continue; | 134 | continue; |
130 | } | ||
131 | 135 | ||
132 | curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; | 136 | curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; |
133 | if (prev_match == 0xFF) { | 137 | if (prev_match == 0xFF) { |
@@ -148,9 +152,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
148 | curr_match = MTRR_TYPE_WRTHROUGH; | 152 | curr_match = MTRR_TYPE_WRTHROUGH; |
149 | } | 153 | } |
150 | 154 | ||
151 | if (prev_match != curr_match) { | 155 | if (prev_match != curr_match) |
152 | return MTRR_TYPE_UNCACHABLE; | 156 | return MTRR_TYPE_UNCACHABLE; |
153 | } | ||
154 | } | 157 | } |
155 | 158 | ||
156 | if (mtrr_tom2) { | 159 | if (mtrr_tom2) { |
@@ -164,7 +167,7 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
164 | return mtrr_state.def_type; | 167 | return mtrr_state.def_type; |
165 | } | 168 | } |
166 | 169 | ||
167 | /* Get the MSR pair relating to a var range */ | 170 | /* Get the MSR pair relating to a var range */ |
168 | static void | 171 | static void |
169 | get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | 172 | get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) |
170 | { | 173 | { |
@@ -172,7 +175,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | |||
172 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); | 175 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); |
173 | } | 176 | } |
174 | 177 | ||
175 | /* fill the MSR pair relating to a var range */ | 178 | /* Fill the MSR pair relating to a var range */ |
176 | void fill_mtrr_var_range(unsigned int index, | 179 | void fill_mtrr_var_range(unsigned int index, |
177 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) | 180 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) |
178 | { | 181 | { |
@@ -186,10 +189,9 @@ void fill_mtrr_var_range(unsigned int index, | |||
186 | vr[index].mask_hi = mask_hi; | 189 | vr[index].mask_hi = mask_hi; |
187 | } | 190 | } |
188 | 191 | ||
189 | static void | 192 | static void get_fixed_ranges(mtrr_type *frs) |
190 | get_fixed_ranges(mtrr_type * frs) | ||
191 | { | 193 | { |
192 | unsigned int *p = (unsigned int *) frs; | 194 | unsigned int *p = (unsigned int *)frs; |
193 | int i; | 195 | int i; |
194 | 196 | ||
195 | k8_check_syscfg_dram_mod_en(); | 197 | k8_check_syscfg_dram_mod_en(); |
@@ -217,22 +219,22 @@ static void __init print_fixed_last(void) | |||
217 | if (!last_fixed_end) | 219 | if (!last_fixed_end) |
218 | return; | 220 | return; |
219 | 221 | ||
220 | printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start, | 222 | pr_debug(" %05X-%05X %s\n", last_fixed_start, |
221 | last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); | 223 | last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); |
222 | 224 | ||
223 | last_fixed_end = 0; | 225 | last_fixed_end = 0; |
224 | } | 226 | } |
225 | 227 | ||
226 | static void __init update_fixed_last(unsigned base, unsigned end, | 228 | static void __init update_fixed_last(unsigned base, unsigned end, |
227 | mtrr_type type) | 229 | mtrr_type type) |
228 | { | 230 | { |
229 | last_fixed_start = base; | 231 | last_fixed_start = base; |
230 | last_fixed_end = end; | 232 | last_fixed_end = end; |
231 | last_fixed_type = type; | 233 | last_fixed_type = type; |
232 | } | 234 | } |
233 | 235 | ||
234 | static void __init print_fixed(unsigned base, unsigned step, | 236 | static void __init |
235 | const mtrr_type *types) | 237 | print_fixed(unsigned base, unsigned step, const mtrr_type *types) |
236 | { | 238 | { |
237 | unsigned i; | 239 | unsigned i; |
238 | 240 | ||
@@ -259,54 +261,55 @@ static void __init print_mtrr_state(void) | |||
259 | unsigned int i; | 261 | unsigned int i; |
260 | int high_width; | 262 | int high_width; |
261 | 263 | ||
262 | printk(KERN_DEBUG "MTRR default type: %s\n", | 264 | pr_debug("MTRR default type: %s\n", |
263 | mtrr_attrib_to_str(mtrr_state.def_type)); | 265 | mtrr_attrib_to_str(mtrr_state.def_type)); |
264 | if (mtrr_state.have_fixed) { | 266 | if (mtrr_state.have_fixed) { |
265 | printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n", | 267 | pr_debug("MTRR fixed ranges %sabled:\n", |
266 | mtrr_state.enabled & 1 ? "en" : "dis"); | 268 | mtrr_state.enabled & 1 ? "en" : "dis"); |
267 | print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); | 269 | print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); |
268 | for (i = 0; i < 2; ++i) | 270 | for (i = 0; i < 2; ++i) |
269 | print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); | 271 | print_fixed(0x80000 + i * 0x20000, 0x04000, |
272 | mtrr_state.fixed_ranges + (i + 1) * 8); | ||
270 | for (i = 0; i < 8; ++i) | 273 | for (i = 0; i < 8; ++i) |
271 | print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); | 274 | print_fixed(0xC0000 + i * 0x08000, 0x01000, |
275 | mtrr_state.fixed_ranges + (i + 3) * 8); | ||
272 | 276 | ||
273 | /* tail */ | 277 | /* tail */ |
274 | print_fixed_last(); | 278 | print_fixed_last(); |
275 | } | 279 | } |
276 | printk(KERN_DEBUG "MTRR variable ranges %sabled:\n", | 280 | pr_debug("MTRR variable ranges %sabled:\n", |
277 | mtrr_state.enabled & 2 ? "en" : "dis"); | 281 | mtrr_state.enabled & 2 ? "en" : "dis"); |
278 | if (size_or_mask & 0xffffffffUL) | 282 | if (size_or_mask & 0xffffffffUL) |
279 | high_width = ffs(size_or_mask & 0xffffffffUL) - 1; | 283 | high_width = ffs(size_or_mask & 0xffffffffUL) - 1; |
280 | else | 284 | else |
281 | high_width = ffs(size_or_mask>>32) + 32 - 1; | 285 | high_width = ffs(size_or_mask>>32) + 32 - 1; |
282 | high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; | 286 | high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; |
287 | |||
283 | for (i = 0; i < num_var_ranges; ++i) { | 288 | for (i = 0; i < num_var_ranges; ++i) { |
284 | if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) | 289 | if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) |
285 | printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n", | 290 | pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n", |
286 | i, | 291 | i, |
287 | high_width, | 292 | high_width, |
288 | mtrr_state.var_ranges[i].base_hi, | 293 | mtrr_state.var_ranges[i].base_hi, |
289 | mtrr_state.var_ranges[i].base_lo >> 12, | 294 | mtrr_state.var_ranges[i].base_lo >> 12, |
290 | high_width, | 295 | high_width, |
291 | mtrr_state.var_ranges[i].mask_hi, | 296 | mtrr_state.var_ranges[i].mask_hi, |
292 | mtrr_state.var_ranges[i].mask_lo >> 12, | 297 | mtrr_state.var_ranges[i].mask_lo >> 12, |
293 | mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); | 298 | mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); |
294 | else | 299 | else |
295 | printk(KERN_DEBUG " %u disabled\n", i); | 300 | pr_debug(" %u disabled\n", i); |
296 | } | ||
297 | if (mtrr_tom2) { | ||
298 | printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n", | ||
299 | mtrr_tom2, mtrr_tom2>>20); | ||
300 | } | 301 | } |
302 | if (mtrr_tom2) | ||
303 | pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); | ||
301 | } | 304 | } |
302 | 305 | ||
303 | /* Grab all of the MTRR state for this CPU into *state */ | 306 | /* Grab all of the MTRR state for this CPU into *state */ |
304 | void __init get_mtrr_state(void) | 307 | void __init get_mtrr_state(void) |
305 | { | 308 | { |
306 | unsigned int i; | ||
307 | struct mtrr_var_range *vrs; | 309 | struct mtrr_var_range *vrs; |
308 | unsigned lo, dummy; | ||
309 | unsigned long flags; | 310 | unsigned long flags; |
311 | unsigned lo, dummy; | ||
312 | unsigned int i; | ||
310 | 313 | ||
311 | vrs = mtrr_state.var_ranges; | 314 | vrs = mtrr_state.var_ranges; |
312 | 315 | ||
@@ -324,6 +327,7 @@ void __init get_mtrr_state(void) | |||
324 | 327 | ||
325 | if (amd_special_default_mtrr()) { | 328 | if (amd_special_default_mtrr()) { |
326 | unsigned low, high; | 329 | unsigned low, high; |
330 | |||
327 | /* TOP_MEM2 */ | 331 | /* TOP_MEM2 */ |
328 | rdmsr(MSR_K8_TOP_MEM2, low, high); | 332 | rdmsr(MSR_K8_TOP_MEM2, low, high); |
329 | mtrr_tom2 = high; | 333 | mtrr_tom2 = high; |
@@ -344,10 +348,9 @@ void __init get_mtrr_state(void) | |||
344 | 348 | ||
345 | post_set(); | 349 | post_set(); |
346 | local_irq_restore(flags); | 350 | local_irq_restore(flags); |
347 | |||
348 | } | 351 | } |
349 | 352 | ||
350 | /* Some BIOS's are fucked and don't set all MTRRs the same! */ | 353 | /* Some BIOS's are messed up and don't set all MTRRs the same! */ |
351 | void __init mtrr_state_warn(void) | 354 | void __init mtrr_state_warn(void) |
352 | { | 355 | { |
353 | unsigned long mask = smp_changes_mask; | 356 | unsigned long mask = smp_changes_mask; |
@@ -355,28 +358,33 @@ void __init mtrr_state_warn(void) | |||
355 | if (!mask) | 358 | if (!mask) |
356 | return; | 359 | return; |
357 | if (mask & MTRR_CHANGE_MASK_FIXED) | 360 | if (mask & MTRR_CHANGE_MASK_FIXED) |
358 | printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); | 361 | pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); |
359 | if (mask & MTRR_CHANGE_MASK_VARIABLE) | 362 | if (mask & MTRR_CHANGE_MASK_VARIABLE) |
360 | printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); | 363 | pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n"); |
361 | if (mask & MTRR_CHANGE_MASK_DEFTYPE) | 364 | if (mask & MTRR_CHANGE_MASK_DEFTYPE) |
362 | printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); | 365 | pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); |
366 | |||
363 | printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); | 367 | printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); |
364 | printk(KERN_INFO "mtrr: corrected configuration.\n"); | 368 | printk(KERN_INFO "mtrr: corrected configuration.\n"); |
365 | } | 369 | } |
366 | 370 | ||
367 | /* Doesn't attempt to pass an error out to MTRR users | 371 | /* |
368 | because it's quite complicated in some cases and probably not | 372 | * Doesn't attempt to pass an error out to MTRR users |
369 | worth it because the best error handling is to ignore it. */ | 373 | * because it's quite complicated in some cases and probably not |
374 | * worth it because the best error handling is to ignore it. | ||
375 | */ | ||
370 | void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) | 376 | void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) |
371 | { | 377 | { |
372 | if (wrmsr_safe(msr, a, b) < 0) | 378 | if (wrmsr_safe(msr, a, b) < 0) { |
373 | printk(KERN_ERR | 379 | printk(KERN_ERR |
374 | "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", | 380 | "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", |
375 | smp_processor_id(), msr, a, b); | 381 | smp_processor_id(), msr, a, b); |
382 | } | ||
376 | } | 383 | } |
377 | 384 | ||
378 | /** | 385 | /** |
379 | * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have | 386 | * set_fixed_range - checks & updates a fixed-range MTRR if it |
387 | * differs from the value it should have | ||
380 | * @msr: MSR address of the MTTR which should be checked and updated | 388 | * @msr: MSR address of the MTTR which should be checked and updated |
381 | * @changed: pointer which indicates whether the MTRR needed to be changed | 389 | * @changed: pointer which indicates whether the MTRR needed to be changed |
382 | * @msrwords: pointer to the MSR values which the MSR should have | 390 | * @msrwords: pointer to the MSR values which the MSR should have |
@@ -401,20 +409,23 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) | |||
401 | * | 409 | * |
402 | * Returns: The index of the region on success, else negative on error. | 410 | * Returns: The index of the region on success, else negative on error. |
403 | */ | 411 | */ |
404 | int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) | 412 | int |
413 | generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) | ||
405 | { | 414 | { |
406 | int i, max; | ||
407 | mtrr_type ltype; | ||
408 | unsigned long lbase, lsize; | 415 | unsigned long lbase, lsize; |
416 | mtrr_type ltype; | ||
417 | int i, max; | ||
409 | 418 | ||
410 | max = num_var_ranges; | 419 | max = num_var_ranges; |
411 | if (replace_reg >= 0 && replace_reg < max) | 420 | if (replace_reg >= 0 && replace_reg < max) |
412 | return replace_reg; | 421 | return replace_reg; |
422 | |||
413 | for (i = 0; i < max; ++i) { | 423 | for (i = 0; i < max; ++i) { |
414 | mtrr_if->get(i, &lbase, &lsize, <ype); | 424 | mtrr_if->get(i, &lbase, &lsize, <ype); |
415 | if (lsize == 0) | 425 | if (lsize == 0) |
416 | return i; | 426 | return i; |
417 | } | 427 | } |
428 | |||
418 | return -ENOSPC; | 429 | return -ENOSPC; |
419 | } | 430 | } |
420 | 431 | ||
@@ -434,7 +445,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
434 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | 445 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); |
435 | 446 | ||
436 | if ((mask_lo & 0x800) == 0) { | 447 | if ((mask_lo & 0x800) == 0) { |
437 | /* Invalid (i.e. free) range */ | 448 | /* Invalid (i.e. free) range */ |
438 | *base = 0; | 449 | *base = 0; |
439 | *size = 0; | 450 | *size = 0; |
440 | *type = 0; | 451 | *type = 0; |
@@ -471,27 +482,31 @@ out_put_cpu: | |||
471 | } | 482 | } |
472 | 483 | ||
473 | /** | 484 | /** |
474 | * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set | 485 | * set_fixed_ranges - checks & updates the fixed-range MTRRs if they |
486 | * differ from the saved set | ||
475 | * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() | 487 | * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() |
476 | */ | 488 | */ |
477 | static int set_fixed_ranges(mtrr_type * frs) | 489 | static int set_fixed_ranges(mtrr_type *frs) |
478 | { | 490 | { |
479 | unsigned long long *saved = (unsigned long long *) frs; | 491 | unsigned long long *saved = (unsigned long long *)frs; |
480 | bool changed = false; | 492 | bool changed = false; |
481 | int block=-1, range; | 493 | int block = -1, range; |
482 | 494 | ||
483 | k8_check_syscfg_dram_mod_en(); | 495 | k8_check_syscfg_dram_mod_en(); |
484 | 496 | ||
485 | while (fixed_range_blocks[++block].ranges) | 497 | while (fixed_range_blocks[++block].ranges) { |
486 | for (range=0; range < fixed_range_blocks[block].ranges; range++) | 498 | for (range = 0; range < fixed_range_blocks[block].ranges; range++) |
487 | set_fixed_range(fixed_range_blocks[block].base_msr + range, | 499 | set_fixed_range(fixed_range_blocks[block].base_msr + range, |
488 | &changed, (unsigned int *) saved++); | 500 | &changed, (unsigned int *)saved++); |
501 | } | ||
489 | 502 | ||
490 | return changed; | 503 | return changed; |
491 | } | 504 | } |
492 | 505 | ||
493 | /* Set the MSR pair relating to a var range. Returns TRUE if | 506 | /* |
494 | changes are made */ | 507 | * Set the MSR pair relating to a var range. |
508 | * Returns true if changes are made. | ||
509 | */ | ||
495 | static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) | 510 | static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) |
496 | { | 511 | { |
497 | unsigned int lo, hi; | 512 | unsigned int lo, hi; |
@@ -501,6 +516,7 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) | |||
501 | if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) | 516 | if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) |
502 | || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != | 517 | || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != |
503 | (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { | 518 | (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { |
519 | |||
504 | mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); | 520 | mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); |
505 | changed = true; | 521 | changed = true; |
506 | } | 522 | } |
@@ -526,21 +542,26 @@ static u32 deftype_lo, deftype_hi; | |||
526 | */ | 542 | */ |
527 | static unsigned long set_mtrr_state(void) | 543 | static unsigned long set_mtrr_state(void) |
528 | { | 544 | { |
529 | unsigned int i; | ||
530 | unsigned long change_mask = 0; | 545 | unsigned long change_mask = 0; |
546 | unsigned int i; | ||
531 | 547 | ||
532 | for (i = 0; i < num_var_ranges; i++) | 548 | for (i = 0; i < num_var_ranges; i++) { |
533 | if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) | 549 | if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) |
534 | change_mask |= MTRR_CHANGE_MASK_VARIABLE; | 550 | change_mask |= MTRR_CHANGE_MASK_VARIABLE; |
551 | } | ||
535 | 552 | ||
536 | if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) | 553 | if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) |
537 | change_mask |= MTRR_CHANGE_MASK_FIXED; | 554 | change_mask |= MTRR_CHANGE_MASK_FIXED; |
538 | 555 | ||
539 | /* Set_mtrr_restore restores the old value of MTRRdefType, | 556 | /* |
540 | so to set it we fiddle with the saved value */ | 557 | * Set_mtrr_restore restores the old value of MTRRdefType, |
558 | * so to set it we fiddle with the saved value: | ||
559 | */ | ||
541 | if ((deftype_lo & 0xff) != mtrr_state.def_type | 560 | if ((deftype_lo & 0xff) != mtrr_state.def_type |
542 | || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { | 561 | || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { |
543 | deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); | 562 | |
563 | deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | | ||
564 | (mtrr_state.enabled << 10); | ||
544 | change_mask |= MTRR_CHANGE_MASK_DEFTYPE; | 565 | change_mask |= MTRR_CHANGE_MASK_DEFTYPE; |
545 | } | 566 | } |
546 | 567 | ||
@@ -548,33 +569,36 @@ static unsigned long set_mtrr_state(void) | |||
548 | } | 569 | } |
549 | 570 | ||
550 | 571 | ||
551 | static unsigned long cr4 = 0; | 572 | static unsigned long cr4; |
552 | static DEFINE_SPINLOCK(set_atomicity_lock); | 573 | static DEFINE_SPINLOCK(set_atomicity_lock); |
553 | 574 | ||
554 | /* | 575 | /* |
555 | * Since we are disabling the cache don't allow any interrupts - they | 576 | * Since we are disabling the cache don't allow any interrupts, |
556 | * would run extremely slow and would only increase the pain. The caller must | 577 | * they would run extremely slow and would only increase the pain. |
557 | * ensure that local interrupts are disabled and are reenabled after post_set() | 578 | * |
558 | * has been called. | 579 | * The caller must ensure that local interrupts are disabled and |
580 | * are reenabled after post_set() has been called. | ||
559 | */ | 581 | */ |
560 | |||
561 | static void prepare_set(void) __acquires(set_atomicity_lock) | 582 | static void prepare_set(void) __acquires(set_atomicity_lock) |
562 | { | 583 | { |
563 | unsigned long cr0; | 584 | unsigned long cr0; |
564 | 585 | ||
565 | /* Note that this is not ideal, since the cache is only flushed/disabled | 586 | /* |
566 | for this CPU while the MTRRs are changed, but changing this requires | 587 | * Note that this is not ideal |
567 | more invasive changes to the way the kernel boots */ | 588 | * since the cache is only flushed/disabled for this CPU while the |
589 | * MTRRs are changed, but changing this requires more invasive | ||
590 | * changes to the way the kernel boots | ||
591 | */ | ||
568 | 592 | ||
569 | spin_lock(&set_atomicity_lock); | 593 | spin_lock(&set_atomicity_lock); |
570 | 594 | ||
571 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ | 595 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ |
572 | cr0 = read_cr0() | X86_CR0_CD; | 596 | cr0 = read_cr0() | X86_CR0_CD; |
573 | write_cr0(cr0); | 597 | write_cr0(cr0); |
574 | wbinvd(); | 598 | wbinvd(); |
575 | 599 | ||
576 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | 600 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
577 | if ( cpu_has_pge ) { | 601 | if (cpu_has_pge) { |
578 | cr4 = read_cr4(); | 602 | cr4 = read_cr4(); |
579 | write_cr4(cr4 & ~X86_CR4_PGE); | 603 | write_cr4(cr4 & ~X86_CR4_PGE); |
580 | } | 604 | } |
@@ -582,26 +606,26 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
582 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ | 606 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ |
583 | __flush_tlb(); | 607 | __flush_tlb(); |
584 | 608 | ||
585 | /* Save MTRR state */ | 609 | /* Save MTRR state */ |
586 | rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); | 610 | rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); |
587 | 611 | ||
588 | /* Disable MTRRs, and set the default type to uncached */ | 612 | /* Disable MTRRs, and set the default type to uncached */ |
589 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); | 613 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); |
590 | } | 614 | } |
591 | 615 | ||
592 | static void post_set(void) __releases(set_atomicity_lock) | 616 | static void post_set(void) __releases(set_atomicity_lock) |
593 | { | 617 | { |
594 | /* Flush TLBs (no need to flush caches - they are disabled) */ | 618 | /* Flush TLBs (no need to flush caches - they are disabled) */ |
595 | __flush_tlb(); | 619 | __flush_tlb(); |
596 | 620 | ||
597 | /* Intel (P6) standard MTRRs */ | 621 | /* Intel (P6) standard MTRRs */ |
598 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); | 622 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); |
599 | 623 | ||
600 | /* Enable caches */ | 624 | /* Enable caches */ |
601 | write_cr0(read_cr0() & 0xbfffffff); | 625 | write_cr0(read_cr0() & 0xbfffffff); |
602 | 626 | ||
603 | /* Restore value of CR4 */ | 627 | /* Restore value of CR4 */ |
604 | if ( cpu_has_pge ) | 628 | if (cpu_has_pge) |
605 | write_cr4(cr4); | 629 | write_cr4(cr4); |
606 | spin_unlock(&set_atomicity_lock); | 630 | spin_unlock(&set_atomicity_lock); |
607 | } | 631 | } |
@@ -623,24 +647,27 @@ static void generic_set_all(void) | |||
623 | post_set(); | 647 | post_set(); |
624 | local_irq_restore(flags); | 648 | local_irq_restore(flags); |
625 | 649 | ||
626 | /* Use the atomic bitops to update the global mask */ | 650 | /* Use the atomic bitops to update the global mask */ |
627 | for (count = 0; count < sizeof mask * 8; ++count) { | 651 | for (count = 0; count < sizeof mask * 8; ++count) { |
628 | if (mask & 0x01) | 652 | if (mask & 0x01) |
629 | set_bit(count, &smp_changes_mask); | 653 | set_bit(count, &smp_changes_mask); |
630 | mask >>= 1; | 654 | mask >>= 1; |
631 | } | 655 | } |
632 | 656 | ||
633 | } | 657 | } |
634 | 658 | ||
659 | /** | ||
660 | * generic_set_mtrr - set variable MTRR register on the local CPU. | ||
661 | * | ||
662 | * @reg: The register to set. | ||
663 | * @base: The base address of the region. | ||
664 | * @size: The size of the region. If this is 0 the region is disabled. | ||
665 | * @type: The type of the region. | ||
666 | * | ||
667 | * Returns nothing. | ||
668 | */ | ||
635 | static void generic_set_mtrr(unsigned int reg, unsigned long base, | 669 | static void generic_set_mtrr(unsigned int reg, unsigned long base, |
636 | unsigned long size, mtrr_type type) | 670 | unsigned long size, mtrr_type type) |
637 | /* [SUMMARY] Set variable MTRR register on the local CPU. | ||
638 | <reg> The register to set. | ||
639 | <base> The base address of the region. | ||
640 | <size> The size of the region. If this is 0 the region is disabled. | ||
641 | <type> The type of the region. | ||
642 | [RETURNS] Nothing. | ||
643 | */ | ||
644 | { | 671 | { |
645 | unsigned long flags; | 672 | unsigned long flags; |
646 | struct mtrr_var_range *vr; | 673 | struct mtrr_var_range *vr; |
@@ -651,8 +678,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, | |||
651 | prepare_set(); | 678 | prepare_set(); |
652 | 679 | ||
653 | if (size == 0) { | 680 | if (size == 0) { |
654 | /* The invalid bit is kept in the mask, so we simply clear the | 681 | /* |
655 | relevant mask register to disable a range. */ | 682 | * The invalid bit is kept in the mask, so we simply |
683 | * clear the relevant mask register to disable a range. | ||
684 | */ | ||
656 | mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); | 685 | mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); |
657 | memset(vr, 0, sizeof(struct mtrr_var_range)); | 686 | memset(vr, 0, sizeof(struct mtrr_var_range)); |
658 | } else { | 687 | } else { |
@@ -669,46 +698,50 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, | |||
669 | local_irq_restore(flags); | 698 | local_irq_restore(flags); |
670 | } | 699 | } |
671 | 700 | ||
672 | int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | 701 | int generic_validate_add_page(unsigned long base, unsigned long size, |
702 | unsigned int type) | ||
673 | { | 703 | { |
674 | unsigned long lbase, last; | 704 | unsigned long lbase, last; |
675 | 705 | ||
676 | /* For Intel PPro stepping <= 7, must be 4 MiB aligned | 706 | /* |
677 | and not touch 0x70000000->0x7003FFFF */ | 707 | * For Intel PPro stepping <= 7 |
708 | * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF | ||
709 | */ | ||
678 | if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && | 710 | if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && |
679 | boot_cpu_data.x86_model == 1 && | 711 | boot_cpu_data.x86_model == 1 && |
680 | boot_cpu_data.x86_mask <= 7) { | 712 | boot_cpu_data.x86_mask <= 7) { |
681 | if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { | 713 | if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { |
682 | printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); | 714 | pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); |
683 | return -EINVAL; | 715 | return -EINVAL; |
684 | } | 716 | } |
685 | if (!(base + size < 0x70000 || base > 0x7003F) && | 717 | if (!(base + size < 0x70000 || base > 0x7003F) && |
686 | (type == MTRR_TYPE_WRCOMB | 718 | (type == MTRR_TYPE_WRCOMB |
687 | || type == MTRR_TYPE_WRBACK)) { | 719 | || type == MTRR_TYPE_WRBACK)) { |
688 | printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); | 720 | pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); |
689 | return -EINVAL; | 721 | return -EINVAL; |
690 | } | 722 | } |
691 | } | 723 | } |
692 | 724 | ||
693 | /* Check upper bits of base and last are equal and lower bits are 0 | 725 | /* |
694 | for base and 1 for last */ | 726 | * Check upper bits of base and last are equal and lower bits are 0 |
727 | * for base and 1 for last | ||
728 | */ | ||
695 | last = base + size - 1; | 729 | last = base + size - 1; |
696 | for (lbase = base; !(lbase & 1) && (last & 1); | 730 | for (lbase = base; !(lbase & 1) && (last & 1); |
697 | lbase = lbase >> 1, last = last >> 1) ; | 731 | lbase = lbase >> 1, last = last >> 1) |
732 | ; | ||
698 | if (lbase != last) { | 733 | if (lbase != last) { |
699 | printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", | 734 | pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); |
700 | base, size); | ||
701 | return -EINVAL; | 735 | return -EINVAL; |
702 | } | 736 | } |
703 | return 0; | 737 | return 0; |
704 | } | 738 | } |
705 | 739 | ||
706 | |||
707 | static int generic_have_wrcomb(void) | 740 | static int generic_have_wrcomb(void) |
708 | { | 741 | { |
709 | unsigned long config, dummy; | 742 | unsigned long config, dummy; |
710 | rdmsr(MSR_MTRRcap, config, dummy); | 743 | rdmsr(MSR_MTRRcap, config, dummy); |
711 | return (config & (1 << 10)); | 744 | return config & (1 << 10); |
712 | } | 745 | } |
713 | 746 | ||
714 | int positive_have_wrcomb(void) | 747 | int positive_have_wrcomb(void) |
@@ -716,14 +749,15 @@ int positive_have_wrcomb(void) | |||
716 | return 1; | 749 | return 1; |
717 | } | 750 | } |
718 | 751 | ||
719 | /* generic structure... | 752 | /* |
753 | * Generic structure... | ||
720 | */ | 754 | */ |
721 | struct mtrr_ops generic_mtrr_ops = { | 755 | struct mtrr_ops generic_mtrr_ops = { |
722 | .use_intel_if = 1, | 756 | .use_intel_if = 1, |
723 | .set_all = generic_set_all, | 757 | .set_all = generic_set_all, |
724 | .get = generic_get_mtrr, | 758 | .get = generic_get_mtrr, |
725 | .get_free_region = generic_get_free_region, | 759 | .get_free_region = generic_get_free_region, |
726 | .set = generic_set_mtrr, | 760 | .set = generic_set_mtrr, |
727 | .validate_add_page = generic_validate_add_page, | 761 | .validate_add_page = generic_validate_add_page, |
728 | .have_wrcomb = generic_have_wrcomb, | 762 | .have_wrcomb = generic_have_wrcomb, |
729 | }; | 763 | }; |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index fb73a52913a4..08b6ea4c62b4 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -1,27 +1,28 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/proc_fs.h> | ||
3 | #include <linux/capability.h> | 1 | #include <linux/capability.h> |
4 | #include <linux/ctype.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/seq_file.h> | 2 | #include <linux/seq_file.h> |
7 | #include <asm/uaccess.h> | 3 | #include <linux/uaccess.h> |
4 | #include <linux/proc_fs.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/ctype.h> | ||
7 | #include <linux/init.h> | ||
8 | 8 | ||
9 | #define LINE_SIZE 80 | 9 | #define LINE_SIZE 80 |
10 | 10 | ||
11 | #include <asm/mtrr.h> | 11 | #include <asm/mtrr.h> |
12 | |||
12 | #include "mtrr.h" | 13 | #include "mtrr.h" |
13 | 14 | ||
14 | #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) | 15 | #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) |
15 | 16 | ||
16 | static const char *const mtrr_strings[MTRR_NUM_TYPES] = | 17 | static const char *const mtrr_strings[MTRR_NUM_TYPES] = |
17 | { | 18 | { |
18 | "uncachable", /* 0 */ | 19 | "uncachable", /* 0 */ |
19 | "write-combining", /* 1 */ | 20 | "write-combining", /* 1 */ |
20 | "?", /* 2 */ | 21 | "?", /* 2 */ |
21 | "?", /* 3 */ | 22 | "?", /* 3 */ |
22 | "write-through", /* 4 */ | 23 | "write-through", /* 4 */ |
23 | "write-protect", /* 5 */ | 24 | "write-protect", /* 5 */ |
24 | "write-back", /* 6 */ | 25 | "write-back", /* 6 */ |
25 | }; | 26 | }; |
26 | 27 | ||
27 | const char *mtrr_attrib_to_str(int x) | 28 | const char *mtrr_attrib_to_str(int x) |
@@ -35,8 +36,8 @@ static int | |||
35 | mtrr_file_add(unsigned long base, unsigned long size, | 36 | mtrr_file_add(unsigned long base, unsigned long size, |
36 | unsigned int type, bool increment, struct file *file, int page) | 37 | unsigned int type, bool increment, struct file *file, int page) |
37 | { | 38 | { |
39 | unsigned int *fcount = FILE_FCOUNT(file); | ||
38 | int reg, max; | 40 | int reg, max; |
39 | unsigned int *fcount = FILE_FCOUNT(file); | ||
40 | 41 | ||
41 | max = num_var_ranges; | 42 | max = num_var_ranges; |
42 | if (fcount == NULL) { | 43 | if (fcount == NULL) { |
@@ -61,8 +62,8 @@ static int | |||
61 | mtrr_file_del(unsigned long base, unsigned long size, | 62 | mtrr_file_del(unsigned long base, unsigned long size, |
62 | struct file *file, int page) | 63 | struct file *file, int page) |
63 | { | 64 | { |
64 | int reg; | ||
65 | unsigned int *fcount = FILE_FCOUNT(file); | 65 | unsigned int *fcount = FILE_FCOUNT(file); |
66 | int reg; | ||
66 | 67 | ||
67 | if (!page) { | 68 | if (!page) { |
68 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) | 69 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) |
@@ -81,13 +82,14 @@ mtrr_file_del(unsigned long base, unsigned long size, | |||
81 | return reg; | 82 | return reg; |
82 | } | 83 | } |
83 | 84 | ||
84 | /* RED-PEN: seq_file can seek now. this is ignored. */ | 85 | /* |
86 | * seq_file can seek but we ignore it. | ||
87 | * | ||
88 | * Format of control line: | ||
89 | * "base=%Lx size=%Lx type=%s" or "disable=%d" | ||
90 | */ | ||
85 | static ssize_t | 91 | static ssize_t |
86 | mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | 92 | mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) |
87 | /* Format of control line: | ||
88 | "base=%Lx size=%Lx type=%s" OR: | ||
89 | "disable=%d" | ||
90 | */ | ||
91 | { | 93 | { |
92 | int i, err; | 94 | int i, err; |
93 | unsigned long reg; | 95 | unsigned long reg; |
@@ -100,15 +102,18 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
100 | return -EPERM; | 102 | return -EPERM; |
101 | if (!len) | 103 | if (!len) |
102 | return -EINVAL; | 104 | return -EINVAL; |
105 | |||
103 | memset(line, 0, LINE_SIZE); | 106 | memset(line, 0, LINE_SIZE); |
104 | if (len > LINE_SIZE) | 107 | if (len > LINE_SIZE) |
105 | len = LINE_SIZE; | 108 | len = LINE_SIZE; |
106 | if (copy_from_user(line, buf, len - 1)) | 109 | if (copy_from_user(line, buf, len - 1)) |
107 | return -EFAULT; | 110 | return -EFAULT; |
111 | |||
108 | linelen = strlen(line); | 112 | linelen = strlen(line); |
109 | ptr = line + linelen - 1; | 113 | ptr = line + linelen - 1; |
110 | if (linelen && *ptr == '\n') | 114 | if (linelen && *ptr == '\n') |
111 | *ptr = '\0'; | 115 | *ptr = '\0'; |
116 | |||
112 | if (!strncmp(line, "disable=", 8)) { | 117 | if (!strncmp(line, "disable=", 8)) { |
113 | reg = simple_strtoul(line + 8, &ptr, 0); | 118 | reg = simple_strtoul(line + 8, &ptr, 0); |
114 | err = mtrr_del_page(reg, 0, 0); | 119 | err = mtrr_del_page(reg, 0, 0); |
@@ -116,28 +121,35 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
116 | return err; | 121 | return err; |
117 | return len; | 122 | return len; |
118 | } | 123 | } |
124 | |||
119 | if (strncmp(line, "base=", 5)) | 125 | if (strncmp(line, "base=", 5)) |
120 | return -EINVAL; | 126 | return -EINVAL; |
127 | |||
121 | base = simple_strtoull(line + 5, &ptr, 0); | 128 | base = simple_strtoull(line + 5, &ptr, 0); |
122 | for (; isspace(*ptr); ++ptr) ; | 129 | for (; isspace(*ptr); ++ptr) |
130 | ; | ||
131 | |||
123 | if (strncmp(ptr, "size=", 5)) | 132 | if (strncmp(ptr, "size=", 5)) |
124 | return -EINVAL; | 133 | return -EINVAL; |
134 | |||
125 | size = simple_strtoull(ptr + 5, &ptr, 0); | 135 | size = simple_strtoull(ptr + 5, &ptr, 0); |
126 | if ((base & 0xfff) || (size & 0xfff)) | 136 | if ((base & 0xfff) || (size & 0xfff)) |
127 | return -EINVAL; | 137 | return -EINVAL; |
128 | for (; isspace(*ptr); ++ptr) ; | 138 | for (; isspace(*ptr); ++ptr) |
139 | ; | ||
140 | |||
129 | if (strncmp(ptr, "type=", 5)) | 141 | if (strncmp(ptr, "type=", 5)) |
130 | return -EINVAL; | 142 | return -EINVAL; |
131 | ptr += 5; | 143 | ptr += 5; |
132 | for (; isspace(*ptr); ++ptr) ; | 144 | for (; isspace(*ptr); ++ptr) |
145 | ; | ||
146 | |||
133 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { | 147 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { |
134 | if (strcmp(ptr, mtrr_strings[i])) | 148 | if (strcmp(ptr, mtrr_strings[i])) |
135 | continue; | 149 | continue; |
136 | base >>= PAGE_SHIFT; | 150 | base >>= PAGE_SHIFT; |
137 | size >>= PAGE_SHIFT; | 151 | size >>= PAGE_SHIFT; |
138 | err = | 152 | err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); |
139 | mtrr_add_page((unsigned long) base, (unsigned long) size, i, | ||
140 | true); | ||
141 | if (err < 0) | 153 | if (err < 0) |
142 | return err; | 154 | return err; |
143 | return len; | 155 | return len; |
@@ -181,7 +193,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
181 | case MTRRIOC32_SET_PAGE_ENTRY: | 193 | case MTRRIOC32_SET_PAGE_ENTRY: |
182 | case MTRRIOC32_DEL_PAGE_ENTRY: | 194 | case MTRRIOC32_DEL_PAGE_ENTRY: |
183 | case MTRRIOC32_KILL_PAGE_ENTRY: { | 195 | case MTRRIOC32_KILL_PAGE_ENTRY: { |
184 | struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; | 196 | struct mtrr_sentry32 __user *s32; |
197 | |||
198 | s32 = (struct mtrr_sentry32 __user *)__arg; | ||
185 | err = get_user(sentry.base, &s32->base); | 199 | err = get_user(sentry.base, &s32->base); |
186 | err |= get_user(sentry.size, &s32->size); | 200 | err |= get_user(sentry.size, &s32->size); |
187 | err |= get_user(sentry.type, &s32->type); | 201 | err |= get_user(sentry.type, &s32->type); |
@@ -191,7 +205,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
191 | } | 205 | } |
192 | case MTRRIOC32_GET_ENTRY: | 206 | case MTRRIOC32_GET_ENTRY: |
193 | case MTRRIOC32_GET_PAGE_ENTRY: { | 207 | case MTRRIOC32_GET_PAGE_ENTRY: { |
194 | struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; | 208 | struct mtrr_gentry32 __user *g32; |
209 | |||
210 | g32 = (struct mtrr_gentry32 __user *)__arg; | ||
195 | err = get_user(gentry.regnum, &g32->regnum); | 211 | err = get_user(gentry.regnum, &g32->regnum); |
196 | err |= get_user(gentry.base, &g32->base); | 212 | err |= get_user(gentry.base, &g32->base); |
197 | err |= get_user(gentry.size, &g32->size); | 213 | err |= get_user(gentry.size, &g32->size); |
@@ -314,7 +330,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
314 | if (err) | 330 | if (err) |
315 | return err; | 331 | return err; |
316 | 332 | ||
317 | switch(cmd) { | 333 | switch (cmd) { |
318 | case MTRRIOC_GET_ENTRY: | 334 | case MTRRIOC_GET_ENTRY: |
319 | case MTRRIOC_GET_PAGE_ENTRY: | 335 | case MTRRIOC_GET_PAGE_ENTRY: |
320 | if (copy_to_user(arg, &gentry, sizeof gentry)) | 336 | if (copy_to_user(arg, &gentry, sizeof gentry)) |
@@ -323,7 +339,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
323 | #ifdef CONFIG_COMPAT | 339 | #ifdef CONFIG_COMPAT |
324 | case MTRRIOC32_GET_ENTRY: | 340 | case MTRRIOC32_GET_ENTRY: |
325 | case MTRRIOC32_GET_PAGE_ENTRY: { | 341 | case MTRRIOC32_GET_PAGE_ENTRY: { |
326 | struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; | 342 | struct mtrr_gentry32 __user *g32; |
343 | |||
344 | g32 = (struct mtrr_gentry32 __user *)__arg; | ||
327 | err = put_user(gentry.base, &g32->base); | 345 | err = put_user(gentry.base, &g32->base); |
328 | err |= put_user(gentry.size, &g32->size); | 346 | err |= put_user(gentry.size, &g32->size); |
329 | err |= put_user(gentry.regnum, &g32->regnum); | 347 | err |= put_user(gentry.regnum, &g32->regnum); |
@@ -335,11 +353,10 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
335 | return err; | 353 | return err; |
336 | } | 354 | } |
337 | 355 | ||
338 | static int | 356 | static int mtrr_close(struct inode *ino, struct file *file) |
339 | mtrr_close(struct inode *ino, struct file *file) | ||
340 | { | 357 | { |
341 | int i, max; | ||
342 | unsigned int *fcount = FILE_FCOUNT(file); | 358 | unsigned int *fcount = FILE_FCOUNT(file); |
359 | int i, max; | ||
343 | 360 | ||
344 | if (fcount != NULL) { | 361 | if (fcount != NULL) { |
345 | max = num_var_ranges; | 362 | max = num_var_ranges; |
@@ -359,22 +376,22 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset); | |||
359 | 376 | ||
360 | static int mtrr_open(struct inode *inode, struct file *file) | 377 | static int mtrr_open(struct inode *inode, struct file *file) |
361 | { | 378 | { |
362 | if (!mtrr_if) | 379 | if (!mtrr_if) |
363 | return -EIO; | 380 | return -EIO; |
364 | if (!mtrr_if->get) | 381 | if (!mtrr_if->get) |
365 | return -ENXIO; | 382 | return -ENXIO; |
366 | return single_open(file, mtrr_seq_show, NULL); | 383 | return single_open(file, mtrr_seq_show, NULL); |
367 | } | 384 | } |
368 | 385 | ||
369 | static const struct file_operations mtrr_fops = { | 386 | static const struct file_operations mtrr_fops = { |
370 | .owner = THIS_MODULE, | 387 | .owner = THIS_MODULE, |
371 | .open = mtrr_open, | 388 | .open = mtrr_open, |
372 | .read = seq_read, | 389 | .read = seq_read, |
373 | .llseek = seq_lseek, | 390 | .llseek = seq_lseek, |
374 | .write = mtrr_write, | 391 | .write = mtrr_write, |
375 | .unlocked_ioctl = mtrr_ioctl, | 392 | .unlocked_ioctl = mtrr_ioctl, |
376 | .compat_ioctl = mtrr_ioctl, | 393 | .compat_ioctl = mtrr_ioctl, |
377 | .release = mtrr_close, | 394 | .release = mtrr_close, |
378 | }; | 395 | }; |
379 | 396 | ||
380 | static int mtrr_seq_show(struct seq_file *seq, void *offset) | 397 | static int mtrr_seq_show(struct seq_file *seq, void *offset) |
@@ -388,23 +405,24 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) | |||
388 | max = num_var_ranges; | 405 | max = num_var_ranges; |
389 | for (i = 0; i < max; i++) { | 406 | for (i = 0; i < max; i++) { |
390 | mtrr_if->get(i, &base, &size, &type); | 407 | mtrr_if->get(i, &base, &size, &type); |
391 | if (size == 0) | 408 | if (size == 0) { |
392 | mtrr_usage_table[i] = 0; | 409 | mtrr_usage_table[i] = 0; |
393 | else { | 410 | continue; |
394 | if (size < (0x100000 >> PAGE_SHIFT)) { | ||
395 | /* less than 1MB */ | ||
396 | factor = 'K'; | ||
397 | size <<= PAGE_SHIFT - 10; | ||
398 | } else { | ||
399 | factor = 'M'; | ||
400 | size >>= 20 - PAGE_SHIFT; | ||
401 | } | ||
402 | /* RED-PEN: base can be > 32bit */ | ||
403 | len += seq_printf(seq, | ||
404 | "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", | ||
405 | i, base, base >> (20 - PAGE_SHIFT), size, factor, | ||
406 | mtrr_usage_table[i], mtrr_attrib_to_str(type)); | ||
407 | } | 411 | } |
412 | if (size < (0x100000 >> PAGE_SHIFT)) { | ||
413 | /* less than 1MB */ | ||
414 | factor = 'K'; | ||
415 | size <<= PAGE_SHIFT - 10; | ||
416 | } else { | ||
417 | factor = 'M'; | ||
418 | size >>= 20 - PAGE_SHIFT; | ||
419 | } | ||
420 | /* Base can be > 32bit */ | ||
421 | len += seq_printf(seq, "reg%02i: base=0x%06lx000 " | ||
422 | "(%5luMB), size=%5lu%cB, count=%d: %s\n", | ||
423 | i, base, base >> (20 - PAGE_SHIFT), size, | ||
424 | factor, mtrr_usage_table[i], | ||
425 | mtrr_attrib_to_str(type)); | ||
408 | } | 426 | } |
409 | return 0; | 427 | return 0; |
410 | } | 428 | } |
@@ -422,6 +440,5 @@ static int __init mtrr_if_init(void) | |||
422 | proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); | 440 | proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); |
423 | return 0; | 441 | return 0; |
424 | } | 442 | } |
425 | |||
426 | arch_initcall(mtrr_if_init); | 443 | arch_initcall(mtrr_if_init); |
427 | #endif /* CONFIG_PROC_FS */ | 444 | #endif /* CONFIG_PROC_FS */ |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 8fc248b5aeaf..7af0f88a4163 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -25,43 +25,48 @@ | |||
25 | Operating System Writer's Guide" (Intel document number 242692), | 25 | Operating System Writer's Guide" (Intel document number 242692), |
26 | section 11.11.7 | 26 | section 11.11.7 |
27 | 27 | ||
28 | This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> | 28 | This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> |
29 | on 6-7 March 2002. | 29 | on 6-7 March 2002. |
30 | Source: Intel Architecture Software Developers Manual, Volume 3: | 30 | Source: Intel Architecture Software Developers Manual, Volume 3: |
31 | System Programming Guide; Section 9.11. (1997 edition - PPro). | 31 | System Programming Guide; Section 9.11. (1997 edition - PPro). |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #define DEBUG | ||
35 | |||
36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ | ||
37 | |||
38 | #include <linux/kvm_para.h> | ||
39 | #include <linux/uaccess.h> | ||
34 | #include <linux/module.h> | 40 | #include <linux/module.h> |
41 | #include <linux/mutex.h> | ||
35 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/sort.h> | ||
44 | #include <linux/cpu.h> | ||
36 | #include <linux/pci.h> | 45 | #include <linux/pci.h> |
37 | #include <linux/smp.h> | 46 | #include <linux/smp.h> |
38 | #include <linux/cpu.h> | ||
39 | #include <linux/mutex.h> | ||
40 | #include <linux/sort.h> | ||
41 | 47 | ||
48 | #include <asm/processor.h> | ||
42 | #include <asm/e820.h> | 49 | #include <asm/e820.h> |
43 | #include <asm/mtrr.h> | 50 | #include <asm/mtrr.h> |
44 | #include <asm/uaccess.h> | ||
45 | #include <asm/processor.h> | ||
46 | #include <asm/msr.h> | 51 | #include <asm/msr.h> |
47 | #include <asm/kvm_para.h> | 52 | |
48 | #include "mtrr.h" | 53 | #include "mtrr.h" |
49 | 54 | ||
50 | u32 num_var_ranges = 0; | 55 | u32 num_var_ranges; |
51 | 56 | ||
52 | unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; | 57 | unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; |
53 | static DEFINE_MUTEX(mtrr_mutex); | 58 | static DEFINE_MUTEX(mtrr_mutex); |
54 | 59 | ||
55 | u64 size_or_mask, size_and_mask; | 60 | u64 size_or_mask, size_and_mask; |
56 | 61 | ||
57 | static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; | 62 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; |
58 | 63 | ||
59 | struct mtrr_ops * mtrr_if = NULL; | 64 | struct mtrr_ops *mtrr_if; |
60 | 65 | ||
61 | static void set_mtrr(unsigned int reg, unsigned long base, | 66 | static void set_mtrr(unsigned int reg, unsigned long base, |
62 | unsigned long size, mtrr_type type); | 67 | unsigned long size, mtrr_type type); |
63 | 68 | ||
64 | void set_mtrr_ops(struct mtrr_ops * ops) | 69 | void set_mtrr_ops(struct mtrr_ops *ops) |
65 | { | 70 | { |
66 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) | 71 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) |
67 | mtrr_ops[ops->vendor] = ops; | 72 | mtrr_ops[ops->vendor] = ops; |
@@ -72,30 +77,36 @@ static int have_wrcomb(void) | |||
72 | { | 77 | { |
73 | struct pci_dev *dev; | 78 | struct pci_dev *dev; |
74 | u8 rev; | 79 | u8 rev; |
75 | 80 | ||
76 | if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { | 81 | dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); |
77 | /* ServerWorks LE chipsets < rev 6 have problems with write-combining | 82 | if (dev != NULL) { |
78 | Don't allow it and leave room for other chipsets to be tagged */ | 83 | /* |
84 | * ServerWorks LE chipsets < rev 6 have problems with | ||
85 | * write-combining. Don't allow it and leave room for other | ||
86 | * chipsets to be tagged | ||
87 | */ | ||
79 | if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && | 88 | if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && |
80 | dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { | 89 | dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { |
81 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | 90 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); |
82 | if (rev <= 5) { | 91 | if (rev <= 5) { |
83 | printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); | 92 | pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); |
84 | pci_dev_put(dev); | 93 | pci_dev_put(dev); |
85 | return 0; | 94 | return 0; |
86 | } | 95 | } |
87 | } | 96 | } |
88 | /* Intel 450NX errata # 23. Non ascending cacheline evictions to | 97 | /* |
89 | write combining memory may resulting in data corruption */ | 98 | * Intel 450NX errata # 23. Non ascending cacheline evictions to |
99 | * write combining memory may resulting in data corruption | ||
100 | */ | ||
90 | if (dev->vendor == PCI_VENDOR_ID_INTEL && | 101 | if (dev->vendor == PCI_VENDOR_ID_INTEL && |
91 | dev->device == PCI_DEVICE_ID_INTEL_82451NX) { | 102 | dev->device == PCI_DEVICE_ID_INTEL_82451NX) { |
92 | printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); | 103 | pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); |
93 | pci_dev_put(dev); | 104 | pci_dev_put(dev); |
94 | return 0; | 105 | return 0; |
95 | } | 106 | } |
96 | pci_dev_put(dev); | 107 | pci_dev_put(dev); |
97 | } | 108 | } |
98 | return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); | 109 | return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; |
99 | } | 110 | } |
100 | 111 | ||
101 | /* This function returns the number of variable MTRRs */ | 112 | /* This function returns the number of variable MTRRs */ |
@@ -103,12 +114,13 @@ static void __init set_num_var_ranges(void) | |||
103 | { | 114 | { |
104 | unsigned long config = 0, dummy; | 115 | unsigned long config = 0, dummy; |
105 | 116 | ||
106 | if (use_intel()) { | 117 | if (use_intel()) |
107 | rdmsr(MSR_MTRRcap, config, dummy); | 118 | rdmsr(MSR_MTRRcap, config, dummy); |
108 | } else if (is_cpu(AMD)) | 119 | else if (is_cpu(AMD)) |
109 | config = 2; | 120 | config = 2; |
110 | else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) | 121 | else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) |
111 | config = 8; | 122 | config = 8; |
123 | |||
112 | num_var_ranges = config & 0xff; | 124 | num_var_ranges = config & 0xff; |
113 | } | 125 | } |
114 | 126 | ||
@@ -130,10 +142,12 @@ struct set_mtrr_data { | |||
130 | mtrr_type smp_type; | 142 | mtrr_type smp_type; |
131 | }; | 143 | }; |
132 | 144 | ||
145 | /** | ||
146 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. | ||
147 | * | ||
148 | * Returns nothing. | ||
149 | */ | ||
133 | static void ipi_handler(void *info) | 150 | static void ipi_handler(void *info) |
134 | /* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. | ||
135 | [RETURNS] Nothing. | ||
136 | */ | ||
137 | { | 151 | { |
138 | #ifdef CONFIG_SMP | 152 | #ifdef CONFIG_SMP |
139 | struct set_mtrr_data *data = info; | 153 | struct set_mtrr_data *data = info; |
@@ -142,18 +156,19 @@ static void ipi_handler(void *info) | |||
142 | local_irq_save(flags); | 156 | local_irq_save(flags); |
143 | 157 | ||
144 | atomic_dec(&data->count); | 158 | atomic_dec(&data->count); |
145 | while(!atomic_read(&data->gate)) | 159 | while (!atomic_read(&data->gate)) |
146 | cpu_relax(); | 160 | cpu_relax(); |
147 | 161 | ||
148 | /* The master has cleared me to execute */ | 162 | /* The master has cleared me to execute */ |
149 | if (data->smp_reg != ~0U) | 163 | if (data->smp_reg != ~0U) { |
150 | mtrr_if->set(data->smp_reg, data->smp_base, | 164 | mtrr_if->set(data->smp_reg, data->smp_base, |
151 | data->smp_size, data->smp_type); | 165 | data->smp_size, data->smp_type); |
152 | else | 166 | } else { |
153 | mtrr_if->set_all(); | 167 | mtrr_if->set_all(); |
168 | } | ||
154 | 169 | ||
155 | atomic_dec(&data->count); | 170 | atomic_dec(&data->count); |
156 | while(atomic_read(&data->gate)) | 171 | while (atomic_read(&data->gate)) |
157 | cpu_relax(); | 172 | cpu_relax(); |
158 | 173 | ||
159 | atomic_dec(&data->count); | 174 | atomic_dec(&data->count); |
@@ -161,7 +176,8 @@ static void ipi_handler(void *info) | |||
161 | #endif | 176 | #endif |
162 | } | 177 | } |
163 | 178 | ||
164 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) { | 179 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) |
180 | { | ||
165 | return type1 == MTRR_TYPE_UNCACHABLE || | 181 | return type1 == MTRR_TYPE_UNCACHABLE || |
166 | type2 == MTRR_TYPE_UNCACHABLE || | 182 | type2 == MTRR_TYPE_UNCACHABLE || |
167 | (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || | 183 | (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || |
@@ -176,10 +192,10 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) { | |||
176 | * @type: mtrr type | 192 | * @type: mtrr type |
177 | * | 193 | * |
178 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: | 194 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: |
179 | * | 195 | * |
180 | * 1. Send IPI to do the following: | 196 | * 1. Send IPI to do the following: |
181 | * 2. Disable Interrupts | 197 | * 2. Disable Interrupts |
182 | * 3. Wait for all procs to do so | 198 | * 3. Wait for all procs to do so |
183 | * 4. Enter no-fill cache mode | 199 | * 4. Enter no-fill cache mode |
184 | * 5. Flush caches | 200 | * 5. Flush caches |
185 | * 6. Clear PGE bit | 201 | * 6. Clear PGE bit |
@@ -189,26 +205,27 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) { | |||
189 | * 10. Enable all range registers | 205 | * 10. Enable all range registers |
190 | * 11. Flush all TLBs and caches again | 206 | * 11. Flush all TLBs and caches again |
191 | * 12. Enter normal cache mode and reenable caching | 207 | * 12. Enter normal cache mode and reenable caching |
192 | * 13. Set PGE | 208 | * 13. Set PGE |
193 | * 14. Wait for buddies to catch up | 209 | * 14. Wait for buddies to catch up |
194 | * 15. Enable interrupts. | 210 | * 15. Enable interrupts. |
195 | * | 211 | * |
196 | * What does that mean for us? Well, first we set data.count to the number | 212 | * What does that mean for us? Well, first we set data.count to the number |
197 | * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait | 213 | * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait |
198 | * until it hits 0 and proceed. We set the data.gate flag and reset data.count. | 214 | * until it hits 0 and proceed. We set the data.gate flag and reset data.count. |
199 | * Meanwhile, they are waiting for that flag to be set. Once it's set, each | 215 | * Meanwhile, they are waiting for that flag to be set. Once it's set, each |
200 | * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it | 216 | * CPU goes through the transition of updating MTRRs. |
201 | * differently, so we call mtrr_if->set() callback and let them take care of it. | 217 | * The CPU vendors may each do it differently, |
202 | * When they're done, they again decrement data->count and wait for data.gate to | 218 | * so we call mtrr_if->set() callback and let them take care of it. |
203 | * be reset. | 219 | * When they're done, they again decrement data->count and wait for data.gate |
204 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. | 220 | * to be reset. |
221 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag | ||
205 | * Everyone then enables interrupts and we all continue on. | 222 | * Everyone then enables interrupts and we all continue on. |
206 | * | 223 | * |
207 | * Note that the mechanism is the same for UP systems, too; all the SMP stuff | 224 | * Note that the mechanism is the same for UP systems, too; all the SMP stuff |
208 | * becomes nops. | 225 | * becomes nops. |
209 | */ | 226 | */ |
210 | static void set_mtrr(unsigned int reg, unsigned long base, | 227 | static void |
211 | unsigned long size, mtrr_type type) | 228 | set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) |
212 | { | 229 | { |
213 | struct set_mtrr_data data; | 230 | struct set_mtrr_data data; |
214 | unsigned long flags; | 231 | unsigned long flags; |
@@ -218,121 +235,122 @@ static void set_mtrr(unsigned int reg, unsigned long base, | |||
218 | data.smp_size = size; | 235 | data.smp_size = size; |
219 | data.smp_type = type; | 236 | data.smp_type = type; |
220 | atomic_set(&data.count, num_booting_cpus() - 1); | 237 | atomic_set(&data.count, num_booting_cpus() - 1); |
221 | /* make sure data.count is visible before unleashing other CPUs */ | 238 | |
239 | /* Make sure data.count is visible before unleashing other CPUs */ | ||
222 | smp_wmb(); | 240 | smp_wmb(); |
223 | atomic_set(&data.gate,0); | 241 | atomic_set(&data.gate, 0); |
224 | 242 | ||
225 | /* Start the ball rolling on other CPUs */ | 243 | /* Start the ball rolling on other CPUs */ |
226 | if (smp_call_function(ipi_handler, &data, 0) != 0) | 244 | if (smp_call_function(ipi_handler, &data, 0) != 0) |
227 | panic("mtrr: timed out waiting for other CPUs\n"); | 245 | panic("mtrr: timed out waiting for other CPUs\n"); |
228 | 246 | ||
229 | local_irq_save(flags); | 247 | local_irq_save(flags); |
230 | 248 | ||
231 | while(atomic_read(&data.count)) | 249 | while (atomic_read(&data.count)) |
232 | cpu_relax(); | 250 | cpu_relax(); |
233 | 251 | ||
234 | /* ok, reset count and toggle gate */ | 252 | /* Ok, reset count and toggle gate */ |
235 | atomic_set(&data.count, num_booting_cpus() - 1); | 253 | atomic_set(&data.count, num_booting_cpus() - 1); |
236 | smp_wmb(); | 254 | smp_wmb(); |
237 | atomic_set(&data.gate,1); | 255 | atomic_set(&data.gate, 1); |
238 | 256 | ||
239 | /* do our MTRR business */ | 257 | /* Do our MTRR business */ |
240 | 258 | ||
241 | /* HACK! | 259 | /* |
260 | * HACK! | ||
242 | * We use this same function to initialize the mtrrs on boot. | 261 | * We use this same function to initialize the mtrrs on boot. |
243 | * The state of the boot cpu's mtrrs has been saved, and we want | 262 | * The state of the boot cpu's mtrrs has been saved, and we want |
244 | * to replicate across all the APs. | 263 | * to replicate across all the APs. |
245 | * If we're doing that @reg is set to something special... | 264 | * If we're doing that @reg is set to something special... |
246 | */ | 265 | */ |
247 | if (reg != ~0U) | 266 | if (reg != ~0U) |
248 | mtrr_if->set(reg,base,size,type); | 267 | mtrr_if->set(reg, base, size, type); |
249 | 268 | ||
250 | /* wait for the others */ | 269 | /* Wait for the others */ |
251 | while(atomic_read(&data.count)) | 270 | while (atomic_read(&data.count)) |
252 | cpu_relax(); | 271 | cpu_relax(); |
253 | 272 | ||
254 | atomic_set(&data.count, num_booting_cpus() - 1); | 273 | atomic_set(&data.count, num_booting_cpus() - 1); |
255 | smp_wmb(); | 274 | smp_wmb(); |
256 | atomic_set(&data.gate,0); | 275 | atomic_set(&data.gate, 0); |
257 | 276 | ||
258 | /* | 277 | /* |
259 | * Wait here for everyone to have seen the gate change | 278 | * Wait here for everyone to have seen the gate change |
260 | * So we're the last ones to touch 'data' | 279 | * So we're the last ones to touch 'data' |
261 | */ | 280 | */ |
262 | while(atomic_read(&data.count)) | 281 | while (atomic_read(&data.count)) |
263 | cpu_relax(); | 282 | cpu_relax(); |
264 | 283 | ||
265 | local_irq_restore(flags); | 284 | local_irq_restore(flags); |
266 | } | 285 | } |
267 | 286 | ||
268 | /** | 287 | /** |
269 | * mtrr_add_page - Add a memory type region | 288 | * mtrr_add_page - Add a memory type region |
270 | * @base: Physical base address of region in pages (in units of 4 kB!) | 289 | * @base: Physical base address of region in pages (in units of 4 kB!) |
271 | * @size: Physical size of region in pages (4 kB) | 290 | * @size: Physical size of region in pages (4 kB) |
272 | * @type: Type of MTRR desired | 291 | * @type: Type of MTRR desired |
273 | * @increment: If this is true do usage counting on the region | 292 | * @increment: If this is true do usage counting on the region |
274 | * | 293 | * |
275 | * Memory type region registers control the caching on newer Intel and | 294 | * Memory type region registers control the caching on newer Intel and |
276 | * non Intel processors. This function allows drivers to request an | 295 | * non Intel processors. This function allows drivers to request an |
277 | * MTRR is added. The details and hardware specifics of each processor's | 296 | * MTRR is added. The details and hardware specifics of each processor's |
278 | * implementation are hidden from the caller, but nevertheless the | 297 | * implementation are hidden from the caller, but nevertheless the |
279 | * caller should expect to need to provide a power of two size on an | 298 | * caller should expect to need to provide a power of two size on an |
280 | * equivalent power of two boundary. | 299 | * equivalent power of two boundary. |
281 | * | 300 | * |
282 | * If the region cannot be added either because all regions are in use | 301 | * If the region cannot be added either because all regions are in use |
283 | * or the CPU cannot support it a negative value is returned. On success | 302 | * or the CPU cannot support it a negative value is returned. On success |
284 | * the register number for this entry is returned, but should be treated | 303 | * the register number for this entry is returned, but should be treated |
285 | * as a cookie only. | 304 | * as a cookie only. |
286 | * | 305 | * |
287 | * On a multiprocessor machine the changes are made to all processors. | 306 | * On a multiprocessor machine the changes are made to all processors. |
288 | * This is required on x86 by the Intel processors. | 307 | * This is required on x86 by the Intel processors. |
289 | * | 308 | * |
290 | * The available types are | 309 | * The available types are |
291 | * | 310 | * |
292 | * %MTRR_TYPE_UNCACHABLE - No caching | 311 | * %MTRR_TYPE_UNCACHABLE - No caching |
293 | * | 312 | * |
294 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever | 313 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever |
295 | * | 314 | * |
296 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts | 315 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts |
297 | * | 316 | * |
298 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes | 317 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes |
299 | * | 318 | * |
300 | * BUGS: Needs a quiet flag for the cases where drivers do not mind | 319 | * BUGS: Needs a quiet flag for the cases where drivers do not mind |
301 | * failures and do not wish system log messages to be sent. | 320 | * failures and do not wish system log messages to be sent. |
302 | */ | 321 | */ |
303 | 322 | int mtrr_add_page(unsigned long base, unsigned long size, | |
304 | int mtrr_add_page(unsigned long base, unsigned long size, | ||
305 | unsigned int type, bool increment) | 323 | unsigned int type, bool increment) |
306 | { | 324 | { |
325 | unsigned long lbase, lsize; | ||
307 | int i, replace, error; | 326 | int i, replace, error; |
308 | mtrr_type ltype; | 327 | mtrr_type ltype; |
309 | unsigned long lbase, lsize; | ||
310 | 328 | ||
311 | if (!mtrr_if) | 329 | if (!mtrr_if) |
312 | return -ENXIO; | 330 | return -ENXIO; |
313 | 331 | ||
314 | if ((error = mtrr_if->validate_add_page(base,size,type))) | 332 | error = mtrr_if->validate_add_page(base, size, type); |
333 | if (error) | ||
315 | return error; | 334 | return error; |
316 | 335 | ||
317 | if (type >= MTRR_NUM_TYPES) { | 336 | if (type >= MTRR_NUM_TYPES) { |
318 | printk(KERN_WARNING "mtrr: type: %u invalid\n", type); | 337 | pr_warning("mtrr: type: %u invalid\n", type); |
319 | return -EINVAL; | 338 | return -EINVAL; |
320 | } | 339 | } |
321 | 340 | ||
322 | /* If the type is WC, check that this processor supports it */ | 341 | /* If the type is WC, check that this processor supports it */ |
323 | if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { | 342 | if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { |
324 | printk(KERN_WARNING | 343 | pr_warning("mtrr: your processor doesn't support write-combining\n"); |
325 | "mtrr: your processor doesn't support write-combining\n"); | ||
326 | return -ENOSYS; | 344 | return -ENOSYS; |
327 | } | 345 | } |
328 | 346 | ||
329 | if (!size) { | 347 | if (!size) { |
330 | printk(KERN_WARNING "mtrr: zero sized request\n"); | 348 | pr_warning("mtrr: zero sized request\n"); |
331 | return -EINVAL; | 349 | return -EINVAL; |
332 | } | 350 | } |
333 | 351 | ||
334 | if (base & size_or_mask || size & size_or_mask) { | 352 | if (base & size_or_mask || size & size_or_mask) { |
335 | printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); | 353 | pr_warning("mtrr: base or size exceeds the MTRR width\n"); |
336 | return -EINVAL; | 354 | return -EINVAL; |
337 | } | 355 | } |
338 | 356 | ||
@@ -341,36 +359,40 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
341 | 359 | ||
342 | /* No CPU hotplug when we change MTRR entries */ | 360 | /* No CPU hotplug when we change MTRR entries */ |
343 | get_online_cpus(); | 361 | get_online_cpus(); |
344 | /* Search for existing MTRR */ | 362 | |
363 | /* Search for existing MTRR */ | ||
345 | mutex_lock(&mtrr_mutex); | 364 | mutex_lock(&mtrr_mutex); |
346 | for (i = 0; i < num_var_ranges; ++i) { | 365 | for (i = 0; i < num_var_ranges; ++i) { |
347 | mtrr_if->get(i, &lbase, &lsize, <ype); | 366 | mtrr_if->get(i, &lbase, &lsize, <ype); |
348 | if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) | 367 | if (!lsize || base > lbase + lsize - 1 || |
368 | base + size - 1 < lbase) | ||
349 | continue; | 369 | continue; |
350 | /* At this point we know there is some kind of overlap/enclosure */ | 370 | /* |
371 | * At this point we know there is some kind of | ||
372 | * overlap/enclosure | ||
373 | */ | ||
351 | if (base < lbase || base + size - 1 > lbase + lsize - 1) { | 374 | if (base < lbase || base + size - 1 > lbase + lsize - 1) { |
352 | if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { | 375 | if (base <= lbase && |
376 | base + size - 1 >= lbase + lsize - 1) { | ||
353 | /* New region encloses an existing region */ | 377 | /* New region encloses an existing region */ |
354 | if (type == ltype) { | 378 | if (type == ltype) { |
355 | replace = replace == -1 ? i : -2; | 379 | replace = replace == -1 ? i : -2; |
356 | continue; | 380 | continue; |
357 | } | 381 | } else if (types_compatible(type, ltype)) |
358 | else if (types_compatible(type, ltype)) | ||
359 | continue; | 382 | continue; |
360 | } | 383 | } |
361 | printk(KERN_WARNING | 384 | pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing" |
362 | "mtrr: 0x%lx000,0x%lx000 overlaps existing" | 385 | " 0x%lx000,0x%lx000\n", base, size, lbase, |
363 | " 0x%lx000,0x%lx000\n", base, size, lbase, | 386 | lsize); |
364 | lsize); | ||
365 | goto out; | 387 | goto out; |
366 | } | 388 | } |
367 | /* New region is enclosed by an existing region */ | 389 | /* New region is enclosed by an existing region */ |
368 | if (ltype != type) { | 390 | if (ltype != type) { |
369 | if (types_compatible(type, ltype)) | 391 | if (types_compatible(type, ltype)) |
370 | continue; | 392 | continue; |
371 | printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", | 393 | pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", |
372 | base, size, mtrr_attrib_to_str(ltype), | 394 | base, size, mtrr_attrib_to_str(ltype), |
373 | mtrr_attrib_to_str(type)); | 395 | mtrr_attrib_to_str(type)); |
374 | goto out; | 396 | goto out; |
375 | } | 397 | } |
376 | if (increment) | 398 | if (increment) |
@@ -378,7 +400,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
378 | error = i; | 400 | error = i; |
379 | goto out; | 401 | goto out; |
380 | } | 402 | } |
381 | /* Search for an empty MTRR */ | 403 | /* Search for an empty MTRR */ |
382 | i = mtrr_if->get_free_region(base, size, replace); | 404 | i = mtrr_if->get_free_region(base, size, replace); |
383 | if (i >= 0) { | 405 | if (i >= 0) { |
384 | set_mtrr(i, base, size, type); | 406 | set_mtrr(i, base, size, type); |
@@ -393,8 +415,9 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
393 | mtrr_usage_table[replace] = 0; | 415 | mtrr_usage_table[replace] = 0; |
394 | } | 416 | } |
395 | } | 417 | } |
396 | } else | 418 | } else { |
397 | printk(KERN_INFO "mtrr: no more MTRRs available\n"); | 419 | pr_info("mtrr: no more MTRRs available\n"); |
420 | } | ||
398 | error = i; | 421 | error = i; |
399 | out: | 422 | out: |
400 | mutex_unlock(&mtrr_mutex); | 423 | mutex_unlock(&mtrr_mutex); |
@@ -405,10 +428,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
405 | static int mtrr_check(unsigned long base, unsigned long size) | 428 | static int mtrr_check(unsigned long base, unsigned long size) |
406 | { | 429 | { |
407 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { | 430 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { |
408 | printk(KERN_WARNING | 431 | pr_warning("mtrr: size and base must be multiples of 4 kiB\n"); |
409 | "mtrr: size and base must be multiples of 4 kiB\n"); | 432 | pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); |
410 | printk(KERN_DEBUG | ||
411 | "mtrr: size: 0x%lx base: 0x%lx\n", size, base); | ||
412 | dump_stack(); | 433 | dump_stack(); |
413 | return -1; | 434 | return -1; |
414 | } | 435 | } |
@@ -416,66 +437,64 @@ static int mtrr_check(unsigned long base, unsigned long size) | |||
416 | } | 437 | } |
417 | 438 | ||
418 | /** | 439 | /** |
419 | * mtrr_add - Add a memory type region | 440 | * mtrr_add - Add a memory type region |
420 | * @base: Physical base address of region | 441 | * @base: Physical base address of region |
421 | * @size: Physical size of region | 442 | * @size: Physical size of region |
422 | * @type: Type of MTRR desired | 443 | * @type: Type of MTRR desired |
423 | * @increment: If this is true do usage counting on the region | 444 | * @increment: If this is true do usage counting on the region |
424 | * | 445 | * |
425 | * Memory type region registers control the caching on newer Intel and | 446 | * Memory type region registers control the caching on newer Intel and |
426 | * non Intel processors. This function allows drivers to request an | 447 | * non Intel processors. This function allows drivers to request an |
427 | * MTRR is added. The details and hardware specifics of each processor's | 448 | * MTRR is added. The details and hardware specifics of each processor's |
428 | * implementation are hidden from the caller, but nevertheless the | 449 | * implementation are hidden from the caller, but nevertheless the |
429 | * caller should expect to need to provide a power of two size on an | 450 | * caller should expect to need to provide a power of two size on an |
430 | * equivalent power of two boundary. | 451 | * equivalent power of two boundary. |
431 | * | 452 | * |
432 | * If the region cannot be added either because all regions are in use | 453 | * If the region cannot be added either because all regions are in use |
433 | * or the CPU cannot support it a negative value is returned. On success | 454 | * or the CPU cannot support it a negative value is returned. On success |
434 | * the register number for this entry is returned, but should be treated | 455 | * the register number for this entry is returned, but should be treated |
435 | * as a cookie only. | 456 | * as a cookie only. |
436 | * | 457 | * |
437 | * On a multiprocessor machine the changes are made to all processors. | 458 | * On a multiprocessor machine the changes are made to all processors. |
438 | * This is required on x86 by the Intel processors. | 459 | * This is required on x86 by the Intel processors. |
439 | * | 460 | * |
440 | * The available types are | 461 | * The available types are |
441 | * | 462 | * |
442 | * %MTRR_TYPE_UNCACHABLE - No caching | 463 | * %MTRR_TYPE_UNCACHABLE - No caching |
443 | * | 464 | * |
444 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever | 465 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever |
445 | * | 466 | * |
446 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts | 467 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts |
447 | * | 468 | * |
448 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes | 469 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes |
449 | * | 470 | * |
450 | * BUGS: Needs a quiet flag for the cases where drivers do not mind | 471 | * BUGS: Needs a quiet flag for the cases where drivers do not mind |
451 | * failures and do not wish system log messages to be sent. | 472 | * failures and do not wish system log messages to be sent. |
452 | */ | 473 | */ |
453 | 474 | int mtrr_add(unsigned long base, unsigned long size, unsigned int type, | |
454 | int | 475 | bool increment) |
455 | mtrr_add(unsigned long base, unsigned long size, unsigned int type, | ||
456 | bool increment) | ||
457 | { | 476 | { |
458 | if (mtrr_check(base, size)) | 477 | if (mtrr_check(base, size)) |
459 | return -EINVAL; | 478 | return -EINVAL; |
460 | return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, | 479 | return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, |
461 | increment); | 480 | increment); |
462 | } | 481 | } |
482 | EXPORT_SYMBOL(mtrr_add); | ||
463 | 483 | ||
464 | /** | 484 | /** |
465 | * mtrr_del_page - delete a memory type region | 485 | * mtrr_del_page - delete a memory type region |
466 | * @reg: Register returned by mtrr_add | 486 | * @reg: Register returned by mtrr_add |
467 | * @base: Physical base address | 487 | * @base: Physical base address |
468 | * @size: Size of region | 488 | * @size: Size of region |
469 | * | 489 | * |
470 | * If register is supplied then base and size are ignored. This is | 490 | * If register is supplied then base and size are ignored. This is |
471 | * how drivers should call it. | 491 | * how drivers should call it. |
472 | * | 492 | * |
473 | * Releases an MTRR region. If the usage count drops to zero the | 493 | * Releases an MTRR region. If the usage count drops to zero the |
474 | * register is freed and the region returns to default state. | 494 | * register is freed and the region returns to default state. |
475 | * On success the register is returned, on failure a negative error | 495 | * On success the register is returned, on failure a negative error |
476 | * code. | 496 | * code. |
477 | */ | 497 | */ |
478 | |||
479 | int mtrr_del_page(int reg, unsigned long base, unsigned long size) | 498 | int mtrr_del_page(int reg, unsigned long base, unsigned long size) |
480 | { | 499 | { |
481 | int i, max; | 500 | int i, max; |
@@ -500,22 +519,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) | |||
500 | } | 519 | } |
501 | } | 520 | } |
502 | if (reg < 0) { | 521 | if (reg < 0) { |
503 | printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, | 522 | pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n", |
504 | size); | 523 | base, size); |
505 | goto out; | 524 | goto out; |
506 | } | 525 | } |
507 | } | 526 | } |
508 | if (reg >= max) { | 527 | if (reg >= max) { |
509 | printk(KERN_WARNING "mtrr: register: %d too big\n", reg); | 528 | pr_warning("mtrr: register: %d too big\n", reg); |
510 | goto out; | 529 | goto out; |
511 | } | 530 | } |
512 | mtrr_if->get(reg, &lbase, &lsize, <ype); | 531 | mtrr_if->get(reg, &lbase, &lsize, <ype); |
513 | if (lsize < 1) { | 532 | if (lsize < 1) { |
514 | printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); | 533 | pr_warning("mtrr: MTRR %d not used\n", reg); |
515 | goto out; | 534 | goto out; |
516 | } | 535 | } |
517 | if (mtrr_usage_table[reg] < 1) { | 536 | if (mtrr_usage_table[reg] < 1) { |
518 | printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); | 537 | pr_warning("mtrr: reg: %d has count=0\n", reg); |
519 | goto out; | 538 | goto out; |
520 | } | 539 | } |
521 | if (--mtrr_usage_table[reg] < 1) | 540 | if (--mtrr_usage_table[reg] < 1) |
@@ -526,33 +545,31 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) | |||
526 | put_online_cpus(); | 545 | put_online_cpus(); |
527 | return error; | 546 | return error; |
528 | } | 547 | } |
548 | |||
529 | /** | 549 | /** |
530 | * mtrr_del - delete a memory type region | 550 | * mtrr_del - delete a memory type region |
531 | * @reg: Register returned by mtrr_add | 551 | * @reg: Register returned by mtrr_add |
532 | * @base: Physical base address | 552 | * @base: Physical base address |
533 | * @size: Size of region | 553 | * @size: Size of region |
534 | * | 554 | * |
535 | * If register is supplied then base and size are ignored. This is | 555 | * If register is supplied then base and size are ignored. This is |
536 | * how drivers should call it. | 556 | * how drivers should call it. |
537 | * | 557 | * |
538 | * Releases an MTRR region. If the usage count drops to zero the | 558 | * Releases an MTRR region. If the usage count drops to zero the |
539 | * register is freed and the region returns to default state. | 559 | * register is freed and the region returns to default state. |
540 | * On success the register is returned, on failure a negative error | 560 | * On success the register is returned, on failure a negative error |
541 | * code. | 561 | * code. |
542 | */ | 562 | */ |
543 | 563 | int mtrr_del(int reg, unsigned long base, unsigned long size) | |
544 | int | ||
545 | mtrr_del(int reg, unsigned long base, unsigned long size) | ||
546 | { | 564 | { |
547 | if (mtrr_check(base, size)) | 565 | if (mtrr_check(base, size)) |
548 | return -EINVAL; | 566 | return -EINVAL; |
549 | return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); | 567 | return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); |
550 | } | 568 | } |
551 | |||
552 | EXPORT_SYMBOL(mtrr_add); | ||
553 | EXPORT_SYMBOL(mtrr_del); | 569 | EXPORT_SYMBOL(mtrr_del); |
554 | 570 | ||
555 | /* HACK ALERT! | 571 | /* |
572 | * HACK ALERT! | ||
556 | * These should be called implicitly, but we can't yet until all the initcall | 573 | * These should be called implicitly, but we can't yet until all the initcall |
557 | * stuff is done... | 574 | * stuff is done... |
558 | */ | 575 | */ |
@@ -576,29 +593,28 @@ struct mtrr_value { | |||
576 | 593 | ||
577 | static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; | 594 | static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; |
578 | 595 | ||
579 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) | 596 | static int mtrr_save(struct sys_device *sysdev, pm_message_t state) |
580 | { | 597 | { |
581 | int i; | 598 | int i; |
582 | 599 | ||
583 | for (i = 0; i < num_var_ranges; i++) { | 600 | for (i = 0; i < num_var_ranges; i++) { |
584 | mtrr_if->get(i, | 601 | mtrr_if->get(i, &mtrr_value[i].lbase, |
585 | &mtrr_value[i].lbase, | 602 | &mtrr_value[i].lsize, |
586 | &mtrr_value[i].lsize, | 603 | &mtrr_value[i].ltype); |
587 | &mtrr_value[i].ltype); | ||
588 | } | 604 | } |
589 | return 0; | 605 | return 0; |
590 | } | 606 | } |
591 | 607 | ||
592 | static int mtrr_restore(struct sys_device * sysdev) | 608 | static int mtrr_restore(struct sys_device *sysdev) |
593 | { | 609 | { |
594 | int i; | 610 | int i; |
595 | 611 | ||
596 | for (i = 0; i < num_var_ranges; i++) { | 612 | for (i = 0; i < num_var_ranges; i++) { |
597 | if (mtrr_value[i].lsize) | 613 | if (mtrr_value[i].lsize) { |
598 | set_mtrr(i, | 614 | set_mtrr(i, mtrr_value[i].lbase, |
599 | mtrr_value[i].lbase, | 615 | mtrr_value[i].lsize, |
600 | mtrr_value[i].lsize, | 616 | mtrr_value[i].ltype); |
601 | mtrr_value[i].ltype); | 617 | } |
602 | } | 618 | } |
603 | return 0; | 619 | return 0; |
604 | } | 620 | } |
@@ -615,26 +631,29 @@ int __initdata changed_by_mtrr_cleanup; | |||
615 | /** | 631 | /** |
616 | * mtrr_bp_init - initialize mtrrs on the boot CPU | 632 | * mtrr_bp_init - initialize mtrrs on the boot CPU |
617 | * | 633 | * |
618 | * This needs to be called early; before any of the other CPUs are | 634 | * This needs to be called early; before any of the other CPUs are |
619 | * initialized (i.e. before smp_init()). | 635 | * initialized (i.e. before smp_init()). |
620 | * | 636 | * |
621 | */ | 637 | */ |
622 | void __init mtrr_bp_init(void) | 638 | void __init mtrr_bp_init(void) |
623 | { | 639 | { |
624 | u32 phys_addr; | 640 | u32 phys_addr; |
641 | |||
625 | init_ifs(); | 642 | init_ifs(); |
626 | 643 | ||
627 | phys_addr = 32; | 644 | phys_addr = 32; |
628 | 645 | ||
629 | if (cpu_has_mtrr) { | 646 | if (cpu_has_mtrr) { |
630 | mtrr_if = &generic_mtrr_ops; | 647 | mtrr_if = &generic_mtrr_ops; |
631 | size_or_mask = 0xff000000; /* 36 bits */ | 648 | size_or_mask = 0xff000000; /* 36 bits */ |
632 | size_and_mask = 0x00f00000; | 649 | size_and_mask = 0x00f00000; |
633 | phys_addr = 36; | 650 | phys_addr = 36; |
634 | 651 | ||
635 | /* This is an AMD specific MSR, but we assume(hope?) that | 652 | /* |
636 | Intel will implement it to when they extend the address | 653 | * This is an AMD specific MSR, but we assume(hope?) that |
637 | bus of the Xeon. */ | 654 | * Intel will implement it to when they extend the address |
655 | * bus of the Xeon. | ||
656 | */ | ||
638 | if (cpuid_eax(0x80000000) >= 0x80000008) { | 657 | if (cpuid_eax(0x80000000) >= 0x80000008) { |
639 | phys_addr = cpuid_eax(0x80000008) & 0xff; | 658 | phys_addr = cpuid_eax(0x80000008) & 0xff; |
640 | /* CPUID workaround for Intel 0F33/0F34 CPU */ | 659 | /* CPUID workaround for Intel 0F33/0F34 CPU */ |
@@ -649,9 +668,11 @@ void __init mtrr_bp_init(void) | |||
649 | size_and_mask = ~size_or_mask & 0xfffff00000ULL; | 668 | size_and_mask = ~size_or_mask & 0xfffff00000ULL; |
650 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && | 669 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && |
651 | boot_cpu_data.x86 == 6) { | 670 | boot_cpu_data.x86 == 6) { |
652 | /* VIA C* family have Intel style MTRRs, but | 671 | /* |
653 | don't support PAE */ | 672 | * VIA C* family have Intel style MTRRs, |
654 | size_or_mask = 0xfff00000; /* 32 bits */ | 673 | * but don't support PAE |
674 | */ | ||
675 | size_or_mask = 0xfff00000; /* 32 bits */ | ||
655 | size_and_mask = 0; | 676 | size_and_mask = 0; |
656 | phys_addr = 32; | 677 | phys_addr = 32; |
657 | } | 678 | } |
@@ -694,7 +715,6 @@ void __init mtrr_bp_init(void) | |||
694 | changed_by_mtrr_cleanup = 1; | 715 | changed_by_mtrr_cleanup = 1; |
695 | mtrr_if->set_all(); | 716 | mtrr_if->set_all(); |
696 | } | 717 | } |
697 | |||
698 | } | 718 | } |
699 | } | 719 | } |
700 | } | 720 | } |
@@ -706,12 +726,17 @@ void mtrr_ap_init(void) | |||
706 | if (!mtrr_if || !use_intel()) | 726 | if (!mtrr_if || !use_intel()) |
707 | return; | 727 | return; |
708 | /* | 728 | /* |
709 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, | 729 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries |
710 | * but this routine will be called in cpu boot time, holding the lock | 730 | * changed, but this routine will be called in cpu boot time, |
711 | * breaks it. This routine is called in two cases: 1.very earily time | 731 | * holding the lock breaks it. |
712 | * of software resume, when there absolutely isn't mtrr entry changes; | 732 | * |
713 | * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to | 733 | * This routine is called in two cases: |
714 | * prevent mtrr entry changes | 734 | * |
735 | * 1. very earily time of software resume, when there absolutely | ||
736 | * isn't mtrr entry changes; | ||
737 | * | ||
738 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug | ||
739 | * lock to prevent mtrr entry changes | ||
715 | */ | 740 | */ |
716 | local_irq_save(flags); | 741 | local_irq_save(flags); |
717 | 742 | ||
@@ -732,19 +757,23 @@ static int __init mtrr_init_finialize(void) | |||
732 | { | 757 | { |
733 | if (!mtrr_if) | 758 | if (!mtrr_if) |
734 | return 0; | 759 | return 0; |
760 | |||
735 | if (use_intel()) { | 761 | if (use_intel()) { |
736 | if (!changed_by_mtrr_cleanup) | 762 | if (!changed_by_mtrr_cleanup) |
737 | mtrr_state_warn(); | 763 | mtrr_state_warn(); |
738 | } else { | 764 | return 0; |
739 | /* The CPUs haven't MTRR and seem to not support SMP. They have | ||
740 | * specific drivers, we use a tricky method to support | ||
741 | * suspend/resume for them. | ||
742 | * TBD: is there any system with such CPU which supports | ||
743 | * suspend/resume? if no, we should remove the code. | ||
744 | */ | ||
745 | sysdev_driver_register(&cpu_sysdev_class, | ||
746 | &mtrr_sysdev_driver); | ||
747 | } | 765 | } |
766 | |||
767 | /* | ||
768 | * The CPU has no MTRR and seems to not support SMP. They have | ||
769 | * specific drivers, we use a tricky method to support | ||
770 | * suspend/resume for them. | ||
771 | * | ||
772 | * TBD: is there any system with such CPU which supports | ||
773 | * suspend/resume? If no, we should remove the code. | ||
774 | */ | ||
775 | sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver); | ||
776 | |||
748 | return 0; | 777 | return 0; |
749 | } | 778 | } |
750 | subsys_initcall(mtrr_init_finialize); | 779 | subsys_initcall(mtrr_init_finialize); |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 7538b767f206..a501dee9a87a 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * local mtrr defines. | 2 | * local MTRR defines. |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
@@ -14,13 +14,12 @@ extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; | |||
14 | struct mtrr_ops { | 14 | struct mtrr_ops { |
15 | u32 vendor; | 15 | u32 vendor; |
16 | u32 use_intel_if; | 16 | u32 use_intel_if; |
17 | // void (*init)(void); | ||
18 | void (*set)(unsigned int reg, unsigned long base, | 17 | void (*set)(unsigned int reg, unsigned long base, |
19 | unsigned long size, mtrr_type type); | 18 | unsigned long size, mtrr_type type); |
20 | void (*set_all)(void); | 19 | void (*set_all)(void); |
21 | 20 | ||
22 | void (*get)(unsigned int reg, unsigned long *base, | 21 | void (*get)(unsigned int reg, unsigned long *base, |
23 | unsigned long *size, mtrr_type * type); | 22 | unsigned long *size, mtrr_type *type); |
24 | int (*get_free_region)(unsigned long base, unsigned long size, | 23 | int (*get_free_region)(unsigned long base, unsigned long size, |
25 | int replace_reg); | 24 | int replace_reg); |
26 | int (*validate_add_page)(unsigned long base, unsigned long size, | 25 | int (*validate_add_page)(unsigned long base, unsigned long size, |
@@ -39,11 +38,11 @@ extern int positive_have_wrcomb(void); | |||
39 | 38 | ||
40 | /* library functions for processor-specific routines */ | 39 | /* library functions for processor-specific routines */ |
41 | struct set_mtrr_context { | 40 | struct set_mtrr_context { |
42 | unsigned long flags; | 41 | unsigned long flags; |
43 | unsigned long cr4val; | 42 | unsigned long cr4val; |
44 | u32 deftype_lo; | 43 | u32 deftype_lo; |
45 | u32 deftype_hi; | 44 | u32 deftype_hi; |
46 | u32 ccr3; | 45 | u32 ccr3; |
47 | }; | 46 | }; |
48 | 47 | ||
49 | void set_mtrr_done(struct set_mtrr_context *ctxt); | 48 | void set_mtrr_done(struct set_mtrr_context *ctxt); |
@@ -54,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index, | |||
54 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); | 53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); |
55 | void get_mtrr_state(void); | 54 | void get_mtrr_state(void); |
56 | 55 | ||
57 | extern void set_mtrr_ops(struct mtrr_ops * ops); | 56 | extern void set_mtrr_ops(struct mtrr_ops *ops); |
58 | 57 | ||
59 | extern u64 size_or_mask, size_and_mask; | 58 | extern u64 size_or_mask, size_and_mask; |
60 | extern struct mtrr_ops * mtrr_if; | 59 | extern struct mtrr_ops *mtrr_if; |
61 | 60 | ||
62 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) | 61 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) |
63 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) | 62 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) |
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c index 1f5fb1588d1f..dfc80b4e6b0d 100644 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ b/arch/x86/kernel/cpu/mtrr/state.c | |||
@@ -1,24 +1,25 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <linux/init.h> | 1 | #include <linux/init.h> |
3 | #include <asm/io.h> | 2 | #include <linux/io.h> |
4 | #include <asm/mtrr.h> | 3 | #include <linux/mm.h> |
5 | #include <asm/msr.h> | 4 | |
6 | #include <asm/processor-cyrix.h> | 5 | #include <asm/processor-cyrix.h> |
7 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
8 | #include "mtrr.h" | 7 | #include <asm/mtrr.h> |
8 | #include <asm/msr.h> | ||
9 | 9 | ||
10 | #include "mtrr.h" | ||
10 | 11 | ||
11 | /* Put the processor into a state where MTRRs can be safely set */ | 12 | /* Put the processor into a state where MTRRs can be safely set */ |
12 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | 13 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) |
13 | { | 14 | { |
14 | unsigned int cr0; | 15 | unsigned int cr0; |
15 | 16 | ||
16 | /* Disable interrupts locally */ | 17 | /* Disable interrupts locally */ |
17 | local_irq_save(ctxt->flags); | 18 | local_irq_save(ctxt->flags); |
18 | 19 | ||
19 | if (use_intel() || is_cpu(CYRIX)) { | 20 | if (use_intel() || is_cpu(CYRIX)) { |
20 | 21 | ||
21 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | 22 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
22 | if (cpu_has_pge) { | 23 | if (cpu_has_pge) { |
23 | ctxt->cr4val = read_cr4(); | 24 | ctxt->cr4val = read_cr4(); |
24 | write_cr4(ctxt->cr4val & ~X86_CR4_PGE); | 25 | write_cr4(ctxt->cr4val & ~X86_CR4_PGE); |
@@ -33,50 +34,61 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | |||
33 | write_cr0(cr0); | 34 | write_cr0(cr0); |
34 | wbinvd(); | 35 | wbinvd(); |
35 | 36 | ||
36 | if (use_intel()) | 37 | if (use_intel()) { |
37 | /* Save MTRR state */ | 38 | /* Save MTRR state */ |
38 | rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); | 39 | rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); |
39 | else | 40 | } else { |
40 | /* Cyrix ARRs - everything else were excluded at the top */ | 41 | /* |
42 | * Cyrix ARRs - | ||
43 | * everything else were excluded at the top | ||
44 | */ | ||
41 | ctxt->ccr3 = getCx86(CX86_CCR3); | 45 | ctxt->ccr3 = getCx86(CX86_CCR3); |
46 | } | ||
42 | } | 47 | } |
43 | } | 48 | } |
44 | 49 | ||
45 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) | 50 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) |
46 | { | 51 | { |
47 | if (use_intel()) | 52 | if (use_intel()) { |
48 | /* Disable MTRRs, and set the default type to uncached */ | 53 | /* Disable MTRRs, and set the default type to uncached */ |
49 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, | 54 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, |
50 | ctxt->deftype_hi); | 55 | ctxt->deftype_hi); |
51 | else if (is_cpu(CYRIX)) | 56 | } else { |
52 | /* Cyrix ARRs - everything else were excluded at the top */ | 57 | if (is_cpu(CYRIX)) { |
53 | setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); | 58 | /* Cyrix ARRs - everything else were excluded at the top */ |
59 | setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); | ||
60 | } | ||
61 | } | ||
54 | } | 62 | } |
55 | 63 | ||
56 | /* Restore the processor after a set_mtrr_prepare */ | 64 | /* Restore the processor after a set_mtrr_prepare */ |
57 | void set_mtrr_done(struct set_mtrr_context *ctxt) | 65 | void set_mtrr_done(struct set_mtrr_context *ctxt) |
58 | { | 66 | { |
59 | if (use_intel() || is_cpu(CYRIX)) { | 67 | if (use_intel() || is_cpu(CYRIX)) { |
60 | 68 | ||
61 | /* Flush caches and TLBs */ | 69 | /* Flush caches and TLBs */ |
62 | wbinvd(); | 70 | wbinvd(); |
63 | 71 | ||
64 | /* Restore MTRRdefType */ | 72 | /* Restore MTRRdefType */ |
65 | if (use_intel()) | 73 | if (use_intel()) { |
66 | /* Intel (P6) standard MTRRs */ | 74 | /* Intel (P6) standard MTRRs */ |
67 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); | 75 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, |
68 | else | 76 | ctxt->deftype_hi); |
69 | /* Cyrix ARRs - everything else was excluded at the top */ | 77 | } else { |
78 | /* | ||
79 | * Cyrix ARRs - | ||
80 | * everything else was excluded at the top | ||
81 | */ | ||
70 | setCx86(CX86_CCR3, ctxt->ccr3); | 82 | setCx86(CX86_CCR3, ctxt->ccr3); |
83 | } | ||
71 | 84 | ||
72 | /* Enable caches */ | 85 | /* Enable caches */ |
73 | write_cr0(read_cr0() & 0xbfffffff); | 86 | write_cr0(read_cr0() & 0xbfffffff); |
74 | 87 | ||
75 | /* Restore value of CR4 */ | 88 | /* Restore value of CR4 */ |
76 | if (cpu_has_pge) | 89 | if (cpu_has_pge) |
77 | write_cr4(ctxt->cr4val); | 90 | write_cr4(ctxt->cr4val); |
78 | } | 91 | } |
79 | /* Re-enable interrupts locally (if enabled previously) */ | 92 | /* Re-enable interrupts locally (if enabled previously) */ |
80 | local_irq_restore(ctxt->flags); | 93 | local_irq_restore(ctxt->flags); |
81 | } | 94 | } |
82 | |||
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 76dfef23f789..f9cd0849bd42 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | 6 | * Copyright (C) 2009 Jaswinder Singh Rajput |
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
9 | * | 10 | * |
10 | * For licencing details see kernel-base/COPYING | 11 | * For licencing details see kernel-base/COPYING |
11 | */ | 12 | */ |
@@ -20,6 +21,7 @@ | |||
20 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
21 | #include <linux/uaccess.h> | 22 | #include <linux/uaccess.h> |
22 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/cpu.h> | ||
23 | 25 | ||
24 | #include <asm/apic.h> | 26 | #include <asm/apic.h> |
25 | #include <asm/stacktrace.h> | 27 | #include <asm/stacktrace.h> |
@@ -27,12 +29,52 @@ | |||
27 | 29 | ||
28 | static u64 perf_counter_mask __read_mostly; | 30 | static u64 perf_counter_mask __read_mostly; |
29 | 31 | ||
32 | /* The maximal number of PEBS counters: */ | ||
33 | #define MAX_PEBS_COUNTERS 4 | ||
34 | |||
35 | /* The size of a BTS record in bytes: */ | ||
36 | #define BTS_RECORD_SIZE 24 | ||
37 | |||
38 | /* The size of a per-cpu BTS buffer in bytes: */ | ||
39 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) | ||
40 | |||
41 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | ||
42 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) | ||
43 | |||
44 | |||
45 | /* | ||
46 | * Bits in the debugctlmsr controlling branch tracing. | ||
47 | */ | ||
48 | #define X86_DEBUGCTL_TR (1 << 6) | ||
49 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
50 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
51 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
52 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
53 | |||
54 | /* | ||
55 | * A debug store configuration. | ||
56 | * | ||
57 | * We only support architectures that use 64bit fields. | ||
58 | */ | ||
59 | struct debug_store { | ||
60 | u64 bts_buffer_base; | ||
61 | u64 bts_index; | ||
62 | u64 bts_absolute_maximum; | ||
63 | u64 bts_interrupt_threshold; | ||
64 | u64 pebs_buffer_base; | ||
65 | u64 pebs_index; | ||
66 | u64 pebs_absolute_maximum; | ||
67 | u64 pebs_interrupt_threshold; | ||
68 | u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; | ||
69 | }; | ||
70 | |||
30 | struct cpu_hw_counters { | 71 | struct cpu_hw_counters { |
31 | struct perf_counter *counters[X86_PMC_IDX_MAX]; | 72 | struct perf_counter *counters[X86_PMC_IDX_MAX]; |
32 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
33 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
34 | unsigned long interrupts; | 75 | unsigned long interrupts; |
35 | int enabled; | 76 | int enabled; |
77 | struct debug_store *ds; | ||
36 | }; | 78 | }; |
37 | 79 | ||
38 | /* | 80 | /* |
@@ -55,8 +97,11 @@ struct x86_pmu { | |||
55 | int num_counters_fixed; | 97 | int num_counters_fixed; |
56 | int counter_bits; | 98 | int counter_bits; |
57 | u64 counter_mask; | 99 | u64 counter_mask; |
100 | int apic; | ||
58 | u64 max_period; | 101 | u64 max_period; |
59 | u64 intel_ctrl; | 102 | u64 intel_ctrl; |
103 | void (*enable_bts)(u64 config); | ||
104 | void (*disable_bts)(void); | ||
60 | }; | 105 | }; |
61 | 106 | ||
62 | static struct x86_pmu x86_pmu __read_mostly; | 107 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -66,6 +111,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { | |||
66 | }; | 111 | }; |
67 | 112 | ||
68 | /* | 113 | /* |
114 | * Not sure about some of these | ||
115 | */ | ||
116 | static const u64 p6_perfmon_event_map[] = | ||
117 | { | ||
118 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
119 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
120 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
121 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
122 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
123 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
124 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
125 | }; | ||
126 | |||
127 | static u64 p6_pmu_event_map(int event) | ||
128 | { | ||
129 | return p6_perfmon_event_map[event]; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Counter setting that is specified not to count anything. | ||
134 | * We use this to effectively disable a counter. | ||
135 | * | ||
136 | * L2_RQSTS with 0 MESI unit mask. | ||
137 | */ | ||
138 | #define P6_NOP_COUNTER 0x0000002EULL | ||
139 | |||
140 | static u64 p6_pmu_raw_event(u64 event) | ||
141 | { | ||
142 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
143 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
144 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
145 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
146 | #define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL | ||
147 | |||
148 | #define P6_EVNTSEL_MASK \ | ||
149 | (P6_EVNTSEL_EVENT_MASK | \ | ||
150 | P6_EVNTSEL_UNIT_MASK | \ | ||
151 | P6_EVNTSEL_EDGE_MASK | \ | ||
152 | P6_EVNTSEL_INV_MASK | \ | ||
153 | P6_EVNTSEL_COUNTER_MASK) | ||
154 | |||
155 | return event & P6_EVNTSEL_MASK; | ||
156 | } | ||
157 | |||
158 | |||
159 | /* | ||
69 | * Intel PerfMon v3. Used on Core2 and later. | 160 | * Intel PerfMon v3. Used on Core2 and later. |
70 | */ | 161 | */ |
71 | static const u64 intel_perfmon_event_map[] = | 162 | static const u64 intel_perfmon_event_map[] = |
@@ -401,7 +492,7 @@ static const u64 amd_hw_cache_event_ids | |||
401 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | 492 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ |
402 | }, | 493 | }, |
403 | [ C(OP_WRITE) ] = { | 494 | [ C(OP_WRITE) ] = { |
404 | [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ | 495 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ |
405 | [ C(RESULT_MISS) ] = 0, | 496 | [ C(RESULT_MISS) ] = 0, |
406 | }, | 497 | }, |
407 | [ C(OP_PREFETCH) ] = { | 498 | [ C(OP_PREFETCH) ] = { |
@@ -530,6 +621,9 @@ x86_perf_counter_update(struct perf_counter *counter, | |||
530 | u64 prev_raw_count, new_raw_count; | 621 | u64 prev_raw_count, new_raw_count; |
531 | s64 delta; | 622 | s64 delta; |
532 | 623 | ||
624 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
625 | return 0; | ||
626 | |||
533 | /* | 627 | /* |
534 | * Careful: an NMI might modify the previous counter value. | 628 | * Careful: an NMI might modify the previous counter value. |
535 | * | 629 | * |
@@ -567,6 +661,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); | |||
567 | 661 | ||
568 | static bool reserve_pmc_hardware(void) | 662 | static bool reserve_pmc_hardware(void) |
569 | { | 663 | { |
664 | #ifdef CONFIG_X86_LOCAL_APIC | ||
570 | int i; | 665 | int i; |
571 | 666 | ||
572 | if (nmi_watchdog == NMI_LOCAL_APIC) | 667 | if (nmi_watchdog == NMI_LOCAL_APIC) |
@@ -581,9 +676,11 @@ static bool reserve_pmc_hardware(void) | |||
581 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 676 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
582 | goto eventsel_fail; | 677 | goto eventsel_fail; |
583 | } | 678 | } |
679 | #endif | ||
584 | 680 | ||
585 | return true; | 681 | return true; |
586 | 682 | ||
683 | #ifdef CONFIG_X86_LOCAL_APIC | ||
587 | eventsel_fail: | 684 | eventsel_fail: |
588 | for (i--; i >= 0; i--) | 685 | for (i--; i >= 0; i--) |
589 | release_evntsel_nmi(x86_pmu.eventsel + i); | 686 | release_evntsel_nmi(x86_pmu.eventsel + i); |
@@ -598,10 +695,12 @@ perfctr_fail: | |||
598 | enable_lapic_nmi_watchdog(); | 695 | enable_lapic_nmi_watchdog(); |
599 | 696 | ||
600 | return false; | 697 | return false; |
698 | #endif | ||
601 | } | 699 | } |
602 | 700 | ||
603 | static void release_pmc_hardware(void) | 701 | static void release_pmc_hardware(void) |
604 | { | 702 | { |
703 | #ifdef CONFIG_X86_LOCAL_APIC | ||
605 | int i; | 704 | int i; |
606 | 705 | ||
607 | for (i = 0; i < x86_pmu.num_counters; i++) { | 706 | for (i = 0; i < x86_pmu.num_counters; i++) { |
@@ -611,12 +710,113 @@ static void release_pmc_hardware(void) | |||
611 | 710 | ||
612 | if (nmi_watchdog == NMI_LOCAL_APIC) | 711 | if (nmi_watchdog == NMI_LOCAL_APIC) |
613 | enable_lapic_nmi_watchdog(); | 712 | enable_lapic_nmi_watchdog(); |
713 | #endif | ||
714 | } | ||
715 | |||
716 | static inline bool bts_available(void) | ||
717 | { | ||
718 | return x86_pmu.enable_bts != NULL; | ||
719 | } | ||
720 | |||
721 | static inline void init_debug_store_on_cpu(int cpu) | ||
722 | { | ||
723 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | ||
724 | |||
725 | if (!ds) | ||
726 | return; | ||
727 | |||
728 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
729 | (u32)((u64)(unsigned long)ds), | ||
730 | (u32)((u64)(unsigned long)ds >> 32)); | ||
731 | } | ||
732 | |||
733 | static inline void fini_debug_store_on_cpu(int cpu) | ||
734 | { | ||
735 | if (!per_cpu(cpu_hw_counters, cpu).ds) | ||
736 | return; | ||
737 | |||
738 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
739 | } | ||
740 | |||
741 | static void release_bts_hardware(void) | ||
742 | { | ||
743 | int cpu; | ||
744 | |||
745 | if (!bts_available()) | ||
746 | return; | ||
747 | |||
748 | get_online_cpus(); | ||
749 | |||
750 | for_each_online_cpu(cpu) | ||
751 | fini_debug_store_on_cpu(cpu); | ||
752 | |||
753 | for_each_possible_cpu(cpu) { | ||
754 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | ||
755 | |||
756 | if (!ds) | ||
757 | continue; | ||
758 | |||
759 | per_cpu(cpu_hw_counters, cpu).ds = NULL; | ||
760 | |||
761 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
762 | kfree(ds); | ||
763 | } | ||
764 | |||
765 | put_online_cpus(); | ||
766 | } | ||
767 | |||
768 | static int reserve_bts_hardware(void) | ||
769 | { | ||
770 | int cpu, err = 0; | ||
771 | |||
772 | if (!bts_available()) | ||
773 | return 0; | ||
774 | |||
775 | get_online_cpus(); | ||
776 | |||
777 | for_each_possible_cpu(cpu) { | ||
778 | struct debug_store *ds; | ||
779 | void *buffer; | ||
780 | |||
781 | err = -ENOMEM; | ||
782 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
783 | if (unlikely(!buffer)) | ||
784 | break; | ||
785 | |||
786 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
787 | if (unlikely(!ds)) { | ||
788 | kfree(buffer); | ||
789 | break; | ||
790 | } | ||
791 | |||
792 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
793 | ds->bts_index = ds->bts_buffer_base; | ||
794 | ds->bts_absolute_maximum = | ||
795 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
796 | ds->bts_interrupt_threshold = | ||
797 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
798 | |||
799 | per_cpu(cpu_hw_counters, cpu).ds = ds; | ||
800 | err = 0; | ||
801 | } | ||
802 | |||
803 | if (err) | ||
804 | release_bts_hardware(); | ||
805 | else { | ||
806 | for_each_online_cpu(cpu) | ||
807 | init_debug_store_on_cpu(cpu); | ||
808 | } | ||
809 | |||
810 | put_online_cpus(); | ||
811 | |||
812 | return err; | ||
614 | } | 813 | } |
615 | 814 | ||
616 | static void hw_perf_counter_destroy(struct perf_counter *counter) | 815 | static void hw_perf_counter_destroy(struct perf_counter *counter) |
617 | { | 816 | { |
618 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { | 817 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { |
619 | release_pmc_hardware(); | 818 | release_pmc_hardware(); |
819 | release_bts_hardware(); | ||
620 | mutex_unlock(&pmc_reserve_mutex); | 820 | mutex_unlock(&pmc_reserve_mutex); |
621 | } | 821 | } |
622 | } | 822 | } |
@@ -659,6 +859,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) | |||
659 | return 0; | 859 | return 0; |
660 | } | 860 | } |
661 | 861 | ||
862 | static void intel_pmu_enable_bts(u64 config) | ||
863 | { | ||
864 | unsigned long debugctlmsr; | ||
865 | |||
866 | debugctlmsr = get_debugctlmsr(); | ||
867 | |||
868 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
869 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
870 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
871 | |||
872 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
873 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
874 | |||
875 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
876 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
877 | |||
878 | update_debugctlmsr(debugctlmsr); | ||
879 | } | ||
880 | |||
881 | static void intel_pmu_disable_bts(void) | ||
882 | { | ||
883 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
884 | unsigned long debugctlmsr; | ||
885 | |||
886 | if (!cpuc->ds) | ||
887 | return; | ||
888 | |||
889 | debugctlmsr = get_debugctlmsr(); | ||
890 | |||
891 | debugctlmsr &= | ||
892 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
893 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
894 | |||
895 | update_debugctlmsr(debugctlmsr); | ||
896 | } | ||
897 | |||
662 | /* | 898 | /* |
663 | * Setup the hardware configuration for a given attr_type | 899 | * Setup the hardware configuration for a given attr_type |
664 | */ | 900 | */ |
@@ -666,6 +902,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
666 | { | 902 | { |
667 | struct perf_counter_attr *attr = &counter->attr; | 903 | struct perf_counter_attr *attr = &counter->attr; |
668 | struct hw_perf_counter *hwc = &counter->hw; | 904 | struct hw_perf_counter *hwc = &counter->hw; |
905 | u64 config; | ||
669 | int err; | 906 | int err; |
670 | 907 | ||
671 | if (!x86_pmu_initialized()) | 908 | if (!x86_pmu_initialized()) |
@@ -674,9 +911,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
674 | err = 0; | 911 | err = 0; |
675 | if (!atomic_inc_not_zero(&active_counters)) { | 912 | if (!atomic_inc_not_zero(&active_counters)) { |
676 | mutex_lock(&pmc_reserve_mutex); | 913 | mutex_lock(&pmc_reserve_mutex); |
677 | if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) | 914 | if (atomic_read(&active_counters) == 0) { |
678 | err = -EBUSY; | 915 | if (!reserve_pmc_hardware()) |
679 | else | 916 | err = -EBUSY; |
917 | else | ||
918 | err = reserve_bts_hardware(); | ||
919 | } | ||
920 | if (!err) | ||
680 | atomic_inc(&active_counters); | 921 | atomic_inc(&active_counters); |
681 | mutex_unlock(&pmc_reserve_mutex); | 922 | mutex_unlock(&pmc_reserve_mutex); |
682 | } | 923 | } |
@@ -701,6 +942,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
701 | hwc->sample_period = x86_pmu.max_period; | 942 | hwc->sample_period = x86_pmu.max_period; |
702 | hwc->last_period = hwc->sample_period; | 943 | hwc->last_period = hwc->sample_period; |
703 | atomic64_set(&hwc->period_left, hwc->sample_period); | 944 | atomic64_set(&hwc->period_left, hwc->sample_period); |
945 | } else { | ||
946 | /* | ||
947 | * If we have a PMU initialized but no APIC | ||
948 | * interrupts, we cannot sample hardware | ||
949 | * counters (user-space has to fall back and | ||
950 | * sample via a hrtimer based software counter): | ||
951 | */ | ||
952 | if (!x86_pmu.apic) | ||
953 | return -EOPNOTSUPP; | ||
704 | } | 954 | } |
705 | 955 | ||
706 | counter->destroy = hw_perf_counter_destroy; | 956 | counter->destroy = hw_perf_counter_destroy; |
@@ -718,17 +968,68 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
718 | 968 | ||
719 | if (attr->config >= x86_pmu.max_events) | 969 | if (attr->config >= x86_pmu.max_events) |
720 | return -EINVAL; | 970 | return -EINVAL; |
971 | |||
721 | /* | 972 | /* |
722 | * The generic map: | 973 | * The generic map: |
723 | */ | 974 | */ |
724 | hwc->config |= x86_pmu.event_map(attr->config); | 975 | config = x86_pmu.event_map(attr->config); |
976 | |||
977 | if (config == 0) | ||
978 | return -ENOENT; | ||
979 | |||
980 | if (config == -1LL) | ||
981 | return -EINVAL; | ||
982 | |||
983 | /* | ||
984 | * Branch tracing: | ||
985 | */ | ||
986 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | ||
987 | (hwc->sample_period == 1)) { | ||
988 | /* BTS is not supported by this architecture. */ | ||
989 | if (!bts_available()) | ||
990 | return -EOPNOTSUPP; | ||
991 | |||
992 | /* BTS is currently only allowed for user-mode. */ | ||
993 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
994 | return -EOPNOTSUPP; | ||
995 | } | ||
996 | |||
997 | hwc->config |= config; | ||
725 | 998 | ||
726 | return 0; | 999 | return 0; |
727 | } | 1000 | } |
728 | 1001 | ||
1002 | static void p6_pmu_disable_all(void) | ||
1003 | { | ||
1004 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1005 | u64 val; | ||
1006 | |||
1007 | if (!cpuc->enabled) | ||
1008 | return; | ||
1009 | |||
1010 | cpuc->enabled = 0; | ||
1011 | barrier(); | ||
1012 | |||
1013 | /* p6 only has one enable register */ | ||
1014 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1015 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1016 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1017 | } | ||
1018 | |||
729 | static void intel_pmu_disable_all(void) | 1019 | static void intel_pmu_disable_all(void) |
730 | { | 1020 | { |
1021 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1022 | |||
1023 | if (!cpuc->enabled) | ||
1024 | return; | ||
1025 | |||
1026 | cpuc->enabled = 0; | ||
1027 | barrier(); | ||
1028 | |||
731 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 1029 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
1030 | |||
1031 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
1032 | intel_pmu_disable_bts(); | ||
732 | } | 1033 | } |
733 | 1034 | ||
734 | static void amd_pmu_disable_all(void) | 1035 | static void amd_pmu_disable_all(void) |
@@ -767,9 +1068,44 @@ void hw_perf_disable(void) | |||
767 | return x86_pmu.disable_all(); | 1068 | return x86_pmu.disable_all(); |
768 | } | 1069 | } |
769 | 1070 | ||
1071 | static void p6_pmu_enable_all(void) | ||
1072 | { | ||
1073 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1074 | unsigned long val; | ||
1075 | |||
1076 | if (cpuc->enabled) | ||
1077 | return; | ||
1078 | |||
1079 | cpuc->enabled = 1; | ||
1080 | barrier(); | ||
1081 | |||
1082 | /* p6 only has one enable register */ | ||
1083 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1084 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1085 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1086 | } | ||
1087 | |||
770 | static void intel_pmu_enable_all(void) | 1088 | static void intel_pmu_enable_all(void) |
771 | { | 1089 | { |
1090 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1091 | |||
1092 | if (cpuc->enabled) | ||
1093 | return; | ||
1094 | |||
1095 | cpuc->enabled = 1; | ||
1096 | barrier(); | ||
1097 | |||
772 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 1098 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
1099 | |||
1100 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
1101 | struct perf_counter *counter = | ||
1102 | cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | ||
1103 | |||
1104 | if (WARN_ON_ONCE(!counter)) | ||
1105 | return; | ||
1106 | |||
1107 | intel_pmu_enable_bts(counter->hw.config); | ||
1108 | } | ||
773 | } | 1109 | } |
774 | 1110 | ||
775 | static void amd_pmu_enable_all(void) | 1111 | static void amd_pmu_enable_all(void) |
@@ -784,13 +1120,13 @@ static void amd_pmu_enable_all(void) | |||
784 | barrier(); | 1120 | barrier(); |
785 | 1121 | ||
786 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1122 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1123 | struct perf_counter *counter = cpuc->counters[idx]; | ||
787 | u64 val; | 1124 | u64 val; |
788 | 1125 | ||
789 | if (!test_bit(idx, cpuc->active_mask)) | 1126 | if (!test_bit(idx, cpuc->active_mask)) |
790 | continue; | 1127 | continue; |
791 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | 1128 | |
792 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) | 1129 | val = counter->hw.config; |
793 | continue; | ||
794 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 1130 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
795 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | 1131 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); |
796 | } | 1132 | } |
@@ -819,16 +1155,13 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
819 | 1155 | ||
820 | static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1156 | static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) |
821 | { | 1157 | { |
822 | int err; | 1158 | (void)checking_wrmsrl(hwc->config_base + idx, |
823 | err = checking_wrmsrl(hwc->config_base + idx, | ||
824 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | 1159 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); |
825 | } | 1160 | } |
826 | 1161 | ||
827 | static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 1162 | static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) |
828 | { | 1163 | { |
829 | int err; | 1164 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); |
830 | err = checking_wrmsrl(hwc->config_base + idx, | ||
831 | hwc->config); | ||
832 | } | 1165 | } |
833 | 1166 | ||
834 | static inline void | 1167 | static inline void |
@@ -836,18 +1169,34 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) | |||
836 | { | 1169 | { |
837 | int idx = __idx - X86_PMC_IDX_FIXED; | 1170 | int idx = __idx - X86_PMC_IDX_FIXED; |
838 | u64 ctrl_val, mask; | 1171 | u64 ctrl_val, mask; |
839 | int err; | ||
840 | 1172 | ||
841 | mask = 0xfULL << (idx * 4); | 1173 | mask = 0xfULL << (idx * 4); |
842 | 1174 | ||
843 | rdmsrl(hwc->config_base, ctrl_val); | 1175 | rdmsrl(hwc->config_base, ctrl_val); |
844 | ctrl_val &= ~mask; | 1176 | ctrl_val &= ~mask; |
845 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 1177 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); |
1178 | } | ||
1179 | |||
1180 | static inline void | ||
1181 | p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
1182 | { | ||
1183 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1184 | u64 val = P6_NOP_COUNTER; | ||
1185 | |||
1186 | if (cpuc->enabled) | ||
1187 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1188 | |||
1189 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
846 | } | 1190 | } |
847 | 1191 | ||
848 | static inline void | 1192 | static inline void |
849 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 1193 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) |
850 | { | 1194 | { |
1195 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1196 | intel_pmu_disable_bts(); | ||
1197 | return; | ||
1198 | } | ||
1199 | |||
851 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1200 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
852 | intel_pmu_disable_fixed(hwc, idx); | 1201 | intel_pmu_disable_fixed(hwc, idx); |
853 | return; | 1202 | return; |
@@ -876,6 +1225,9 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
876 | s64 period = hwc->sample_period; | 1225 | s64 period = hwc->sample_period; |
877 | int err, ret = 0; | 1226 | int err, ret = 0; |
878 | 1227 | ||
1228 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
1229 | return 0; | ||
1230 | |||
879 | /* | 1231 | /* |
880 | * If we are way outside a reasoable range then just skip forward: | 1232 | * If we are way outside a reasoable range then just skip forward: |
881 | */ | 1233 | */ |
@@ -912,6 +1264,8 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
912 | err = checking_wrmsrl(hwc->counter_base + idx, | 1264 | err = checking_wrmsrl(hwc->counter_base + idx, |
913 | (u64)(-left) & x86_pmu.counter_mask); | 1265 | (u64)(-left) & x86_pmu.counter_mask); |
914 | 1266 | ||
1267 | perf_counter_update_userpage(counter); | ||
1268 | |||
915 | return ret; | 1269 | return ret; |
916 | } | 1270 | } |
917 | 1271 | ||
@@ -941,8 +1295,29 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) | |||
941 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 1295 | err = checking_wrmsrl(hwc->config_base, ctrl_val); |
942 | } | 1296 | } |
943 | 1297 | ||
1298 | static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
1299 | { | ||
1300 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1301 | u64 val; | ||
1302 | |||
1303 | val = hwc->config; | ||
1304 | if (cpuc->enabled) | ||
1305 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1306 | |||
1307 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1308 | } | ||
1309 | |||
1310 | |||
944 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1311 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) |
945 | { | 1312 | { |
1313 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1314 | if (!__get_cpu_var(cpu_hw_counters).enabled) | ||
1315 | return; | ||
1316 | |||
1317 | intel_pmu_enable_bts(hwc->config); | ||
1318 | return; | ||
1319 | } | ||
1320 | |||
946 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1321 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
947 | intel_pmu_enable_fixed(hwc, idx); | 1322 | intel_pmu_enable_fixed(hwc, idx); |
948 | return; | 1323 | return; |
@@ -957,8 +1332,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | |||
957 | 1332 | ||
958 | if (cpuc->enabled) | 1333 | if (cpuc->enabled) |
959 | x86_pmu_enable_counter(hwc, idx); | 1334 | x86_pmu_enable_counter(hwc, idx); |
960 | else | ||
961 | x86_pmu_disable_counter(hwc, idx); | ||
962 | } | 1335 | } |
963 | 1336 | ||
964 | static int | 1337 | static int |
@@ -966,17 +1339,15 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | |||
966 | { | 1339 | { |
967 | unsigned int event; | 1340 | unsigned int event; |
968 | 1341 | ||
969 | if (!x86_pmu.num_counters_fixed) | 1342 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; |
970 | return -1; | ||
971 | 1343 | ||
972 | /* | 1344 | if (unlikely((event == |
973 | * Quirk, IA32_FIXED_CTRs do not work on current Atom processors: | 1345 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && |
974 | */ | 1346 | (hwc->sample_period == 1))) |
975 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | 1347 | return X86_PMC_IDX_FIXED_BTS; |
976 | boot_cpu_data.x86_model == 28) | ||
977 | return -1; | ||
978 | 1348 | ||
979 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | 1349 | if (!x86_pmu.num_counters_fixed) |
1350 | return -1; | ||
980 | 1351 | ||
981 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 1352 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
982 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | 1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; |
@@ -998,7 +1369,15 @@ static int x86_pmu_enable(struct perf_counter *counter) | |||
998 | int idx; | 1369 | int idx; |
999 | 1370 | ||
1000 | idx = fixed_mode_idx(counter, hwc); | 1371 | idx = fixed_mode_idx(counter, hwc); |
1001 | if (idx >= 0) { | 1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { |
1373 | /* BTS is already occupied. */ | ||
1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1375 | return -EAGAIN; | ||
1376 | |||
1377 | hwc->config_base = 0; | ||
1378 | hwc->counter_base = 0; | ||
1379 | hwc->idx = idx; | ||
1380 | } else if (idx >= 0) { | ||
1002 | /* | 1381 | /* |
1003 | * Try to get the fixed counter, if that is already taken | 1382 | * Try to get the fixed counter, if that is already taken |
1004 | * then try to get a generic counter: | 1383 | * then try to get a generic counter: |
@@ -1041,6 +1420,8 @@ try_generic: | |||
1041 | x86_perf_counter_set_period(counter, hwc, idx); | 1420 | x86_perf_counter_set_period(counter, hwc, idx); |
1042 | x86_pmu.enable(hwc, idx); | 1421 | x86_pmu.enable(hwc, idx); |
1043 | 1422 | ||
1423 | perf_counter_update_userpage(counter); | ||
1424 | |||
1044 | return 0; | 1425 | return 0; |
1045 | } | 1426 | } |
1046 | 1427 | ||
@@ -1107,6 +1488,44 @@ void perf_counter_print_debug(void) | |||
1107 | local_irq_restore(flags); | 1488 | local_irq_restore(flags); |
1108 | } | 1489 | } |
1109 | 1490 | ||
1491 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, | ||
1492 | struct perf_sample_data *data) | ||
1493 | { | ||
1494 | struct debug_store *ds = cpuc->ds; | ||
1495 | struct bts_record { | ||
1496 | u64 from; | ||
1497 | u64 to; | ||
1498 | u64 flags; | ||
1499 | }; | ||
1500 | struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | ||
1501 | unsigned long orig_ip = data->regs->ip; | ||
1502 | struct bts_record *at, *top; | ||
1503 | |||
1504 | if (!counter) | ||
1505 | return; | ||
1506 | |||
1507 | if (!ds) | ||
1508 | return; | ||
1509 | |||
1510 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1511 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1512 | |||
1513 | ds->bts_index = ds->bts_buffer_base; | ||
1514 | |||
1515 | for (; at < top; at++) { | ||
1516 | data->regs->ip = at->from; | ||
1517 | data->addr = at->to; | ||
1518 | |||
1519 | perf_counter_output(counter, 1, data); | ||
1520 | } | ||
1521 | |||
1522 | data->regs->ip = orig_ip; | ||
1523 | data->addr = 0; | ||
1524 | |||
1525 | /* There's new data available. */ | ||
1526 | counter->pending_kill = POLL_IN; | ||
1527 | } | ||
1528 | |||
1110 | static void x86_pmu_disable(struct perf_counter *counter) | 1529 | static void x86_pmu_disable(struct perf_counter *counter) |
1111 | { | 1530 | { |
1112 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1531 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); |
@@ -1131,8 +1550,19 @@ static void x86_pmu_disable(struct perf_counter *counter) | |||
1131 | * that we are disabling: | 1550 | * that we are disabling: |
1132 | */ | 1551 | */ |
1133 | x86_perf_counter_update(counter, hwc, idx); | 1552 | x86_perf_counter_update(counter, hwc, idx); |
1553 | |||
1554 | /* Drain the remaining BTS records. */ | ||
1555 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1556 | struct perf_sample_data data; | ||
1557 | struct pt_regs regs; | ||
1558 | |||
1559 | data.regs = ®s; | ||
1560 | intel_pmu_drain_bts_buffer(cpuc, &data); | ||
1561 | } | ||
1134 | cpuc->counters[idx] = NULL; | 1562 | cpuc->counters[idx] = NULL; |
1135 | clear_bit(idx, cpuc->used_mask); | 1563 | clear_bit(idx, cpuc->used_mask); |
1564 | |||
1565 | perf_counter_update_userpage(counter); | ||
1136 | } | 1566 | } |
1137 | 1567 | ||
1138 | /* | 1568 | /* |
@@ -1156,6 +1586,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter) | |||
1156 | 1586 | ||
1157 | static void intel_pmu_reset(void) | 1587 | static void intel_pmu_reset(void) |
1158 | { | 1588 | { |
1589 | struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; | ||
1159 | unsigned long flags; | 1590 | unsigned long flags; |
1160 | int idx; | 1591 | int idx; |
1161 | 1592 | ||
@@ -1173,10 +1604,55 @@ static void intel_pmu_reset(void) | |||
1173 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | 1604 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
1174 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 1605 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
1175 | } | 1606 | } |
1607 | if (ds) | ||
1608 | ds->bts_index = ds->bts_buffer_base; | ||
1176 | 1609 | ||
1177 | local_irq_restore(flags); | 1610 | local_irq_restore(flags); |
1178 | } | 1611 | } |
1179 | 1612 | ||
1613 | static int p6_pmu_handle_irq(struct pt_regs *regs) | ||
1614 | { | ||
1615 | struct perf_sample_data data; | ||
1616 | struct cpu_hw_counters *cpuc; | ||
1617 | struct perf_counter *counter; | ||
1618 | struct hw_perf_counter *hwc; | ||
1619 | int idx, handled = 0; | ||
1620 | u64 val; | ||
1621 | |||
1622 | data.regs = regs; | ||
1623 | data.addr = 0; | ||
1624 | |||
1625 | cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1626 | |||
1627 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
1628 | if (!test_bit(idx, cpuc->active_mask)) | ||
1629 | continue; | ||
1630 | |||
1631 | counter = cpuc->counters[idx]; | ||
1632 | hwc = &counter->hw; | ||
1633 | |||
1634 | val = x86_perf_counter_update(counter, hwc, idx); | ||
1635 | if (val & (1ULL << (x86_pmu.counter_bits - 1))) | ||
1636 | continue; | ||
1637 | |||
1638 | /* | ||
1639 | * counter overflow | ||
1640 | */ | ||
1641 | handled = 1; | ||
1642 | data.period = counter->hw.last_period; | ||
1643 | |||
1644 | if (!x86_perf_counter_set_period(counter, hwc, idx)) | ||
1645 | continue; | ||
1646 | |||
1647 | if (perf_counter_overflow(counter, 1, &data)) | ||
1648 | p6_pmu_disable_counter(hwc, idx); | ||
1649 | } | ||
1650 | |||
1651 | if (handled) | ||
1652 | inc_irq_stat(apic_perf_irqs); | ||
1653 | |||
1654 | return handled; | ||
1655 | } | ||
1180 | 1656 | ||
1181 | /* | 1657 | /* |
1182 | * This handler is triggered by the local APIC, so the APIC IRQ handling | 1658 | * This handler is triggered by the local APIC, so the APIC IRQ handling |
@@ -1186,16 +1662,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1186 | { | 1662 | { |
1187 | struct perf_sample_data data; | 1663 | struct perf_sample_data data; |
1188 | struct cpu_hw_counters *cpuc; | 1664 | struct cpu_hw_counters *cpuc; |
1189 | int bit, cpu, loops; | 1665 | int bit, loops; |
1190 | u64 ack, status; | 1666 | u64 ack, status; |
1191 | 1667 | ||
1192 | data.regs = regs; | 1668 | data.regs = regs; |
1193 | data.addr = 0; | 1669 | data.addr = 0; |
1194 | 1670 | ||
1195 | cpu = smp_processor_id(); | 1671 | cpuc = &__get_cpu_var(cpu_hw_counters); |
1196 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1197 | 1672 | ||
1198 | perf_disable(); | 1673 | perf_disable(); |
1674 | intel_pmu_drain_bts_buffer(cpuc, &data); | ||
1199 | status = intel_pmu_get_status(); | 1675 | status = intel_pmu_get_status(); |
1200 | if (!status) { | 1676 | if (!status) { |
1201 | perf_enable(); | 1677 | perf_enable(); |
@@ -1250,14 +1726,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1250 | struct cpu_hw_counters *cpuc; | 1726 | struct cpu_hw_counters *cpuc; |
1251 | struct perf_counter *counter; | 1727 | struct perf_counter *counter; |
1252 | struct hw_perf_counter *hwc; | 1728 | struct hw_perf_counter *hwc; |
1253 | int cpu, idx, handled = 0; | 1729 | int idx, handled = 0; |
1254 | u64 val; | 1730 | u64 val; |
1255 | 1731 | ||
1256 | data.regs = regs; | 1732 | data.regs = regs; |
1257 | data.addr = 0; | 1733 | data.addr = 0; |
1258 | 1734 | ||
1259 | cpu = smp_processor_id(); | 1735 | cpuc = &__get_cpu_var(cpu_hw_counters); |
1260 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1261 | 1736 | ||
1262 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1737 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1263 | if (!test_bit(idx, cpuc->active_mask)) | 1738 | if (!test_bit(idx, cpuc->active_mask)) |
@@ -1300,18 +1775,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) | |||
1300 | 1775 | ||
1301 | void set_perf_counter_pending(void) | 1776 | void set_perf_counter_pending(void) |
1302 | { | 1777 | { |
1778 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1303 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | 1779 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); |
1780 | #endif | ||
1304 | } | 1781 | } |
1305 | 1782 | ||
1306 | void perf_counters_lapic_init(void) | 1783 | void perf_counters_lapic_init(void) |
1307 | { | 1784 | { |
1308 | if (!x86_pmu_initialized()) | 1785 | #ifdef CONFIG_X86_LOCAL_APIC |
1786 | if (!x86_pmu.apic || !x86_pmu_initialized()) | ||
1309 | return; | 1787 | return; |
1310 | 1788 | ||
1311 | /* | 1789 | /* |
1312 | * Always use NMI for PMU | 1790 | * Always use NMI for PMU |
1313 | */ | 1791 | */ |
1314 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1792 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1793 | #endif | ||
1315 | } | 1794 | } |
1316 | 1795 | ||
1317 | static int __kprobes | 1796 | static int __kprobes |
@@ -1335,7 +1814,9 @@ perf_counter_nmi_handler(struct notifier_block *self, | |||
1335 | 1814 | ||
1336 | regs = args->regs; | 1815 | regs = args->regs; |
1337 | 1816 | ||
1817 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1338 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1818 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1819 | #endif | ||
1339 | /* | 1820 | /* |
1340 | * Can't rely on the handled return value to say it was our NMI, two | 1821 | * Can't rely on the handled return value to say it was our NMI, two |
1341 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. | 1822 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. |
@@ -1354,6 +1835,33 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { | |||
1354 | .priority = 1 | 1835 | .priority = 1 |
1355 | }; | 1836 | }; |
1356 | 1837 | ||
1838 | static struct x86_pmu p6_pmu = { | ||
1839 | .name = "p6", | ||
1840 | .handle_irq = p6_pmu_handle_irq, | ||
1841 | .disable_all = p6_pmu_disable_all, | ||
1842 | .enable_all = p6_pmu_enable_all, | ||
1843 | .enable = p6_pmu_enable_counter, | ||
1844 | .disable = p6_pmu_disable_counter, | ||
1845 | .eventsel = MSR_P6_EVNTSEL0, | ||
1846 | .perfctr = MSR_P6_PERFCTR0, | ||
1847 | .event_map = p6_pmu_event_map, | ||
1848 | .raw_event = p6_pmu_raw_event, | ||
1849 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
1850 | .apic = 1, | ||
1851 | .max_period = (1ULL << 31) - 1, | ||
1852 | .version = 0, | ||
1853 | .num_counters = 2, | ||
1854 | /* | ||
1855 | * Counters have 40 bits implemented. However they are designed such | ||
1856 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
1857 | * effective width of a counter for P6-like PMU is 32 bits only. | ||
1858 | * | ||
1859 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
1860 | */ | ||
1861 | .counter_bits = 32, | ||
1862 | .counter_mask = (1ULL << 32) - 1, | ||
1863 | }; | ||
1864 | |||
1357 | static struct x86_pmu intel_pmu = { | 1865 | static struct x86_pmu intel_pmu = { |
1358 | .name = "Intel", | 1866 | .name = "Intel", |
1359 | .handle_irq = intel_pmu_handle_irq, | 1867 | .handle_irq = intel_pmu_handle_irq, |
@@ -1366,12 +1874,15 @@ static struct x86_pmu intel_pmu = { | |||
1366 | .event_map = intel_pmu_event_map, | 1874 | .event_map = intel_pmu_event_map, |
1367 | .raw_event = intel_pmu_raw_event, | 1875 | .raw_event = intel_pmu_raw_event, |
1368 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 1876 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
1877 | .apic = 1, | ||
1369 | /* | 1878 | /* |
1370 | * Intel PMCs cannot be accessed sanely above 32 bit width, | 1879 | * Intel PMCs cannot be accessed sanely above 32 bit width, |
1371 | * so we install an artificial 1<<31 period regardless of | 1880 | * so we install an artificial 1<<31 period regardless of |
1372 | * the generic counter period: | 1881 | * the generic counter period: |
1373 | */ | 1882 | */ |
1374 | .max_period = (1ULL << 31) - 1, | 1883 | .max_period = (1ULL << 31) - 1, |
1884 | .enable_bts = intel_pmu_enable_bts, | ||
1885 | .disable_bts = intel_pmu_disable_bts, | ||
1375 | }; | 1886 | }; |
1376 | 1887 | ||
1377 | static struct x86_pmu amd_pmu = { | 1888 | static struct x86_pmu amd_pmu = { |
@@ -1389,10 +1900,43 @@ static struct x86_pmu amd_pmu = { | |||
1389 | .num_counters = 4, | 1900 | .num_counters = 4, |
1390 | .counter_bits = 48, | 1901 | .counter_bits = 48, |
1391 | .counter_mask = (1ULL << 48) - 1, | 1902 | .counter_mask = (1ULL << 48) - 1, |
1903 | .apic = 1, | ||
1392 | /* use highest bit to detect overflow */ | 1904 | /* use highest bit to detect overflow */ |
1393 | .max_period = (1ULL << 47) - 1, | 1905 | .max_period = (1ULL << 47) - 1, |
1394 | }; | 1906 | }; |
1395 | 1907 | ||
1908 | static int p6_pmu_init(void) | ||
1909 | { | ||
1910 | switch (boot_cpu_data.x86_model) { | ||
1911 | case 1: | ||
1912 | case 3: /* Pentium Pro */ | ||
1913 | case 5: | ||
1914 | case 6: /* Pentium II */ | ||
1915 | case 7: | ||
1916 | case 8: | ||
1917 | case 11: /* Pentium III */ | ||
1918 | break; | ||
1919 | case 9: | ||
1920 | case 13: | ||
1921 | /* Pentium M */ | ||
1922 | break; | ||
1923 | default: | ||
1924 | pr_cont("unsupported p6 CPU model %d ", | ||
1925 | boot_cpu_data.x86_model); | ||
1926 | return -ENODEV; | ||
1927 | } | ||
1928 | |||
1929 | x86_pmu = p6_pmu; | ||
1930 | |||
1931 | if (!cpu_has_apic) { | ||
1932 | pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); | ||
1933 | pr_info("no hardware sampling interrupt available.\n"); | ||
1934 | x86_pmu.apic = 0; | ||
1935 | } | ||
1936 | |||
1937 | return 0; | ||
1938 | } | ||
1939 | |||
1396 | static int intel_pmu_init(void) | 1940 | static int intel_pmu_init(void) |
1397 | { | 1941 | { |
1398 | union cpuid10_edx edx; | 1942 | union cpuid10_edx edx; |
@@ -1401,8 +1945,14 @@ static int intel_pmu_init(void) | |||
1401 | unsigned int ebx; | 1945 | unsigned int ebx; |
1402 | int version; | 1946 | int version; |
1403 | 1947 | ||
1404 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 1948 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
1949 | /* check for P6 processor family */ | ||
1950 | if (boot_cpu_data.x86 == 6) { | ||
1951 | return p6_pmu_init(); | ||
1952 | } else { | ||
1405 | return -ENODEV; | 1953 | return -ENODEV; |
1954 | } | ||
1955 | } | ||
1406 | 1956 | ||
1407 | /* | 1957 | /* |
1408 | * Check whether the Architectural PerfMon supports | 1958 | * Check whether the Architectural PerfMon supports |
@@ -1428,8 +1978,6 @@ static int intel_pmu_init(void) | |||
1428 | */ | 1978 | */ |
1429 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); | 1979 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
1430 | 1980 | ||
1431 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1432 | |||
1433 | /* | 1981 | /* |
1434 | * Install the hw-cache-events table: | 1982 | * Install the hw-cache-events table: |
1435 | */ | 1983 | */ |
@@ -1499,21 +2047,22 @@ void __init init_hw_perf_counters(void) | |||
1499 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 2047 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1500 | 2048 | ||
1501 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 2049 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { |
1502 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1503 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", | 2050 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", |
1504 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); | 2051 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
2052 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1505 | } | 2053 | } |
1506 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; | 2054 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; |
1507 | perf_max_counters = x86_pmu.num_counters; | 2055 | perf_max_counters = x86_pmu.num_counters; |
1508 | 2056 | ||
1509 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | 2057 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
1510 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1511 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", | 2058 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", |
1512 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); | 2059 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
2060 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1513 | } | 2061 | } |
1514 | 2062 | ||
1515 | perf_counter_mask |= | 2063 | perf_counter_mask |= |
1516 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | 2064 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
2065 | x86_pmu.intel_ctrl = perf_counter_mask; | ||
1517 | 2066 | ||
1518 | perf_counters_lapic_init(); | 2067 | perf_counters_lapic_init(); |
1519 | register_die_notifier(&perf_counter_nmi_notifier); | 2068 | register_die_notifier(&perf_counter_nmi_notifier); |
@@ -1563,6 +2112,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) | |||
1563 | 2112 | ||
1564 | static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); | 2113 | static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); |
1565 | static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); | 2114 | static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); |
2115 | static DEFINE_PER_CPU(int, in_nmi_frame); | ||
1566 | 2116 | ||
1567 | 2117 | ||
1568 | static void | 2118 | static void |
@@ -1578,7 +2128,9 @@ static void backtrace_warning(void *data, char *msg) | |||
1578 | 2128 | ||
1579 | static int backtrace_stack(void *data, char *name) | 2129 | static int backtrace_stack(void *data, char *name) |
1580 | { | 2130 | { |
1581 | /* Process all stacks: */ | 2131 | per_cpu(in_nmi_frame, smp_processor_id()) = |
2132 | x86_is_stack_id(NMI_STACK, name); | ||
2133 | |||
1582 | return 0; | 2134 | return 0; |
1583 | } | 2135 | } |
1584 | 2136 | ||
@@ -1586,6 +2138,9 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
1586 | { | 2138 | { |
1587 | struct perf_callchain_entry *entry = data; | 2139 | struct perf_callchain_entry *entry = data; |
1588 | 2140 | ||
2141 | if (per_cpu(in_nmi_frame, smp_processor_id())) | ||
2142 | return; | ||
2143 | |||
1589 | if (reliable) | 2144 | if (reliable) |
1590 | callchain_store(entry, addr); | 2145 | callchain_store(entry, addr); |
1591 | } | 2146 | } |
@@ -1719,3 +2274,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1719 | 2274 | ||
1720 | return entry; | 2275 | return entry; |
1721 | } | 2276 | } |
2277 | |||
2278 | void hw_perf_counter_setup_online(int cpu) | ||
2279 | { | ||
2280 | init_debug_store_on_cpu(cpu); | ||
2281 | } | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 5c481f6205bf..392bea43b890 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -68,16 +68,16 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | |||
68 | /* returns the bit offset of the performance counter register */ | 68 | /* returns the bit offset of the performance counter register */ |
69 | switch (boot_cpu_data.x86_vendor) { | 69 | switch (boot_cpu_data.x86_vendor) { |
70 | case X86_VENDOR_AMD: | 70 | case X86_VENDOR_AMD: |
71 | return (msr - MSR_K7_PERFCTR0); | 71 | return msr - MSR_K7_PERFCTR0; |
72 | case X86_VENDOR_INTEL: | 72 | case X86_VENDOR_INTEL: |
73 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 73 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
74 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | 74 | return msr - MSR_ARCH_PERFMON_PERFCTR0; |
75 | 75 | ||
76 | switch (boot_cpu_data.x86) { | 76 | switch (boot_cpu_data.x86) { |
77 | case 6: | 77 | case 6: |
78 | return (msr - MSR_P6_PERFCTR0); | 78 | return msr - MSR_P6_PERFCTR0; |
79 | case 15: | 79 | case 15: |
80 | return (msr - MSR_P4_BPU_PERFCTR0); | 80 | return msr - MSR_P4_BPU_PERFCTR0; |
81 | } | 81 | } |
82 | } | 82 | } |
83 | return 0; | 83 | return 0; |
@@ -92,16 +92,16 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | |||
92 | /* returns the bit offset of the event selection register */ | 92 | /* returns the bit offset of the event selection register */ |
93 | switch (boot_cpu_data.x86_vendor) { | 93 | switch (boot_cpu_data.x86_vendor) { |
94 | case X86_VENDOR_AMD: | 94 | case X86_VENDOR_AMD: |
95 | return (msr - MSR_K7_EVNTSEL0); | 95 | return msr - MSR_K7_EVNTSEL0; |
96 | case X86_VENDOR_INTEL: | 96 | case X86_VENDOR_INTEL: |
97 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 97 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
98 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | 98 | return msr - MSR_ARCH_PERFMON_EVENTSEL0; |
99 | 99 | ||
100 | switch (boot_cpu_data.x86) { | 100 | switch (boot_cpu_data.x86) { |
101 | case 6: | 101 | case 6: |
102 | return (msr - MSR_P6_EVNTSEL0); | 102 | return msr - MSR_P6_EVNTSEL0; |
103 | case 15: | 103 | case 15: |
104 | return (msr - MSR_P4_BSU_ESCR0); | 104 | return msr - MSR_P4_BSU_ESCR0; |
105 | } | 105 | } |
106 | } | 106 | } |
107 | return 0; | 107 | return 0; |
@@ -113,7 +113,7 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | |||
113 | { | 113 | { |
114 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | 114 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); |
115 | 115 | ||
116 | return (!test_bit(counter, perfctr_nmi_owner)); | 116 | return !test_bit(counter, perfctr_nmi_owner); |
117 | } | 117 | } |
118 | 118 | ||
119 | /* checks the an msr for availability */ | 119 | /* checks the an msr for availability */ |
@@ -124,7 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr) | |||
124 | counter = nmi_perfctr_msr_to_bit(msr); | 124 | counter = nmi_perfctr_msr_to_bit(msr); |
125 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | 125 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); |
126 | 126 | ||
127 | return (!test_bit(counter, perfctr_nmi_owner)); | 127 | return !test_bit(counter, perfctr_nmi_owner); |
128 | } | 128 | } |
129 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | 129 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
130 | 130 | ||
@@ -237,7 +237,7 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz) | |||
237 | */ | 237 | */ |
238 | counter_val = (u64)cpu_khz * 1000; | 238 | counter_val = (u64)cpu_khz * 1000; |
239 | do_div(counter_val, retval); | 239 | do_div(counter_val, retval); |
240 | if (counter_val > 0x7fffffffULL) { | 240 | if (counter_val > 0x7fffffffULL) { |
241 | u64 count = (u64)cpu_khz * 1000; | 241 | u64 count = (u64)cpu_khz * 1000; |
242 | do_div(count, 0x7fffffffUL); | 242 | do_div(count, 0x7fffffffUL); |
243 | retval = count + 1; | 243 | retval = count + 1; |
@@ -251,7 +251,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr, | |||
251 | u64 count = (u64)cpu_khz * 1000; | 251 | u64 count = (u64)cpu_khz * 1000; |
252 | 252 | ||
253 | do_div(count, nmi_hz); | 253 | do_div(count, nmi_hz); |
254 | if(descr) | 254 | if (descr) |
255 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | 255 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
256 | wrmsrl(perfctr_msr, 0 - count); | 256 | wrmsrl(perfctr_msr, 0 - count); |
257 | } | 257 | } |
@@ -262,7 +262,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr, | |||
262 | u64 count = (u64)cpu_khz * 1000; | 262 | u64 count = (u64)cpu_khz * 1000; |
263 | 263 | ||
264 | do_div(count, nmi_hz); | 264 | do_div(count, nmi_hz); |
265 | if(descr) | 265 | if (descr) |
266 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | 266 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
267 | wrmsr(perfctr_msr, (u32)(-count), 0); | 267 | wrmsr(perfctr_msr, (u32)(-count), 0); |
268 | } | 268 | } |
@@ -296,7 +296,7 @@ static int setup_k7_watchdog(unsigned nmi_hz) | |||
296 | 296 | ||
297 | /* setup the timer */ | 297 | /* setup the timer */ |
298 | wrmsr(evntsel_msr, evntsel, 0); | 298 | wrmsr(evntsel_msr, evntsel, 0); |
299 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); | 299 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); |
300 | 300 | ||
301 | /* initialize the wd struct before enabling */ | 301 | /* initialize the wd struct before enabling */ |
302 | wd->perfctr_msr = perfctr_msr; | 302 | wd->perfctr_msr = perfctr_msr; |
@@ -387,7 +387,7 @@ static int setup_p6_watchdog(unsigned nmi_hz) | |||
387 | /* setup the timer */ | 387 | /* setup the timer */ |
388 | wrmsr(evntsel_msr, evntsel, 0); | 388 | wrmsr(evntsel_msr, evntsel, 0); |
389 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 389 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
390 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); | 390 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); |
391 | 391 | ||
392 | /* initialize the wd struct before enabling */ | 392 | /* initialize the wd struct before enabling */ |
393 | wd->perfctr_msr = perfctr_msr; | 393 | wd->perfctr_msr = perfctr_msr; |
@@ -415,7 +415,7 @@ static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | |||
415 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 415 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
416 | 416 | ||
417 | /* P6/ARCH_PERFMON has 32 bit counter write */ | 417 | /* P6/ARCH_PERFMON has 32 bit counter write */ |
418 | write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); | 418 | write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); |
419 | } | 419 | } |
420 | 420 | ||
421 | static const struct wd_ops p6_wd_ops = { | 421 | static const struct wd_ops p6_wd_ops = { |
@@ -490,9 +490,9 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
490 | if (smp_num_siblings == 2) { | 490 | if (smp_num_siblings == 2) { |
491 | unsigned int ebx, apicid; | 491 | unsigned int ebx, apicid; |
492 | 492 | ||
493 | ebx = cpuid_ebx(1); | 493 | ebx = cpuid_ebx(1); |
494 | apicid = (ebx >> 24) & 0xff; | 494 | apicid = (ebx >> 24) & 0xff; |
495 | ht_num = apicid & 1; | 495 | ht_num = apicid & 1; |
496 | } else | 496 | } else |
497 | #endif | 497 | #endif |
498 | ht_num = 0; | 498 | ht_num = 0; |
@@ -544,7 +544,7 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
544 | } | 544 | } |
545 | 545 | ||
546 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | 546 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) |
547 | | P4_ESCR_OS | 547 | | P4_ESCR_OS |
548 | | P4_ESCR_USR; | 548 | | P4_ESCR_USR; |
549 | 549 | ||
550 | cccr_val |= P4_CCCR_THRESHOLD(15) | 550 | cccr_val |= P4_CCCR_THRESHOLD(15) |
@@ -612,7 +612,7 @@ static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | |||
612 | { | 612 | { |
613 | unsigned dummy; | 613 | unsigned dummy; |
614 | /* | 614 | /* |
615 | * P4 quirks: | 615 | * P4 quirks: |
616 | * - An overflown perfctr will assert its interrupt | 616 | * - An overflown perfctr will assert its interrupt |
617 | * until the OVF flag in its CCCR is cleared. | 617 | * until the OVF flag in its CCCR is cleared. |
618 | * - LVTPC is masked on interrupt and must be | 618 | * - LVTPC is masked on interrupt and must be |
@@ -662,7 +662,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) | |||
662 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | 662 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
663 | */ | 663 | */ |
664 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | 664 | cpuid(10, &(eax.full), &ebx, &unused, &unused); |
665 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | 665 | if ((eax.split.mask_length < |
666 | (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
666 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 667 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) |
667 | return 0; | 668 | return 0; |
668 | 669 | ||
@@ -803,8 +804,3 @@ int __kprobes lapic_wd_event(unsigned nmi_hz) | |||
803 | wd_ops->rearm(wd, nmi_hz); | 804 | wd_ops->rearm(wd, nmi_hz); |
804 | return 1; | 805 | return 1; |
805 | } | 806 | } |
806 | |||
807 | int lapic_watchdog_ok(void) | ||
808 | { | ||
809 | return wd_ops != NULL; | ||
810 | } | ||
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index d5e30397246b..62ac8cb6ba27 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
116 | seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); | 116 | seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); |
117 | #endif | 117 | #endif |
118 | seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); | 118 | seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); |
119 | #ifdef CONFIG_X86_64 | ||
120 | seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); | 119 | seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); |
121 | seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", | 120 | seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", |
122 | c->x86_phys_bits, c->x86_virt_bits); | 121 | c->x86_phys_bits, c->x86_virt_bits); |
123 | #endif | ||
124 | 122 | ||
125 | seq_printf(m, "power management:"); | 123 | seq_printf(m, "power management:"); |
126 | for (i = 0; i < 32; i++) { | 124 | for (i = 0; i < 32; i++) { |
@@ -128,7 +126,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
128 | if (i < ARRAY_SIZE(x86_power_flags) && | 126 | if (i < ARRAY_SIZE(x86_power_flags) && |
129 | x86_power_flags[i]) | 127 | x86_power_flags[i]) |
130 | seq_printf(m, "%s%s", | 128 | seq_printf(m, "%s%s", |
131 | x86_power_flags[i][0]?" ":"", | 129 | x86_power_flags[i][0] ? " " : "", |
132 | x86_power_flags[i]); | 130 | x86_power_flags[i]); |
133 | else | 131 | else |
134 | seq_printf(m, " [%d]", i); | 132 | seq_printf(m, " [%d]", i); |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 284c399e3234..bc24f514ec93 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -49,17 +49,17 @@ static inline int __vmware_platform(void) | |||
49 | 49 | ||
50 | static unsigned long __vmware_get_tsc_khz(void) | 50 | static unsigned long __vmware_get_tsc_khz(void) |
51 | { | 51 | { |
52 | uint64_t tsc_hz; | 52 | uint64_t tsc_hz; |
53 | uint32_t eax, ebx, ecx, edx; | 53 | uint32_t eax, ebx, ecx, edx; |
54 | 54 | ||
55 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | 55 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); |
56 | 56 | ||
57 | if (ebx == UINT_MAX) | 57 | if (ebx == UINT_MAX) |
58 | return 0; | 58 | return 0; |
59 | tsc_hz = eax | (((uint64_t)ebx) << 32); | 59 | tsc_hz = eax | (((uint64_t)ebx) << 32); |
60 | do_div(tsc_hz, 1000); | 60 | do_div(tsc_hz, 1000); |
61 | BUG_ON(tsc_hz >> 32); | 61 | BUG_ON(tsc_hz >> 32); |
62 | return tsc_hz; | 62 | return tsc_hz; |
63 | } | 63 | } |
64 | 64 | ||
65 | /* | 65 | /* |
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index b4f14c6c09d9..37250fe490b1 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c | |||
@@ -27,9 +27,7 @@ static void doublefault_fn(void) | |||
27 | 27 | ||
28 | if (ptr_ok(gdt)) { | 28 | if (ptr_ok(gdt)) { |
29 | gdt += GDT_ENTRY_TSS << 3; | 29 | gdt += GDT_ENTRY_TSS << 3; |
30 | tss = *(u16 *)(gdt+2); | 30 | tss = get_desc_base((struct desc_struct *)gdt); |
31 | tss += *(u8 *)(gdt+4) << 16; | ||
32 | tss += *(u8 *)(gdt+7) << 24; | ||
33 | printk(KERN_EMERG "double fault, tss at %08lx\n", tss); | 31 | printk(KERN_EMERG "double fault, tss at %08lx\n", tss); |
34 | 32 | ||
35 | if (ptr_ok(tss)) { | 33 | if (ptr_ok(tss)) { |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 48bfe1386038..ef42a038f1a6 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -509,15 +509,15 @@ enum bts_field { | |||
509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | 509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) |
510 | }; | 510 | }; |
511 | 511 | ||
512 | static inline unsigned long bts_get(const char *base, enum bts_field field) | 512 | static inline unsigned long bts_get(const char *base, unsigned long field) |
513 | { | 513 | { |
514 | base += (ds_cfg.sizeof_ptr_field * field); | 514 | base += (ds_cfg.sizeof_ptr_field * field); |
515 | return *(unsigned long *)base; | 515 | return *(unsigned long *)base; |
516 | } | 516 | } |
517 | 517 | ||
518 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | 518 | static inline void bts_set(char *base, unsigned long field, unsigned long val) |
519 | { | 519 | { |
520 | base += (ds_cfg.sizeof_ptr_field * field);; | 520 | base += (ds_cfg.sizeof_ptr_field * field); |
521 | (*(unsigned long *)base) = val; | 521 | (*(unsigned long *)base) = val; |
522 | } | 522 | } |
523 | 523 | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 95ea5fa7d444..2d8a371d4339 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -15,13 +15,13 @@ | |||
15 | #include <linux/bug.h> | 15 | #include <linux/bug.h> |
16 | #include <linux/nmi.h> | 16 | #include <linux/nmi.h> |
17 | #include <linux/sysfs.h> | 17 | #include <linux/sysfs.h> |
18 | #include <linux/ftrace.h> | ||
19 | 18 | ||
20 | #include <asm/stacktrace.h> | 19 | #include <asm/stacktrace.h> |
21 | 20 | ||
22 | #include "dumpstack.h" | 21 | #include "dumpstack.h" |
23 | 22 | ||
24 | int panic_on_unrecovered_nmi; | 23 | int panic_on_unrecovered_nmi; |
24 | int panic_on_io_nmi; | ||
25 | unsigned int code_bytes = 64; | 25 | unsigned int code_bytes = 64; |
26 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | 26 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; |
27 | static int die_counter; | 27 | static int die_counter; |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index d593cd1f58dc..bca5fba91c9e 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -19,6 +19,12 @@ | |||
19 | 19 | ||
20 | #include "dumpstack.h" | 20 | #include "dumpstack.h" |
21 | 21 | ||
22 | /* Just a stub for now */ | ||
23 | int x86_is_stack_id(int id, char *name) | ||
24 | { | ||
25 | return 0; | ||
26 | } | ||
27 | |||
22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 28 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
23 | unsigned long *stack, unsigned long bp, | 29 | unsigned long *stack, unsigned long bp, |
24 | const struct stacktrace_ops *ops, void *data) | 30 | const struct stacktrace_ops *ops, void *data) |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index d35db5993fd6..54b0a3276766 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -19,10 +19,8 @@ | |||
19 | 19 | ||
20 | #include "dumpstack.h" | 20 | #include "dumpstack.h" |
21 | 21 | ||
22 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 22 | |
23 | unsigned *usedp, char **idp) | 23 | static char x86_stack_ids[][8] = { |
24 | { | ||
25 | static char ids[][8] = { | ||
26 | [DEBUG_STACK - 1] = "#DB", | 24 | [DEBUG_STACK - 1] = "#DB", |
27 | [NMI_STACK - 1] = "NMI", | 25 | [NMI_STACK - 1] = "NMI", |
28 | [DOUBLEFAULT_STACK - 1] = "#DF", | 26 | [DOUBLEFAULT_STACK - 1] = "#DF", |
@@ -33,6 +31,15 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
33 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | 31 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" |
34 | #endif | 32 | #endif |
35 | }; | 33 | }; |
34 | |||
35 | int x86_is_stack_id(int id, char *name) | ||
36 | { | ||
37 | return x86_stack_ids[id - 1] == name; | ||
38 | } | ||
39 | |||
40 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | ||
41 | unsigned *usedp, char **idp) | ||
42 | { | ||
36 | unsigned k; | 43 | unsigned k; |
37 | 44 | ||
38 | /* | 45 | /* |
@@ -61,7 +68,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
61 | if (*usedp & (1U << k)) | 68 | if (*usedp & (1U << k)) |
62 | break; | 69 | break; |
63 | *usedp |= 1U << k; | 70 | *usedp |= 1U << k; |
64 | *idp = ids[k]; | 71 | *idp = x86_stack_ids[k]; |
65 | return (unsigned long *)end; | 72 | return (unsigned long *)end; |
66 | } | 73 | } |
67 | /* | 74 | /* |
@@ -81,12 +88,13 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
81 | do { | 88 | do { |
82 | ++j; | 89 | ++j; |
83 | end -= EXCEPTION_STKSZ; | 90 | end -= EXCEPTION_STKSZ; |
84 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); | 91 | x86_stack_ids[j][4] = '1' + |
92 | (j - N_EXCEPTION_STACKS); | ||
85 | } while (stack < end - EXCEPTION_STKSZ); | 93 | } while (stack < end - EXCEPTION_STKSZ); |
86 | if (*usedp & (1U << j)) | 94 | if (*usedp & (1U << j)) |
87 | break; | 95 | break; |
88 | *usedp |= 1U << j; | 96 | *usedp |= 1U << j; |
89 | *idp = ids[j]; | 97 | *idp = x86_stack_ids[j]; |
90 | return (unsigned long *)end; | 98 | return (unsigned long *)end; |
91 | } | 99 | } |
92 | #endif | 100 | #endif |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 2e5e0faa99b5..147005a1cc3c 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -627,10 +627,9 @@ __init void e820_setup_gap(void) | |||
627 | #ifdef CONFIG_X86_64 | 627 | #ifdef CONFIG_X86_64 |
628 | if (!found) { | 628 | if (!found) { |
629 | gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; | 629 | gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; |
630 | printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " | 630 | printk(KERN_ERR |
631 | "address range\n" | 631 | "PCI: Warning: Cannot find a gap in the 32bit address range\n" |
632 | KERN_ERR "PCI: Unassigned devices with 32bit resource " | 632 | "PCI: Unassigned devices with 32bit resource registers may break!\n"); |
633 | "registers may break!\n"); | ||
634 | } | 633 | } |
635 | #endif | 634 | #endif |
636 | 635 | ||
@@ -1383,6 +1382,8 @@ static unsigned long ram_alignment(resource_size_t pos) | |||
1383 | return 32*1024*1024; | 1382 | return 32*1024*1024; |
1384 | } | 1383 | } |
1385 | 1384 | ||
1385 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) | ||
1386 | |||
1386 | void __init e820_reserve_resources_late(void) | 1387 | void __init e820_reserve_resources_late(void) |
1387 | { | 1388 | { |
1388 | int i; | 1389 | int i; |
@@ -1400,17 +1401,19 @@ void __init e820_reserve_resources_late(void) | |||
1400 | * avoid stolen RAM: | 1401 | * avoid stolen RAM: |
1401 | */ | 1402 | */ |
1402 | for (i = 0; i < e820.nr_map; i++) { | 1403 | for (i = 0; i < e820.nr_map; i++) { |
1403 | struct e820entry *entry = &e820_saved.map[i]; | 1404 | struct e820entry *entry = &e820.map[i]; |
1404 | resource_size_t start, end; | 1405 | u64 start, end; |
1405 | 1406 | ||
1406 | if (entry->type != E820_RAM) | 1407 | if (entry->type != E820_RAM) |
1407 | continue; | 1408 | continue; |
1408 | start = entry->addr + entry->size; | 1409 | start = entry->addr + entry->size; |
1409 | end = round_up(start, ram_alignment(start)); | 1410 | end = round_up(start, ram_alignment(start)) - 1; |
1410 | if (start == end) | 1411 | if (end > MAX_RESOURCE_SIZE) |
1412 | end = MAX_RESOURCE_SIZE; | ||
1413 | if (start >= end) | ||
1411 | continue; | 1414 | continue; |
1412 | reserve_region_with_split(&iomem_resource, start, | 1415 | reserve_region_with_split(&iomem_resource, start, end, |
1413 | end - 1, "RAM buffer"); | 1416 | "RAM buffer"); |
1414 | } | 1417 | } |
1415 | } | 1418 | } |
1416 | 1419 | ||
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 96f7ac0bbf01..fe26ba3e3451 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -354,7 +354,7 @@ void __init efi_init(void) | |||
354 | */ | 354 | */ |
355 | c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); | 355 | c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); |
356 | if (c16) { | 356 | if (c16) { |
357 | for (i = 0; i < sizeof(vendor) && *c16; ++i) | 357 | for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) |
358 | vendor[i] = *c16++; | 358 | vendor[i] = *c16++; |
359 | vendor[i] = '\0'; | 359 | vendor[i] = '\0'; |
360 | } else | 360 | } else |
@@ -512,7 +512,7 @@ void __init efi_enter_virtual_mode(void) | |||
512 | && end_pfn <= max_pfn_mapped)) | 512 | && end_pfn <= max_pfn_mapped)) |
513 | va = __va(md->phys_addr); | 513 | va = __va(md->phys_addr); |
514 | else | 514 | else |
515 | va = efi_ioremap(md->phys_addr, size); | 515 | va = efi_ioremap(md->phys_addr, size, md->type); |
516 | 516 | ||
517 | md->virt_addr = (u64) (unsigned long) va; | 517 | md->virt_addr = (u64) (unsigned long) va; |
518 | 518 | ||
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 22c3b7828c50..ac0621a7ac3d 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c | |||
@@ -98,10 +98,14 @@ void __init efi_call_phys_epilog(void) | |||
98 | early_runtime_code_mapping_set_exec(0); | 98 | early_runtime_code_mapping_set_exec(0); |
99 | } | 99 | } |
100 | 100 | ||
101 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) | 101 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
102 | u32 type) | ||
102 | { | 103 | { |
103 | unsigned long last_map_pfn; | 104 | unsigned long last_map_pfn; |
104 | 105 | ||
106 | if (type == EFI_MEMORY_MAPPED_IO) | ||
107 | return ioremap(phys_addr, size); | ||
108 | |||
105 | last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); | 109 | last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); |
106 | if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) | 110 | if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) |
107 | return NULL; | 111 | return NULL; |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d94e1ea3b9fe..9dbb527e1652 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -417,10 +417,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
417 | unsigned long return_hooker = (unsigned long) | 417 | unsigned long return_hooker = (unsigned long) |
418 | &return_to_handler; | 418 | &return_to_handler; |
419 | 419 | ||
420 | /* Nmi's are currently unsupported */ | ||
421 | if (unlikely(in_nmi())) | ||
422 | return; | ||
423 | |||
424 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) | 420 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) |
425 | return; | 421 | return; |
426 | 422 | ||
@@ -498,37 +494,56 @@ static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) | |||
498 | 494 | ||
499 | struct syscall_metadata *syscall_nr_to_meta(int nr) | 495 | struct syscall_metadata *syscall_nr_to_meta(int nr) |
500 | { | 496 | { |
501 | if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0) | 497 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) |
502 | return NULL; | 498 | return NULL; |
503 | 499 | ||
504 | return syscalls_metadata[nr]; | 500 | return syscalls_metadata[nr]; |
505 | } | 501 | } |
506 | 502 | ||
507 | void arch_init_ftrace_syscalls(void) | 503 | int syscall_name_to_nr(char *name) |
504 | { | ||
505 | int i; | ||
506 | |||
507 | if (!syscalls_metadata) | ||
508 | return -1; | ||
509 | |||
510 | for (i = 0; i < NR_syscalls; i++) { | ||
511 | if (syscalls_metadata[i]) { | ||
512 | if (!strcmp(syscalls_metadata[i]->name, name)) | ||
513 | return i; | ||
514 | } | ||
515 | } | ||
516 | return -1; | ||
517 | } | ||
518 | |||
519 | void set_syscall_enter_id(int num, int id) | ||
520 | { | ||
521 | syscalls_metadata[num]->enter_id = id; | ||
522 | } | ||
523 | |||
524 | void set_syscall_exit_id(int num, int id) | ||
525 | { | ||
526 | syscalls_metadata[num]->exit_id = id; | ||
527 | } | ||
528 | |||
529 | static int __init arch_init_ftrace_syscalls(void) | ||
508 | { | 530 | { |
509 | int i; | 531 | int i; |
510 | struct syscall_metadata *meta; | 532 | struct syscall_metadata *meta; |
511 | unsigned long **psys_syscall_table = &sys_call_table; | 533 | unsigned long **psys_syscall_table = &sys_call_table; |
512 | static atomic_t refs; | ||
513 | |||
514 | if (atomic_inc_return(&refs) != 1) | ||
515 | goto end; | ||
516 | 534 | ||
517 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | 535 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * |
518 | FTRACE_SYSCALL_MAX, GFP_KERNEL); | 536 | NR_syscalls, GFP_KERNEL); |
519 | if (!syscalls_metadata) { | 537 | if (!syscalls_metadata) { |
520 | WARN_ON(1); | 538 | WARN_ON(1); |
521 | return; | 539 | return -ENOMEM; |
522 | } | 540 | } |
523 | 541 | ||
524 | for (i = 0; i < FTRACE_SYSCALL_MAX; i++) { | 542 | for (i = 0; i < NR_syscalls; i++) { |
525 | meta = find_syscall_meta(psys_syscall_table[i]); | 543 | meta = find_syscall_meta(psys_syscall_table[i]); |
526 | syscalls_metadata[i] = meta; | 544 | syscalls_metadata[i] = meta; |
527 | } | 545 | } |
528 | return; | 546 | return 0; |
529 | |||
530 | /* Paranoid: avoid overflow */ | ||
531 | end: | ||
532 | atomic_dec(&refs); | ||
533 | } | 547 | } |
548 | arch_initcall(arch_init_ftrace_syscalls); | ||
534 | #endif | 549 | #endif |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 8663afb56535..7ffec6b3b331 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -261,9 +261,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
261 | * which will be freed later | 261 | * which will be freed later |
262 | */ | 262 | */ |
263 | 263 | ||
264 | #ifndef CONFIG_HOTPLUG_CPU | 264 | __CPUINIT |
265 | .section .init.text,"ax",@progbits | ||
266 | #endif | ||
267 | 265 | ||
268 | #ifdef CONFIG_SMP | 266 | #ifdef CONFIG_SMP |
269 | ENTRY(startup_32_smp) | 267 | ENTRY(startup_32_smp) |
@@ -441,7 +439,6 @@ is386: movl $2,%ecx # set MP | |||
441 | jne 1f | 439 | jne 1f |
442 | movl $per_cpu__gdt_page,%eax | 440 | movl $per_cpu__gdt_page,%eax |
443 | movl $per_cpu__stack_canary,%ecx | 441 | movl $per_cpu__stack_canary,%ecx |
444 | subl $20, %ecx | ||
445 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | 442 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) |
446 | shrl $16, %ecx | 443 | shrl $16, %ecx |
447 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | 444 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) |
@@ -602,7 +599,7 @@ ignore_int: | |||
602 | #endif | 599 | #endif |
603 | iret | 600 | iret |
604 | 601 | ||
605 | .section .cpuinit.data,"wa" | 602 | __REFDATA |
606 | .align 4 | 603 | .align 4 |
607 | ENTRY(initial_code) | 604 | ENTRY(initial_code) |
608 | .long i386_start_kernel | 605 | .long i386_start_kernel |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 3b09634a5153..7d35d0fe2329 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -218,7 +218,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
218 | void fixup_irqs(void) | 218 | void fixup_irqs(void) |
219 | { | 219 | { |
220 | unsigned int irq; | 220 | unsigned int irq; |
221 | static int warned; | ||
222 | struct irq_desc *desc; | 221 | struct irq_desc *desc; |
223 | 222 | ||
224 | for_each_irq_desc(irq, desc) { | 223 | for_each_irq_desc(irq, desc) { |
@@ -236,8 +235,8 @@ void fixup_irqs(void) | |||
236 | } | 235 | } |
237 | if (desc->chip->set_affinity) | 236 | if (desc->chip->set_affinity) |
238 | desc->chip->set_affinity(irq, affinity); | 237 | desc->chip->set_affinity(irq, affinity); |
239 | else if (desc->action && !(warned++)) | 238 | else if (desc->action) |
240 | printk("Cannot set affinity for irq %i\n", irq); | 239 | printk_once("Cannot set affinity for irq %i\n", irq); |
241 | } | 240 | } |
242 | 241 | ||
243 | #if 0 | 242 | #if 0 |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 696f0e475c2d..92b7703d3d58 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -187,7 +187,7 @@ static void __init apic_intr_init(void) | |||
187 | #ifdef CONFIG_X86_THERMAL_VECTOR | 187 | #ifdef CONFIG_X86_THERMAL_VECTOR |
188 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 188 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
189 | #endif | 189 | #endif |
190 | #ifdef CONFIG_X86_THRESHOLD | 190 | #ifdef CONFIG_X86_MCE_THRESHOLD |
191 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | 191 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); |
192 | #endif | 192 | #endif |
193 | #if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) | 193 | #if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a78ecad0c900..c664d515f613 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -200,7 +200,7 @@ static void kvm_leave_lazy_mmu(void) | |||
200 | state->mode = paravirt_get_lazy_mode(); | 200 | state->mode = paravirt_get_lazy_mode(); |
201 | } | 201 | } |
202 | 202 | ||
203 | static void paravirt_ops_setup(void) | 203 | static void __init paravirt_ops_setup(void) |
204 | { | 204 | { |
205 | pv_info.name = "KVM"; | 205 | pv_info.name = "KVM"; |
206 | pv_info.paravirt_enabled = 1; | 206 | pv_info.paravirt_enabled = 1; |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 846510b78a09..2a62d843f015 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -347,7 +347,7 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id) | |||
347 | 347 | ||
348 | static struct irqaction mfgptirq = { | 348 | static struct irqaction mfgptirq = { |
349 | .handler = mfgpt_tick, | 349 | .handler = mfgpt_tick, |
350 | .flags = IRQF_DISABLED | IRQF_NOBALANCING, | 350 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, |
351 | .name = "mfgpt-timer" | 351 | .name = "mfgpt-timer" |
352 | }; | 352 | }; |
353 | 353 | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 651c93b28862..fcd513bf2846 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -482,11 +482,11 @@ static void __init construct_ioapic_table(int mpc_default_type) | |||
482 | MP_bus_info(&bus); | 482 | MP_bus_info(&bus); |
483 | } | 483 | } |
484 | 484 | ||
485 | ioapic.type = MP_IOAPIC; | 485 | ioapic.type = MP_IOAPIC; |
486 | ioapic.apicid = 2; | 486 | ioapic.apicid = 2; |
487 | ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; | 487 | ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; |
488 | ioapic.flags = MPC_APIC_USABLE; | 488 | ioapic.flags = MPC_APIC_USABLE; |
489 | ioapic.apicaddr = 0xFEC00000; | 489 | ioapic.apicaddr = IO_APIC_DEFAULT_PHYS_BASE; |
490 | MP_ioapic_info(&ioapic); | 490 | MP_ioapic_info(&ioapic); |
491 | 491 | ||
492 | /* | 492 | /* |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 98fd6cd4e3a4..7dd950094178 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* ----------------------------------------------------------------------- * | 1 | /* ----------------------------------------------------------------------- * |
2 | * | 2 | * |
3 | * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved | 3 | * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved |
4 | * Copyright 2009 Intel Corporation; author: H. Peter Anvin | ||
4 | * | 5 | * |
5 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -80,11 +81,8 @@ static ssize_t msr_read(struct file *file, char __user *buf, | |||
80 | 81 | ||
81 | for (; count; count -= 8) { | 82 | for (; count; count -= 8) { |
82 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); | 83 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); |
83 | if (err) { | 84 | if (err) |
84 | if (err == -EFAULT) /* Fix idiotic error code */ | ||
85 | err = -EIO; | ||
86 | break; | 85 | break; |
87 | } | ||
88 | if (copy_to_user(tmp, &data, 8)) { | 86 | if (copy_to_user(tmp, &data, 8)) { |
89 | err = -EFAULT; | 87 | err = -EFAULT; |
90 | break; | 88 | break; |
@@ -115,11 +113,8 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
115 | break; | 113 | break; |
116 | } | 114 | } |
117 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); | 115 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); |
118 | if (err) { | 116 | if (err) |
119 | if (err == -EFAULT) /* Fix idiotic error code */ | ||
120 | err = -EIO; | ||
121 | break; | 117 | break; |
122 | } | ||
123 | tmp += 2; | 118 | tmp += 2; |
124 | bytes += 8; | 119 | bytes += 8; |
125 | } | 120 | } |
@@ -127,6 +122,54 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
127 | return bytes ? bytes : err; | 122 | return bytes ? bytes : err; |
128 | } | 123 | } |
129 | 124 | ||
125 | static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) | ||
126 | { | ||
127 | u32 __user *uregs = (u32 __user *)arg; | ||
128 | u32 regs[8]; | ||
129 | int cpu = iminor(file->f_path.dentry->d_inode); | ||
130 | int err; | ||
131 | |||
132 | switch (ioc) { | ||
133 | case X86_IOC_RDMSR_REGS: | ||
134 | if (!(file->f_mode & FMODE_READ)) { | ||
135 | err = -EBADF; | ||
136 | break; | ||
137 | } | ||
138 | if (copy_from_user(®s, uregs, sizeof regs)) { | ||
139 | err = -EFAULT; | ||
140 | break; | ||
141 | } | ||
142 | err = rdmsr_safe_regs_on_cpu(cpu, regs); | ||
143 | if (err) | ||
144 | break; | ||
145 | if (copy_to_user(uregs, ®s, sizeof regs)) | ||
146 | err = -EFAULT; | ||
147 | break; | ||
148 | |||
149 | case X86_IOC_WRMSR_REGS: | ||
150 | if (!(file->f_mode & FMODE_WRITE)) { | ||
151 | err = -EBADF; | ||
152 | break; | ||
153 | } | ||
154 | if (copy_from_user(®s, uregs, sizeof regs)) { | ||
155 | err = -EFAULT; | ||
156 | break; | ||
157 | } | ||
158 | err = wrmsr_safe_regs_on_cpu(cpu, regs); | ||
159 | if (err) | ||
160 | break; | ||
161 | if (copy_to_user(uregs, ®s, sizeof regs)) | ||
162 | err = -EFAULT; | ||
163 | break; | ||
164 | |||
165 | default: | ||
166 | err = -ENOTTY; | ||
167 | break; | ||
168 | } | ||
169 | |||
170 | return err; | ||
171 | } | ||
172 | |||
130 | static int msr_open(struct inode *inode, struct file *file) | 173 | static int msr_open(struct inode *inode, struct file *file) |
131 | { | 174 | { |
132 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); | 175 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); |
@@ -157,6 +200,8 @@ static const struct file_operations msr_fops = { | |||
157 | .read = msr_read, | 200 | .read = msr_read, |
158 | .write = msr_write, | 201 | .write = msr_write, |
159 | .open = msr_open, | 202 | .open = msr_open, |
203 | .unlocked_ioctl = msr_ioctl, | ||
204 | .compat_ioctl = msr_ioctl, | ||
160 | }; | 205 | }; |
161 | 206 | ||
162 | static int __cpuinit msr_device_create(int cpu) | 207 | static int __cpuinit msr_device_create(int cpu) |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 70ec9b951d76..f5b0b4a01fb2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -362,8 +362,9 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
362 | #endif | 362 | #endif |
363 | .wbinvd = native_wbinvd, | 363 | .wbinvd = native_wbinvd, |
364 | .read_msr = native_read_msr_safe, | 364 | .read_msr = native_read_msr_safe, |
365 | .read_msr_amd = native_read_msr_amd_safe, | 365 | .rdmsr_regs = native_rdmsr_safe_regs, |
366 | .write_msr = native_write_msr_safe, | 366 | .write_msr = native_write_msr_safe, |
367 | .wrmsr_regs = native_wrmsr_safe_regs, | ||
367 | .read_tsc = native_read_tsc, | 368 | .read_tsc = native_read_tsc, |
368 | .read_pmc = native_read_pmc, | 369 | .read_pmc = native_read_pmc, |
369 | .read_tscp = native_read_tscp, | 370 | .read_tscp = native_read_tscp, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 47630479b067..d71c8655905b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/dmar.h> | 3 | #include <linux/dmar.h> |
4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
5 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
6 | #include <linux/kmemleak.h> | ||
6 | 7 | ||
7 | #include <asm/proto.h> | 8 | #include <asm/proto.h> |
8 | #include <asm/dma.h> | 9 | #include <asm/dma.h> |
@@ -32,7 +33,14 @@ int no_iommu __read_mostly; | |||
32 | /* Set this to 1 if there is a HW IOMMU in the system */ | 33 | /* Set this to 1 if there is a HW IOMMU in the system */ |
33 | int iommu_detected __read_mostly = 0; | 34 | int iommu_detected __read_mostly = 0; |
34 | 35 | ||
35 | int iommu_pass_through; | 36 | /* |
37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | ||
38 | * If this variable is 1, IOMMU implementations do no DMA ranslation for | ||
39 | * devices and allow every device to access to whole physical memory. This is | ||
40 | * useful if a user want to use an IOMMU only for KVM device assignment to | ||
41 | * guests and not for driver dma translation. | ||
42 | */ | ||
43 | int iommu_pass_through __read_mostly; | ||
36 | 44 | ||
37 | dma_addr_t bad_dma_address __read_mostly = 0; | 45 | dma_addr_t bad_dma_address __read_mostly = 0; |
38 | EXPORT_SYMBOL(bad_dma_address); | 46 | EXPORT_SYMBOL(bad_dma_address); |
@@ -88,6 +96,11 @@ void __init dma32_reserve_bootmem(void) | |||
88 | size = roundup(dma32_bootmem_size, align); | 96 | size = roundup(dma32_bootmem_size, align); |
89 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | 97 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, |
90 | 512ULL<<20); | 98 | 512ULL<<20); |
99 | /* | ||
100 | * Kmemleak should not scan this block as it may not be mapped via the | ||
101 | * kernel direct mapping. | ||
102 | */ | ||
103 | kmemleak_ignore(dma32_bootmem_ptr); | ||
91 | if (dma32_bootmem_ptr) | 104 | if (dma32_bootmem_ptr) |
92 | dma32_bootmem_size = size; | 105 | dma32_bootmem_size = size; |
93 | else | 106 | else |
@@ -147,7 +160,7 @@ again: | |||
147 | return NULL; | 160 | return NULL; |
148 | 161 | ||
149 | addr = page_to_phys(page); | 162 | addr = page_to_phys(page); |
150 | if (!is_buffer_dma_capable(dma_mask, addr, size)) { | 163 | if (addr + size > dma_mask) { |
151 | __free_pages(page, get_order(size)); | 164 | __free_pages(page, get_order(size)); |
152 | 165 | ||
153 | if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { | 166 | if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { |
@@ -211,11 +224,11 @@ static __init int iommu_setup(char *p) | |||
211 | #ifdef CONFIG_SWIOTLB | 224 | #ifdef CONFIG_SWIOTLB |
212 | if (!strncmp(p, "soft", 4)) | 225 | if (!strncmp(p, "soft", 4)) |
213 | swiotlb = 1; | 226 | swiotlb = 1; |
227 | #endif | ||
214 | if (!strncmp(p, "pt", 2)) { | 228 | if (!strncmp(p, "pt", 2)) { |
215 | iommu_pass_through = 1; | 229 | iommu_pass_through = 1; |
216 | return 1; | 230 | return 1; |
217 | } | 231 | } |
218 | #endif | ||
219 | 232 | ||
220 | gart_parse_options(p); | 233 | gart_parse_options(p); |
221 | 234 | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index cfd9f9063896..98a827ee9ed7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -190,14 +190,13 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
190 | static inline int | 190 | static inline int |
191 | need_iommu(struct device *dev, unsigned long addr, size_t size) | 191 | need_iommu(struct device *dev, unsigned long addr, size_t size) |
192 | { | 192 | { |
193 | return force_iommu || | 193 | return force_iommu || !dma_capable(dev, addr, size); |
194 | !is_buffer_dma_capable(*dev->dma_mask, addr, size); | ||
195 | } | 194 | } |
196 | 195 | ||
197 | static inline int | 196 | static inline int |
198 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 197 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
199 | { | 198 | { |
200 | return !is_buffer_dma_capable(*dev->dma_mask, addr, size); | 199 | return !dma_capable(dev, addr, size); |
201 | } | 200 | } |
202 | 201 | ||
203 | /* Map a single continuous physical area into the IOMMU. | 202 | /* Map a single continuous physical area into the IOMMU. |
@@ -675,7 +674,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
675 | nommu: | 674 | nommu: |
676 | /* Should not happen anymore */ | 675 | /* Should not happen anymore */ |
677 | printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" | 676 | printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" |
678 | KERN_WARNING "falling back to iommu=soft.\n"); | 677 | "falling back to iommu=soft.\n"); |
679 | return -1; | 678 | return -1; |
680 | } | 679 | } |
681 | 680 | ||
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 71d412a09f30..a3933d4330cd 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -14,7 +14,7 @@ | |||
14 | static int | 14 | static int |
15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) | 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
16 | { | 16 | { |
17 | if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { | 17 | if (hwdev && !dma_capable(hwdev, bus, size)) { |
18 | if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) | 18 | if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) |
19 | printk(KERN_ERR | 19 | printk(KERN_ERR |
20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", | 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", |
@@ -79,12 +79,29 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | |||
79 | free_pages((unsigned long)vaddr, get_order(size)); | 79 | free_pages((unsigned long)vaddr, get_order(size)); |
80 | } | 80 | } |
81 | 81 | ||
82 | static void nommu_sync_single_for_device(struct device *dev, | ||
83 | dma_addr_t addr, size_t size, | ||
84 | enum dma_data_direction dir) | ||
85 | { | ||
86 | flush_write_buffers(); | ||
87 | } | ||
88 | |||
89 | |||
90 | static void nommu_sync_sg_for_device(struct device *dev, | ||
91 | struct scatterlist *sg, int nelems, | ||
92 | enum dma_data_direction dir) | ||
93 | { | ||
94 | flush_write_buffers(); | ||
95 | } | ||
96 | |||
82 | struct dma_map_ops nommu_dma_ops = { | 97 | struct dma_map_ops nommu_dma_ops = { |
83 | .alloc_coherent = dma_generic_alloc_coherent, | 98 | .alloc_coherent = dma_generic_alloc_coherent, |
84 | .free_coherent = nommu_free_coherent, | 99 | .free_coherent = nommu_free_coherent, |
85 | .map_sg = nommu_map_sg, | 100 | .map_sg = nommu_map_sg, |
86 | .map_page = nommu_map_page, | 101 | .map_page = nommu_map_page, |
87 | .is_phys = 1, | 102 | .sync_single_for_device = nommu_sync_single_for_device, |
103 | .sync_sg_for_device = nommu_sync_sg_for_device, | ||
104 | .is_phys = 1, | ||
88 | }; | 105 | }; |
89 | 106 | ||
90 | void __init no_iommu_init(void) | 107 | void __init no_iommu_init(void) |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6af96ee44200..e8a35016115f 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -13,31 +13,6 @@ | |||
13 | 13 | ||
14 | int swiotlb __read_mostly; | 14 | int swiotlb __read_mostly; |
15 | 15 | ||
16 | void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) | ||
17 | { | ||
18 | return alloc_bootmem_low_pages(size); | ||
19 | } | ||
20 | |||
21 | void *swiotlb_alloc(unsigned order, unsigned long nslabs) | ||
22 | { | ||
23 | return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); | ||
24 | } | ||
25 | |||
26 | dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) | ||
27 | { | ||
28 | return paddr; | ||
29 | } | ||
30 | |||
31 | phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) | ||
32 | { | ||
33 | return baddr; | ||
34 | } | ||
35 | |||
36 | int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, | 16 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, |
42 | dma_addr_t *dma_handle, gfp_t flags) | 17 | dma_addr_t *dma_handle, gfp_t flags) |
43 | { | 18 | { |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 994dd6a4a2a0..071166a4ba83 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -519,16 +519,12 @@ static void c1e_idle(void) | |||
519 | if (!cpumask_test_cpu(cpu, c1e_mask)) { | 519 | if (!cpumask_test_cpu(cpu, c1e_mask)) { |
520 | cpumask_set_cpu(cpu, c1e_mask); | 520 | cpumask_set_cpu(cpu, c1e_mask); |
521 | /* | 521 | /* |
522 | * Force broadcast so ACPI can not interfere. Needs | 522 | * Force broadcast so ACPI can not interfere. |
523 | * to run with interrupts enabled as it uses | ||
524 | * smp_function_call. | ||
525 | */ | 523 | */ |
526 | local_irq_enable(); | ||
527 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, | 524 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, |
528 | &cpu); | 525 | &cpu); |
529 | printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", | 526 | printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", |
530 | cpu); | 527 | cpu); |
531 | local_irq_disable(); | ||
532 | } | 528 | } |
533 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); | 529 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); |
534 | 530 | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524984af..4cf79567cdab 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -61,9 +61,6 @@ | |||
61 | 61 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 63 | ||
64 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
65 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
66 | |||
67 | /* | 64 | /* |
68 | * Return saved PC of a blocked thread. | 65 | * Return saved PC of a blocked thread. |
69 | */ | 66 | */ |
@@ -350,14 +347,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
350 | *next = &next_p->thread; | 347 | *next = &next_p->thread; |
351 | int cpu = smp_processor_id(); | 348 | int cpu = smp_processor_id(); |
352 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 349 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
350 | bool preload_fpu; | ||
353 | 351 | ||
354 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 352 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
355 | 353 | ||
356 | __unlazy_fpu(prev_p); | 354 | /* |
355 | * If the task has used fpu the last 5 timeslices, just do a full | ||
356 | * restore of the math state immediately to avoid the trap; the | ||
357 | * chances of needing FPU soon are obviously high now | ||
358 | */ | ||
359 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
357 | 360 | ||
361 | __unlazy_fpu(prev_p); | ||
358 | 362 | ||
359 | /* we're going to use this soon, after a few expensive things */ | 363 | /* we're going to use this soon, after a few expensive things */ |
360 | if (next_p->fpu_counter > 5) | 364 | if (preload_fpu) |
361 | prefetch(next->xstate); | 365 | prefetch(next->xstate); |
362 | 366 | ||
363 | /* | 367 | /* |
@@ -398,6 +402,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
398 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | 402 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) |
399 | __switch_to_xtra(prev_p, next_p, tss); | 403 | __switch_to_xtra(prev_p, next_p, tss); |
400 | 404 | ||
405 | /* If we're going to preload the fpu context, make sure clts | ||
406 | is run while we're batching the cpu state updates. */ | ||
407 | if (preload_fpu) | ||
408 | clts(); | ||
409 | |||
401 | /* | 410 | /* |
402 | * Leave lazy mode, flushing any hypercalls made here. | 411 | * Leave lazy mode, flushing any hypercalls made here. |
403 | * This must be done before restoring TLS segments so | 412 | * This must be done before restoring TLS segments so |
@@ -407,15 +416,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
407 | */ | 416 | */ |
408 | arch_end_context_switch(next_p); | 417 | arch_end_context_switch(next_p); |
409 | 418 | ||
410 | /* If the task has used fpu the last 5 timeslices, just do a full | 419 | if (preload_fpu) |
411 | * restore of the math state immediately to avoid the trap; the | 420 | __math_state_restore(); |
412 | * chances of needing FPU soon are obviously high now | ||
413 | * | ||
414 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
415 | * which can sleep in the case of !tsk_used_math() | ||
416 | */ | ||
417 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) | ||
418 | math_state_restore(); | ||
419 | 421 | ||
420 | /* | 422 | /* |
421 | * Restore %gs if needed (which is common) | 423 | * Restore %gs if needed (which is common) |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb5407b9d..ad535b683170 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -55,9 +55,6 @@ | |||
55 | 55 | ||
56 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
57 | 57 | ||
58 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
59 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
60 | |||
61 | DEFINE_PER_CPU(unsigned long, old_rsp); | 58 | DEFINE_PER_CPU(unsigned long, old_rsp); |
62 | static DEFINE_PER_CPU(unsigned char, is_idle); | 59 | static DEFINE_PER_CPU(unsigned char, is_idle); |
63 | 60 | ||
@@ -386,9 +383,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
386 | int cpu = smp_processor_id(); | 383 | int cpu = smp_processor_id(); |
387 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 384 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
388 | unsigned fsindex, gsindex; | 385 | unsigned fsindex, gsindex; |
386 | bool preload_fpu; | ||
387 | |||
388 | /* | ||
389 | * If the task has used fpu the last 5 timeslices, just do a full | ||
390 | * restore of the math state immediately to avoid the trap; the | ||
391 | * chances of needing FPU soon are obviously high now | ||
392 | */ | ||
393 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
389 | 394 | ||
390 | /* we're going to use this soon, after a few expensive things */ | 395 | /* we're going to use this soon, after a few expensive things */ |
391 | if (next_p->fpu_counter > 5) | 396 | if (preload_fpu) |
392 | prefetch(next->xstate); | 397 | prefetch(next->xstate); |
393 | 398 | ||
394 | /* | 399 | /* |
@@ -419,6 +424,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
419 | 424 | ||
420 | load_TLS(next, cpu); | 425 | load_TLS(next, cpu); |
421 | 426 | ||
427 | /* Must be after DS reload */ | ||
428 | unlazy_fpu(prev_p); | ||
429 | |||
430 | /* Make sure cpu is ready for new context */ | ||
431 | if (preload_fpu) | ||
432 | clts(); | ||
433 | |||
422 | /* | 434 | /* |
423 | * Leave lazy mode, flushing any hypercalls made here. | 435 | * Leave lazy mode, flushing any hypercalls made here. |
424 | * This must be done before restoring TLS segments so | 436 | * This must be done before restoring TLS segments so |
@@ -459,9 +471,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
459 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 471 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); |
460 | prev->gsindex = gsindex; | 472 | prev->gsindex = gsindex; |
461 | 473 | ||
462 | /* Must be after DS reload */ | ||
463 | unlazy_fpu(prev_p); | ||
464 | |||
465 | /* | 474 | /* |
466 | * Switch the PDA and FPU contexts. | 475 | * Switch the PDA and FPU contexts. |
467 | */ | 476 | */ |
@@ -480,15 +489,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
480 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) | 489 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) |
481 | __switch_to_xtra(prev_p, next_p, tss); | 490 | __switch_to_xtra(prev_p, next_p, tss); |
482 | 491 | ||
483 | /* If the task has used fpu the last 5 timeslices, just do a full | 492 | /* |
484 | * restore of the math state immediately to avoid the trap; the | 493 | * Preload the FPU context, now that we've determined that the |
485 | * chances of needing FPU soon are obviously high now | 494 | * task is likely to be using it. |
486 | * | ||
487 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
488 | * which can sleep in the case of !tsk_used_math() | ||
489 | */ | 495 | */ |
490 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) | 496 | if (preload_fpu) |
491 | math_state_restore(); | 497 | __math_state_restore(); |
492 | return prev_p; | 498 | return prev_p; |
493 | } | 499 | } |
494 | 500 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde91c13..8d7d5c9c1be3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -35,10 +35,11 @@ | |||
35 | #include <asm/proto.h> | 35 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 36 | #include <asm/ds.h> |
37 | 37 | ||
38 | #include <trace/syscall.h> | ||
39 | |||
40 | #include "tls.h" | 38 | #include "tls.h" |
41 | 39 | ||
40 | #define CREATE_TRACE_POINTS | ||
41 | #include <trace/events/syscalls.h> | ||
42 | |||
42 | enum x86_regset { | 43 | enum x86_regset { |
43 | REGSET_GENERAL, | 44 | REGSET_GENERAL, |
44 | REGSET_FP, | 45 | REGSET_FP, |
@@ -1497,8 +1498,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) | |||
1497 | tracehook_report_syscall_entry(regs)) | 1498 | tracehook_report_syscall_entry(regs)) |
1498 | ret = -1L; | 1499 | ret = -1L; |
1499 | 1500 | ||
1500 | if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) | 1501 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1501 | ftrace_syscall_enter(regs); | 1502 | trace_sys_enter(regs, regs->orig_ax); |
1502 | 1503 | ||
1503 | if (unlikely(current->audit_context)) { | 1504 | if (unlikely(current->audit_context)) { |
1504 | if (IS_IA32) | 1505 | if (IS_IA32) |
@@ -1523,8 +1524,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) | |||
1523 | if (unlikely(current->audit_context)) | 1524 | if (unlikely(current->audit_context)) |
1524 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1525 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
1525 | 1526 | ||
1526 | if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) | 1527 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1527 | ftrace_syscall_exit(regs); | 1528 | trace_sys_exit(regs, regs->ax); |
1528 | 1529 | ||
1529 | if (test_thread_flag(TIF_SYSCALL_TRACE)) | 1530 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
1530 | tracehook_report_syscall_exit(regs, 0); | 1531 | tracehook_report_syscall_exit(regs, 0); |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 4f9c55f3a7c0..03801f2f761f 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -60,7 +60,7 @@ static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | |||
60 | "adc %5,%%edx ; " | 60 | "adc %5,%%edx ; " |
61 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | 61 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) |
62 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | 62 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); |
63 | #elif __x86_64__ | 63 | #elif defined(__x86_64__) |
64 | __asm__ ( | 64 | __asm__ ( |
65 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | 65 | "mul %%rdx ; shrd $32,%%rdx,%%rax" |
66 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | 66 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d2d1ce8170f0..a06e8d101844 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | #include <linux/pm.h> | 4 | #include <linux/pm.h> |
5 | #include <linux/efi.h> | 5 | #include <linux/efi.h> |
6 | #include <linux/dmi.h> | ||
6 | #include <acpi/reboot.h> | 7 | #include <acpi/reboot.h> |
7 | #include <asm/io.h> | 8 | #include <asm/io.h> |
8 | #include <asm/apic.h> | 9 | #include <asm/apic.h> |
@@ -17,7 +18,6 @@ | |||
17 | #include <asm/cpu.h> | 18 | #include <asm/cpu.h> |
18 | 19 | ||
19 | #ifdef CONFIG_X86_32 | 20 | #ifdef CONFIG_X86_32 |
20 | # include <linux/dmi.h> | ||
21 | # include <linux/ctype.h> | 21 | # include <linux/ctype.h> |
22 | # include <linux/mc146818rtc.h> | 22 | # include <linux/mc146818rtc.h> |
23 | #else | 23 | #else |
@@ -249,6 +249,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
249 | DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), | 249 | DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), |
250 | }, | 250 | }, |
251 | }, | 251 | }, |
252 | { /* Handle problems with rebooting on CompuLab SBC-FITPC2 */ | ||
253 | .callback = set_bios_reboot, | ||
254 | .ident = "CompuLab SBC-FITPC2", | ||
255 | .matches = { | ||
256 | DMI_MATCH(DMI_SYS_VENDOR, "CompuLab"), | ||
257 | DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), | ||
258 | }, | ||
259 | }, | ||
252 | { } | 260 | { } |
253 | }; | 261 | }; |
254 | 262 | ||
@@ -396,6 +404,46 @@ EXPORT_SYMBOL(machine_real_restart); | |||
396 | 404 | ||
397 | #endif /* CONFIG_X86_32 */ | 405 | #endif /* CONFIG_X86_32 */ |
398 | 406 | ||
407 | /* | ||
408 | * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot | ||
409 | */ | ||
410 | static int __init set_pci_reboot(const struct dmi_system_id *d) | ||
411 | { | ||
412 | if (reboot_type != BOOT_CF9) { | ||
413 | reboot_type = BOOT_CF9; | ||
414 | printk(KERN_INFO "%s series board detected. " | ||
415 | "Selecting PCI-method for reboots.\n", d->ident); | ||
416 | } | ||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | ||
421 | { /* Handle problems with rebooting on Apple MacBook5 */ | ||
422 | .callback = set_pci_reboot, | ||
423 | .ident = "Apple MacBook5", | ||
424 | .matches = { | ||
425 | DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), | ||
426 | DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), | ||
427 | }, | ||
428 | }, | ||
429 | { /* Handle problems with rebooting on Apple MacBookPro5 */ | ||
430 | .callback = set_pci_reboot, | ||
431 | .ident = "Apple MacBookPro5", | ||
432 | .matches = { | ||
433 | DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), | ||
434 | DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), | ||
435 | }, | ||
436 | }, | ||
437 | { } | ||
438 | }; | ||
439 | |||
440 | static int __init pci_reboot_init(void) | ||
441 | { | ||
442 | dmi_check_system(pci_reboot_dmi_table); | ||
443 | return 0; | ||
444 | } | ||
445 | core_initcall(pci_reboot_init); | ||
446 | |||
399 | static inline void kb_wait(void) | 447 | static inline void kb_wait(void) |
400 | { | 448 | { |
401 | int i; | 449 | int i; |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index be5ae80f897f..63f32d220ef2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -289,6 +289,20 @@ void * __init extend_brk(size_t size, size_t align) | |||
289 | return ret; | 289 | return ret; |
290 | } | 290 | } |
291 | 291 | ||
292 | #ifdef CONFIG_X86_64 | ||
293 | static void __init init_gbpages(void) | ||
294 | { | ||
295 | if (direct_gbpages && cpu_has_gbpages) | ||
296 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
297 | else | ||
298 | direct_gbpages = 0; | ||
299 | } | ||
300 | #else | ||
301 | static inline void init_gbpages(void) | ||
302 | { | ||
303 | } | ||
304 | #endif | ||
305 | |||
292 | static void __init reserve_brk(void) | 306 | static void __init reserve_brk(void) |
293 | { | 307 | { |
294 | if (_brk_end > _brk_start) | 308 | if (_brk_end > _brk_start) |
@@ -658,6 +672,19 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
658 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), | 672 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), |
659 | }, | 673 | }, |
660 | }, | 674 | }, |
675 | { | ||
676 | /* | ||
677 | * AMI BIOS with low memory corruption was found on Intel DG45ID board. | ||
678 | * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will | ||
679 | * match only DMI_BOARD_NAME and see if there is more bad products | ||
680 | * with this vendor. | ||
681 | */ | ||
682 | .callback = dmi_low_memory_corruption, | ||
683 | .ident = "AMI BIOS", | ||
684 | .matches = { | ||
685 | DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), | ||
686 | }, | ||
687 | }, | ||
661 | #endif | 688 | #endif |
662 | {} | 689 | {} |
663 | }; | 690 | }; |
@@ -871,6 +898,8 @@ void __init setup_arch(char **cmdline_p) | |||
871 | 898 | ||
872 | reserve_brk(); | 899 | reserve_brk(); |
873 | 900 | ||
901 | init_gbpages(); | ||
902 | |||
874 | /* max_pfn_mapped is updated here */ | 903 | /* max_pfn_mapped is updated here */ |
875 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); | 904 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); |
876 | max_pfn_mapped = max_low_pfn_mapped; | 905 | max_pfn_mapped = max_low_pfn_mapped; |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 9c3f0823e6aa..07d81916f212 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -124,7 +124,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
124 | } | 124 | } |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * Remap allocator | 127 | * Large page remap allocator |
128 | * | 128 | * |
129 | * This allocator uses PMD page as unit. A PMD page is allocated for | 129 | * This allocator uses PMD page as unit. A PMD page is allocated for |
130 | * each cpu and each is remapped into vmalloc area using PMD mapping. | 130 | * each cpu and each is remapped into vmalloc area using PMD mapping. |
@@ -137,105 +137,185 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
137 | * better than only using 4k mappings while still being NUMA friendly. | 137 | * better than only using 4k mappings while still being NUMA friendly. |
138 | */ | 138 | */ |
139 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 139 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
140 | static size_t pcpur_size __initdata; | 140 | struct pcpul_ent { |
141 | static void **pcpur_ptrs __initdata; | 141 | unsigned int cpu; |
142 | void *ptr; | ||
143 | }; | ||
144 | |||
145 | static size_t pcpul_size; | ||
146 | static struct pcpul_ent *pcpul_map; | ||
147 | static struct vm_struct pcpul_vm; | ||
142 | 148 | ||
143 | static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) | 149 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) |
144 | { | 150 | { |
145 | size_t off = (size_t)pageno << PAGE_SHIFT; | 151 | size_t off = (size_t)pageno << PAGE_SHIFT; |
146 | 152 | ||
147 | if (off >= pcpur_size) | 153 | if (off >= pcpul_size) |
148 | return NULL; | 154 | return NULL; |
149 | 155 | ||
150 | return virt_to_page(pcpur_ptrs[cpu] + off); | 156 | return virt_to_page(pcpul_map[cpu].ptr + off); |
151 | } | 157 | } |
152 | 158 | ||
153 | static ssize_t __init setup_pcpu_remap(size_t static_size) | 159 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
154 | { | 160 | { |
155 | static struct vm_struct vm; | 161 | size_t map_size, dyn_size; |
156 | size_t ptrs_size, dyn_size; | ||
157 | unsigned int cpu; | 162 | unsigned int cpu; |
163 | int i, j; | ||
158 | ssize_t ret; | 164 | ssize_t ret; |
159 | 165 | ||
160 | /* | 166 | if (!chosen) { |
161 | * If large page isn't supported, there's no benefit in doing | 167 | size_t vm_size = VMALLOC_END - VMALLOC_START; |
162 | * this. Also, on non-NUMA, embedding is better. | 168 | size_t tot_size = nr_cpu_ids * PMD_SIZE; |
163 | * | 169 | |
164 | * NOTE: disabled for now. | 170 | /* on non-NUMA, embedding is better */ |
165 | */ | 171 | if (!pcpu_need_numa()) |
166 | if (true || !cpu_has_pse || !pcpu_need_numa()) | 172 | return -EINVAL; |
173 | |||
174 | /* don't consume more than 20% of vmalloc area */ | ||
175 | if (tot_size > vm_size / 5) { | ||
176 | pr_info("PERCPU: too large chunk size %zuMB for " | ||
177 | "large page remap\n", tot_size >> 20); | ||
178 | return -EINVAL; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | /* need PSE */ | ||
183 | if (!cpu_has_pse) { | ||
184 | pr_warning("PERCPU: lpage allocator requires PSE\n"); | ||
167 | return -EINVAL; | 185 | return -EINVAL; |
186 | } | ||
168 | 187 | ||
169 | /* | 188 | /* |
170 | * Currently supports only single page. Supporting multiple | 189 | * Currently supports only single page. Supporting multiple |
171 | * pages won't be too difficult if it ever becomes necessary. | 190 | * pages won't be too difficult if it ever becomes necessary. |
172 | */ | 191 | */ |
173 | pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | 192 | pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + |
174 | PERCPU_DYNAMIC_RESERVE); | 193 | PERCPU_DYNAMIC_RESERVE); |
175 | if (pcpur_size > PMD_SIZE) { | 194 | if (pcpul_size > PMD_SIZE) { |
176 | pr_warning("PERCPU: static data is larger than large page, " | 195 | pr_warning("PERCPU: static data is larger than large page, " |
177 | "can't use large page\n"); | 196 | "can't use large page\n"); |
178 | return -EINVAL; | 197 | return -EINVAL; |
179 | } | 198 | } |
180 | dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | 199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; |
181 | 200 | ||
182 | /* allocate pointer array and alloc large pages */ | 201 | /* allocate pointer array and alloc large pages */ |
183 | ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); | 202 | map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); |
184 | pcpur_ptrs = alloc_bootmem(ptrs_size); | 203 | pcpul_map = alloc_bootmem(map_size); |
185 | 204 | ||
186 | for_each_possible_cpu(cpu) { | 205 | for_each_possible_cpu(cpu) { |
187 | pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); | 206 | pcpul_map[cpu].cpu = cpu; |
188 | if (!pcpur_ptrs[cpu]) | 207 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, |
208 | PMD_SIZE); | ||
209 | if (!pcpul_map[cpu].ptr) { | ||
210 | pr_warning("PERCPU: failed to allocate large page " | ||
211 | "for cpu%u\n", cpu); | ||
189 | goto enomem; | 212 | goto enomem; |
213 | } | ||
190 | 214 | ||
191 | /* | 215 | /* |
192 | * Only use pcpur_size bytes and give back the rest. | 216 | * Only use pcpul_size bytes and give back the rest. |
193 | * | 217 | * |
194 | * Ingo: The 2MB up-rounding bootmem is needed to make | 218 | * Ingo: The 2MB up-rounding bootmem is needed to make |
195 | * sure the partial 2MB page is still fully RAM - it's | 219 | * sure the partial 2MB page is still fully RAM - it's |
196 | * not well-specified to have a PAT-incompatible area | 220 | * not well-specified to have a PAT-incompatible area |
197 | * (unmapped RAM, device memory, etc.) in that hole. | 221 | * (unmapped RAM, device memory, etc.) in that hole. |
198 | */ | 222 | */ |
199 | free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), | 223 | free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), |
200 | PMD_SIZE - pcpur_size); | 224 | PMD_SIZE - pcpul_size); |
201 | 225 | ||
202 | memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); | 226 | memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); |
203 | } | 227 | } |
204 | 228 | ||
205 | /* allocate address and map */ | 229 | /* allocate address and map */ |
206 | vm.flags = VM_ALLOC; | 230 | pcpul_vm.flags = VM_ALLOC; |
207 | vm.size = num_possible_cpus() * PMD_SIZE; | 231 | pcpul_vm.size = nr_cpu_ids * PMD_SIZE; |
208 | vm_area_register_early(&vm, PMD_SIZE); | 232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); |
209 | 233 | ||
210 | for_each_possible_cpu(cpu) { | 234 | for_each_possible_cpu(cpu) { |
211 | pmd_t *pmd; | 235 | pmd_t *pmd, pmd_v; |
212 | 236 | ||
213 | pmd = populate_extra_pmd((unsigned long)vm.addr | 237 | pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + |
214 | + cpu * PMD_SIZE); | 238 | cpu * PMD_SIZE); |
215 | set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), | 239 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), |
216 | PAGE_KERNEL_LARGE)); | 240 | PAGE_KERNEL_LARGE); |
241 | set_pmd(pmd, pmd_v); | ||
217 | } | 242 | } |
218 | 243 | ||
219 | /* we're ready, commit */ | 244 | /* we're ready, commit */ |
220 | pr_info("PERCPU: Remapped at %p with large pages, static data " | 245 | pr_info("PERCPU: Remapped at %p with large pages, static data " |
221 | "%zu bytes\n", vm.addr, static_size); | 246 | "%zu bytes\n", pcpul_vm.addr, static_size); |
222 | 247 | ||
223 | ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, | 248 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, |
224 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | 249 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, |
225 | PMD_SIZE, vm.addr, NULL); | 250 | PMD_SIZE, pcpul_vm.addr, NULL); |
226 | goto out_free_ar; | 251 | |
252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | ||
253 | for (i = 0; i < nr_cpu_ids - 1; i++) | ||
254 | for (j = i + 1; j < nr_cpu_ids; j++) | ||
255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | ||
256 | struct pcpul_ent tmp = pcpul_map[i]; | ||
257 | pcpul_map[i] = pcpul_map[j]; | ||
258 | pcpul_map[j] = tmp; | ||
259 | } | ||
260 | |||
261 | return ret; | ||
227 | 262 | ||
228 | enomem: | 263 | enomem: |
229 | for_each_possible_cpu(cpu) | 264 | for_each_possible_cpu(cpu) |
230 | if (pcpur_ptrs[cpu]) | 265 | if (pcpul_map[cpu].ptr) |
231 | free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); | 266 | free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); |
232 | ret = -ENOMEM; | 267 | free_bootmem(__pa(pcpul_map), map_size); |
233 | out_free_ar: | 268 | return -ENOMEM; |
234 | free_bootmem(__pa(pcpur_ptrs), ptrs_size); | 269 | } |
235 | return ret; | 270 | |
271 | /** | ||
272 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
273 | * @kaddr: the kernel address in question | ||
274 | * | ||
275 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
276 | * used by pageattr to detect VM aliases and break up the pcpu PMD | ||
277 | * mapping such that the same physical page is not mapped under | ||
278 | * different attributes. | ||
279 | * | ||
280 | * The recycled area is always at the tail of a partially used PMD | ||
281 | * page. | ||
282 | * | ||
283 | * RETURNS: | ||
284 | * Address of corresponding remapped pcpu address if match is found; | ||
285 | * otherwise, NULL. | ||
286 | */ | ||
287 | void *pcpu_lpage_remapped(void *kaddr) | ||
288 | { | ||
289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | ||
290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | ||
291 | int left = 0, right = nr_cpu_ids - 1; | ||
292 | int pos; | ||
293 | |||
294 | /* pcpul in use at all? */ | ||
295 | if (!pcpul_map) | ||
296 | return NULL; | ||
297 | |||
298 | /* okay, perform binary search */ | ||
299 | while (left <= right) { | ||
300 | pos = (left + right) / 2; | ||
301 | |||
302 | if (pcpul_map[pos].ptr < pmd_addr) | ||
303 | left = pos + 1; | ||
304 | else if (pcpul_map[pos].ptr > pmd_addr) | ||
305 | right = pos - 1; | ||
306 | else { | ||
307 | /* it shouldn't be in the area for the first chunk */ | ||
308 | WARN_ON(offset < pcpul_size); | ||
309 | |||
310 | return pcpul_vm.addr + | ||
311 | pcpul_map[pos].cpu * PMD_SIZE + offset; | ||
312 | } | ||
313 | } | ||
314 | |||
315 | return NULL; | ||
236 | } | 316 | } |
237 | #else | 317 | #else |
238 | static ssize_t __init setup_pcpu_remap(size_t static_size) | 318 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
239 | { | 319 | { |
240 | return -EINVAL; | 320 | return -EINVAL; |
241 | } | 321 | } |
@@ -249,7 +329,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) | |||
249 | * mapping so that it can use PMD mapping without additional TLB | 329 | * mapping so that it can use PMD mapping without additional TLB |
250 | * pressure. | 330 | * pressure. |
251 | */ | 331 | */ |
252 | static ssize_t __init setup_pcpu_embed(size_t static_size) | 332 | static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) |
253 | { | 333 | { |
254 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; | 334 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; |
255 | 335 | ||
@@ -258,7 +338,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) | |||
258 | * this. Also, embedding allocation doesn't play well with | 338 | * this. Also, embedding allocation doesn't play well with |
259 | * NUMA. | 339 | * NUMA. |
260 | */ | 340 | */ |
261 | if (!cpu_has_pse || pcpu_need_numa()) | 341 | if (!chosen && (!cpu_has_pse || pcpu_need_numa())) |
262 | return -EINVAL; | 342 | return -EINVAL; |
263 | 343 | ||
264 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, | 344 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, |
@@ -297,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) | |||
297 | pcpu4k_nr_static_pages = PFN_UP(static_size); | 377 | pcpu4k_nr_static_pages = PFN_UP(static_size); |
298 | 378 | ||
299 | /* unaligned allocations can't be freed, round up to page size */ | 379 | /* unaligned allocations can't be freed, round up to page size */ |
300 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() | 380 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids |
301 | * sizeof(pcpu4k_pages[0])); | 381 | * sizeof(pcpu4k_pages[0])); |
302 | pcpu4k_pages = alloc_bootmem(pages_size); | 382 | pcpu4k_pages = alloc_bootmem(pages_size); |
303 | 383 | ||
@@ -308,8 +388,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) | |||
308 | void *ptr; | 388 | void *ptr; |
309 | 389 | ||
310 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); | 390 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); |
311 | if (!ptr) | 391 | if (!ptr) { |
392 | pr_warning("PERCPU: failed to allocate " | ||
393 | "4k page for cpu%u\n", cpu); | ||
312 | goto enomem; | 394 | goto enomem; |
395 | } | ||
313 | 396 | ||
314 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); | 397 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); |
315 | pcpu4k_pages[j++] = virt_to_page(ptr); | 398 | pcpu4k_pages[j++] = virt_to_page(ptr); |
@@ -333,6 +416,16 @@ out_free_ar: | |||
333 | return ret; | 416 | return ret; |
334 | } | 417 | } |
335 | 418 | ||
419 | /* for explicit first chunk allocator selection */ | ||
420 | static char pcpu_chosen_alloc[16] __initdata; | ||
421 | |||
422 | static int __init percpu_alloc_setup(char *str) | ||
423 | { | ||
424 | strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); | ||
425 | return 0; | ||
426 | } | ||
427 | early_param("percpu_alloc", percpu_alloc_setup); | ||
428 | |||
336 | static inline void setup_percpu_segment(int cpu) | 429 | static inline void setup_percpu_segment(int cpu) |
337 | { | 430 | { |
338 | #ifdef CONFIG_X86_32 | 431 | #ifdef CONFIG_X86_32 |
@@ -346,11 +439,6 @@ static inline void setup_percpu_segment(int cpu) | |||
346 | #endif | 439 | #endif |
347 | } | 440 | } |
348 | 441 | ||
349 | /* | ||
350 | * Great future plan: | ||
351 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | ||
352 | * Always point %gs to its beginning | ||
353 | */ | ||
354 | void __init setup_per_cpu_areas(void) | 442 | void __init setup_per_cpu_areas(void) |
355 | { | 443 | { |
356 | size_t static_size = __per_cpu_end - __per_cpu_start; | 444 | size_t static_size = __per_cpu_end - __per_cpu_start; |
@@ -367,9 +455,26 @@ void __init setup_per_cpu_areas(void) | |||
367 | * of large page mappings. Please read comments on top of | 455 | * of large page mappings. Please read comments on top of |
368 | * each allocator for details. | 456 | * each allocator for details. |
369 | */ | 457 | */ |
370 | ret = setup_pcpu_remap(static_size); | 458 | ret = -EINVAL; |
371 | if (ret < 0) | 459 | if (strlen(pcpu_chosen_alloc)) { |
372 | ret = setup_pcpu_embed(static_size); | 460 | if (strcmp(pcpu_chosen_alloc, "4k")) { |
461 | if (!strcmp(pcpu_chosen_alloc, "lpage")) | ||
462 | ret = setup_pcpu_lpage(static_size, true); | ||
463 | else if (!strcmp(pcpu_chosen_alloc, "embed")) | ||
464 | ret = setup_pcpu_embed(static_size, true); | ||
465 | else | ||
466 | pr_warning("PERCPU: unknown allocator %s " | ||
467 | "specified\n", pcpu_chosen_alloc); | ||
468 | if (ret < 0) | ||
469 | pr_warning("PERCPU: %s allocator failed (%zd), " | ||
470 | "falling back to 4k\n", | ||
471 | pcpu_chosen_alloc, ret); | ||
472 | } | ||
473 | } else { | ||
474 | ret = setup_pcpu_lpage(static_size, false); | ||
475 | if (ret < 0) | ||
476 | ret = setup_pcpu_embed(static_size, false); | ||
477 | } | ||
373 | if (ret < 0) | 478 | if (ret < 0) |
374 | ret = setup_pcpu_4k(static_size); | 479 | ret = setup_pcpu_4k(static_size); |
375 | if (ret < 0) | 480 | if (ret < 0) |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c578751e94e..81e58238c4ce 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
869 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | 869 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { |
870 | clear_thread_flag(TIF_NOTIFY_RESUME); | 870 | clear_thread_flag(TIF_NOTIFY_RESUME); |
871 | tracehook_notify_resume(regs); | 871 | tracehook_notify_resume(regs); |
872 | if (current->replacement_session_keyring) | ||
873 | key_replace_session_keyring(); | ||
872 | } | 874 | } |
873 | 875 | ||
874 | #ifdef CONFIG_X86_32 | 876 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2fecda69ee64..c36cc1452cdc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -434,7 +434,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu) | |||
434 | * For perf, we return last level cache shared map. | 434 | * For perf, we return last level cache shared map. |
435 | * And for power savings, we return cpu_core_map | 435 | * And for power savings, we return cpu_core_map |
436 | */ | 436 | */ |
437 | if (sched_mc_power_savings || sched_smt_power_savings) | 437 | if ((sched_mc_power_savings || sched_smt_power_savings) && |
438 | !(cpu_has(c, X86_FEATURE_AMD_DCM))) | ||
438 | return cpu_core_mask(cpu); | 439 | return cpu_core_mask(cpu); |
439 | else | 440 | else |
440 | return c->llc_shared_map; | 441 | return c->llc_shared_map; |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index e8b9863ef8c4..3149032ff107 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | #include <linux/ptrace.h> | 6 | #include <linux/ptrace.h> |
7 | #include <asm/desc.h> | ||
7 | 8 | ||
8 | unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) | 9 | unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) |
9 | { | 10 | { |
@@ -23,7 +24,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re | |||
23 | * and APM bios ones we just ignore here. | 24 | * and APM bios ones we just ignore here. |
24 | */ | 25 | */ |
25 | if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { | 26 | if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { |
26 | u32 *desc; | 27 | struct desc_struct *desc; |
27 | unsigned long base; | 28 | unsigned long base; |
28 | 29 | ||
29 | seg &= ~7UL; | 30 | seg &= ~7UL; |
@@ -33,12 +34,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re | |||
33 | addr = -1L; /* bogus selector, access would fault */ | 34 | addr = -1L; /* bogus selector, access would fault */ |
34 | else { | 35 | else { |
35 | desc = child->mm->context.ldt + seg; | 36 | desc = child->mm->context.ldt + seg; |
36 | base = ((desc[0] >> 16) | | 37 | base = get_desc_base(desc); |
37 | ((desc[1] & 0xff) << 16) | | ||
38 | (desc[1] & 0xff000000)); | ||
39 | 38 | ||
40 | /* 16-bit code segment? */ | 39 | /* 16-bit code segment? */ |
41 | if (!((desc[1] >> 22) & 1)) | 40 | if (!desc->d) |
42 | addr &= 0xffff; | 41 | addr &= 0xffff; |
43 | addr += base; | 42 | addr += base; |
44 | } | 43 | } |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 6bc211accf08..45e00eb09c3a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -18,9 +18,9 @@ | |||
18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
19 | #include <asm/syscalls.h> | 19 | #include <asm/syscalls.h> |
20 | 20 | ||
21 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, | 21 | SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, |
22 | unsigned long prot, unsigned long flags, | 22 | unsigned long, prot, unsigned long, flags, |
23 | unsigned long fd, unsigned long off) | 23 | unsigned long, fd, unsigned long, off) |
24 | { | 24 | { |
25 | long error; | 25 | long error; |
26 | struct file *file; | 26 | struct file *file; |
@@ -226,7 +226,7 @@ bottomup: | |||
226 | } | 226 | } |
227 | 227 | ||
228 | 228 | ||
229 | asmlinkage long sys_uname(struct new_utsname __user *name) | 229 | SYSCALL_DEFINE1(uname, struct new_utsname __user *, name) |
230 | { | 230 | { |
231 | int err; | 231 | int err; |
232 | down_read(&uts_sem); | 232 | down_read(&uts_sem); |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 124d40c575df..503c1f2e8835 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -640,13 +640,13 @@ static int __init uv_ptc_init(void) | |||
640 | if (!is_uv_system()) | 640 | if (!is_uv_system()) |
641 | return 0; | 641 | return 0; |
642 | 642 | ||
643 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); | 643 | proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, |
644 | &proc_uv_ptc_operations); | ||
644 | if (!proc_uv_ptc) { | 645 | if (!proc_uv_ptc) { |
645 | printk(KERN_ERR "unable to create %s proc entry\n", | 646 | printk(KERN_ERR "unable to create %s proc entry\n", |
646 | UV_PTC_BASENAME); | 647 | UV_PTC_BASENAME); |
647 | return -EINVAL; | 648 | return -EINVAL; |
648 | } | 649 | } |
649 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; | ||
650 | return 0; | 650 | return 0; |
651 | } | 651 | } |
652 | 652 | ||
@@ -711,7 +711,6 @@ uv_activation_descriptor_init(int node, int pnode) | |||
711 | unsigned long pa; | 711 | unsigned long pa; |
712 | unsigned long m; | 712 | unsigned long m; |
713 | unsigned long n; | 713 | unsigned long n; |
714 | unsigned long mmr_image; | ||
715 | struct bau_desc *adp; | 714 | struct bau_desc *adp; |
716 | struct bau_desc *ad2; | 715 | struct bau_desc *ad2; |
717 | 716 | ||
@@ -727,12 +726,8 @@ uv_activation_descriptor_init(int node, int pnode) | |||
727 | n = pa >> uv_nshift; | 726 | n = pa >> uv_nshift; |
728 | m = pa & uv_mmask; | 727 | m = pa & uv_mmask; |
729 | 728 | ||
730 | mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); | 729 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
731 | if (mmr_image) { | 730 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); |
732 | uv_write_global_mmr64(pnode, (unsigned long) | ||
733 | UVH_LB_BAU_SB_DESCRIPTOR_BASE, | ||
734 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | ||
735 | } | ||
736 | 731 | ||
737 | /* | 732 | /* |
738 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | 733 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each |
@@ -749,6 +744,7 @@ uv_activation_descriptor_init(int node, int pnode) | |||
749 | * note that base_dest_nodeid is actually a nasid. | 744 | * note that base_dest_nodeid is actually a nasid. |
750 | */ | 745 | */ |
751 | ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; | 746 | ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; |
747 | ad2->header.dest_subnodeid = 0x10; /* the LB */ | ||
752 | ad2->header.command = UV_NET_ENDPOINT_INTD; | 748 | ad2->header.command = UV_NET_ENDPOINT_INTD; |
753 | ad2->header.int_both = 1; | 749 | ad2->header.int_both = 1; |
754 | /* | 750 | /* |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a0f48f5671c0..83264922a878 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -76,7 +76,7 @@ char ignore_fpu_irq; | |||
76 | * F0 0F bug workaround.. We have a special link segment | 76 | * F0 0F bug workaround.. We have a special link segment |
77 | * for this. | 77 | * for this. |
78 | */ | 78 | */ |
79 | gate_desc idt_table[256] | 79 | gate_desc idt_table[NR_VECTORS] |
80 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | 80 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; |
81 | #endif | 81 | #endif |
82 | 82 | ||
@@ -346,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
346 | printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); | 346 | printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); |
347 | show_registers(regs); | 347 | show_registers(regs); |
348 | 348 | ||
349 | if (panic_on_io_nmi) | ||
350 | panic("NMI IOCK error: Not continuing"); | ||
351 | |||
349 | /* Re-enable the IOCK line, wait for a few seconds */ | 352 | /* Re-enable the IOCK line, wait for a few seconds */ |
350 | reason = (reason & 0xf) | 8; | 353 | reason = (reason & 0xf) | 8; |
351 | outb(reason, 0x61); | 354 | outb(reason, 0x61); |
@@ -783,33 +786,34 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | |||
783 | #endif | 786 | #endif |
784 | } | 787 | } |
785 | 788 | ||
786 | #ifdef CONFIG_X86_32 | 789 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) |
787 | unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | ||
788 | { | 790 | { |
789 | struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); | ||
790 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; | ||
791 | unsigned long new_kesp = kesp - base; | ||
792 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | ||
793 | __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; | ||
794 | |||
795 | /* Set up base for espfix segment */ | ||
796 | desc &= 0x00f0ff0000000000ULL; | ||
797 | desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | | ||
798 | ((((__u64)base) << 32) & 0xff00000000000000ULL) | | ||
799 | ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | | ||
800 | (lim_pages & 0xffff); | ||
801 | *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; | ||
802 | |||
803 | return new_kesp; | ||
804 | } | 791 | } |
805 | #endif | ||
806 | 792 | ||
807 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | 793 | asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) |
808 | { | 794 | { |
809 | } | 795 | } |
810 | 796 | ||
811 | asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) | 797 | /* |
798 | * __math_state_restore assumes that cr0.TS is already clear and the | ||
799 | * fpu state is all ready for use. Used during context switch. | ||
800 | */ | ||
801 | void __math_state_restore(void) | ||
812 | { | 802 | { |
803 | struct thread_info *thread = current_thread_info(); | ||
804 | struct task_struct *tsk = thread->task; | ||
805 | |||
806 | /* | ||
807 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
808 | */ | ||
809 | if (unlikely(restore_fpu_checking(tsk))) { | ||
810 | stts(); | ||
811 | force_sig(SIGSEGV, tsk); | ||
812 | return; | ||
813 | } | ||
814 | |||
815 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
816 | tsk->fpu_counter++; | ||
813 | } | 817 | } |
814 | 818 | ||
815 | /* | 819 | /* |
@@ -843,17 +847,8 @@ asmlinkage void math_state_restore(void) | |||
843 | } | 847 | } |
844 | 848 | ||
845 | clts(); /* Allow maths ops (or we recurse) */ | 849 | clts(); /* Allow maths ops (or we recurse) */ |
846 | /* | ||
847 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
848 | */ | ||
849 | if (unlikely(restore_fpu_checking(tsk))) { | ||
850 | stts(); | ||
851 | force_sig(SIGSEGV, tsk); | ||
852 | return; | ||
853 | } | ||
854 | 850 | ||
855 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | 851 | __math_state_restore(); |
856 | tsk->fpu_counter++; | ||
857 | } | 852 | } |
858 | EXPORT_SYMBOL_GPL(math_state_restore); | 853 | EXPORT_SYMBOL_GPL(math_state_restore); |
859 | 854 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6e1a368d21d4..71f4368b357e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) | |||
275 | * use the TSC value at the transitions to calculate a pretty | 275 | * use the TSC value at the transitions to calculate a pretty |
276 | * good value for the TSC frequencty. | 276 | * good value for the TSC frequencty. |
277 | */ | 277 | */ |
278 | static inline int pit_verify_msb(unsigned char val) | ||
279 | { | ||
280 | /* Ignore LSB */ | ||
281 | inb(0x42); | ||
282 | return inb(0x42) == val; | ||
283 | } | ||
284 | |||
278 | static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) | 285 | static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) |
279 | { | 286 | { |
280 | int count; | 287 | int count; |
281 | u64 tsc = 0; | 288 | u64 tsc = 0; |
282 | 289 | ||
283 | for (count = 0; count < 50000; count++) { | 290 | for (count = 0; count < 50000; count++) { |
284 | /* Ignore LSB */ | 291 | if (!pit_verify_msb(val)) |
285 | inb(0x42); | ||
286 | if (inb(0x42) != val) | ||
287 | break; | 292 | break; |
288 | tsc = get_cycles(); | 293 | tsc = get_cycles(); |
289 | } | 294 | } |
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void) | |||
336 | * to do that is to just read back the 16-bit counter | 341 | * to do that is to just read back the 16-bit counter |
337 | * once from the PIT. | 342 | * once from the PIT. |
338 | */ | 343 | */ |
339 | inb(0x42); | 344 | pit_verify_msb(0); |
340 | inb(0x42); | ||
341 | 345 | ||
342 | if (pit_expect_msb(0xff, &tsc, &d1)) { | 346 | if (pit_expect_msb(0xff, &tsc, &d1)) { |
343 | for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { | 347 | for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { |
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void) | |||
348 | * Iterate until the error is less than 500 ppm | 352 | * Iterate until the error is less than 500 ppm |
349 | */ | 353 | */ |
350 | delta -= tsc; | 354 | delta -= tsc; |
351 | if (d1+d2 < delta >> 11) | 355 | if (d1+d2 >= delta >> 11) |
352 | goto success; | 356 | continue; |
357 | |||
358 | /* | ||
359 | * Check the PIT one more time to verify that | ||
360 | * all TSC reads were stable wrt the PIT. | ||
361 | * | ||
362 | * This also guarantees serialization of the | ||
363 | * last cycle read ('d2') in pit_expect_msb. | ||
364 | */ | ||
365 | if (!pit_verify_msb(0xfe - i)) | ||
366 | break; | ||
367 | goto success; | ||
353 | } | 368 | } |
354 | } | 369 | } |
355 | printk("Fast TSC calibration failed\n"); | 370 | printk("Fast TSC calibration failed\n"); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b263423fbe2a..95a7289e4b0c 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | |||
441 | ap.ds = __USER_DS; | 441 | ap.ds = __USER_DS; |
442 | ap.es = __USER_DS; | 442 | ap.es = __USER_DS; |
443 | ap.fs = __KERNEL_PERCPU; | 443 | ap.fs = __KERNEL_PERCPU; |
444 | ap.gs = 0; | 444 | ap.gs = __KERNEL_STACK_CANARY; |
445 | 445 | ||
446 | ap.eflags = 0; | 446 | ap.eflags = 0; |
447 | 447 | ||
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 367e87882041..9fc178255c04 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -46,11 +46,10 @@ PHDRS { | |||
46 | data PT_LOAD FLAGS(7); /* RWE */ | 46 | data PT_LOAD FLAGS(7); /* RWE */ |
47 | #ifdef CONFIG_X86_64 | 47 | #ifdef CONFIG_X86_64 |
48 | user PT_LOAD FLAGS(7); /* RWE */ | 48 | user PT_LOAD FLAGS(7); /* RWE */ |
49 | data.init PT_LOAD FLAGS(7); /* RWE */ | ||
50 | #ifdef CONFIG_SMP | 49 | #ifdef CONFIG_SMP |
51 | percpu PT_LOAD FLAGS(7); /* RWE */ | 50 | percpu PT_LOAD FLAGS(7); /* RWE */ |
52 | #endif | 51 | #endif |
53 | data.init2 PT_LOAD FLAGS(7); /* RWE */ | 52 | init PT_LOAD FLAGS(7); /* RWE */ |
54 | #endif | 53 | #endif |
55 | note PT_NOTE FLAGS(0); /* ___ */ | 54 | note PT_NOTE FLAGS(0); /* ___ */ |
56 | } | 55 | } |
@@ -103,72 +102,43 @@ SECTIONS | |||
103 | __stop___ex_table = .; | 102 | __stop___ex_table = .; |
104 | } :text = 0x9090 | 103 | } :text = 0x9090 |
105 | 104 | ||
106 | RODATA | 105 | RO_DATA(PAGE_SIZE) |
107 | 106 | ||
108 | /* Data */ | 107 | /* Data */ |
109 | . = ALIGN(PAGE_SIZE); | ||
110 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 108 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
111 | /* Start of data section */ | 109 | /* Start of data section */ |
112 | _sdata = .; | 110 | _sdata = .; |
113 | DATA_DATA | ||
114 | CONSTRUCTORS | ||
115 | 111 | ||
116 | #ifdef CONFIG_X86_64 | 112 | /* init_task */ |
117 | /* End of data section */ | 113 | INIT_TASK_DATA(THREAD_SIZE) |
118 | _edata = .; | ||
119 | #endif | ||
120 | } :data | ||
121 | 114 | ||
122 | #ifdef CONFIG_X86_32 | 115 | #ifdef CONFIG_X86_32 |
123 | /* 32 bit has nosave before _edata */ | 116 | /* 32 bit has nosave before _edata */ |
124 | . = ALIGN(PAGE_SIZE); | 117 | NOSAVE_DATA |
125 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { | ||
126 | __nosave_begin = .; | ||
127 | *(.data.nosave) | ||
128 | . = ALIGN(PAGE_SIZE); | ||
129 | __nosave_end = .; | ||
130 | } | ||
131 | #endif | 118 | #endif |
132 | 119 | ||
133 | . = ALIGN(PAGE_SIZE); | 120 | PAGE_ALIGNED_DATA(PAGE_SIZE) |
134 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { | ||
135 | *(.data.page_aligned) | ||
136 | *(.data.idt) | 121 | *(.data.idt) |
137 | } | ||
138 | 122 | ||
139 | #ifdef CONFIG_X86_32 | 123 | CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) |
140 | . = ALIGN(32); | ||
141 | #else | ||
142 | . = ALIGN(PAGE_SIZE); | ||
143 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
144 | #endif | ||
145 | .data.cacheline_aligned : | ||
146 | AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { | ||
147 | *(.data.cacheline_aligned) | ||
148 | } | ||
149 | 124 | ||
150 | /* rarely changed data like cpu maps */ | 125 | DATA_DATA |
151 | #ifdef CONFIG_X86_32 | 126 | CONSTRUCTORS |
152 | . = ALIGN(32); | 127 | |
153 | #else | 128 | /* rarely changed data like cpu maps */ |
154 | . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); | 129 | READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) |
155 | #endif | ||
156 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { | ||
157 | *(.data.read_mostly) | ||
158 | 130 | ||
159 | #ifdef CONFIG_X86_32 | ||
160 | /* End of data section */ | 131 | /* End of data section */ |
161 | _edata = .; | 132 | _edata = .; |
162 | #endif | 133 | } :data |
163 | } | ||
164 | 134 | ||
165 | #ifdef CONFIG_X86_64 | 135 | #ifdef CONFIG_X86_64 |
166 | 136 | ||
167 | #define VSYSCALL_ADDR (-10*1024*1024) | 137 | #define VSYSCALL_ADDR (-10*1024*1024) |
168 | #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ | 138 | #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \ |
169 | SIZEOF(.data.read_mostly) + 4095) & ~(4095)) | 139 | PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) |
170 | #define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ | 140 | #define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \ |
171 | SIZEOF(.data.read_mostly) + 4095) & ~(4095)) | 141 | PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) |
172 | 142 | ||
173 | #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) | 143 | #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) |
174 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) | 144 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) |
@@ -234,35 +204,29 @@ SECTIONS | |||
234 | 204 | ||
235 | #endif /* CONFIG_X86_64 */ | 205 | #endif /* CONFIG_X86_64 */ |
236 | 206 | ||
237 | /* init_task */ | 207 | /* Init code and data - will be freed after init */ |
238 | . = ALIGN(THREAD_SIZE); | 208 | . = ALIGN(PAGE_SIZE); |
239 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { | 209 | .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { |
240 | *(.data.init_task) | 210 | __init_begin = .; /* paired with __init_end */ |
241 | } | 211 | } |
242 | #ifdef CONFIG_X86_64 | ||
243 | :data.init | ||
244 | #endif | ||
245 | 212 | ||
213 | #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) | ||
246 | /* | 214 | /* |
247 | * smp_locks might be freed after init | 215 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the |
248 | * start/end must be page aligned | 216 | * output PHDR, so the next output section - .init.text - should |
217 | * start another segment - init. | ||
249 | */ | 218 | */ |
250 | . = ALIGN(PAGE_SIZE); | 219 | PERCPU_VADDR(0, :percpu) |
251 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | 220 | #endif |
252 | __smp_locks = .; | ||
253 | *(.smp_locks) | ||
254 | __smp_locks_end = .; | ||
255 | . = ALIGN(PAGE_SIZE); | ||
256 | } | ||
257 | 221 | ||
258 | /* Init code and data - will be freed after init */ | ||
259 | . = ALIGN(PAGE_SIZE); | ||
260 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { | 222 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { |
261 | __init_begin = .; /* paired with __init_end */ | ||
262 | _sinittext = .; | 223 | _sinittext = .; |
263 | INIT_TEXT | 224 | INIT_TEXT |
264 | _einittext = .; | 225 | _einittext = .; |
265 | } | 226 | } |
227 | #ifdef CONFIG_X86_64 | ||
228 | :init | ||
229 | #endif | ||
266 | 230 | ||
267 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { | 231 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { |
268 | INIT_DATA | 232 | INIT_DATA |
@@ -333,17 +297,7 @@ SECTIONS | |||
333 | } | 297 | } |
334 | #endif | 298 | #endif |
335 | 299 | ||
336 | #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) | 300 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
337 | /* | ||
338 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the | ||
339 | * output PHDR, so the next output section - __data_nosave - should | ||
340 | * start another section data.init2. Also, pda should be at the head of | ||
341 | * percpu area. Preallocate it and define the percpu offset symbol | ||
342 | * so that it can be accessed as a percpu variable. | ||
343 | */ | ||
344 | . = ALIGN(PAGE_SIZE); | ||
345 | PERCPU_VADDR(0, :percpu) | ||
346 | #else | ||
347 | PERCPU(PAGE_SIZE) | 301 | PERCPU(PAGE_SIZE) |
348 | #endif | 302 | #endif |
349 | 303 | ||
@@ -354,15 +308,22 @@ SECTIONS | |||
354 | __init_end = .; | 308 | __init_end = .; |
355 | } | 309 | } |
356 | 310 | ||
311 | /* | ||
312 | * smp_locks might be freed after init | ||
313 | * start/end must be page aligned | ||
314 | */ | ||
315 | . = ALIGN(PAGE_SIZE); | ||
316 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | ||
317 | __smp_locks = .; | ||
318 | *(.smp_locks) | ||
319 | __smp_locks_end = .; | ||
320 | . = ALIGN(PAGE_SIZE); | ||
321 | } | ||
322 | |||
357 | #ifdef CONFIG_X86_64 | 323 | #ifdef CONFIG_X86_64 |
358 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { | 324 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { |
359 | . = ALIGN(PAGE_SIZE); | 325 | NOSAVE_DATA |
360 | __nosave_begin = .; | 326 | } |
361 | *(.data.nosave) | ||
362 | . = ALIGN(PAGE_SIZE); | ||
363 | __nosave_end = .; | ||
364 | } :data.init2 | ||
365 | /* use another section data.init2, see PERCPU_VADDR() above */ | ||
366 | #endif | 327 | #endif |
367 | 328 | ||
368 | /* BSS */ | 329 | /* BSS */ |
@@ -400,8 +361,8 @@ SECTIONS | |||
400 | 361 | ||
401 | 362 | ||
402 | #ifdef CONFIG_X86_32 | 363 | #ifdef CONFIG_X86_32 |
403 | ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), | 364 | . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), |
404 | "kernel image bigger than KERNEL_IMAGE_SIZE") | 365 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
405 | #else | 366 | #else |
406 | /* | 367 | /* |
407 | * Per-cpu symbols which need to be offset from __per_cpu_load | 368 | * Per-cpu symbols which need to be offset from __per_cpu_load |
@@ -414,12 +375,12 @@ INIT_PER_CPU(irq_stack_union); | |||
414 | /* | 375 | /* |
415 | * Build-time check on the image size: | 376 | * Build-time check on the image size: |
416 | */ | 377 | */ |
417 | ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), | 378 | . = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), |
418 | "kernel image bigger than KERNEL_IMAGE_SIZE") | 379 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
419 | 380 | ||
420 | #ifdef CONFIG_SMP | 381 | #ifdef CONFIG_SMP |
421 | ASSERT((per_cpu__irq_stack_union == 0), | 382 | . = ASSERT((per_cpu__irq_stack_union == 0), |
422 | "irq_stack_union is not at start of per-cpu area"); | 383 | "irq_stack_union is not at start of per-cpu area"); |
423 | #endif | 384 | #endif |
424 | 385 | ||
425 | #endif /* CONFIG_X86_32 */ | 386 | #endif /* CONFIG_X86_32 */ |
@@ -427,7 +388,7 @@ ASSERT((per_cpu__irq_stack_union == 0), | |||
427 | #ifdef CONFIG_KEXEC | 388 | #ifdef CONFIG_KEXEC |
428 | #include <asm/kexec.h> | 389 | #include <asm/kexec.h> |
429 | 390 | ||
430 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | 391 | . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, |
431 | "kexec control code size is too big") | 392 | "kexec control code size is too big"); |
432 | #endif | 393 | #endif |
433 | 394 | ||
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4d6f0d293ee2..21f68e00524f 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm) | |||
104 | ktime_t remaining; | 104 | ktime_t remaining; |
105 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | 105 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; |
106 | 106 | ||
107 | if (!ps->pit_timer.period) | ||
108 | return 0; | ||
109 | |||
107 | /* | 110 | /* |
108 | * The Counter does not stop when it reaches zero. In | 111 | * The Counter does not stop when it reaches zero. In |
109 | * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to | 112 | * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5c3d6e81a7dc..0ef5bb2b4043 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage) | |||
489 | * | 489 | * |
490 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc | 490 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc |
491 | * containing more mappings. | 491 | * containing more mappings. |
492 | * | ||
493 | * Returns the number of rmap entries before the spte was added or zero if | ||
494 | * the spte was not added. | ||
495 | * | ||
492 | */ | 496 | */ |
493 | static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) | 497 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) |
494 | { | 498 | { |
495 | struct kvm_mmu_page *sp; | 499 | struct kvm_mmu_page *sp; |
496 | struct kvm_rmap_desc *desc; | 500 | struct kvm_rmap_desc *desc; |
497 | unsigned long *rmapp; | 501 | unsigned long *rmapp; |
498 | int i; | 502 | int i, count = 0; |
499 | 503 | ||
500 | if (!is_rmap_pte(*spte)) | 504 | if (!is_rmap_pte(*spte)) |
501 | return; | 505 | return count; |
502 | gfn = unalias_gfn(vcpu->kvm, gfn); | 506 | gfn = unalias_gfn(vcpu->kvm, gfn); |
503 | sp = page_header(__pa(spte)); | 507 | sp = page_header(__pa(spte)); |
504 | sp->gfns[spte - sp->spt] = gfn; | 508 | sp->gfns[spte - sp->spt] = gfn; |
@@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) | |||
515 | } else { | 519 | } else { |
516 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); | 520 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); |
517 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 521 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
518 | while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) | 522 | while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) { |
519 | desc = desc->more; | 523 | desc = desc->more; |
524 | count += RMAP_EXT; | ||
525 | } | ||
520 | if (desc->shadow_ptes[RMAP_EXT-1]) { | 526 | if (desc->shadow_ptes[RMAP_EXT-1]) { |
521 | desc->more = mmu_alloc_rmap_desc(vcpu); | 527 | desc->more = mmu_alloc_rmap_desc(vcpu); |
522 | desc = desc->more; | 528 | desc = desc->more; |
@@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) | |||
525 | ; | 531 | ; |
526 | desc->shadow_ptes[i] = spte; | 532 | desc->shadow_ptes[i] = spte; |
527 | } | 533 | } |
534 | return count; | ||
528 | } | 535 | } |
529 | 536 | ||
530 | static void rmap_desc_remove_entry(unsigned long *rmapp, | 537 | static void rmap_desc_remove_entry(unsigned long *rmapp, |
@@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | |||
754 | return young; | 761 | return young; |
755 | } | 762 | } |
756 | 763 | ||
764 | #define RMAP_RECYCLE_THRESHOLD 1000 | ||
765 | |||
766 | static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage) | ||
767 | { | ||
768 | unsigned long *rmapp; | ||
769 | |||
770 | gfn = unalias_gfn(vcpu->kvm, gfn); | ||
771 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); | ||
772 | |||
773 | kvm_unmap_rmapp(vcpu->kvm, rmapp); | ||
774 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
775 | } | ||
776 | |||
757 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 777 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) |
758 | { | 778 | { |
759 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | 779 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); |
@@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1407 | */ | 1427 | */ |
1408 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | 1428 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) |
1409 | { | 1429 | { |
1430 | int used_pages; | ||
1431 | |||
1432 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; | ||
1433 | used_pages = max(0, used_pages); | ||
1434 | |||
1410 | /* | 1435 | /* |
1411 | * If we set the number of mmu pages to be smaller be than the | 1436 | * If we set the number of mmu pages to be smaller be than the |
1412 | * number of actived pages , we must to free some mmu pages before we | 1437 | * number of actived pages , we must to free some mmu pages before we |
1413 | * change the value | 1438 | * change the value |
1414 | */ | 1439 | */ |
1415 | 1440 | ||
1416 | if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) > | 1441 | if (used_pages > kvm_nr_mmu_pages) { |
1417 | kvm_nr_mmu_pages) { | 1442 | while (used_pages > kvm_nr_mmu_pages) { |
1418 | int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages | ||
1419 | - kvm->arch.n_free_mmu_pages; | ||
1420 | |||
1421 | while (n_used_mmu_pages > kvm_nr_mmu_pages) { | ||
1422 | struct kvm_mmu_page *page; | 1443 | struct kvm_mmu_page *page; |
1423 | 1444 | ||
1424 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1445 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1425 | struct kvm_mmu_page, link); | 1446 | struct kvm_mmu_page, link); |
1426 | kvm_mmu_zap_page(kvm, page); | 1447 | kvm_mmu_zap_page(kvm, page); |
1427 | n_used_mmu_pages--; | 1448 | used_pages--; |
1428 | } | 1449 | } |
1429 | kvm->arch.n_free_mmu_pages = 0; | 1450 | kvm->arch.n_free_mmu_pages = 0; |
1430 | } | 1451 | } |
@@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1740 | { | 1761 | { |
1741 | int was_rmapped = 0; | 1762 | int was_rmapped = 0; |
1742 | int was_writeble = is_writeble_pte(*shadow_pte); | 1763 | int was_writeble = is_writeble_pte(*shadow_pte); |
1764 | int rmap_count; | ||
1743 | 1765 | ||
1744 | pgprintk("%s: spte %llx access %x write_fault %d" | 1766 | pgprintk("%s: spte %llx access %x write_fault %d" |
1745 | " user_fault %d gfn %lx\n", | 1767 | " user_fault %d gfn %lx\n", |
@@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1781 | 1803 | ||
1782 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); | 1804 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); |
1783 | if (!was_rmapped) { | 1805 | if (!was_rmapped) { |
1784 | rmap_add(vcpu, shadow_pte, gfn, largepage); | 1806 | rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage); |
1785 | if (!is_rmap_pte(*shadow_pte)) | 1807 | if (!is_rmap_pte(*shadow_pte)) |
1786 | kvm_release_pfn_clean(pfn); | 1808 | kvm_release_pfn_clean(pfn); |
1809 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | ||
1810 | rmap_recycle(vcpu, gfn, largepage); | ||
1787 | } else { | 1811 | } else { |
1788 | if (was_writeble) | 1812 | if (was_writeble) |
1789 | kvm_release_pfn_dirty(pfn); | 1813 | kvm_release_pfn_dirty(pfn); |
@@ -2157,7 +2181,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2157 | else | 2181 | else |
2158 | /* 32 bits PSE 4MB page */ | 2182 | /* 32 bits PSE 4MB page */ |
2159 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | 2183 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); |
2160 | context->rsvd_bits_mask[1][0] = ~0ull; | 2184 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; |
2161 | break; | 2185 | break; |
2162 | case PT32E_ROOT_LEVEL: | 2186 | case PT32E_ROOT_LEVEL: |
2163 | context->rsvd_bits_mask[0][2] = | 2187 | context->rsvd_bits_mask[0][2] = |
@@ -2170,7 +2194,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2170 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2194 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2171 | rsvd_bits(maxphyaddr, 62) | | 2195 | rsvd_bits(maxphyaddr, 62) | |
2172 | rsvd_bits(13, 20); /* large page */ | 2196 | rsvd_bits(13, 20); /* large page */ |
2173 | context->rsvd_bits_mask[1][0] = ~0ull; | 2197 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; |
2174 | break; | 2198 | break; |
2175 | case PT64_ROOT_LEVEL: | 2199 | case PT64_ROOT_LEVEL: |
2176 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 2200 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
@@ -2186,7 +2210,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2186 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2210 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2187 | rsvd_bits(maxphyaddr, 51) | | 2211 | rsvd_bits(maxphyaddr, 51) | |
2188 | rsvd_bits(13, 20); /* large page */ | 2212 | rsvd_bits(13, 20); /* large page */ |
2189 | context->rsvd_bits_mask[1][0] = ~0ull; | 2213 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; |
2190 | break; | 2214 | break; |
2191 | } | 2215 | } |
2192 | } | 2216 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 258e4591e1ca..67785f635399 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -281,7 +281,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
281 | { | 281 | { |
282 | unsigned access = gw->pt_access; | 282 | unsigned access = gw->pt_access; |
283 | struct kvm_mmu_page *shadow_page; | 283 | struct kvm_mmu_page *shadow_page; |
284 | u64 spte, *sptep; | 284 | u64 spte, *sptep = NULL; |
285 | int direct; | 285 | int direct; |
286 | gfn_t table_gfn; | 286 | gfn_t table_gfn; |
287 | int r; | 287 | int r; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 71510e07e69e..b1f658ad2f06 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
711 | svm->vmcb->control.tsc_offset += delta; | 711 | svm->vmcb->control.tsc_offset += delta; |
712 | vcpu->cpu = cpu; | 712 | vcpu->cpu = cpu; |
713 | kvm_migrate_timers(vcpu); | 713 | kvm_migrate_timers(vcpu); |
714 | svm->asid_generation = 0; | ||
714 | } | 715 | } |
715 | 716 | ||
716 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 717 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
@@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) | |||
1031 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; | 1032 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; |
1032 | } | 1033 | } |
1033 | 1034 | ||
1034 | svm->vcpu.cpu = svm_data->cpu; | ||
1035 | svm->asid_generation = svm_data->asid_generation; | 1035 | svm->asid_generation = svm_data->asid_generation; |
1036 | svm->vmcb->control.asid = svm_data->next_asid++; | 1036 | svm->vmcb->control.asid = svm_data->next_asid++; |
1037 | } | 1037 | } |
@@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm) | |||
2300 | struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); | 2300 | struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); |
2301 | 2301 | ||
2302 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; | 2302 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; |
2303 | if (svm->vcpu.cpu != cpu || | 2303 | /* FIXME: handle wraparound of asid_generation */ |
2304 | svm->asid_generation != svm_data->asid_generation) | 2304 | if (svm->asid_generation != svm_data->asid_generation) |
2305 | new_asid(svm, svm_data); | 2305 | new_asid(svm, svm_data); |
2306 | } | 2306 | } |
2307 | 2307 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e770bf349ec4..29f912927a58 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -3012,6 +3012,12 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3012 | return 1; | 3012 | return 1; |
3013 | } | 3013 | } |
3014 | 3014 | ||
3015 | static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
3016 | { | ||
3017 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3018 | return 1; | ||
3019 | } | ||
3020 | |||
3015 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3021 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
3016 | { | 3022 | { |
3017 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3023 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -3151,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
3151 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3157 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3152 | enum emulation_result err = EMULATE_DONE; | 3158 | enum emulation_result err = EMULATE_DONE; |
3153 | 3159 | ||
3154 | preempt_enable(); | ||
3155 | local_irq_enable(); | 3160 | local_irq_enable(); |
3161 | preempt_enable(); | ||
3156 | 3162 | ||
3157 | while (!guest_state_valid(vcpu)) { | 3163 | while (!guest_state_valid(vcpu)) { |
3158 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3164 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); |
@@ -3162,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
3162 | 3168 | ||
3163 | if (err != EMULATE_DONE) { | 3169 | if (err != EMULATE_DONE) { |
3164 | kvm_report_emulation_failure(vcpu, "emulation failure"); | 3170 | kvm_report_emulation_failure(vcpu, "emulation failure"); |
3165 | return; | 3171 | break; |
3166 | } | 3172 | } |
3167 | 3173 | ||
3168 | if (signal_pending(current)) | 3174 | if (signal_pending(current)) |
@@ -3171,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
3171 | schedule(); | 3177 | schedule(); |
3172 | } | 3178 | } |
3173 | 3179 | ||
3174 | local_irq_disable(); | ||
3175 | preempt_disable(); | 3180 | preempt_disable(); |
3181 | local_irq_disable(); | ||
3176 | 3182 | ||
3177 | vmx->invalid_state_emulation_result = err; | 3183 | vmx->invalid_state_emulation_result = err; |
3178 | } | 3184 | } |
@@ -3198,6 +3204,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
3198 | [EXIT_REASON_HLT] = handle_halt, | 3204 | [EXIT_REASON_HLT] = handle_halt, |
3199 | [EXIT_REASON_INVLPG] = handle_invlpg, | 3205 | [EXIT_REASON_INVLPG] = handle_invlpg, |
3200 | [EXIT_REASON_VMCALL] = handle_vmcall, | 3206 | [EXIT_REASON_VMCALL] = handle_vmcall, |
3207 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, | ||
3208 | [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, | ||
3209 | [EXIT_REASON_VMPTRLD] = handle_vmx_insn, | ||
3210 | [EXIT_REASON_VMPTRST] = handle_vmx_insn, | ||
3211 | [EXIT_REASON_VMREAD] = handle_vmx_insn, | ||
3212 | [EXIT_REASON_VMRESUME] = handle_vmx_insn, | ||
3213 | [EXIT_REASON_VMWRITE] = handle_vmx_insn, | ||
3214 | [EXIT_REASON_VMOFF] = handle_vmx_insn, | ||
3215 | [EXIT_REASON_VMON] = handle_vmx_insn, | ||
3201 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 3216 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
3202 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 3217 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
3203 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 3218 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 249540f98513..633ccc7400a4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr) | |||
704 | return false; | 704 | return false; |
705 | } | 705 | } |
706 | 706 | ||
707 | static bool valid_pat_type(unsigned t) | ||
708 | { | ||
709 | return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ | ||
710 | } | ||
711 | |||
712 | static bool valid_mtrr_type(unsigned t) | ||
713 | { | ||
714 | return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ | ||
715 | } | ||
716 | |||
717 | static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
718 | { | ||
719 | int i; | ||
720 | |||
721 | if (!msr_mtrr_valid(msr)) | ||
722 | return false; | ||
723 | |||
724 | if (msr == MSR_IA32_CR_PAT) { | ||
725 | for (i = 0; i < 8; i++) | ||
726 | if (!valid_pat_type((data >> (i * 8)) & 0xff)) | ||
727 | return false; | ||
728 | return true; | ||
729 | } else if (msr == MSR_MTRRdefType) { | ||
730 | if (data & ~0xcff) | ||
731 | return false; | ||
732 | return valid_mtrr_type(data & 0xff); | ||
733 | } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { | ||
734 | for (i = 0; i < 8 ; i++) | ||
735 | if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) | ||
736 | return false; | ||
737 | return true; | ||
738 | } | ||
739 | |||
740 | /* variable MTRRs */ | ||
741 | return valid_mtrr_type(data & 0xff); | ||
742 | } | ||
743 | |||
707 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 744 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
708 | { | 745 | { |
709 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; | 746 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; |
710 | 747 | ||
711 | if (!msr_mtrr_valid(msr)) | 748 | if (!mtrr_valid(vcpu, msr, data)) |
712 | return 1; | 749 | return 1; |
713 | 750 | ||
714 | if (msr == MSR_MTRRdefType) { | 751 | if (msr == MSR_MTRRdefType) { |
@@ -898,6 +935,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
898 | case MSR_VM_HSAVE_PA: | 935 | case MSR_VM_HSAVE_PA: |
899 | case MSR_P6_EVNTSEL0: | 936 | case MSR_P6_EVNTSEL0: |
900 | case MSR_P6_EVNTSEL1: | 937 | case MSR_P6_EVNTSEL1: |
938 | case MSR_K7_EVNTSEL0: | ||
901 | data = 0; | 939 | data = 0; |
902 | break; | 940 | break; |
903 | case MSR_MTRRcap: | 941 | case MSR_MTRRcap: |
@@ -1078,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
1078 | if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) | 1116 | if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) |
1079 | goto out; | 1117 | goto out; |
1080 | r = -E2BIG; | 1118 | r = -E2BIG; |
1081 | if (n < num_msrs_to_save) | 1119 | if (n < msr_list.nmsrs) |
1082 | goto out; | 1120 | goto out; |
1083 | r = -EFAULT; | 1121 | r = -EFAULT; |
1084 | if (copy_to_user(user_msr_list->indices, &msrs_to_save, | 1122 | if (copy_to_user(user_msr_list->indices, &msrs_to_save, |
1085 | num_msrs_to_save * sizeof(u32))) | 1123 | num_msrs_to_save * sizeof(u32))) |
1086 | goto out; | 1124 | goto out; |
1087 | if (copy_to_user(user_msr_list->indices | 1125 | if (copy_to_user(user_msr_list->indices + num_msrs_to_save, |
1088 | + num_msrs_to_save * sizeof(u32), | ||
1089 | &emulated_msrs, | 1126 | &emulated_msrs, |
1090 | ARRAY_SIZE(emulated_msrs) * sizeof(u32))) | 1127 | ARRAY_SIZE(emulated_msrs) * sizeof(u32))) |
1091 | goto out; | 1128 | goto out; |
@@ -2260,12 +2297,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
2260 | unsigned int bytes, | 2297 | unsigned int bytes, |
2261 | struct kvm_vcpu *vcpu) | 2298 | struct kvm_vcpu *vcpu) |
2262 | { | 2299 | { |
2263 | static int reported; | 2300 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
2264 | |||
2265 | if (!reported) { | ||
2266 | reported = 1; | ||
2267 | printk(KERN_WARNING "kvm: emulating exchange as write\n"); | ||
2268 | } | ||
2269 | #ifndef CONFIG_X86_64 | 2301 | #ifndef CONFIG_X86_64 |
2270 | /* guests cmpxchg8b have to be emulated atomically */ | 2302 | /* guests cmpxchg8b have to be emulated atomically */ |
2271 | if (bytes == 8) { | 2303 | if (bytes == 8) { |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index c1b6c232e02b..616de4628d60 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -1361,7 +1361,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1361 | return 0; | 1361 | return 0; |
1362 | } | 1362 | } |
1363 | 1363 | ||
1364 | void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | 1364 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) |
1365 | { | 1365 | { |
1366 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); | 1366 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); |
1367 | /* | 1367 | /* |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7bc65f0f62c4..d677fa9ca650 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -22,7 +22,8 @@ | |||
22 | * | 22 | * |
23 | * So how does the kernel know it's a Guest? We'll see that later, but let's | 23 | * So how does the kernel know it's a Guest? We'll see that later, but let's |
24 | * just say that we end up here where we replace the native functions various | 24 | * just say that we end up here where we replace the native functions various |
25 | * "paravirt" structures with our Guest versions, then boot like normal. :*/ | 25 | * "paravirt" structures with our Guest versions, then boot like normal. |
26 | :*/ | ||
26 | 27 | ||
27 | /* | 28 | /* |
28 | * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. | 29 | * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. |
@@ -74,7 +75,8 @@ | |||
74 | * | 75 | * |
75 | * The Guest in our tale is a simple creature: identical to the Host but | 76 | * The Guest in our tale is a simple creature: identical to the Host but |
76 | * behaving in simplified but equivalent ways. In particular, the Guest is the | 77 | * behaving in simplified but equivalent ways. In particular, the Guest is the |
77 | * same kernel as the Host (or at least, built from the same source code). :*/ | 78 | * same kernel as the Host (or at least, built from the same source code). |
79 | :*/ | ||
78 | 80 | ||
79 | struct lguest_data lguest_data = { | 81 | struct lguest_data lguest_data = { |
80 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, | 82 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, |
@@ -85,7 +87,8 @@ struct lguest_data lguest_data = { | |||
85 | .syscall_vec = SYSCALL_VECTOR, | 87 | .syscall_vec = SYSCALL_VECTOR, |
86 | }; | 88 | }; |
87 | 89 | ||
88 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a | 90 | /*G:037 |
91 | * async_hcall() is pretty simple: I'm quite proud of it really. We have a | ||
89 | * ring buffer of stored hypercalls which the Host will run though next time we | 92 | * ring buffer of stored hypercalls which the Host will run though next time we |
90 | * do a normal hypercall. Each entry in the ring has 5 slots for the hypercall | 93 | * do a normal hypercall. Each entry in the ring has 5 slots for the hypercall |
91 | * arguments, and a "hcall_status" word which is 0 if the call is ready to go, | 94 | * arguments, and a "hcall_status" word which is 0 if the call is ready to go, |
@@ -94,7 +97,8 @@ struct lguest_data lguest_data = { | |||
94 | * If we come around to a slot which hasn't been finished, then the table is | 97 | * If we come around to a slot which hasn't been finished, then the table is |
95 | * full and we just make the hypercall directly. This has the nice side | 98 | * full and we just make the hypercall directly. This has the nice side |
96 | * effect of causing the Host to run all the stored calls in the ring buffer | 99 | * effect of causing the Host to run all the stored calls in the ring buffer |
97 | * which empties it for next time! */ | 100 | * which empties it for next time! |
101 | */ | ||
98 | static void async_hcall(unsigned long call, unsigned long arg1, | 102 | static void async_hcall(unsigned long call, unsigned long arg1, |
99 | unsigned long arg2, unsigned long arg3, | 103 | unsigned long arg2, unsigned long arg3, |
100 | unsigned long arg4) | 104 | unsigned long arg4) |
@@ -103,9 +107,11 @@ static void async_hcall(unsigned long call, unsigned long arg1, | |||
103 | static unsigned int next_call; | 107 | static unsigned int next_call; |
104 | unsigned long flags; | 108 | unsigned long flags; |
105 | 109 | ||
106 | /* Disable interrupts if not already disabled: we don't want an | 110 | /* |
111 | * Disable interrupts if not already disabled: we don't want an | ||
107 | * interrupt handler making a hypercall while we're already doing | 112 | * interrupt handler making a hypercall while we're already doing |
108 | * one! */ | 113 | * one! |
114 | */ | ||
109 | local_irq_save(flags); | 115 | local_irq_save(flags); |
110 | if (lguest_data.hcall_status[next_call] != 0xFF) { | 116 | if (lguest_data.hcall_status[next_call] != 0xFF) { |
111 | /* Table full, so do normal hcall which will flush table. */ | 117 | /* Table full, so do normal hcall which will flush table. */ |
@@ -125,8 +131,9 @@ static void async_hcall(unsigned long call, unsigned long arg1, | |||
125 | local_irq_restore(flags); | 131 | local_irq_restore(flags); |
126 | } | 132 | } |
127 | 133 | ||
128 | /*G:035 Notice the lazy_hcall() above, rather than hcall(). This is our first | 134 | /*G:035 |
129 | * real optimization trick! | 135 | * Notice the lazy_hcall() above, rather than hcall(). This is our first real |
136 | * optimization trick! | ||
130 | * | 137 | * |
131 | * When lazy_mode is set, it means we're allowed to defer all hypercalls and do | 138 | * When lazy_mode is set, it means we're allowed to defer all hypercalls and do |
132 | * them as a batch when lazy_mode is eventually turned off. Because hypercalls | 139 | * them as a batch when lazy_mode is eventually turned off. Because hypercalls |
@@ -136,7 +143,8 @@ static void async_hcall(unsigned long call, unsigned long arg1, | |||
136 | * lguest_leave_lazy_mode(). | 143 | * lguest_leave_lazy_mode(). |
137 | * | 144 | * |
138 | * So, when we're in lazy mode, we call async_hcall() to store the call for | 145 | * So, when we're in lazy mode, we call async_hcall() to store the call for |
139 | * future processing: */ | 146 | * future processing: |
147 | */ | ||
140 | static void lazy_hcall1(unsigned long call, | 148 | static void lazy_hcall1(unsigned long call, |
141 | unsigned long arg1) | 149 | unsigned long arg1) |
142 | { | 150 | { |
@@ -146,6 +154,7 @@ static void lazy_hcall1(unsigned long call, | |||
146 | async_hcall(call, arg1, 0, 0, 0); | 154 | async_hcall(call, arg1, 0, 0, 0); |
147 | } | 155 | } |
148 | 156 | ||
157 | /* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ | ||
149 | static void lazy_hcall2(unsigned long call, | 158 | static void lazy_hcall2(unsigned long call, |
150 | unsigned long arg1, | 159 | unsigned long arg1, |
151 | unsigned long arg2) | 160 | unsigned long arg2) |
@@ -181,8 +190,10 @@ static void lazy_hcall4(unsigned long call, | |||
181 | } | 190 | } |
182 | #endif | 191 | #endif |
183 | 192 | ||
184 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then | 193 | /*G:036 |
185 | * issue the do-nothing hypercall to flush any stored calls. */ | 194 | * When lazy mode is turned off reset the per-cpu lazy mode variable and then |
195 | * issue the do-nothing hypercall to flush any stored calls. | ||
196 | :*/ | ||
186 | static void lguest_leave_lazy_mmu_mode(void) | 197 | static void lguest_leave_lazy_mmu_mode(void) |
187 | { | 198 | { |
188 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); | 199 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); |
@@ -208,9 +219,11 @@ static void lguest_end_context_switch(struct task_struct *next) | |||
208 | * check there before it tries to deliver an interrupt. | 219 | * check there before it tries to deliver an interrupt. |
209 | */ | 220 | */ |
210 | 221 | ||
211 | /* save_flags() is expected to return the processor state (ie. "flags"). The | 222 | /* |
223 | * save_flags() is expected to return the processor state (ie. "flags"). The | ||
212 | * flags word contains all kind of stuff, but in practice Linux only cares | 224 | * flags word contains all kind of stuff, but in practice Linux only cares |
213 | * about the interrupt flag. Our "save_flags()" just returns that. */ | 225 | * about the interrupt flag. Our "save_flags()" just returns that. |
226 | */ | ||
214 | static unsigned long save_fl(void) | 227 | static unsigned long save_fl(void) |
215 | { | 228 | { |
216 | return lguest_data.irq_enabled; | 229 | return lguest_data.irq_enabled; |
@@ -222,13 +235,15 @@ static void irq_disable(void) | |||
222 | lguest_data.irq_enabled = 0; | 235 | lguest_data.irq_enabled = 0; |
223 | } | 236 | } |
224 | 237 | ||
225 | /* Let's pause a moment. Remember how I said these are called so often? | 238 | /* |
239 | * Let's pause a moment. Remember how I said these are called so often? | ||
226 | * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to | 240 | * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to |
227 | * break some rules. In particular, these functions are assumed to save their | 241 | * break some rules. In particular, these functions are assumed to save their |
228 | * own registers if they need to: normal C functions assume they can trash the | 242 | * own registers if they need to: normal C functions assume they can trash the |
229 | * eax register. To use normal C functions, we use | 243 | * eax register. To use normal C functions, we use |
230 | * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the | 244 | * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the |
231 | * C function, then restores it. */ | 245 | * C function, then restores it. |
246 | */ | ||
232 | PV_CALLEE_SAVE_REGS_THUNK(save_fl); | 247 | PV_CALLEE_SAVE_REGS_THUNK(save_fl); |
233 | PV_CALLEE_SAVE_REGS_THUNK(irq_disable); | 248 | PV_CALLEE_SAVE_REGS_THUNK(irq_disable); |
234 | /*:*/ | 249 | /*:*/ |
@@ -237,18 +252,18 @@ PV_CALLEE_SAVE_REGS_THUNK(irq_disable); | |||
237 | extern void lg_irq_enable(void); | 252 | extern void lg_irq_enable(void); |
238 | extern void lg_restore_fl(unsigned long flags); | 253 | extern void lg_restore_fl(unsigned long flags); |
239 | 254 | ||
240 | /*M:003 Note that we don't check for outstanding interrupts when we re-enable | 255 | /*M:003 |
241 | * them (or when we unmask an interrupt). This seems to work for the moment, | 256 | * We could be more efficient in our checking of outstanding interrupts, rather |
242 | * since interrupts are rare and we'll just get the interrupt on the next timer | 257 | * than using a branch. One way would be to put the "irq_enabled" field in a |
243 | * tick, but now we can run with CONFIG_NO_HZ, we should revisit this. One way | 258 | * page by itself, and have the Host write-protect it when an interrupt comes |
244 | * would be to put the "irq_enabled" field in a page by itself, and have the | 259 | * in when irqs are disabled. There will then be a page fault as soon as |
245 | * Host write-protect it when an interrupt comes in when irqs are disabled. | 260 | * interrupts are re-enabled. |
246 | * There will then be a page fault as soon as interrupts are re-enabled. | ||
247 | * | 261 | * |
248 | * A better method is to implement soft interrupt disable generally for x86: | 262 | * A better method is to implement soft interrupt disable generally for x86: |
249 | * instead of disabling interrupts, we set a flag. If an interrupt does come | 263 | * instead of disabling interrupts, we set a flag. If an interrupt does come |
250 | * in, we then disable them for real. This is uncommon, so we could simply use | 264 | * in, we then disable them for real. This is uncommon, so we could simply use |
251 | * a hypercall for interrupt control and not worry about efficiency. :*/ | 265 | * a hypercall for interrupt control and not worry about efficiency. |
266 | :*/ | ||
252 | 267 | ||
253 | /*G:034 | 268 | /*G:034 |
254 | * The Interrupt Descriptor Table (IDT). | 269 | * The Interrupt Descriptor Table (IDT). |
@@ -261,10 +276,12 @@ extern void lg_restore_fl(unsigned long flags); | |||
261 | static void lguest_write_idt_entry(gate_desc *dt, | 276 | static void lguest_write_idt_entry(gate_desc *dt, |
262 | int entrynum, const gate_desc *g) | 277 | int entrynum, const gate_desc *g) |
263 | { | 278 | { |
264 | /* The gate_desc structure is 8 bytes long: we hand it to the Host in | 279 | /* |
280 | * The gate_desc structure is 8 bytes long: we hand it to the Host in | ||
265 | * two 32-bit chunks. The whole 32-bit kernel used to hand descriptors | 281 | * two 32-bit chunks. The whole 32-bit kernel used to hand descriptors |
266 | * around like this; typesafety wasn't a big concern in Linux's early | 282 | * around like this; typesafety wasn't a big concern in Linux's early |
267 | * years. */ | 283 | * years. |
284 | */ | ||
268 | u32 *desc = (u32 *)g; | 285 | u32 *desc = (u32 *)g; |
269 | /* Keep the local copy up to date. */ | 286 | /* Keep the local copy up to date. */ |
270 | native_write_idt_entry(dt, entrynum, g); | 287 | native_write_idt_entry(dt, entrynum, g); |
@@ -272,9 +289,11 @@ static void lguest_write_idt_entry(gate_desc *dt, | |||
272 | kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); | 289 | kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); |
273 | } | 290 | } |
274 | 291 | ||
275 | /* Changing to a different IDT is very rare: we keep the IDT up-to-date every | 292 | /* |
293 | * Changing to a different IDT is very rare: we keep the IDT up-to-date every | ||
276 | * time it is written, so we can simply loop through all entries and tell the | 294 | * time it is written, so we can simply loop through all entries and tell the |
277 | * Host about them. */ | 295 | * Host about them. |
296 | */ | ||
278 | static void lguest_load_idt(const struct desc_ptr *desc) | 297 | static void lguest_load_idt(const struct desc_ptr *desc) |
279 | { | 298 | { |
280 | unsigned int i; | 299 | unsigned int i; |
@@ -305,9 +324,11 @@ static void lguest_load_gdt(const struct desc_ptr *desc) | |||
305 | kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); | 324 | kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); |
306 | } | 325 | } |
307 | 326 | ||
308 | /* For a single GDT entry which changes, we do the lazy thing: alter our GDT, | 327 | /* |
328 | * For a single GDT entry which changes, we do the lazy thing: alter our GDT, | ||
309 | * then tell the Host to reload the entire thing. This operation is so rare | 329 | * then tell the Host to reload the entire thing. This operation is so rare |
310 | * that this naive implementation is reasonable. */ | 330 | * that this naive implementation is reasonable. |
331 | */ | ||
311 | static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, | 332 | static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, |
312 | const void *desc, int type) | 333 | const void *desc, int type) |
313 | { | 334 | { |
@@ -317,29 +338,36 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, | |||
317 | dt[entrynum].a, dt[entrynum].b); | 338 | dt[entrynum].a, dt[entrynum].b); |
318 | } | 339 | } |
319 | 340 | ||
320 | /* OK, I lied. There are three "thread local storage" GDT entries which change | 341 | /* |
342 | * OK, I lied. There are three "thread local storage" GDT entries which change | ||
321 | * on every context switch (these three entries are how glibc implements | 343 | * on every context switch (these three entries are how glibc implements |
322 | * __thread variables). So we have a hypercall specifically for this case. */ | 344 | * __thread variables). So we have a hypercall specifically for this case. |
345 | */ | ||
323 | static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) | 346 | static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) |
324 | { | 347 | { |
325 | /* There's one problem which normal hardware doesn't have: the Host | 348 | /* |
349 | * There's one problem which normal hardware doesn't have: the Host | ||
326 | * can't handle us removing entries we're currently using. So we clear | 350 | * can't handle us removing entries we're currently using. So we clear |
327 | * the GS register here: if it's needed it'll be reloaded anyway. */ | 351 | * the GS register here: if it's needed it'll be reloaded anyway. |
352 | */ | ||
328 | lazy_load_gs(0); | 353 | lazy_load_gs(0); |
329 | lazy_hcall2(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu); | 354 | lazy_hcall2(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu); |
330 | } | 355 | } |
331 | 356 | ||
332 | /*G:038 That's enough excitement for now, back to ploughing through each of | 357 | /*G:038 |
333 | * the different pv_ops structures (we're about 1/3 of the way through). | 358 | * That's enough excitement for now, back to ploughing through each of the |
359 | * different pv_ops structures (we're about 1/3 of the way through). | ||
334 | * | 360 | * |
335 | * This is the Local Descriptor Table, another weird Intel thingy. Linux only | 361 | * This is the Local Descriptor Table, another weird Intel thingy. Linux only |
336 | * uses this for some strange applications like Wine. We don't do anything | 362 | * uses this for some strange applications like Wine. We don't do anything |
337 | * here, so they'll get an informative and friendly Segmentation Fault. */ | 363 | * here, so they'll get an informative and friendly Segmentation Fault. |
364 | */ | ||
338 | static void lguest_set_ldt(const void *addr, unsigned entries) | 365 | static void lguest_set_ldt(const void *addr, unsigned entries) |
339 | { | 366 | { |
340 | } | 367 | } |
341 | 368 | ||
342 | /* This loads a GDT entry into the "Task Register": that entry points to a | 369 | /* |
370 | * This loads a GDT entry into the "Task Register": that entry points to a | ||
343 | * structure called the Task State Segment. Some comments scattered though the | 371 | * structure called the Task State Segment. Some comments scattered though the |
344 | * kernel code indicate that this used for task switching in ages past, along | 372 | * kernel code indicate that this used for task switching in ages past, along |
345 | * with blood sacrifice and astrology. | 373 | * with blood sacrifice and astrology. |
@@ -347,19 +375,21 @@ static void lguest_set_ldt(const void *addr, unsigned entries) | |||
347 | * Now there's nothing interesting in here that we don't get told elsewhere. | 375 | * Now there's nothing interesting in here that we don't get told elsewhere. |
348 | * But the native version uses the "ltr" instruction, which makes the Host | 376 | * But the native version uses the "ltr" instruction, which makes the Host |
349 | * complain to the Guest about a Segmentation Fault and it'll oops. So we | 377 | * complain to the Guest about a Segmentation Fault and it'll oops. So we |
350 | * override the native version with a do-nothing version. */ | 378 | * override the native version with a do-nothing version. |
379 | */ | ||
351 | static void lguest_load_tr_desc(void) | 380 | static void lguest_load_tr_desc(void) |
352 | { | 381 | { |
353 | } | 382 | } |
354 | 383 | ||
355 | /* The "cpuid" instruction is a way of querying both the CPU identity | 384 | /* |
385 | * The "cpuid" instruction is a way of querying both the CPU identity | ||
356 | * (manufacturer, model, etc) and its features. It was introduced before the | 386 | * (manufacturer, model, etc) and its features. It was introduced before the |
357 | * Pentium in 1993 and keeps getting extended by both Intel, AMD and others. | 387 | * Pentium in 1993 and keeps getting extended by both Intel, AMD and others. |
358 | * As you might imagine, after a decade and a half this treatment, it is now a | 388 | * As you might imagine, after a decade and a half this treatment, it is now a |
359 | * giant ball of hair. Its entry in the current Intel manual runs to 28 pages. | 389 | * giant ball of hair. Its entry in the current Intel manual runs to 28 pages. |
360 | * | 390 | * |
361 | * This instruction even it has its own Wikipedia entry. The Wikipedia entry | 391 | * This instruction even it has its own Wikipedia entry. The Wikipedia entry |
362 | * has been translated into 4 languages. I am not making this up! | 392 | * has been translated into 5 languages. I am not making this up! |
363 | * | 393 | * |
364 | * We could get funky here and identify ourselves as "GenuineLguest", but | 394 | * We could get funky here and identify ourselves as "GenuineLguest", but |
365 | * instead we just use the real "cpuid" instruction. Then I pretty much turned | 395 | * instead we just use the real "cpuid" instruction. Then I pretty much turned |
@@ -371,7 +401,8 @@ static void lguest_load_tr_desc(void) | |||
371 | * Replacing the cpuid so we can turn features off is great for the kernel, but | 401 | * Replacing the cpuid so we can turn features off is great for the kernel, but |
372 | * anyone (including userspace) can just use the raw "cpuid" instruction and | 402 | * anyone (including userspace) can just use the raw "cpuid" instruction and |
373 | * the Host won't even notice since it isn't privileged. So we try not to get | 403 | * the Host won't even notice since it isn't privileged. So we try not to get |
374 | * too worked up about it. */ | 404 | * too worked up about it. |
405 | */ | ||
375 | static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | 406 | static void lguest_cpuid(unsigned int *ax, unsigned int *bx, |
376 | unsigned int *cx, unsigned int *dx) | 407 | unsigned int *cx, unsigned int *dx) |
377 | { | 408 | { |
@@ -379,38 +410,63 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
379 | 410 | ||
380 | native_cpuid(ax, bx, cx, dx); | 411 | native_cpuid(ax, bx, cx, dx); |
381 | switch (function) { | 412 | switch (function) { |
382 | case 1: /* Basic feature request. */ | 413 | /* |
383 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ | 414 | * CPUID 0 gives the highest legal CPUID number (and the ID string). |
415 | * We futureproof our code a little by sticking to known CPUID values. | ||
416 | */ | ||
417 | case 0: | ||
418 | if (*ax > 5) | ||
419 | *ax = 5; | ||
420 | break; | ||
421 | |||
422 | /* | ||
423 | * CPUID 1 is a basic feature request. | ||
424 | * | ||
425 | * CX: we only allow kernel to see SSE3, CMPXCHG16B and SSSE3 | ||
426 | * DX: SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU and PAE. | ||
427 | */ | ||
428 | case 1: | ||
384 | *cx &= 0x00002201; | 429 | *cx &= 0x00002201; |
385 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */ | ||
386 | *dx &= 0x07808151; | 430 | *dx &= 0x07808151; |
387 | /* The Host can do a nice optimization if it knows that the | 431 | /* |
432 | * The Host can do a nice optimization if it knows that the | ||
388 | * kernel mappings (addresses above 0xC0000000 or whatever | 433 | * kernel mappings (addresses above 0xC0000000 or whatever |
389 | * PAGE_OFFSET is set to) haven't changed. But Linux calls | 434 | * PAGE_OFFSET is set to) haven't changed. But Linux calls |
390 | * flush_tlb_user() for both user and kernel mappings unless | 435 | * flush_tlb_user() for both user and kernel mappings unless |
391 | * the Page Global Enable (PGE) feature bit is set. */ | 436 | * the Page Global Enable (PGE) feature bit is set. |
437 | */ | ||
392 | *dx |= 0x00002000; | 438 | *dx |= 0x00002000; |
393 | /* We also lie, and say we're family id 5. 6 or greater | 439 | /* |
440 | * We also lie, and say we're family id 5. 6 or greater | ||
394 | * leads to a rdmsr in early_init_intel which we can't handle. | 441 | * leads to a rdmsr in early_init_intel which we can't handle. |
395 | * Family ID is returned as bits 8-12 in ax. */ | 442 | * Family ID is returned as bits 8-12 in ax. |
443 | */ | ||
396 | *ax &= 0xFFFFF0FF; | 444 | *ax &= 0xFFFFF0FF; |
397 | *ax |= 0x00000500; | 445 | *ax |= 0x00000500; |
398 | break; | 446 | break; |
447 | /* | ||
448 | * 0x80000000 returns the highest Extended Function, so we futureproof | ||
449 | * like we do above by limiting it to known fields. | ||
450 | */ | ||
399 | case 0x80000000: | 451 | case 0x80000000: |
400 | /* Futureproof this a little: if they ask how much extended | ||
401 | * processor information there is, limit it to known fields. */ | ||
402 | if (*ax > 0x80000008) | 452 | if (*ax > 0x80000008) |
403 | *ax = 0x80000008; | 453 | *ax = 0x80000008; |
404 | break; | 454 | break; |
455 | |||
456 | /* | ||
457 | * PAE systems can mark pages as non-executable. Linux calls this the | ||
458 | * NX bit. Intel calls it XD (eXecute Disable), AMD EVP (Enhanced | ||
459 | * Virus Protection). We just switch turn if off here, since we don't | ||
460 | * support it. | ||
461 | */ | ||
405 | case 0x80000001: | 462 | case 0x80000001: |
406 | /* Here we should fix nx cap depending on host. */ | ||
407 | /* For this version of PAE, we just clear NX bit. */ | ||
408 | *dx &= ~(1 << 20); | 463 | *dx &= ~(1 << 20); |
409 | break; | 464 | break; |
410 | } | 465 | } |
411 | } | 466 | } |
412 | 467 | ||
413 | /* Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4. | 468 | /* |
469 | * Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4. | ||
414 | * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother | 470 | * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother |
415 | * it. The Host needs to know when the Guest wants to change them, so we have | 471 | * it. The Host needs to know when the Guest wants to change them, so we have |
416 | * a whole series of functions like read_cr0() and write_cr0(). | 472 | * a whole series of functions like read_cr0() and write_cr0(). |
@@ -425,7 +481,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
425 | * name like "FPUTRAP bit" be a little less cryptic? | 481 | * name like "FPUTRAP bit" be a little less cryptic? |
426 | * | 482 | * |
427 | * We store cr0 locally because the Host never changes it. The Guest sometimes | 483 | * We store cr0 locally because the Host never changes it. The Guest sometimes |
428 | * wants to read it and we'd prefer not to bother the Host unnecessarily. */ | 484 | * wants to read it and we'd prefer not to bother the Host unnecessarily. |
485 | */ | ||
429 | static unsigned long current_cr0; | 486 | static unsigned long current_cr0; |
430 | static void lguest_write_cr0(unsigned long val) | 487 | static void lguest_write_cr0(unsigned long val) |
431 | { | 488 | { |
@@ -438,18 +495,22 @@ static unsigned long lguest_read_cr0(void) | |||
438 | return current_cr0; | 495 | return current_cr0; |
439 | } | 496 | } |
440 | 497 | ||
441 | /* Intel provided a special instruction to clear the TS bit for people too cool | 498 | /* |
499 | * Intel provided a special instruction to clear the TS bit for people too cool | ||
442 | * to use write_cr0() to do it. This "clts" instruction is faster, because all | 500 | * to use write_cr0() to do it. This "clts" instruction is faster, because all |
443 | * the vowels have been optimized out. */ | 501 | * the vowels have been optimized out. |
502 | */ | ||
444 | static void lguest_clts(void) | 503 | static void lguest_clts(void) |
445 | { | 504 | { |
446 | lazy_hcall1(LHCALL_TS, 0); | 505 | lazy_hcall1(LHCALL_TS, 0); |
447 | current_cr0 &= ~X86_CR0_TS; | 506 | current_cr0 &= ~X86_CR0_TS; |
448 | } | 507 | } |
449 | 508 | ||
450 | /* cr2 is the virtual address of the last page fault, which the Guest only ever | 509 | /* |
510 | * cr2 is the virtual address of the last page fault, which the Guest only ever | ||
451 | * reads. The Host kindly writes this into our "struct lguest_data", so we | 511 | * reads. The Host kindly writes this into our "struct lguest_data", so we |
452 | * just read it out of there. */ | 512 | * just read it out of there. |
513 | */ | ||
453 | static unsigned long lguest_read_cr2(void) | 514 | static unsigned long lguest_read_cr2(void) |
454 | { | 515 | { |
455 | return lguest_data.cr2; | 516 | return lguest_data.cr2; |
@@ -458,10 +519,12 @@ static unsigned long lguest_read_cr2(void) | |||
458 | /* See lguest_set_pte() below. */ | 519 | /* See lguest_set_pte() below. */ |
459 | static bool cr3_changed = false; | 520 | static bool cr3_changed = false; |
460 | 521 | ||
461 | /* cr3 is the current toplevel pagetable page: the principle is the same as | 522 | /* |
523 | * cr3 is the current toplevel pagetable page: the principle is the same as | ||
462 | * cr0. Keep a local copy, and tell the Host when it changes. The only | 524 | * cr0. Keep a local copy, and tell the Host when it changes. The only |
463 | * difference is that our local copy is in lguest_data because the Host needs | 525 | * difference is that our local copy is in lguest_data because the Host needs |
464 | * to set it upon our initial hypercall. */ | 526 | * to set it upon our initial hypercall. |
527 | */ | ||
465 | static void lguest_write_cr3(unsigned long cr3) | 528 | static void lguest_write_cr3(unsigned long cr3) |
466 | { | 529 | { |
467 | lguest_data.pgdir = cr3; | 530 | lguest_data.pgdir = cr3; |
@@ -506,7 +569,7 @@ static void lguest_write_cr4(unsigned long val) | |||
506 | * cr3 ---> +---------+ | 569 | * cr3 ---> +---------+ |
507 | * | --------->+---------+ | 570 | * | --------->+---------+ |
508 | * | | | PADDR1 | | 571 | * | | | PADDR1 | |
509 | * Top-level | | PADDR2 | | 572 | * Mid-level | | PADDR2 | |
510 | * (PMD) page | | | | 573 | * (PMD) page | | | |
511 | * | | Lower-level | | 574 | * | | Lower-level | |
512 | * | | (PTE) page | | 575 | * | | (PTE) page | |
@@ -526,21 +589,62 @@ static void lguest_write_cr4(unsigned long val) | |||
526 | * Index into top Index into second Offset within page | 589 | * Index into top Index into second Offset within page |
527 | * page directory page pagetable page | 590 | * page directory page pagetable page |
528 | * | 591 | * |
529 | * The kernel spends a lot of time changing both the top-level page directory | 592 | * Now, unfortunately, this isn't the whole story: Intel added Physical Address |
530 | * and lower-level pagetable pages. The Guest doesn't know physical addresses, | 593 | * Extension (PAE) to allow 32 bit systems to use 64GB of memory (ie. 36 bits). |
531 | * so while it maintains these page tables exactly like normal, it also needs | 594 | * These are held in 64-bit page table entries, so we can now only fit 512 |
532 | * to keep the Host informed whenever it makes a change: the Host will create | 595 | * entries in a page, and the neat three-level tree breaks down. |
533 | * the real page tables based on the Guests'. | 596 | * |
597 | * The result is a four level page table: | ||
598 | * | ||
599 | * cr3 --> [ 4 Upper ] | ||
600 | * [ Level ] | ||
601 | * [ Entries ] | ||
602 | * [(PUD Page)]---> +---------+ | ||
603 | * | --------->+---------+ | ||
604 | * | | | PADDR1 | | ||
605 | * Mid-level | | PADDR2 | | ||
606 | * (PMD) page | | | | ||
607 | * | | Lower-level | | ||
608 | * | | (PTE) page | | ||
609 | * | | | | | ||
610 | * .... .... | ||
611 | * | ||
612 | * | ||
613 | * And the virtual address is decoded as: | ||
614 | * | ||
615 | * 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | ||
616 | * |<-2->|<--- 9 bits ---->|<---- 9 bits --->|<------ 12 bits ------>| | ||
617 | * Index into Index into mid Index into lower Offset within page | ||
618 | * top entries directory page pagetable page | ||
619 | * | ||
620 | * It's too hard to switch between these two formats at runtime, so Linux only | ||
621 | * supports one or the other depending on whether CONFIG_X86_PAE is set. Many | ||
622 | * distributions turn it on, and not just for people with silly amounts of | ||
623 | * memory: the larger PTE entries allow room for the NX bit, which lets the | ||
624 | * kernel disable execution of pages and increase security. | ||
625 | * | ||
626 | * This was a problem for lguest, which couldn't run on these distributions; | ||
627 | * then Matias Zabaljauregui figured it all out and implemented it, and only a | ||
628 | * handful of puppies were crushed in the process! | ||
629 | * | ||
630 | * Back to our point: the kernel spends a lot of time changing both the | ||
631 | * top-level page directory and lower-level pagetable pages. The Guest doesn't | ||
632 | * know physical addresses, so while it maintains these page tables exactly | ||
633 | * like normal, it also needs to keep the Host informed whenever it makes a | ||
634 | * change: the Host will create the real page tables based on the Guests'. | ||
534 | */ | 635 | */ |
535 | 636 | ||
536 | /* The Guest calls this to set a second-level entry (pte), ie. to map a page | 637 | /* |
537 | * into a process' address space. We set the entry then tell the Host the | 638 | * The Guest calls this after it has set a second-level entry (pte), ie. to map |
538 | * toplevel and address this corresponds to. The Guest uses one pagetable per | 639 | * a page into a process' address space. Wetell the Host the toplevel and |
539 | * process, so we need to tell the Host which one we're changing (mm->pgd). */ | 640 | * address this corresponds to. The Guest uses one pagetable per process, so |
641 | * we need to tell the Host which one we're changing (mm->pgd). | ||
642 | */ | ||
540 | static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, | 643 | static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, |
541 | pte_t *ptep) | 644 | pte_t *ptep) |
542 | { | 645 | { |
543 | #ifdef CONFIG_X86_PAE | 646 | #ifdef CONFIG_X86_PAE |
647 | /* PAE needs to hand a 64 bit page table entry, so it uses two args. */ | ||
544 | lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr, | 648 | lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr, |
545 | ptep->pte_low, ptep->pte_high); | 649 | ptep->pte_low, ptep->pte_high); |
546 | #else | 650 | #else |
@@ -548,6 +652,7 @@ static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, | |||
548 | #endif | 652 | #endif |
549 | } | 653 | } |
550 | 654 | ||
655 | /* This is the "set and update" combo-meal-deal version. */ | ||
551 | static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, | 656 | static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, |
552 | pte_t *ptep, pte_t pteval) | 657 | pte_t *ptep, pte_t pteval) |
553 | { | 658 | { |
@@ -555,10 +660,13 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
555 | lguest_pte_update(mm, addr, ptep); | 660 | lguest_pte_update(mm, addr, ptep); |
556 | } | 661 | } |
557 | 662 | ||
558 | /* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd | 663 | /* |
664 | * The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd | ||
559 | * to set a middle-level entry when PAE is activated. | 665 | * to set a middle-level entry when PAE is activated. |
666 | * | ||
560 | * Again, we set the entry then tell the Host which page we changed, | 667 | * Again, we set the entry then tell the Host which page we changed, |
561 | * and the index of the entry we changed. */ | 668 | * and the index of the entry we changed. |
669 | */ | ||
562 | #ifdef CONFIG_X86_PAE | 670 | #ifdef CONFIG_X86_PAE |
563 | static void lguest_set_pud(pud_t *pudp, pud_t pudval) | 671 | static void lguest_set_pud(pud_t *pudp, pud_t pudval) |
564 | { | 672 | { |
@@ -577,8 +685,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
577 | } | 685 | } |
578 | #else | 686 | #else |
579 | 687 | ||
580 | /* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not | 688 | /* The Guest calls lguest_set_pmd to set a top-level entry when !PAE. */ |
581 | * activated. */ | ||
582 | static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 689 | static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
583 | { | 690 | { |
584 | native_set_pmd(pmdp, pmdval); | 691 | native_set_pmd(pmdp, pmdval); |
@@ -587,7 +694,8 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
587 | } | 694 | } |
588 | #endif | 695 | #endif |
589 | 696 | ||
590 | /* There are a couple of legacy places where the kernel sets a PTE, but we | 697 | /* |
698 | * There are a couple of legacy places where the kernel sets a PTE, but we | ||
591 | * don't know the top level any more. This is useless for us, since we don't | 699 | * don't know the top level any more. This is useless for us, since we don't |
592 | * know which pagetable is changing or what address, so we just tell the Host | 700 | * know which pagetable is changing or what address, so we just tell the Host |
593 | * to forget all of them. Fortunately, this is very rare. | 701 | * to forget all of them. Fortunately, this is very rare. |
@@ -595,7 +703,8 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
595 | * ... except in early boot when the kernel sets up the initial pagetables, | 703 | * ... except in early boot when the kernel sets up the initial pagetables, |
596 | * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell | 704 | * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell |
597 | * the Host anything changed until we've done the first page table switch, | 705 | * the Host anything changed until we've done the first page table switch, |
598 | * which brings boot back to 0.25 seconds. */ | 706 | * which brings boot back to 0.25 seconds. |
707 | */ | ||
599 | static void lguest_set_pte(pte_t *ptep, pte_t pteval) | 708 | static void lguest_set_pte(pte_t *ptep, pte_t pteval) |
600 | { | 709 | { |
601 | native_set_pte(ptep, pteval); | 710 | native_set_pte(ptep, pteval); |
@@ -604,6 +713,11 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval) | |||
604 | } | 713 | } |
605 | 714 | ||
606 | #ifdef CONFIG_X86_PAE | 715 | #ifdef CONFIG_X86_PAE |
716 | /* | ||
717 | * With 64-bit PTE values, we need to be careful setting them: if we set 32 | ||
718 | * bits at a time, the hardware could see a weird half-set entry. These | ||
719 | * versions ensure we update all 64 bits at once. | ||
720 | */ | ||
607 | static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte) | 721 | static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte) |
608 | { | 722 | { |
609 | native_set_pte_atomic(ptep, pte); | 723 | native_set_pte_atomic(ptep, pte); |
@@ -611,19 +725,21 @@ static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte) | |||
611 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); | 725 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); |
612 | } | 726 | } |
613 | 727 | ||
614 | void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 728 | static void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, |
729 | pte_t *ptep) | ||
615 | { | 730 | { |
616 | native_pte_clear(mm, addr, ptep); | 731 | native_pte_clear(mm, addr, ptep); |
617 | lguest_pte_update(mm, addr, ptep); | 732 | lguest_pte_update(mm, addr, ptep); |
618 | } | 733 | } |
619 | 734 | ||
620 | void lguest_pmd_clear(pmd_t *pmdp) | 735 | static void lguest_pmd_clear(pmd_t *pmdp) |
621 | { | 736 | { |
622 | lguest_set_pmd(pmdp, __pmd(0)); | 737 | lguest_set_pmd(pmdp, __pmd(0)); |
623 | } | 738 | } |
624 | #endif | 739 | #endif |
625 | 740 | ||
626 | /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on | 741 | /* |
742 | * Unfortunately for Lguest, the pv_mmu_ops for page tables were based on | ||
627 | * native page table operations. On native hardware you can set a new page | 743 | * native page table operations. On native hardware you can set a new page |
628 | * table entry whenever you want, but if you want to remove one you have to do | 744 | * table entry whenever you want, but if you want to remove one you have to do |
629 | * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). | 745 | * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). |
@@ -632,24 +748,29 @@ void lguest_pmd_clear(pmd_t *pmdp) | |||
632 | * called when a valid entry is written, not when it's removed (ie. marked not | 748 | * called when a valid entry is written, not when it's removed (ie. marked not |
633 | * present). Instead, this is where we come when the Guest wants to remove a | 749 | * present). Instead, this is where we come when the Guest wants to remove a |
634 | * page table entry: we tell the Host to set that entry to 0 (ie. the present | 750 | * page table entry: we tell the Host to set that entry to 0 (ie. the present |
635 | * bit is zero). */ | 751 | * bit is zero). |
752 | */ | ||
636 | static void lguest_flush_tlb_single(unsigned long addr) | 753 | static void lguest_flush_tlb_single(unsigned long addr) |
637 | { | 754 | { |
638 | /* Simply set it to zero: if it was not, it will fault back in. */ | 755 | /* Simply set it to zero: if it was not, it will fault back in. */ |
639 | lazy_hcall3(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0); | 756 | lazy_hcall3(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0); |
640 | } | 757 | } |
641 | 758 | ||
642 | /* This is what happens after the Guest has removed a large number of entries. | 759 | /* |
760 | * This is what happens after the Guest has removed a large number of entries. | ||
643 | * This tells the Host that any of the page table entries for userspace might | 761 | * This tells the Host that any of the page table entries for userspace might |
644 | * have changed, ie. virtual addresses below PAGE_OFFSET. */ | 762 | * have changed, ie. virtual addresses below PAGE_OFFSET. |
763 | */ | ||
645 | static void lguest_flush_tlb_user(void) | 764 | static void lguest_flush_tlb_user(void) |
646 | { | 765 | { |
647 | lazy_hcall1(LHCALL_FLUSH_TLB, 0); | 766 | lazy_hcall1(LHCALL_FLUSH_TLB, 0); |
648 | } | 767 | } |
649 | 768 | ||
650 | /* This is called when the kernel page tables have changed. That's not very | 769 | /* |
770 | * This is called when the kernel page tables have changed. That's not very | ||
651 | * common (unless the Guest is using highmem, which makes the Guest extremely | 771 | * common (unless the Guest is using highmem, which makes the Guest extremely |
652 | * slow), so it's worth separating this from the user flushing above. */ | 772 | * slow), so it's worth separating this from the user flushing above. |
773 | */ | ||
653 | static void lguest_flush_tlb_kernel(void) | 774 | static void lguest_flush_tlb_kernel(void) |
654 | { | 775 | { |
655 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); | 776 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); |
@@ -686,26 +807,38 @@ static struct irq_chip lguest_irq_controller = { | |||
686 | .unmask = enable_lguest_irq, | 807 | .unmask = enable_lguest_irq, |
687 | }; | 808 | }; |
688 | 809 | ||
689 | /* This sets up the Interrupt Descriptor Table (IDT) entry for each hardware | 810 | /* |
811 | * This sets up the Interrupt Descriptor Table (IDT) entry for each hardware | ||
690 | * interrupt (except 128, which is used for system calls), and then tells the | 812 | * interrupt (except 128, which is used for system calls), and then tells the |
691 | * Linux infrastructure that each interrupt is controlled by our level-based | 813 | * Linux infrastructure that each interrupt is controlled by our level-based |
692 | * lguest interrupt controller. */ | 814 | * lguest interrupt controller. |
815 | */ | ||
693 | static void __init lguest_init_IRQ(void) | 816 | static void __init lguest_init_IRQ(void) |
694 | { | 817 | { |
695 | unsigned int i; | 818 | unsigned int i; |
696 | 819 | ||
697 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | 820 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { |
698 | /* Some systems map "vectors" to interrupts weirdly. Lguest has | 821 | /* Some systems map "vectors" to interrupts weirdly. Not us! */ |
699 | * a straightforward 1 to 1 mapping, so force that here. */ | ||
700 | __get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR; | 822 | __get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR; |
701 | if (i != SYSCALL_VECTOR) | 823 | if (i != SYSCALL_VECTOR) |
702 | set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); | 824 | set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); |
703 | } | 825 | } |
704 | /* This call is required to set up for 4k stacks, where we have | 826 | |
705 | * separate stacks for hard and soft interrupts. */ | 827 | /* |
828 | * This call is required to set up for 4k stacks, where we have | ||
829 | * separate stacks for hard and soft interrupts. | ||
830 | */ | ||
706 | irq_ctx_init(smp_processor_id()); | 831 | irq_ctx_init(smp_processor_id()); |
707 | } | 832 | } |
708 | 833 | ||
834 | /* | ||
835 | * With CONFIG_SPARSE_IRQ, interrupt descriptors are allocated as-needed, so | ||
836 | * rather than set them in lguest_init_IRQ we are called here every time an | ||
837 | * lguest device needs an interrupt. | ||
838 | * | ||
839 | * FIXME: irq_to_desc_alloc_node() can fail due to lack of memory, we should | ||
840 | * pass that up! | ||
841 | */ | ||
709 | void lguest_setup_irq(unsigned int irq) | 842 | void lguest_setup_irq(unsigned int irq) |
710 | { | 843 | { |
711 | irq_to_desc_alloc_node(irq, 0); | 844 | irq_to_desc_alloc_node(irq, 0); |
@@ -724,31 +857,39 @@ static unsigned long lguest_get_wallclock(void) | |||
724 | return lguest_data.time.tv_sec; | 857 | return lguest_data.time.tv_sec; |
725 | } | 858 | } |
726 | 859 | ||
727 | /* The TSC is an Intel thing called the Time Stamp Counter. The Host tells us | 860 | /* |
861 | * The TSC is an Intel thing called the Time Stamp Counter. The Host tells us | ||
728 | * what speed it runs at, or 0 if it's unusable as a reliable clock source. | 862 | * what speed it runs at, or 0 if it's unusable as a reliable clock source. |
729 | * This matches what we want here: if we return 0 from this function, the x86 | 863 | * This matches what we want here: if we return 0 from this function, the x86 |
730 | * TSC clock will give up and not register itself. */ | 864 | * TSC clock will give up and not register itself. |
865 | */ | ||
731 | static unsigned long lguest_tsc_khz(void) | 866 | static unsigned long lguest_tsc_khz(void) |
732 | { | 867 | { |
733 | return lguest_data.tsc_khz; | 868 | return lguest_data.tsc_khz; |
734 | } | 869 | } |
735 | 870 | ||
736 | /* If we can't use the TSC, the kernel falls back to our lower-priority | 871 | /* |
737 | * "lguest_clock", where we read the time value given to us by the Host. */ | 872 | * If we can't use the TSC, the kernel falls back to our lower-priority |
873 | * "lguest_clock", where we read the time value given to us by the Host. | ||
874 | */ | ||
738 | static cycle_t lguest_clock_read(struct clocksource *cs) | 875 | static cycle_t lguest_clock_read(struct clocksource *cs) |
739 | { | 876 | { |
740 | unsigned long sec, nsec; | 877 | unsigned long sec, nsec; |
741 | 878 | ||
742 | /* Since the time is in two parts (seconds and nanoseconds), we risk | 879 | /* |
880 | * Since the time is in two parts (seconds and nanoseconds), we risk | ||
743 | * reading it just as it's changing from 99 & 0.999999999 to 100 and 0, | 881 | * reading it just as it's changing from 99 & 0.999999999 to 100 and 0, |
744 | * and getting 99 and 0. As Linux tends to come apart under the stress | 882 | * and getting 99 and 0. As Linux tends to come apart under the stress |
745 | * of time travel, we must be careful: */ | 883 | * of time travel, we must be careful: |
884 | */ | ||
746 | do { | 885 | do { |
747 | /* First we read the seconds part. */ | 886 | /* First we read the seconds part. */ |
748 | sec = lguest_data.time.tv_sec; | 887 | sec = lguest_data.time.tv_sec; |
749 | /* This read memory barrier tells the compiler and the CPU that | 888 | /* |
889 | * This read memory barrier tells the compiler and the CPU that | ||
750 | * this can't be reordered: we have to complete the above | 890 | * this can't be reordered: we have to complete the above |
751 | * before going on. */ | 891 | * before going on. |
892 | */ | ||
752 | rmb(); | 893 | rmb(); |
753 | /* Now we read the nanoseconds part. */ | 894 | /* Now we read the nanoseconds part. */ |
754 | nsec = lguest_data.time.tv_nsec; | 895 | nsec = lguest_data.time.tv_nsec; |
@@ -772,9 +913,11 @@ static struct clocksource lguest_clock = { | |||
772 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 913 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
773 | }; | 914 | }; |
774 | 915 | ||
775 | /* We also need a "struct clock_event_device": Linux asks us to set it to go | 916 | /* |
917 | * We also need a "struct clock_event_device": Linux asks us to set it to go | ||
776 | * off some time in the future. Actually, James Morris figured all this out, I | 918 | * off some time in the future. Actually, James Morris figured all this out, I |
777 | * just applied the patch. */ | 919 | * just applied the patch. |
920 | */ | ||
778 | static int lguest_clockevent_set_next_event(unsigned long delta, | 921 | static int lguest_clockevent_set_next_event(unsigned long delta, |
779 | struct clock_event_device *evt) | 922 | struct clock_event_device *evt) |
780 | { | 923 | { |
@@ -824,8 +967,10 @@ static struct clock_event_device lguest_clockevent = { | |||
824 | .max_delta_ns = LG_CLOCK_MAX_DELTA, | 967 | .max_delta_ns = LG_CLOCK_MAX_DELTA, |
825 | }; | 968 | }; |
826 | 969 | ||
827 | /* This is the Guest timer interrupt handler (hardware interrupt 0). We just | 970 | /* |
828 | * call the clockevent infrastructure and it does whatever needs doing. */ | 971 | * This is the Guest timer interrupt handler (hardware interrupt 0). We just |
972 | * call the clockevent infrastructure and it does whatever needs doing. | ||
973 | */ | ||
829 | static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) | 974 | static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) |
830 | { | 975 | { |
831 | unsigned long flags; | 976 | unsigned long flags; |
@@ -836,10 +981,12 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) | |||
836 | local_irq_restore(flags); | 981 | local_irq_restore(flags); |
837 | } | 982 | } |
838 | 983 | ||
839 | /* At some point in the boot process, we get asked to set up our timing | 984 | /* |
985 | * At some point in the boot process, we get asked to set up our timing | ||
840 | * infrastructure. The kernel doesn't expect timer interrupts before this, but | 986 | * infrastructure. The kernel doesn't expect timer interrupts before this, but |
841 | * we cleverly initialized the "blocked_interrupts" field of "struct | 987 | * we cleverly initialized the "blocked_interrupts" field of "struct |
842 | * lguest_data" so that timer interrupts were blocked until now. */ | 988 | * lguest_data" so that timer interrupts were blocked until now. |
989 | */ | ||
843 | static void lguest_time_init(void) | 990 | static void lguest_time_init(void) |
844 | { | 991 | { |
845 | /* Set up the timer interrupt (0) to go to our simple timer routine */ | 992 | /* Set up the timer interrupt (0) to go to our simple timer routine */ |
@@ -863,14 +1010,16 @@ static void lguest_time_init(void) | |||
863 | * to work. They're pretty simple. | 1010 | * to work. They're pretty simple. |
864 | */ | 1011 | */ |
865 | 1012 | ||
866 | /* The Guest needs to tell the Host what stack it expects traps to use. For | 1013 | /* |
1014 | * The Guest needs to tell the Host what stack it expects traps to use. For | ||
867 | * native hardware, this is part of the Task State Segment mentioned above in | 1015 | * native hardware, this is part of the Task State Segment mentioned above in |
868 | * lguest_load_tr_desc(), but to help hypervisors there's this special call. | 1016 | * lguest_load_tr_desc(), but to help hypervisors there's this special call. |
869 | * | 1017 | * |
870 | * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data | 1018 | * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data |
871 | * segment), the privilege level (we're privilege level 1, the Host is 0 and | 1019 | * segment), the privilege level (we're privilege level 1, the Host is 0 and |
872 | * will not tolerate us trying to use that), the stack pointer, and the number | 1020 | * will not tolerate us trying to use that), the stack pointer, and the number |
873 | * of pages in the stack. */ | 1021 | * of pages in the stack. |
1022 | */ | ||
874 | static void lguest_load_sp0(struct tss_struct *tss, | 1023 | static void lguest_load_sp0(struct tss_struct *tss, |
875 | struct thread_struct *thread) | 1024 | struct thread_struct *thread) |
876 | { | 1025 | { |
@@ -884,7 +1033,8 @@ static void lguest_set_debugreg(int regno, unsigned long value) | |||
884 | /* FIXME: Implement */ | 1033 | /* FIXME: Implement */ |
885 | } | 1034 | } |
886 | 1035 | ||
887 | /* There are times when the kernel wants to make sure that no memory writes are | 1036 | /* |
1037 | * There are times when the kernel wants to make sure that no memory writes are | ||
888 | * caught in the cache (that they've all reached real hardware devices). This | 1038 | * caught in the cache (that they've all reached real hardware devices). This |
889 | * doesn't matter for the Guest which has virtual hardware. | 1039 | * doesn't matter for the Guest which has virtual hardware. |
890 | * | 1040 | * |
@@ -898,11 +1048,13 @@ static void lguest_wbinvd(void) | |||
898 | { | 1048 | { |
899 | } | 1049 | } |
900 | 1050 | ||
901 | /* If the Guest expects to have an Advanced Programmable Interrupt Controller, | 1051 | /* |
1052 | * If the Guest expects to have an Advanced Programmable Interrupt Controller, | ||
902 | * we play dumb by ignoring writes and returning 0 for reads. So it's no | 1053 | * we play dumb by ignoring writes and returning 0 for reads. So it's no |
903 | * longer Programmable nor Controlling anything, and I don't think 8 lines of | 1054 | * longer Programmable nor Controlling anything, and I don't think 8 lines of |
904 | * code qualifies for Advanced. It will also never interrupt anything. It | 1055 | * code qualifies for Advanced. It will also never interrupt anything. It |
905 | * does, however, allow us to get through the Linux boot code. */ | 1056 | * does, however, allow us to get through the Linux boot code. |
1057 | */ | ||
906 | #ifdef CONFIG_X86_LOCAL_APIC | 1058 | #ifdef CONFIG_X86_LOCAL_APIC |
907 | static void lguest_apic_write(u32 reg, u32 v) | 1059 | static void lguest_apic_write(u32 reg, u32 v) |
908 | { | 1060 | { |
@@ -951,11 +1103,13 @@ static void lguest_safe_halt(void) | |||
951 | kvm_hypercall0(LHCALL_HALT); | 1103 | kvm_hypercall0(LHCALL_HALT); |
952 | } | 1104 | } |
953 | 1105 | ||
954 | /* The SHUTDOWN hypercall takes a string to describe what's happening, and | 1106 | /* |
1107 | * The SHUTDOWN hypercall takes a string to describe what's happening, and | ||
955 | * an argument which says whether this to restart (reboot) the Guest or not. | 1108 | * an argument which says whether this to restart (reboot) the Guest or not. |
956 | * | 1109 | * |
957 | * Note that the Host always prefers that the Guest speak in physical addresses | 1110 | * Note that the Host always prefers that the Guest speak in physical addresses |
958 | * rather than virtual addresses, so we use __pa() here. */ | 1111 | * rather than virtual addresses, so we use __pa() here. |
1112 | */ | ||
959 | static void lguest_power_off(void) | 1113 | static void lguest_power_off(void) |
960 | { | 1114 | { |
961 | kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), | 1115 | kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), |
@@ -986,8 +1140,10 @@ static __init char *lguest_memory_setup(void) | |||
986 | * nice to move it back to lguest_init. Patch welcome... */ | 1140 | * nice to move it back to lguest_init. Patch welcome... */ |
987 | atomic_notifier_chain_register(&panic_notifier_list, &paniced); | 1141 | atomic_notifier_chain_register(&panic_notifier_list, &paniced); |
988 | 1142 | ||
989 | /* The Linux bootloader header contains an "e820" memory map: the | 1143 | /* |
990 | * Launcher populated the first entry with our memory limit. */ | 1144 | *The Linux bootloader header contains an "e820" memory map: the |
1145 | * Launcher populated the first entry with our memory limit. | ||
1146 | */ | ||
991 | e820_add_region(boot_params.e820_map[0].addr, | 1147 | e820_add_region(boot_params.e820_map[0].addr, |
992 | boot_params.e820_map[0].size, | 1148 | boot_params.e820_map[0].size, |
993 | boot_params.e820_map[0].type); | 1149 | boot_params.e820_map[0].type); |
@@ -996,16 +1152,17 @@ static __init char *lguest_memory_setup(void) | |||
996 | return "LGUEST"; | 1152 | return "LGUEST"; |
997 | } | 1153 | } |
998 | 1154 | ||
999 | /* We will eventually use the virtio console device to produce console output, | 1155 | /* |
1156 | * We will eventually use the virtio console device to produce console output, | ||
1000 | * but before that is set up we use LHCALL_NOTIFY on normal memory to produce | 1157 | * but before that is set up we use LHCALL_NOTIFY on normal memory to produce |
1001 | * console output. */ | 1158 | * console output. |
1159 | */ | ||
1002 | static __init int early_put_chars(u32 vtermno, const char *buf, int count) | 1160 | static __init int early_put_chars(u32 vtermno, const char *buf, int count) |
1003 | { | 1161 | { |
1004 | char scratch[17]; | 1162 | char scratch[17]; |
1005 | unsigned int len = count; | 1163 | unsigned int len = count; |
1006 | 1164 | ||
1007 | /* We use a nul-terminated string, so we have to make a copy. Icky, | 1165 | /* We use a nul-terminated string, so we make a copy. Icky, huh? */ |
1008 | * huh? */ | ||
1009 | if (len > sizeof(scratch) - 1) | 1166 | if (len > sizeof(scratch) - 1) |
1010 | len = sizeof(scratch) - 1; | 1167 | len = sizeof(scratch) - 1; |
1011 | scratch[len] = '\0'; | 1168 | scratch[len] = '\0'; |
@@ -1016,8 +1173,10 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) | |||
1016 | return len; | 1173 | return len; |
1017 | } | 1174 | } |
1018 | 1175 | ||
1019 | /* Rebooting also tells the Host we're finished, but the RESTART flag tells the | 1176 | /* |
1020 | * Launcher to reboot us. */ | 1177 | * Rebooting also tells the Host we're finished, but the RESTART flag tells the |
1178 | * Launcher to reboot us. | ||
1179 | */ | ||
1021 | static void lguest_restart(char *reason) | 1180 | static void lguest_restart(char *reason) |
1022 | { | 1181 | { |
1023 | kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); | 1182 | kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); |
@@ -1044,7 +1203,8 @@ static void lguest_restart(char *reason) | |||
1044 | * fit comfortably. | 1203 | * fit comfortably. |
1045 | * | 1204 | * |
1046 | * First we need assembly templates of each of the patchable Guest operations, | 1205 | * First we need assembly templates of each of the patchable Guest operations, |
1047 | * and these are in i386_head.S. */ | 1206 | * and these are in i386_head.S. |
1207 | */ | ||
1048 | 1208 | ||
1049 | /*G:060 We construct a table from the assembler templates: */ | 1209 | /*G:060 We construct a table from the assembler templates: */ |
1050 | static const struct lguest_insns | 1210 | static const struct lguest_insns |
@@ -1055,9 +1215,11 @@ static const struct lguest_insns | |||
1055 | [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, | 1215 | [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, |
1056 | }; | 1216 | }; |
1057 | 1217 | ||
1058 | /* Now our patch routine is fairly simple (based on the native one in | 1218 | /* |
1219 | * Now our patch routine is fairly simple (based on the native one in | ||
1059 | * paravirt.c). If we have a replacement, we copy it in and return how much of | 1220 | * paravirt.c). If we have a replacement, we copy it in and return how much of |
1060 | * the available space we used. */ | 1221 | * the available space we used. |
1222 | */ | ||
1061 | static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, | 1223 | static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, |
1062 | unsigned long addr, unsigned len) | 1224 | unsigned long addr, unsigned len) |
1063 | { | 1225 | { |
@@ -1069,8 +1231,7 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, | |||
1069 | 1231 | ||
1070 | insn_len = lguest_insns[type].end - lguest_insns[type].start; | 1232 | insn_len = lguest_insns[type].end - lguest_insns[type].start; |
1071 | 1233 | ||
1072 | /* Similarly if we can't fit replacement (shouldn't happen, but let's | 1234 | /* Similarly if it can't fit (doesn't happen, but let's be thorough). */ |
1073 | * be thorough). */ | ||
1074 | if (len < insn_len) | 1235 | if (len < insn_len) |
1075 | return paravirt_patch_default(type, clobber, ibuf, addr, len); | 1236 | return paravirt_patch_default(type, clobber, ibuf, addr, len); |
1076 | 1237 | ||
@@ -1079,22 +1240,28 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, | |||
1079 | return insn_len; | 1240 | return insn_len; |
1080 | } | 1241 | } |
1081 | 1242 | ||
1082 | /*G:030 Once we get to lguest_init(), we know we're a Guest. The various | 1243 | /*G:029 |
1244 | * Once we get to lguest_init(), we know we're a Guest. The various | ||
1083 | * pv_ops structures in the kernel provide points for (almost) every routine we | 1245 | * pv_ops structures in the kernel provide points for (almost) every routine we |
1084 | * have to override to avoid privileged instructions. */ | 1246 | * have to override to avoid privileged instructions. |
1247 | */ | ||
1085 | __init void lguest_init(void) | 1248 | __init void lguest_init(void) |
1086 | { | 1249 | { |
1087 | /* We're under lguest, paravirt is enabled, and we're running at | 1250 | /* We're under lguest. */ |
1088 | * privilege level 1, not 0 as normal. */ | ||
1089 | pv_info.name = "lguest"; | 1251 | pv_info.name = "lguest"; |
1252 | /* Paravirt is enabled. */ | ||
1090 | pv_info.paravirt_enabled = 1; | 1253 | pv_info.paravirt_enabled = 1; |
1254 | /* We're running at privilege level 1, not 0 as normal. */ | ||
1091 | pv_info.kernel_rpl = 1; | 1255 | pv_info.kernel_rpl = 1; |
1256 | /* Everyone except Xen runs with this set. */ | ||
1092 | pv_info.shared_kernel_pmd = 1; | 1257 | pv_info.shared_kernel_pmd = 1; |
1093 | 1258 | ||
1094 | /* We set up all the lguest overrides for sensitive operations. These | 1259 | /* |
1095 | * are detailed with the operations themselves. */ | 1260 | * We set up all the lguest overrides for sensitive operations. These |
1261 | * are detailed with the operations themselves. | ||
1262 | */ | ||
1096 | 1263 | ||
1097 | /* interrupt-related operations */ | 1264 | /* Interrupt-related operations */ |
1098 | pv_irq_ops.init_IRQ = lguest_init_IRQ; | 1265 | pv_irq_ops.init_IRQ = lguest_init_IRQ; |
1099 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); | 1266 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); |
1100 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); | 1267 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); |
@@ -1102,11 +1269,11 @@ __init void lguest_init(void) | |||
1102 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); | 1269 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); |
1103 | pv_irq_ops.safe_halt = lguest_safe_halt; | 1270 | pv_irq_ops.safe_halt = lguest_safe_halt; |
1104 | 1271 | ||
1105 | /* init-time operations */ | 1272 | /* Setup operations */ |
1106 | pv_init_ops.memory_setup = lguest_memory_setup; | 1273 | pv_init_ops.memory_setup = lguest_memory_setup; |
1107 | pv_init_ops.patch = lguest_patch; | 1274 | pv_init_ops.patch = lguest_patch; |
1108 | 1275 | ||
1109 | /* Intercepts of various cpu instructions */ | 1276 | /* Intercepts of various CPU instructions */ |
1110 | pv_cpu_ops.load_gdt = lguest_load_gdt; | 1277 | pv_cpu_ops.load_gdt = lguest_load_gdt; |
1111 | pv_cpu_ops.cpuid = lguest_cpuid; | 1278 | pv_cpu_ops.cpuid = lguest_cpuid; |
1112 | pv_cpu_ops.load_idt = lguest_load_idt; | 1279 | pv_cpu_ops.load_idt = lguest_load_idt; |
@@ -1127,7 +1294,7 @@ __init void lguest_init(void) | |||
1127 | pv_cpu_ops.start_context_switch = paravirt_start_context_switch; | 1294 | pv_cpu_ops.start_context_switch = paravirt_start_context_switch; |
1128 | pv_cpu_ops.end_context_switch = lguest_end_context_switch; | 1295 | pv_cpu_ops.end_context_switch = lguest_end_context_switch; |
1129 | 1296 | ||
1130 | /* pagetable management */ | 1297 | /* Pagetable management */ |
1131 | pv_mmu_ops.write_cr3 = lguest_write_cr3; | 1298 | pv_mmu_ops.write_cr3 = lguest_write_cr3; |
1132 | pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user; | 1299 | pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user; |
1133 | pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single; | 1300 | pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single; |
@@ -1149,54 +1316,71 @@ __init void lguest_init(void) | |||
1149 | pv_mmu_ops.pte_update_defer = lguest_pte_update; | 1316 | pv_mmu_ops.pte_update_defer = lguest_pte_update; |
1150 | 1317 | ||
1151 | #ifdef CONFIG_X86_LOCAL_APIC | 1318 | #ifdef CONFIG_X86_LOCAL_APIC |
1152 | /* apic read/write intercepts */ | 1319 | /* APIC read/write intercepts */ |
1153 | set_lguest_basic_apic_ops(); | 1320 | set_lguest_basic_apic_ops(); |
1154 | #endif | 1321 | #endif |
1155 | 1322 | ||
1156 | /* time operations */ | 1323 | /* Time operations */ |
1157 | pv_time_ops.get_wallclock = lguest_get_wallclock; | 1324 | pv_time_ops.get_wallclock = lguest_get_wallclock; |
1158 | pv_time_ops.time_init = lguest_time_init; | 1325 | pv_time_ops.time_init = lguest_time_init; |
1159 | pv_time_ops.get_tsc_khz = lguest_tsc_khz; | 1326 | pv_time_ops.get_tsc_khz = lguest_tsc_khz; |
1160 | 1327 | ||
1161 | /* Now is a good time to look at the implementations of these functions | 1328 | /* |
1162 | * before returning to the rest of lguest_init(). */ | 1329 | * Now is a good time to look at the implementations of these functions |
1330 | * before returning to the rest of lguest_init(). | ||
1331 | */ | ||
1163 | 1332 | ||
1164 | /*G:070 Now we've seen all the paravirt_ops, we return to | 1333 | /*G:070 |
1334 | * Now we've seen all the paravirt_ops, we return to | ||
1165 | * lguest_init() where the rest of the fairly chaotic boot setup | 1335 | * lguest_init() where the rest of the fairly chaotic boot setup |
1166 | * occurs. */ | 1336 | * occurs. |
1337 | */ | ||
1167 | 1338 | ||
1168 | /* The stack protector is a weird thing where gcc places a canary | 1339 | /* |
1340 | * The stack protector is a weird thing where gcc places a canary | ||
1169 | * value on the stack and then checks it on return. This file is | 1341 | * value on the stack and then checks it on return. This file is |
1170 | * compiled with -fno-stack-protector it, so we got this far without | 1342 | * compiled with -fno-stack-protector it, so we got this far without |
1171 | * problems. The value of the canary is kept at offset 20 from the | 1343 | * problems. The value of the canary is kept at offset 20 from the |
1172 | * %gs register, so we need to set that up before calling C functions | 1344 | * %gs register, so we need to set that up before calling C functions |
1173 | * in other files. */ | 1345 | * in other files. |
1346 | */ | ||
1174 | setup_stack_canary_segment(0); | 1347 | setup_stack_canary_segment(0); |
1175 | /* We could just call load_stack_canary_segment(), but we might as | 1348 | |
1176 | * call switch_to_new_gdt() which loads the whole table and sets up | 1349 | /* |
1177 | * the per-cpu segment descriptor register %fs as well. */ | 1350 | * We could just call load_stack_canary_segment(), but we might as well |
1351 | * call switch_to_new_gdt() which loads the whole table and sets up the | ||
1352 | * per-cpu segment descriptor register %fs as well. | ||
1353 | */ | ||
1178 | switch_to_new_gdt(0); | 1354 | switch_to_new_gdt(0); |
1179 | 1355 | ||
1180 | /* As described in head_32.S, we map the first 128M of memory. */ | 1356 | /* We actually boot with all memory mapped, but let's say 128MB. */ |
1181 | max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; | 1357 | max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; |
1182 | 1358 | ||
1183 | /* The Host<->Guest Switcher lives at the top of our address space, and | 1359 | /* |
1360 | * The Host<->Guest Switcher lives at the top of our address space, and | ||
1184 | * the Host told us how big it is when we made LGUEST_INIT hypercall: | 1361 | * the Host told us how big it is when we made LGUEST_INIT hypercall: |
1185 | * it put the answer in lguest_data.reserve_mem */ | 1362 | * it put the answer in lguest_data.reserve_mem |
1363 | */ | ||
1186 | reserve_top_address(lguest_data.reserve_mem); | 1364 | reserve_top_address(lguest_data.reserve_mem); |
1187 | 1365 | ||
1188 | /* If we don't initialize the lock dependency checker now, it crashes | 1366 | /* |
1189 | * paravirt_disable_iospace. */ | 1367 | * If we don't initialize the lock dependency checker now, it crashes |
1368 | * paravirt_disable_iospace. | ||
1369 | */ | ||
1190 | lockdep_init(); | 1370 | lockdep_init(); |
1191 | 1371 | ||
1192 | /* The IDE code spends about 3 seconds probing for disks: if we reserve | 1372 | /* |
1373 | * The IDE code spends about 3 seconds probing for disks: if we reserve | ||
1193 | * all the I/O ports up front it can't get them and so doesn't probe. | 1374 | * all the I/O ports up front it can't get them and so doesn't probe. |
1194 | * Other device drivers are similar (but less severe). This cuts the | 1375 | * Other device drivers are similar (but less severe). This cuts the |
1195 | * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. */ | 1376 | * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. |
1377 | */ | ||
1196 | paravirt_disable_iospace(); | 1378 | paravirt_disable_iospace(); |
1197 | 1379 | ||
1198 | /* This is messy CPU setup stuff which the native boot code does before | 1380 | /* |
1199 | * start_kernel, so we have to do, too: */ | 1381 | * This is messy CPU setup stuff which the native boot code does before |
1382 | * start_kernel, so we have to do, too: | ||
1383 | */ | ||
1200 | cpu_detect(&new_cpu_data); | 1384 | cpu_detect(&new_cpu_data); |
1201 | /* head.S usually sets up the first capability word, so do it here. */ | 1385 | /* head.S usually sets up the first capability word, so do it here. */ |
1202 | new_cpu_data.x86_capability[0] = cpuid_edx(1); | 1386 | new_cpu_data.x86_capability[0] = cpuid_edx(1); |
@@ -1213,22 +1397,28 @@ __init void lguest_init(void) | |||
1213 | acpi_ht = 0; | 1397 | acpi_ht = 0; |
1214 | #endif | 1398 | #endif |
1215 | 1399 | ||
1216 | /* We set the preferred console to "hvc". This is the "hypervisor | 1400 | /* |
1401 | * We set the preferred console to "hvc". This is the "hypervisor | ||
1217 | * virtual console" driver written by the PowerPC people, which we also | 1402 | * virtual console" driver written by the PowerPC people, which we also |
1218 | * adapted for lguest's use. */ | 1403 | * adapted for lguest's use. |
1404 | */ | ||
1219 | add_preferred_console("hvc", 0, NULL); | 1405 | add_preferred_console("hvc", 0, NULL); |
1220 | 1406 | ||
1221 | /* Register our very early console. */ | 1407 | /* Register our very early console. */ |
1222 | virtio_cons_early_init(early_put_chars); | 1408 | virtio_cons_early_init(early_put_chars); |
1223 | 1409 | ||
1224 | /* Last of all, we set the power management poweroff hook to point to | 1410 | /* |
1411 | * Last of all, we set the power management poweroff hook to point to | ||
1225 | * the Guest routine to power off, and the reboot hook to our restart | 1412 | * the Guest routine to power off, and the reboot hook to our restart |
1226 | * routine. */ | 1413 | * routine. |
1414 | */ | ||
1227 | pm_power_off = lguest_power_off; | 1415 | pm_power_off = lguest_power_off; |
1228 | machine_ops.restart = lguest_restart; | 1416 | machine_ops.restart = lguest_restart; |
1229 | 1417 | ||
1230 | /* Now we're set up, call i386_start_kernel() in head32.c and we proceed | 1418 | /* |
1231 | * to boot as normal. It never returns. */ | 1419 | * Now we're set up, call i386_start_kernel() in head32.c and we proceed |
1420 | * to boot as normal. It never returns. | ||
1421 | */ | ||
1232 | i386_start_kernel(); | 1422 | i386_start_kernel(); |
1233 | } | 1423 | } |
1234 | /* | 1424 | /* |
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index a9c8cfe61cd4..27eac0faee48 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -5,7 +5,8 @@ | |||
5 | #include <asm/thread_info.h> | 5 | #include <asm/thread_info.h> |
6 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
7 | 7 | ||
8 | /*G:020 Our story starts with the kernel booting into startup_32 in | 8 | /*G:020 |
9 | * Our story starts with the kernel booting into startup_32 in | ||
9 | * arch/x86/kernel/head_32.S. It expects a boot header, which is created by | 10 | * arch/x86/kernel/head_32.S. It expects a boot header, which is created by |
10 | * the bootloader (the Launcher in our case). | 11 | * the bootloader (the Launcher in our case). |
11 | * | 12 | * |
@@ -21,11 +22,14 @@ | |||
21 | * data without remembering to subtract __PAGE_OFFSET! | 22 | * data without remembering to subtract __PAGE_OFFSET! |
22 | * | 23 | * |
23 | * The .section line puts this code in .init.text so it will be discarded after | 24 | * The .section line puts this code in .init.text so it will be discarded after |
24 | * boot. */ | 25 | * boot. |
26 | */ | ||
25 | .section .init.text, "ax", @progbits | 27 | .section .init.text, "ax", @progbits |
26 | ENTRY(lguest_entry) | 28 | ENTRY(lguest_entry) |
27 | /* We make the "initialization" hypercall now to tell the Host about | 29 | /* |
28 | * us, and also find out where it put our page tables. */ | 30 | * We make the "initialization" hypercall now to tell the Host about |
31 | * us, and also find out where it put our page tables. | ||
32 | */ | ||
29 | movl $LHCALL_LGUEST_INIT, %eax | 33 | movl $LHCALL_LGUEST_INIT, %eax |
30 | movl $lguest_data - __PAGE_OFFSET, %ebx | 34 | movl $lguest_data - __PAGE_OFFSET, %ebx |
31 | .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ | 35 | .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ |
@@ -33,13 +37,14 @@ ENTRY(lguest_entry) | |||
33 | /* Set up the initial stack so we can run C code. */ | 37 | /* Set up the initial stack so we can run C code. */ |
34 | movl $(init_thread_union+THREAD_SIZE),%esp | 38 | movl $(init_thread_union+THREAD_SIZE),%esp |
35 | 39 | ||
36 | /* Jumps are relative, and we're running __PAGE_OFFSET too low at the | 40 | /* Jumps are relative: we're running __PAGE_OFFSET too low. */ |
37 | * moment. */ | ||
38 | jmp lguest_init+__PAGE_OFFSET | 41 | jmp lguest_init+__PAGE_OFFSET |
39 | 42 | ||
40 | /*G:055 We create a macro which puts the assembler code between lgstart_ and | 43 | /*G:055 |
41 | * lgend_ markers. These templates are put in the .text section: they can't be | 44 | * We create a macro which puts the assembler code between lgstart_ and lgend_ |
42 | * discarded after boot as we may need to patch modules, too. */ | 45 | * markers. These templates are put in the .text section: they can't be |
46 | * discarded after boot as we may need to patch modules, too. | ||
47 | */ | ||
43 | .text | 48 | .text |
44 | #define LGUEST_PATCH(name, insns...) \ | 49 | #define LGUEST_PATCH(name, insns...) \ |
45 | lgstart_##name: insns; lgend_##name:; \ | 50 | lgstart_##name: insns; lgend_##name:; \ |
@@ -48,83 +53,103 @@ ENTRY(lguest_entry) | |||
48 | LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) | 53 | LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) |
49 | LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) | 54 | LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) |
50 | 55 | ||
51 | /*G:033 But using those wrappers is inefficient (we'll see why that doesn't | 56 | /*G:033 |
52 | * matter for save_fl and irq_disable later). If we write our routines | 57 | * But using those wrappers is inefficient (we'll see why that doesn't matter |
53 | * carefully in assembler, we can avoid clobbering any registers and avoid | 58 | * for save_fl and irq_disable later). If we write our routines carefully in |
54 | * jumping through the wrapper functions. | 59 | * assembler, we can avoid clobbering any registers and avoid jumping through |
60 | * the wrapper functions. | ||
55 | * | 61 | * |
56 | * I skipped over our first piece of assembler, but this one is worth studying | 62 | * I skipped over our first piece of assembler, but this one is worth studying |
57 | * in a bit more detail so I'll describe in easy stages. First, the routine | 63 | * in a bit more detail so I'll describe in easy stages. First, the routine to |
58 | * to enable interrupts: */ | 64 | * enable interrupts: |
65 | */ | ||
59 | ENTRY(lg_irq_enable) | 66 | ENTRY(lg_irq_enable) |
60 | /* The reverse of irq_disable, this sets lguest_data.irq_enabled to | 67 | /* |
61 | * X86_EFLAGS_IF (ie. "Interrupts enabled"). */ | 68 | * The reverse of irq_disable, this sets lguest_data.irq_enabled to |
69 | * X86_EFLAGS_IF (ie. "Interrupts enabled"). | ||
70 | */ | ||
62 | movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled | 71 | movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled |
63 | /* But now we need to check if the Host wants to know: there might have | 72 | /* |
73 | * But now we need to check if the Host wants to know: there might have | ||
64 | * been interrupts waiting to be delivered, in which case it will have | 74 | * been interrupts waiting to be delivered, in which case it will have |
65 | * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we | 75 | * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we |
66 | * jump to send_interrupts, otherwise we're done. */ | 76 | * jump to send_interrupts, otherwise we're done. |
77 | */ | ||
67 | testl $0, lguest_data+LGUEST_DATA_irq_pending | 78 | testl $0, lguest_data+LGUEST_DATA_irq_pending |
68 | jnz send_interrupts | 79 | jnz send_interrupts |
69 | /* One cool thing about x86 is that you can do many things without using | 80 | /* |
81 | * One cool thing about x86 is that you can do many things without using | ||
70 | * a register. In this case, the normal path hasn't needed to save or | 82 | * a register. In this case, the normal path hasn't needed to save or |
71 | * restore any registers at all! */ | 83 | * restore any registers at all! |
84 | */ | ||
72 | ret | 85 | ret |
73 | send_interrupts: | 86 | send_interrupts: |
74 | /* OK, now we need a register: eax is used for the hypercall number, | 87 | /* |
88 | * OK, now we need a register: eax is used for the hypercall number, | ||
75 | * which is LHCALL_SEND_INTERRUPTS. | 89 | * which is LHCALL_SEND_INTERRUPTS. |
76 | * | 90 | * |
77 | * We used not to bother with this pending detection at all, which was | 91 | * We used not to bother with this pending detection at all, which was |
78 | * much simpler. Sooner or later the Host would realize it had to | 92 | * much simpler. Sooner or later the Host would realize it had to |
79 | * send us an interrupt. But that turns out to make performance 7 | 93 | * send us an interrupt. But that turns out to make performance 7 |
80 | * times worse on a simple tcp benchmark. So now we do this the hard | 94 | * times worse on a simple tcp benchmark. So now we do this the hard |
81 | * way. */ | 95 | * way. |
96 | */ | ||
82 | pushl %eax | 97 | pushl %eax |
83 | movl $LHCALL_SEND_INTERRUPTS, %eax | 98 | movl $LHCALL_SEND_INTERRUPTS, %eax |
84 | /* This is a vmcall instruction (same thing that KVM uses). Older | 99 | /* |
100 | * This is a vmcall instruction (same thing that KVM uses). Older | ||
85 | * assembler versions might not know the "vmcall" instruction, so we | 101 | * assembler versions might not know the "vmcall" instruction, so we |
86 | * create one manually here. */ | 102 | * create one manually here. |
103 | */ | ||
87 | .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ | 104 | .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ |
105 | /* Put eax back the way we found it. */ | ||
88 | popl %eax | 106 | popl %eax |
89 | ret | 107 | ret |
90 | 108 | ||
91 | /* Finally, the "popf" or "restore flags" routine. The %eax register holds the | 109 | /* |
110 | * Finally, the "popf" or "restore flags" routine. The %eax register holds the | ||
92 | * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're | 111 | * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're |
93 | * enabling interrupts again, if it's 0 we're leaving them off. */ | 112 | * enabling interrupts again, if it's 0 we're leaving them off. |
113 | */ | ||
94 | ENTRY(lg_restore_fl) | 114 | ENTRY(lg_restore_fl) |
95 | /* This is just "lguest_data.irq_enabled = flags;" */ | 115 | /* This is just "lguest_data.irq_enabled = flags;" */ |
96 | movl %eax, lguest_data+LGUEST_DATA_irq_enabled | 116 | movl %eax, lguest_data+LGUEST_DATA_irq_enabled |
97 | /* Now, if the %eax value has enabled interrupts and | 117 | /* |
118 | * Now, if the %eax value has enabled interrupts and | ||
98 | * lguest_data.irq_pending is set, we want to tell the Host so it can | 119 | * lguest_data.irq_pending is set, we want to tell the Host so it can |
99 | * deliver any outstanding interrupts. Fortunately, both values will | 120 | * deliver any outstanding interrupts. Fortunately, both values will |
100 | * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl" | 121 | * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl" |
101 | * instruction will AND them together for us. If both are set, we | 122 | * instruction will AND them together for us. If both are set, we |
102 | * jump to send_interrupts. */ | 123 | * jump to send_interrupts. |
124 | */ | ||
103 | testl lguest_data+LGUEST_DATA_irq_pending, %eax | 125 | testl lguest_data+LGUEST_DATA_irq_pending, %eax |
104 | jnz send_interrupts | 126 | jnz send_interrupts |
105 | /* Again, the normal path has used no extra registers. Clever, huh? */ | 127 | /* Again, the normal path has used no extra registers. Clever, huh? */ |
106 | ret | 128 | ret |
129 | /*:*/ | ||
107 | 130 | ||
108 | /* These demark the EIP range where host should never deliver interrupts. */ | 131 | /* These demark the EIP range where host should never deliver interrupts. */ |
109 | .global lguest_noirq_start | 132 | .global lguest_noirq_start |
110 | .global lguest_noirq_end | 133 | .global lguest_noirq_end |
111 | 134 | ||
112 | /*M:004 When the Host reflects a trap or injects an interrupt into the Guest, | 135 | /*M:004 |
113 | * it sets the eflags interrupt bit on the stack based on | 136 | * When the Host reflects a trap or injects an interrupt into the Guest, it |
114 | * lguest_data.irq_enabled, so the Guest iret logic does the right thing when | 137 | * sets the eflags interrupt bit on the stack based on lguest_data.irq_enabled, |
115 | * restoring it. However, when the Host sets the Guest up for direct traps, | 138 | * so the Guest iret logic does the right thing when restoring it. However, |
116 | * such as system calls, the processor is the one to push eflags onto the | 139 | * when the Host sets the Guest up for direct traps, such as system calls, the |
117 | * stack, and the interrupt bit will be 1 (in reality, interrupts are always | 140 | * processor is the one to push eflags onto the stack, and the interrupt bit |
118 | * enabled in the Guest). | 141 | * will be 1 (in reality, interrupts are always enabled in the Guest). |
119 | * | 142 | * |
120 | * This turns out to be harmless: the only trap which should happen under Linux | 143 | * This turns out to be harmless: the only trap which should happen under Linux |
121 | * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc | 144 | * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc |
122 | * regions), which has to be reflected through the Host anyway. If another | 145 | * regions), which has to be reflected through the Host anyway. If another |
123 | * trap *does* go off when interrupts are disabled, the Guest will panic, and | 146 | * trap *does* go off when interrupts are disabled, the Guest will panic, and |
124 | * we'll never get to this iret! :*/ | 147 | * we'll never get to this iret! |
148 | :*/ | ||
125 | 149 | ||
126 | /*G:045 There is one final paravirt_op that the Guest implements, and glancing | 150 | /*G:045 |
127 | * at it you can see why I left it to last. It's *cool*! It's in *assembler*! | 151 | * There is one final paravirt_op that the Guest implements, and glancing at it |
152 | * you can see why I left it to last. It's *cool*! It's in *assembler*! | ||
128 | * | 153 | * |
129 | * The "iret" instruction is used to return from an interrupt or trap. The | 154 | * The "iret" instruction is used to return from an interrupt or trap. The |
130 | * stack looks like this: | 155 | * stack looks like this: |
@@ -148,15 +173,18 @@ ENTRY(lg_restore_fl) | |||
148 | * return to userspace or wherever. Our solution to this is to surround the | 173 | * return to userspace or wherever. Our solution to this is to surround the |
149 | * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the | 174 | * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the |
150 | * Host that it is *never* to interrupt us there, even if interrupts seem to be | 175 | * Host that it is *never* to interrupt us there, even if interrupts seem to be |
151 | * enabled. */ | 176 | * enabled. |
177 | */ | ||
152 | ENTRY(lguest_iret) | 178 | ENTRY(lguest_iret) |
153 | pushl %eax | 179 | pushl %eax |
154 | movl 12(%esp), %eax | 180 | movl 12(%esp), %eax |
155 | lguest_noirq_start: | 181 | lguest_noirq_start: |
156 | /* Note the %ss: segment prefix here. Normal data accesses use the | 182 | /* |
183 | * Note the %ss: segment prefix here. Normal data accesses use the | ||
157 | * "ds" segment, but that will have already been restored for whatever | 184 | * "ds" segment, but that will have already been restored for whatever |
158 | * we're returning to (such as userspace): we can't trust it. The %ss: | 185 | * we're returning to (such as userspace): we can't trust it. The %ss: |
159 | * prefix makes sure we use the stack segment, which is still valid. */ | 186 | * prefix makes sure we use the stack segment, which is still valid. |
187 | */ | ||
160 | movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled | 188 | movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled |
161 | popl %eax | 189 | popl %eax |
162 | iret | 190 | iret |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f9d35632666b..9e609206fac9 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -9,7 +9,10 @@ lib-y += thunk_$(BITS).o | |||
9 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 9 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
10 | lib-y += memcpy_$(BITS).o | 10 | lib-y += memcpy_$(BITS).o |
11 | 11 | ||
12 | obj-y += msr-reg.o msr-reg-export.o | ||
13 | |||
12 | ifeq ($(CONFIG_X86_32),y) | 14 | ifeq ($(CONFIG_X86_32),y) |
15 | obj-y += atomic64_32.o | ||
13 | lib-y += checksum_32.o | 16 | lib-y += checksum_32.o |
14 | lib-y += strstr_32.o | 17 | lib-y += strstr_32.o |
15 | lib-y += semaphore_32.o string_32.o | 18 | lib-y += semaphore_32.o string_32.o |
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c new file mode 100644 index 000000000000..824fa0be55a3 --- /dev/null +++ b/arch/x86/lib/atomic64_32.c | |||
@@ -0,0 +1,230 @@ | |||
1 | #include <linux/compiler.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/types.h> | ||
4 | |||
5 | #include <asm/processor.h> | ||
6 | #include <asm/cmpxchg.h> | ||
7 | #include <asm/atomic.h> | ||
8 | |||
9 | static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new) | ||
10 | { | ||
11 | u32 low = new; | ||
12 | u32 high = new >> 32; | ||
13 | |||
14 | asm volatile( | ||
15 | LOCK_PREFIX "cmpxchg8b %1\n" | ||
16 | : "+A" (old), "+m" (*ptr) | ||
17 | : "b" (low), "c" (high) | ||
18 | ); | ||
19 | return old; | ||
20 | } | ||
21 | |||
22 | u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val) | ||
23 | { | ||
24 | return cmpxchg8b(&ptr->counter, old_val, new_val); | ||
25 | } | ||
26 | EXPORT_SYMBOL(atomic64_cmpxchg); | ||
27 | |||
28 | /** | ||
29 | * atomic64_xchg - xchg atomic64 variable | ||
30 | * @ptr: pointer to type atomic64_t | ||
31 | * @new_val: value to assign | ||
32 | * | ||
33 | * Atomically xchgs the value of @ptr to @new_val and returns | ||
34 | * the old value. | ||
35 | */ | ||
36 | u64 atomic64_xchg(atomic64_t *ptr, u64 new_val) | ||
37 | { | ||
38 | /* | ||
39 | * Try first with a (possibly incorrect) assumption about | ||
40 | * what we have there. We'll do two loops most likely, | ||
41 | * but we'll get an ownership MESI transaction straight away | ||
42 | * instead of a read transaction followed by a | ||
43 | * flush-for-ownership transaction: | ||
44 | */ | ||
45 | u64 old_val, real_val = 0; | ||
46 | |||
47 | do { | ||
48 | old_val = real_val; | ||
49 | |||
50 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | ||
51 | |||
52 | } while (real_val != old_val); | ||
53 | |||
54 | return old_val; | ||
55 | } | ||
56 | EXPORT_SYMBOL(atomic64_xchg); | ||
57 | |||
58 | /** | ||
59 | * atomic64_set - set atomic64 variable | ||
60 | * @ptr: pointer to type atomic64_t | ||
61 | * @new_val: value to assign | ||
62 | * | ||
63 | * Atomically sets the value of @ptr to @new_val. | ||
64 | */ | ||
65 | void atomic64_set(atomic64_t *ptr, u64 new_val) | ||
66 | { | ||
67 | atomic64_xchg(ptr, new_val); | ||
68 | } | ||
69 | EXPORT_SYMBOL(atomic64_set); | ||
70 | |||
71 | /** | ||
72 | EXPORT_SYMBOL(atomic64_read); | ||
73 | * atomic64_add_return - add and return | ||
74 | * @delta: integer value to add | ||
75 | * @ptr: pointer to type atomic64_t | ||
76 | * | ||
77 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | ||
78 | */ | ||
79 | noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr) | ||
80 | { | ||
81 | /* | ||
82 | * Try first with a (possibly incorrect) assumption about | ||
83 | * what we have there. We'll do two loops most likely, | ||
84 | * but we'll get an ownership MESI transaction straight away | ||
85 | * instead of a read transaction followed by a | ||
86 | * flush-for-ownership transaction: | ||
87 | */ | ||
88 | u64 old_val, new_val, real_val = 0; | ||
89 | |||
90 | do { | ||
91 | old_val = real_val; | ||
92 | new_val = old_val + delta; | ||
93 | |||
94 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | ||
95 | |||
96 | } while (real_val != old_val); | ||
97 | |||
98 | return new_val; | ||
99 | } | ||
100 | EXPORT_SYMBOL(atomic64_add_return); | ||
101 | |||
102 | u64 atomic64_sub_return(u64 delta, atomic64_t *ptr) | ||
103 | { | ||
104 | return atomic64_add_return(-delta, ptr); | ||
105 | } | ||
106 | EXPORT_SYMBOL(atomic64_sub_return); | ||
107 | |||
108 | u64 atomic64_inc_return(atomic64_t *ptr) | ||
109 | { | ||
110 | return atomic64_add_return(1, ptr); | ||
111 | } | ||
112 | EXPORT_SYMBOL(atomic64_inc_return); | ||
113 | |||
114 | u64 atomic64_dec_return(atomic64_t *ptr) | ||
115 | { | ||
116 | return atomic64_sub_return(1, ptr); | ||
117 | } | ||
118 | EXPORT_SYMBOL(atomic64_dec_return); | ||
119 | |||
120 | /** | ||
121 | * atomic64_add - add integer to atomic64 variable | ||
122 | * @delta: integer value to add | ||
123 | * @ptr: pointer to type atomic64_t | ||
124 | * | ||
125 | * Atomically adds @delta to @ptr. | ||
126 | */ | ||
127 | void atomic64_add(u64 delta, atomic64_t *ptr) | ||
128 | { | ||
129 | atomic64_add_return(delta, ptr); | ||
130 | } | ||
131 | EXPORT_SYMBOL(atomic64_add); | ||
132 | |||
133 | /** | ||
134 | * atomic64_sub - subtract the atomic64 variable | ||
135 | * @delta: integer value to subtract | ||
136 | * @ptr: pointer to type atomic64_t | ||
137 | * | ||
138 | * Atomically subtracts @delta from @ptr. | ||
139 | */ | ||
140 | void atomic64_sub(u64 delta, atomic64_t *ptr) | ||
141 | { | ||
142 | atomic64_add(-delta, ptr); | ||
143 | } | ||
144 | EXPORT_SYMBOL(atomic64_sub); | ||
145 | |||
146 | /** | ||
147 | * atomic64_sub_and_test - subtract value from variable and test result | ||
148 | * @delta: integer value to subtract | ||
149 | * @ptr: pointer to type atomic64_t | ||
150 | * | ||
151 | * Atomically subtracts @delta from @ptr and returns | ||
152 | * true if the result is zero, or false for all | ||
153 | * other cases. | ||
154 | */ | ||
155 | int atomic64_sub_and_test(u64 delta, atomic64_t *ptr) | ||
156 | { | ||
157 | u64 new_val = atomic64_sub_return(delta, ptr); | ||
158 | |||
159 | return new_val == 0; | ||
160 | } | ||
161 | EXPORT_SYMBOL(atomic64_sub_and_test); | ||
162 | |||
163 | /** | ||
164 | * atomic64_inc - increment atomic64 variable | ||
165 | * @ptr: pointer to type atomic64_t | ||
166 | * | ||
167 | * Atomically increments @ptr by 1. | ||
168 | */ | ||
169 | void atomic64_inc(atomic64_t *ptr) | ||
170 | { | ||
171 | atomic64_add(1, ptr); | ||
172 | } | ||
173 | EXPORT_SYMBOL(atomic64_inc); | ||
174 | |||
175 | /** | ||
176 | * atomic64_dec - decrement atomic64 variable | ||
177 | * @ptr: pointer to type atomic64_t | ||
178 | * | ||
179 | * Atomically decrements @ptr by 1. | ||
180 | */ | ||
181 | void atomic64_dec(atomic64_t *ptr) | ||
182 | { | ||
183 | atomic64_sub(1, ptr); | ||
184 | } | ||
185 | EXPORT_SYMBOL(atomic64_dec); | ||
186 | |||
187 | /** | ||
188 | * atomic64_dec_and_test - decrement and test | ||
189 | * @ptr: pointer to type atomic64_t | ||
190 | * | ||
191 | * Atomically decrements @ptr by 1 and | ||
192 | * returns true if the result is 0, or false for all other | ||
193 | * cases. | ||
194 | */ | ||
195 | int atomic64_dec_and_test(atomic64_t *ptr) | ||
196 | { | ||
197 | return atomic64_sub_and_test(1, ptr); | ||
198 | } | ||
199 | EXPORT_SYMBOL(atomic64_dec_and_test); | ||
200 | |||
201 | /** | ||
202 | * atomic64_inc_and_test - increment and test | ||
203 | * @ptr: pointer to type atomic64_t | ||
204 | * | ||
205 | * Atomically increments @ptr by 1 | ||
206 | * and returns true if the result is zero, or false for all | ||
207 | * other cases. | ||
208 | */ | ||
209 | int atomic64_inc_and_test(atomic64_t *ptr) | ||
210 | { | ||
211 | return atomic64_sub_and_test(-1, ptr); | ||
212 | } | ||
213 | EXPORT_SYMBOL(atomic64_inc_and_test); | ||
214 | |||
215 | /** | ||
216 | * atomic64_add_negative - add and test if negative | ||
217 | * @delta: integer value to add | ||
218 | * @ptr: pointer to type atomic64_t | ||
219 | * | ||
220 | * Atomically adds @delta to @ptr and returns true | ||
221 | * if the result is negative, or false when | ||
222 | * result is greater than or equal to zero. | ||
223 | */ | ||
224 | int atomic64_add_negative(u64 delta, atomic64_t *ptr) | ||
225 | { | ||
226 | s64 new_val = atomic64_add_return(delta, ptr); | ||
227 | |||
228 | return new_val < 0; | ||
229 | } | ||
230 | EXPORT_SYMBOL(atomic64_add_negative); | ||
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index 9a10a78bb4a4..ebeafcce04a9 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S | |||
@@ -5,15 +5,14 @@ | |||
5 | * Zero a page. | 5 | * Zero a page. |
6 | * rdi page | 6 | * rdi page |
7 | */ | 7 | */ |
8 | ALIGN | 8 | ENTRY(clear_page_c) |
9 | clear_page_c: | ||
10 | CFI_STARTPROC | 9 | CFI_STARTPROC |
11 | movl $4096/8,%ecx | 10 | movl $4096/8,%ecx |
12 | xorl %eax,%eax | 11 | xorl %eax,%eax |
13 | rep stosq | 12 | rep stosq |
14 | ret | 13 | ret |
15 | CFI_ENDPROC | 14 | CFI_ENDPROC |
16 | ENDPROC(clear_page) | 15 | ENDPROC(clear_page_c) |
17 | 16 | ||
18 | ENTRY(clear_page) | 17 | ENTRY(clear_page) |
19 | CFI_STARTPROC | 18 | CFI_STARTPROC |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index f118c110af32..6ba0f7bb85ea 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -75,6 +75,7 @@ ENTRY(copy_to_user) | |||
75 | jae bad_to_user | 75 | jae bad_to_user |
76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
77 | CFI_ENDPROC | 77 | CFI_ENDPROC |
78 | ENDPROC(copy_to_user) | ||
78 | 79 | ||
79 | /* Standard copy_from_user with segment limit checking */ | 80 | /* Standard copy_from_user with segment limit checking */ |
80 | ENTRY(copy_from_user) | 81 | ENTRY(copy_from_user) |
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index f4568605d7d5..ff485d361182 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c | |||
@@ -55,8 +55,10 @@ static void delay_tsc(unsigned long loops) | |||
55 | 55 | ||
56 | preempt_disable(); | 56 | preempt_disable(); |
57 | cpu = smp_processor_id(); | 57 | cpu = smp_processor_id(); |
58 | rdtsc_barrier(); | ||
58 | rdtscl(bclock); | 59 | rdtscl(bclock); |
59 | for (;;) { | 60 | for (;;) { |
61 | rdtsc_barrier(); | ||
60 | rdtscl(now); | 62 | rdtscl(now); |
61 | if ((now - bclock) >= loops) | 63 | if ((now - bclock) >= loops) |
62 | break; | 64 | break; |
@@ -78,6 +80,7 @@ static void delay_tsc(unsigned long loops) | |||
78 | if (unlikely(cpu != smp_processor_id())) { | 80 | if (unlikely(cpu != smp_processor_id())) { |
79 | loops -= (now - bclock); | 81 | loops -= (now - bclock); |
80 | cpu = smp_processor_id(); | 82 | cpu = smp_processor_id(); |
83 | rdtsc_barrier(); | ||
81 | rdtscl(bclock); | 84 | rdtscl(bclock); |
82 | } | 85 | } |
83 | } | 86 | } |
diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c new file mode 100644 index 000000000000..a311cc59b65d --- /dev/null +++ b/arch/x86/lib/msr-reg-export.c | |||
@@ -0,0 +1,5 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <asm/msr.h> | ||
3 | |||
4 | EXPORT_SYMBOL(native_rdmsr_safe_regs); | ||
5 | EXPORT_SYMBOL(native_wrmsr_safe_regs); | ||
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S new file mode 100644 index 000000000000..69fa10623f21 --- /dev/null +++ b/arch/x86/lib/msr-reg.S | |||
@@ -0,0 +1,102 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <asm/dwarf2.h> | ||
4 | #include <asm/asm.h> | ||
5 | #include <asm/msr.h> | ||
6 | |||
7 | #ifdef CONFIG_X86_64 | ||
8 | /* | ||
9 | * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]); | ||
10 | * | ||
11 | * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] | ||
12 | * | ||
13 | */ | ||
14 | .macro op_safe_regs op | ||
15 | ENTRY(native_\op\()_safe_regs) | ||
16 | CFI_STARTPROC | ||
17 | pushq_cfi %rbx | ||
18 | pushq_cfi %rbp | ||
19 | movq %rdi, %r10 /* Save pointer */ | ||
20 | xorl %r11d, %r11d /* Return value */ | ||
21 | movl (%rdi), %eax | ||
22 | movl 4(%rdi), %ecx | ||
23 | movl 8(%rdi), %edx | ||
24 | movl 12(%rdi), %ebx | ||
25 | movl 20(%rdi), %ebp | ||
26 | movl 24(%rdi), %esi | ||
27 | movl 28(%rdi), %edi | ||
28 | CFI_REMEMBER_STATE | ||
29 | 1: \op | ||
30 | 2: movl %eax, (%r10) | ||
31 | movl %r11d, %eax /* Return value */ | ||
32 | movl %ecx, 4(%r10) | ||
33 | movl %edx, 8(%r10) | ||
34 | movl %ebx, 12(%r10) | ||
35 | movl %ebp, 20(%r10) | ||
36 | movl %esi, 24(%r10) | ||
37 | movl %edi, 28(%r10) | ||
38 | popq_cfi %rbp | ||
39 | popq_cfi %rbx | ||
40 | ret | ||
41 | 3: | ||
42 | CFI_RESTORE_STATE | ||
43 | movl $-EIO, %r11d | ||
44 | jmp 2b | ||
45 | |||
46 | _ASM_EXTABLE(1b, 3b) | ||
47 | CFI_ENDPROC | ||
48 | ENDPROC(native_\op\()_safe_regs) | ||
49 | .endm | ||
50 | |||
51 | #else /* X86_32 */ | ||
52 | |||
53 | .macro op_safe_regs op | ||
54 | ENTRY(native_\op\()_safe_regs) | ||
55 | CFI_STARTPROC | ||
56 | pushl_cfi %ebx | ||
57 | pushl_cfi %ebp | ||
58 | pushl_cfi %esi | ||
59 | pushl_cfi %edi | ||
60 | pushl_cfi $0 /* Return value */ | ||
61 | pushl_cfi %eax | ||
62 | movl 4(%eax), %ecx | ||
63 | movl 8(%eax), %edx | ||
64 | movl 12(%eax), %ebx | ||
65 | movl 20(%eax), %ebp | ||
66 | movl 24(%eax), %esi | ||
67 | movl 28(%eax), %edi | ||
68 | movl (%eax), %eax | ||
69 | CFI_REMEMBER_STATE | ||
70 | 1: \op | ||
71 | 2: pushl_cfi %eax | ||
72 | movl 4(%esp), %eax | ||
73 | popl_cfi (%eax) | ||
74 | addl $4, %esp | ||
75 | CFI_ADJUST_CFA_OFFSET -4 | ||
76 | movl %ecx, 4(%eax) | ||
77 | movl %edx, 8(%eax) | ||
78 | movl %ebx, 12(%eax) | ||
79 | movl %ebp, 20(%eax) | ||
80 | movl %esi, 24(%eax) | ||
81 | movl %edi, 28(%eax) | ||
82 | popl_cfi %eax | ||
83 | popl_cfi %edi | ||
84 | popl_cfi %esi | ||
85 | popl_cfi %ebp | ||
86 | popl_cfi %ebx | ||
87 | ret | ||
88 | 3: | ||
89 | CFI_RESTORE_STATE | ||
90 | movl $-EIO, 4(%esp) | ||
91 | jmp 2b | ||
92 | |||
93 | _ASM_EXTABLE(1b, 3b) | ||
94 | CFI_ENDPROC | ||
95 | ENDPROC(native_\op\()_safe_regs) | ||
96 | .endm | ||
97 | |||
98 | #endif | ||
99 | |||
100 | op_safe_regs rdmsr | ||
101 | op_safe_regs wrmsr | ||
102 | |||
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 1440b9c0547e..33a1e3ca22d8 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c | |||
@@ -89,16 +89,13 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) | |||
89 | rv.msrs = msrs; | 89 | rv.msrs = msrs; |
90 | rv.msr_no = msr_no; | 90 | rv.msr_no = msr_no; |
91 | 91 | ||
92 | preempt_disable(); | 92 | this_cpu = get_cpu(); |
93 | /* | 93 | |
94 | * FIXME: handle the CPU we're executing on separately for now until | 94 | if (cpumask_test_cpu(this_cpu, mask)) |
95 | * smp_call_function_many has been fixed to not skip it. | 95 | __rdmsr_on_cpu(&rv); |
96 | */ | ||
97 | this_cpu = raw_smp_processor_id(); | ||
98 | smp_call_function_single(this_cpu, __rdmsr_on_cpu, &rv, 1); | ||
99 | 96 | ||
100 | smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); | 97 | smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); |
101 | preempt_enable(); | 98 | put_cpu(); |
102 | } | 99 | } |
103 | EXPORT_SYMBOL(rdmsr_on_cpus); | 100 | EXPORT_SYMBOL(rdmsr_on_cpus); |
104 | 101 | ||
@@ -121,16 +118,13 @@ void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) | |||
121 | rv.msrs = msrs; | 118 | rv.msrs = msrs; |
122 | rv.msr_no = msr_no; | 119 | rv.msr_no = msr_no; |
123 | 120 | ||
124 | preempt_disable(); | 121 | this_cpu = get_cpu(); |
125 | /* | 122 | |
126 | * FIXME: handle the CPU we're executing on separately for now until | 123 | if (cpumask_test_cpu(this_cpu, mask)) |
127 | * smp_call_function_many has been fixed to not skip it. | 124 | __wrmsr_on_cpu(&rv); |
128 | */ | ||
129 | this_cpu = raw_smp_processor_id(); | ||
130 | smp_call_function_single(this_cpu, __wrmsr_on_cpu, &rv, 1); | ||
131 | 125 | ||
132 | smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); | 126 | smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); |
133 | preempt_enable(); | 127 | put_cpu(); |
134 | } | 128 | } |
135 | EXPORT_SYMBOL(wrmsr_on_cpus); | 129 | EXPORT_SYMBOL(wrmsr_on_cpus); |
136 | 130 | ||
@@ -181,3 +175,52 @@ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
181 | return err ? err : rv.err; | 175 | return err ? err : rv.err; |
182 | } | 176 | } |
183 | EXPORT_SYMBOL(wrmsr_safe_on_cpu); | 177 | EXPORT_SYMBOL(wrmsr_safe_on_cpu); |
178 | |||
179 | /* | ||
180 | * These variants are significantly slower, but allows control over | ||
181 | * the entire 32-bit GPR set. | ||
182 | */ | ||
183 | struct msr_regs_info { | ||
184 | u32 *regs; | ||
185 | int err; | ||
186 | }; | ||
187 | |||
188 | static void __rdmsr_safe_regs_on_cpu(void *info) | ||
189 | { | ||
190 | struct msr_regs_info *rv = info; | ||
191 | |||
192 | rv->err = rdmsr_safe_regs(rv->regs); | ||
193 | } | ||
194 | |||
195 | static void __wrmsr_safe_regs_on_cpu(void *info) | ||
196 | { | ||
197 | struct msr_regs_info *rv = info; | ||
198 | |||
199 | rv->err = wrmsr_safe_regs(rv->regs); | ||
200 | } | ||
201 | |||
202 | int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) | ||
203 | { | ||
204 | int err; | ||
205 | struct msr_regs_info rv; | ||
206 | |||
207 | rv.regs = regs; | ||
208 | rv.err = -EIO; | ||
209 | err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); | ||
210 | |||
211 | return err ? err : rv.err; | ||
212 | } | ||
213 | EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); | ||
214 | |||
215 | int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) | ||
216 | { | ||
217 | int err; | ||
218 | struct msr_regs_info rv; | ||
219 | |||
220 | rv.regs = regs; | ||
221 | rv.err = -EIO; | ||
222 | err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); | ||
223 | |||
224 | return err ? err : rv.err; | ||
225 | } | ||
226 | EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); | ||
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 7c8ca91bb9ec..1f118d462acc 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -751,7 +751,7 @@ survive: | |||
751 | 751 | ||
752 | if (retval == -ENOMEM && is_global_init(current)) { | 752 | if (retval == -ENOMEM && is_global_init(current)) { |
753 | up_read(¤t->mm->mmap_sem); | 753 | up_read(¤t->mm->mmap_sem); |
754 | congestion_wait(WRITE, HZ/50); | 754 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
755 | goto survive; | 755 | goto survive; |
756 | } | 756 | } |
757 | 757 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 78a5fff857be..775a020990a5 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -285,26 +285,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address, | |||
285 | tsk->thread.screen_bitmap |= 1 << bit; | 285 | tsk->thread.screen_bitmap |= 1 << bit; |
286 | } | 286 | } |
287 | 287 | ||
288 | static void dump_pagetable(unsigned long address) | 288 | static bool low_pfn(unsigned long pfn) |
289 | { | 289 | { |
290 | __typeof__(pte_val(__pte(0))) page; | 290 | return pfn < max_low_pfn; |
291 | } | ||
291 | 292 | ||
292 | page = read_cr3(); | 293 | static void dump_pagetable(unsigned long address) |
293 | page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; | 294 | { |
295 | pgd_t *base = __va(read_cr3()); | ||
296 | pgd_t *pgd = &base[pgd_index(address)]; | ||
297 | pmd_t *pmd; | ||
298 | pte_t *pte; | ||
294 | 299 | ||
295 | #ifdef CONFIG_X86_PAE | 300 | #ifdef CONFIG_X86_PAE |
296 | printk("*pdpt = %016Lx ", page); | 301 | printk("*pdpt = %016Lx ", pgd_val(*pgd)); |
297 | if ((page >> PAGE_SHIFT) < max_low_pfn | 302 | if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd)) |
298 | && page & _PAGE_PRESENT) { | 303 | goto out; |
299 | page &= PAGE_MASK; | ||
300 | page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) | ||
301 | & (PTRS_PER_PMD - 1)]; | ||
302 | printk(KERN_CONT "*pde = %016Lx ", page); | ||
303 | page &= ~_PAGE_NX; | ||
304 | } | ||
305 | #else | ||
306 | printk("*pde = %08lx ", page); | ||
307 | #endif | 304 | #endif |
305 | pmd = pmd_offset(pud_offset(pgd, address), address); | ||
306 | printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd)); | ||
308 | 307 | ||
309 | /* | 308 | /* |
310 | * We must not directly access the pte in the highpte | 309 | * We must not directly access the pte in the highpte |
@@ -312,16 +311,12 @@ static void dump_pagetable(unsigned long address) | |||
312 | * And let's rather not kmap-atomic the pte, just in case | 311 | * And let's rather not kmap-atomic the pte, just in case |
313 | * it's allocated already: | 312 | * it's allocated already: |
314 | */ | 313 | */ |
315 | if ((page >> PAGE_SHIFT) < max_low_pfn | 314 | if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd)) |
316 | && (page & _PAGE_PRESENT) | 315 | goto out; |
317 | && !(page & _PAGE_PSE)) { | ||
318 | |||
319 | page &= PAGE_MASK; | ||
320 | page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) | ||
321 | & (PTRS_PER_PTE - 1)]; | ||
322 | printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); | ||
323 | } | ||
324 | 316 | ||
317 | pte = pte_offset_kernel(pmd, address); | ||
318 | printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); | ||
319 | out: | ||
325 | printk("\n"); | 320 | printk("\n"); |
326 | } | 321 | } |
327 | 322 | ||
@@ -426,10 +421,11 @@ static noinline int vmalloc_fault(unsigned long address) | |||
426 | } | 421 | } |
427 | 422 | ||
428 | static const char errata93_warning[] = | 423 | static const char errata93_warning[] = |
429 | KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" | 424 | KERN_ERR |
430 | KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" | 425 | "******* Your BIOS seems to not contain a fix for K8 errata #93\n" |
431 | KERN_ERR "******* Please consider a BIOS update.\n" | 426 | "******* Working around it, but it may cause SEGVs or burn power.\n" |
432 | KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; | 427 | "******* Please consider a BIOS update.\n" |
428 | "******* Disabling USB legacy in the BIOS may also help.\n"; | ||
433 | 429 | ||
434 | /* | 430 | /* |
435 | * No vm86 mode in 64-bit mode: | 431 | * No vm86 mode in 64-bit mode: |
@@ -449,16 +445,12 @@ static int bad_address(void *p) | |||
449 | 445 | ||
450 | static void dump_pagetable(unsigned long address) | 446 | static void dump_pagetable(unsigned long address) |
451 | { | 447 | { |
452 | pgd_t *pgd; | 448 | pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK); |
449 | pgd_t *pgd = base + pgd_index(address); | ||
453 | pud_t *pud; | 450 | pud_t *pud; |
454 | pmd_t *pmd; | 451 | pmd_t *pmd; |
455 | pte_t *pte; | 452 | pte_t *pte; |
456 | 453 | ||
457 | pgd = (pgd_t *)read_cr3(); | ||
458 | |||
459 | pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); | ||
460 | |||
461 | pgd += pgd_index(address); | ||
462 | if (bad_address(pgd)) | 454 | if (bad_address(pgd)) |
463 | goto bad; | 455 | goto bad; |
464 | 456 | ||
@@ -696,7 +688,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, | |||
696 | if (!printk_ratelimit()) | 688 | if (!printk_ratelimit()) |
697 | return; | 689 | return; |
698 | 690 | ||
699 | printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", | 691 | printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", |
700 | task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, | 692 | task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, |
701 | tsk->comm, task_pid_nr(tsk), address, | 693 | tsk->comm, task_pid_nr(tsk), address, |
702 | (void *)regs->ip, (void *)regs->sp, error_code); | 694 | (void *)regs->ip, (void *)regs->sp, error_code); |
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 58f621e81919..1617958a3805 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -24,7 +24,7 @@ void kunmap(struct page *page) | |||
24 | * no global lock is needed and because the kmap code must perform a global TLB | 24 | * no global lock is needed and because the kmap code must perform a global TLB |
25 | * invalidation when the kmap pool wraps. | 25 | * invalidation when the kmap pool wraps. |
26 | * | 26 | * |
27 | * However when holding an atomic kmap is is not legal to sleep, so atomic | 27 | * However when holding an atomic kmap it is not legal to sleep, so atomic |
28 | * kmaps are appropriate for short, tight code paths only. | 28 | * kmaps are appropriate for short, tight code paths only. |
29 | */ | 29 | */ |
30 | void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | 30 | void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) |
@@ -103,6 +103,7 @@ EXPORT_SYMBOL(kmap); | |||
103 | EXPORT_SYMBOL(kunmap); | 103 | EXPORT_SYMBOL(kunmap); |
104 | EXPORT_SYMBOL(kmap_atomic); | 104 | EXPORT_SYMBOL(kmap_atomic); |
105 | EXPORT_SYMBOL(kunmap_atomic); | 105 | EXPORT_SYMBOL(kunmap_atomic); |
106 | EXPORT_SYMBOL(kmap_atomic_prot); | ||
106 | 107 | ||
107 | void __init set_highmem_pages_init(void) | 108 | void __init set_highmem_pages_init(void) |
108 | { | 109 | { |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f53b57e4086f..0607119cef94 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/tlbflush.h> | 13 | #include <asm/tlbflush.h> |
14 | #include <asm/tlb.h> | 14 | #include <asm/tlb.h> |
15 | #include <asm/proto.h> | ||
15 | 16 | ||
16 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 17 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
17 | 18 | ||
@@ -177,20 +178,6 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, | |||
177 | return nr_range; | 178 | return nr_range; |
178 | } | 179 | } |
179 | 180 | ||
180 | #ifdef CONFIG_X86_64 | ||
181 | static void __init init_gbpages(void) | ||
182 | { | ||
183 | if (direct_gbpages && cpu_has_gbpages) | ||
184 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
185 | else | ||
186 | direct_gbpages = 0; | ||
187 | } | ||
188 | #else | ||
189 | static inline void init_gbpages(void) | ||
190 | { | ||
191 | } | ||
192 | #endif | ||
193 | |||
194 | /* | 181 | /* |
195 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | 182 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. |
196 | * This runs before bootmem is initialized and gets pages directly from | 183 | * This runs before bootmem is initialized and gets pages directly from |
@@ -210,9 +197,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
210 | 197 | ||
211 | printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); | 198 | printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); |
212 | 199 | ||
213 | if (!after_bootmem) | ||
214 | init_gbpages(); | ||
215 | |||
216 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) | 200 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
217 | /* | 201 | /* |
218 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 202 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c4378f4fd4a5..ea56b8cbb6a6 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -598,6 +598,15 @@ void __init paging_init(void) | |||
598 | 598 | ||
599 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | 599 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
600 | sparse_init(); | 600 | sparse_init(); |
601 | |||
602 | /* | ||
603 | * clear the default setting with node 0 | ||
604 | * note: don't use nodes_clear here, that is really clearing when | ||
605 | * numa support is not compiled in, and later node_set_state | ||
606 | * will not set it back. | ||
607 | */ | ||
608 | node_clear_state(0, N_NORMAL_MEMORY); | ||
609 | |||
601 | free_area_init_nodes(max_zone_pfns); | 610 | free_area_init_nodes(max_zone_pfns); |
602 | } | 611 | } |
603 | 612 | ||
@@ -787,7 +796,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | |||
787 | return ret; | 796 | return ret; |
788 | 797 | ||
789 | #else | 798 | #else |
790 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | 799 | reserve_bootmem(phys, len, flags); |
791 | #endif | 800 | #endif |
792 | 801 | ||
793 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { | 802 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { |
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 2c55ed098654..528bf954eb74 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -331,6 +331,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs, | |||
331 | kmemcheck_shadow_set(shadow, size); | 331 | kmemcheck_shadow_set(shadow, size); |
332 | } | 332 | } |
333 | 333 | ||
334 | bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) | ||
335 | { | ||
336 | enum kmemcheck_shadow status; | ||
337 | void *shadow; | ||
338 | |||
339 | shadow = kmemcheck_shadow_lookup(addr); | ||
340 | if (!shadow) | ||
341 | return true; | ||
342 | |||
343 | status = kmemcheck_shadow_test(shadow, size); | ||
344 | |||
345 | return status == KMEMCHECK_SHADOW_INITIALIZED; | ||
346 | } | ||
347 | |||
334 | /* Access may cross page boundary */ | 348 | /* Access may cross page boundary */ |
335 | static void kmemcheck_read(struct pt_regs *regs, | 349 | static void kmemcheck_read(struct pt_regs *regs, |
336 | unsigned long addr, unsigned int size) | 350 | unsigned long addr, unsigned int size) |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3cfe9ced8a4c..7e600c1962db 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
12 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
13 | #include <linux/debugfs.h> | 13 | #include <linux/debugfs.h> |
14 | #include <linux/pfn.h> | ||
14 | 15 | ||
15 | #include <asm/e820.h> | 16 | #include <asm/e820.h> |
16 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
@@ -590,9 +591,12 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) | |||
590 | unsigned int level; | 591 | unsigned int level; |
591 | pte_t *kpte, old_pte; | 592 | pte_t *kpte, old_pte; |
592 | 593 | ||
593 | if (cpa->flags & CPA_PAGES_ARRAY) | 594 | if (cpa->flags & CPA_PAGES_ARRAY) { |
594 | address = (unsigned long)page_address(cpa->pages[cpa->curpage]); | 595 | struct page *page = cpa->pages[cpa->curpage]; |
595 | else if (cpa->flags & CPA_ARRAY) | 596 | if (unlikely(PageHighMem(page))) |
597 | return 0; | ||
598 | address = (unsigned long)page_address(page); | ||
599 | } else if (cpa->flags & CPA_ARRAY) | ||
596 | address = cpa->vaddr[cpa->curpage]; | 600 | address = cpa->vaddr[cpa->curpage]; |
597 | else | 601 | else |
598 | address = *cpa->vaddr; | 602 | address = *cpa->vaddr; |
@@ -681,8 +685,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); | |||
681 | static int cpa_process_alias(struct cpa_data *cpa) | 685 | static int cpa_process_alias(struct cpa_data *cpa) |
682 | { | 686 | { |
683 | struct cpa_data alias_cpa; | 687 | struct cpa_data alias_cpa; |
684 | int ret = 0; | 688 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); |
685 | unsigned long temp_cpa_vaddr, vaddr; | 689 | unsigned long vaddr, remapped; |
690 | int ret; | ||
686 | 691 | ||
687 | if (cpa->pfn >= max_pfn_mapped) | 692 | if (cpa->pfn >= max_pfn_mapped) |
688 | return 0; | 693 | return 0; |
@@ -695,9 +700,12 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
695 | * No need to redo, when the primary call touched the direct | 700 | * No need to redo, when the primary call touched the direct |
696 | * mapping already: | 701 | * mapping already: |
697 | */ | 702 | */ |
698 | if (cpa->flags & CPA_PAGES_ARRAY) | 703 | if (cpa->flags & CPA_PAGES_ARRAY) { |
699 | vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]); | 704 | struct page *page = cpa->pages[cpa->curpage]; |
700 | else if (cpa->flags & CPA_ARRAY) | 705 | if (unlikely(PageHighMem(page))) |
706 | return 0; | ||
707 | vaddr = (unsigned long)page_address(page); | ||
708 | } else if (cpa->flags & CPA_ARRAY) | ||
701 | vaddr = cpa->vaddr[cpa->curpage]; | 709 | vaddr = cpa->vaddr[cpa->curpage]; |
702 | else | 710 | else |
703 | vaddr = *cpa->vaddr; | 711 | vaddr = *cpa->vaddr; |
@@ -706,42 +714,55 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
706 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { | 714 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { |
707 | 715 | ||
708 | alias_cpa = *cpa; | 716 | alias_cpa = *cpa; |
709 | temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); | 717 | alias_cpa.vaddr = &laddr; |
710 | alias_cpa.vaddr = &temp_cpa_vaddr; | ||
711 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | 718 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); |
712 | 719 | ||
713 | |||
714 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | 720 | ret = __change_page_attr_set_clr(&alias_cpa, 0); |
721 | if (ret) | ||
722 | return ret; | ||
715 | } | 723 | } |
716 | 724 | ||
717 | #ifdef CONFIG_X86_64 | 725 | #ifdef CONFIG_X86_64 |
718 | if (ret) | ||
719 | return ret; | ||
720 | /* | ||
721 | * No need to redo, when the primary call touched the high | ||
722 | * mapping already: | ||
723 | */ | ||
724 | if (within(vaddr, (unsigned long) _text, _brk_end)) | ||
725 | return 0; | ||
726 | |||
727 | /* | 726 | /* |
728 | * If the physical address is inside the kernel map, we need | 727 | * If the primary call didn't touch the high mapping already |
728 | * and the physical address is inside the kernel map, we need | ||
729 | * to touch the high mapped kernel as well: | 729 | * to touch the high mapped kernel as well: |
730 | */ | 730 | */ |
731 | if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) | 731 | if (!within(vaddr, (unsigned long)_text, _brk_end) && |
732 | return 0; | 732 | within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) { |
733 | unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + | ||
734 | __START_KERNEL_map - phys_base; | ||
735 | alias_cpa = *cpa; | ||
736 | alias_cpa.vaddr = &temp_cpa_vaddr; | ||
737 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | ||
733 | 738 | ||
734 | alias_cpa = *cpa; | 739 | /* |
735 | temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; | 740 | * The high mapping range is imprecise, so ignore the |
736 | alias_cpa.vaddr = &temp_cpa_vaddr; | 741 | * return value. |
737 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | 742 | */ |
743 | __change_page_attr_set_clr(&alias_cpa, 0); | ||
744 | } | ||
745 | #endif | ||
738 | 746 | ||
739 | /* | 747 | /* |
740 | * The high mapping range is imprecise, so ignore the return value. | 748 | * If the PMD page was partially used for per-cpu remapping, |
749 | * the recycled area needs to be split and modified. Because | ||
750 | * the area is always proper subset of a PMD page | ||
751 | * cpa->numpages is guaranteed to be 1 for these areas, so | ||
752 | * there's no need to loop over and check for further remaps. | ||
741 | */ | 753 | */ |
742 | __change_page_attr_set_clr(&alias_cpa, 0); | 754 | remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); |
743 | #endif | 755 | if (remapped) { |
744 | return ret; | 756 | WARN_ON(cpa->numpages > 1); |
757 | alias_cpa = *cpa; | ||
758 | alias_cpa.vaddr = &remapped; | ||
759 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | ||
760 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | ||
761 | if (ret) | ||
762 | return ret; | ||
763 | } | ||
764 | |||
765 | return 0; | ||
745 | } | 766 | } |
746 | 767 | ||
747 | static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | 768 | static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) |
@@ -982,12 +1003,15 @@ EXPORT_SYMBOL(set_memory_array_uc); | |||
982 | int _set_memory_wc(unsigned long addr, int numpages) | 1003 | int _set_memory_wc(unsigned long addr, int numpages) |
983 | { | 1004 | { |
984 | int ret; | 1005 | int ret; |
1006 | unsigned long addr_copy = addr; | ||
1007 | |||
985 | ret = change_page_attr_set(&addr, numpages, | 1008 | ret = change_page_attr_set(&addr, numpages, |
986 | __pgprot(_PAGE_CACHE_UC_MINUS), 0); | 1009 | __pgprot(_PAGE_CACHE_UC_MINUS), 0); |
987 | |||
988 | if (!ret) { | 1010 | if (!ret) { |
989 | ret = change_page_attr_set(&addr, numpages, | 1011 | ret = change_page_attr_set_clr(&addr_copy, numpages, |
990 | __pgprot(_PAGE_CACHE_WC), 0); | 1012 | __pgprot(_PAGE_CACHE_WC), |
1013 | __pgprot(_PAGE_CACHE_MASK), | ||
1014 | 0, 0, NULL); | ||
991 | } | 1015 | } |
992 | return ret; | 1016 | return ret; |
993 | } | 1017 | } |
@@ -1104,7 +1128,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray) | |||
1104 | int free_idx; | 1128 | int free_idx; |
1105 | 1129 | ||
1106 | for (i = 0; i < addrinarray; i++) { | 1130 | for (i = 0; i < addrinarray; i++) { |
1107 | start = (unsigned long)page_address(pages[i]); | 1131 | if (PageHighMem(pages[i])) |
1132 | continue; | ||
1133 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; | ||
1108 | end = start + PAGE_SIZE; | 1134 | end = start + PAGE_SIZE; |
1109 | if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) | 1135 | if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) |
1110 | goto err_out; | 1136 | goto err_out; |
@@ -1117,7 +1143,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray) | |||
1117 | err_out: | 1143 | err_out: |
1118 | free_idx = i; | 1144 | free_idx = i; |
1119 | for (i = 0; i < free_idx; i++) { | 1145 | for (i = 0; i < free_idx; i++) { |
1120 | start = (unsigned long)page_address(pages[i]); | 1146 | if (PageHighMem(pages[i])) |
1147 | continue; | ||
1148 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; | ||
1121 | end = start + PAGE_SIZE; | 1149 | end = start + PAGE_SIZE; |
1122 | free_memtype(start, end); | 1150 | free_memtype(start, end); |
1123 | } | 1151 | } |
@@ -1146,7 +1174,9 @@ int set_pages_array_wb(struct page **pages, int addrinarray) | |||
1146 | return retval; | 1174 | return retval; |
1147 | 1175 | ||
1148 | for (i = 0; i < addrinarray; i++) { | 1176 | for (i = 0; i < addrinarray; i++) { |
1149 | start = (unsigned long)page_address(pages[i]); | 1177 | if (PageHighMem(pages[i])) |
1178 | continue; | ||
1179 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; | ||
1150 | end = start + PAGE_SIZE; | 1180 | end = start + PAGE_SIZE; |
1151 | free_memtype(start, end); | 1181 | free_memtype(start, end); |
1152 | } | 1182 | } |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e6718bb28065..b2f7d3e59b86 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -623,7 +623,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
623 | return ret; | 623 | return ret; |
624 | 624 | ||
625 | if (flags != want_flags) { | 625 | if (flags != want_flags) { |
626 | if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) { | 626 | if (strict_prot || |
627 | !is_new_memtype_allowed(paddr, size, want_flags, flags)) { | ||
627 | free_memtype(paddr, paddr + size); | 628 | free_memtype(paddr, paddr + size); |
628 | printk(KERN_ERR "%s:%d map pfn expected mapping type %s" | 629 | printk(KERN_ERR "%s:%d map pfn expected mapping type %s" |
629 | " for %Lx-%Lx, got %s\n", | 630 | " for %Lx-%Lx, got %s\n", |
@@ -826,7 +827,7 @@ static int memtype_seq_show(struct seq_file *seq, void *v) | |||
826 | return 0; | 827 | return 0; |
827 | } | 828 | } |
828 | 829 | ||
829 | static struct seq_operations memtype_seq_ops = { | 830 | static const struct seq_operations memtype_seq_ops = { |
830 | .start = memtype_seq_start, | 831 | .start = memtype_seq_start, |
831 | .next = memtype_seq_next, | 832 | .next = memtype_seq_next, |
832 | .stop = memtype_seq_stop, | 833 | .stop = memtype_seq_stop, |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8e43bdd45456..ed34f5e35999 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -25,7 +25,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
25 | return pte; | 25 | return pte; |
26 | } | 26 | } |
27 | 27 | ||
28 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | 28 | void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) |
29 | { | 29 | { |
30 | pgtable_page_dtor(pte); | 30 | pgtable_page_dtor(pte); |
31 | paravirt_release_pte(page_to_pfn(pte)); | 31 | paravirt_release_pte(page_to_pfn(pte)); |
@@ -33,14 +33,14 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | |||
33 | } | 33 | } |
34 | 34 | ||
35 | #if PAGETABLE_LEVELS > 2 | 35 | #if PAGETABLE_LEVELS > 2 |
36 | void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | 36 | void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) |
37 | { | 37 | { |
38 | paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); | 38 | paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); |
39 | tlb_remove_page(tlb, virt_to_page(pmd)); | 39 | tlb_remove_page(tlb, virt_to_page(pmd)); |
40 | } | 40 | } |
41 | 41 | ||
42 | #if PAGETABLE_LEVELS > 3 | 42 | #if PAGETABLE_LEVELS > 3 |
43 | void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) | 43 | void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) |
44 | { | 44 | { |
45 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); | 45 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); |
46 | tlb_remove_page(tlb, virt_to_page(pud)); | 46 | tlb_remove_page(tlb, virt_to_page(pud)); |
@@ -329,7 +329,6 @@ void __init reserve_top_address(unsigned long reserve) | |||
329 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", | 329 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", |
330 | (int)-reserve); | 330 | (int)-reserve); |
331 | __FIXADDR_TOP = -reserve - PAGE_SIZE; | 331 | __FIXADDR_TOP = -reserve - PAGE_SIZE; |
332 | __VMALLOC_RESERVE += reserve; | ||
333 | #endif | 332 | #endif |
334 | } | 333 | } |
335 | 334 | ||
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 29a0e37114f8..6f8aa33031c7 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -215,7 +215,7 @@ int __init get_memcfg_from_srat(void) | |||
215 | goto out_fail; | 215 | goto out_fail; |
216 | 216 | ||
217 | if (num_memory_chunks == 0) { | 217 | if (num_memory_chunks == 0) { |
218 | printk(KERN_WARNING | 218 | printk(KERN_DEBUG |
219 | "could not find any ACPI SRAT memory areas.\n"); | 219 | "could not find any ACPI SRAT memory areas.\n"); |
220 | goto out_fail; | 220 | goto out_fail; |
221 | } | 221 | } |
@@ -277,7 +277,7 @@ int __init get_memcfg_from_srat(void) | |||
277 | } | 277 | } |
278 | return 1; | 278 | return 1; |
279 | out_fail: | 279 | out_fail: |
280 | printk(KERN_ERR "failed to get NUMA memory information from SRAT" | 280 | printk(KERN_DEBUG "failed to get NUMA memory information from SRAT" |
281 | " table\n"); | 281 | " table\n"); |
282 | return 0; | 282 | return 0; |
283 | } | 283 | } |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 2dfcbf9df2ae..dbb5381f7b3b 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -79,8 +79,10 @@ static __init void bad_srat(void) | |||
79 | acpi_numa = -1; | 79 | acpi_numa = -1; |
80 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 80 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
81 | apicid_to_node[i] = NUMA_NO_NODE; | 81 | apicid_to_node[i] = NUMA_NO_NODE; |
82 | for (i = 0; i < MAX_NUMNODES; i++) | 82 | for (i = 0; i < MAX_NUMNODES; i++) { |
83 | nodes_add[i].start = nodes[i].end = 0; | 83 | nodes[i].start = nodes[i].end = 0; |
84 | nodes_add[i].start = nodes_add[i].end = 0; | ||
85 | } | ||
84 | remove_all_active_ranges(); | 86 | remove_all_active_ranges(); |
85 | } | 87 | } |
86 | 88 | ||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 821e97017e95..c814e144a3f0 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -183,18 +183,17 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
183 | 183 | ||
184 | f->flush_mm = mm; | 184 | f->flush_mm = mm; |
185 | f->flush_va = va; | 185 | f->flush_va = va; |
186 | cpumask_andnot(to_cpumask(f->flush_cpumask), | 186 | if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) { |
187 | cpumask, cpumask_of(smp_processor_id())); | 187 | /* |
188 | 188 | * We have to send the IPI only to | |
189 | /* | 189 | * CPUs affected. |
190 | * We have to send the IPI only to | 190 | */ |
191 | * CPUs affected. | 191 | apic->send_IPI_mask(to_cpumask(f->flush_cpumask), |
192 | */ | 192 | INVALIDATE_TLB_VECTOR_START + sender); |
193 | apic->send_IPI_mask(to_cpumask(f->flush_cpumask), | ||
194 | INVALIDATE_TLB_VECTOR_START + sender); | ||
195 | 193 | ||
196 | while (!cpumask_empty(to_cpumask(f->flush_cpumask))) | 194 | while (!cpumask_empty(to_cpumask(f->flush_cpumask))) |
197 | cpu_relax(); | 195 | cpu_relax(); |
196 | } | ||
198 | 197 | ||
199 | f->flush_mm = NULL; | 198 | f->flush_mm = NULL; |
200 | f->flush_va = 0; | 199 | f->flush_va = 0; |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index b07dd8d0b321..cb88b1a0bd5f 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -1,11 +1,14 @@ | |||
1 | /** | 1 | /** |
2 | * @file nmi_int.c | 2 | * @file nmi_int.c |
3 | * | 3 | * |
4 | * @remark Copyright 2002-2008 OProfile authors | 4 | * @remark Copyright 2002-2009 OProfile authors |
5 | * @remark Read the file COPYING | 5 | * @remark Read the file COPYING |
6 | * | 6 | * |
7 | * @author John Levon <levon@movementarian.org> | 7 | * @author John Levon <levon@movementarian.org> |
8 | * @author Robert Richter <robert.richter@amd.com> | 8 | * @author Robert Richter <robert.richter@amd.com> |
9 | * @author Barry Kasindorf <barry.kasindorf@amd.com> | ||
10 | * @author Jason Yeh <jason.yeh@amd.com> | ||
11 | * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> | ||
9 | */ | 12 | */ |
10 | 13 | ||
11 | #include <linux/init.h> | 14 | #include <linux/init.h> |
@@ -24,13 +27,35 @@ | |||
24 | #include "op_counter.h" | 27 | #include "op_counter.h" |
25 | #include "op_x86_model.h" | 28 | #include "op_x86_model.h" |
26 | 29 | ||
27 | static struct op_x86_model_spec const *model; | 30 | static struct op_x86_model_spec *model; |
28 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); | 31 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); |
29 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); | 32 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); |
30 | 33 | ||
31 | /* 0 == registered but off, 1 == registered and on */ | 34 | /* 0 == registered but off, 1 == registered and on */ |
32 | static int nmi_enabled = 0; | 35 | static int nmi_enabled = 0; |
33 | 36 | ||
37 | struct op_counter_config counter_config[OP_MAX_COUNTER]; | ||
38 | |||
39 | /* common functions */ | ||
40 | |||
41 | u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, | ||
42 | struct op_counter_config *counter_config) | ||
43 | { | ||
44 | u64 val = 0; | ||
45 | u16 event = (u16)counter_config->event; | ||
46 | |||
47 | val |= ARCH_PERFMON_EVENTSEL_INT; | ||
48 | val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; | ||
49 | val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; | ||
50 | val |= (counter_config->unit_mask & 0xFF) << 8; | ||
51 | event &= model->event_mask ? model->event_mask : 0xFF; | ||
52 | val |= event & 0xFF; | ||
53 | val |= (event & 0x0F00) << 24; | ||
54 | |||
55 | return val; | ||
56 | } | ||
57 | |||
58 | |||
34 | static int profile_exceptions_notify(struct notifier_block *self, | 59 | static int profile_exceptions_notify(struct notifier_block *self, |
35 | unsigned long val, void *data) | 60 | unsigned long val, void *data) |
36 | { | 61 | { |
@@ -52,36 +77,214 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
52 | 77 | ||
53 | static void nmi_cpu_save_registers(struct op_msrs *msrs) | 78 | static void nmi_cpu_save_registers(struct op_msrs *msrs) |
54 | { | 79 | { |
55 | unsigned int const nr_ctrs = model->num_counters; | ||
56 | unsigned int const nr_ctrls = model->num_controls; | ||
57 | struct op_msr *counters = msrs->counters; | 80 | struct op_msr *counters = msrs->counters; |
58 | struct op_msr *controls = msrs->controls; | 81 | struct op_msr *controls = msrs->controls; |
59 | unsigned int i; | 82 | unsigned int i; |
60 | 83 | ||
61 | for (i = 0; i < nr_ctrs; ++i) { | 84 | for (i = 0; i < model->num_counters; ++i) { |
62 | if (counters[i].addr) { | 85 | if (counters[i].addr) |
63 | rdmsr(counters[i].addr, | 86 | rdmsrl(counters[i].addr, counters[i].saved); |
64 | counters[i].saved.low, | 87 | } |
65 | counters[i].saved.high); | 88 | |
66 | } | 89 | for (i = 0; i < model->num_controls; ++i) { |
90 | if (controls[i].addr) | ||
91 | rdmsrl(controls[i].addr, controls[i].saved); | ||
92 | } | ||
93 | } | ||
94 | |||
95 | static void nmi_cpu_start(void *dummy) | ||
96 | { | ||
97 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | ||
98 | model->start(msrs); | ||
99 | } | ||
100 | |||
101 | static int nmi_start(void) | ||
102 | { | ||
103 | on_each_cpu(nmi_cpu_start, NULL, 1); | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | static void nmi_cpu_stop(void *dummy) | ||
108 | { | ||
109 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | ||
110 | model->stop(msrs); | ||
111 | } | ||
112 | |||
113 | static void nmi_stop(void) | ||
114 | { | ||
115 | on_each_cpu(nmi_cpu_stop, NULL, 1); | ||
116 | } | ||
117 | |||
118 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
119 | |||
120 | static DEFINE_PER_CPU(int, switch_index); | ||
121 | |||
122 | static inline int has_mux(void) | ||
123 | { | ||
124 | return !!model->switch_ctrl; | ||
125 | } | ||
126 | |||
127 | inline int op_x86_phys_to_virt(int phys) | ||
128 | { | ||
129 | return __get_cpu_var(switch_index) + phys; | ||
130 | } | ||
131 | |||
132 | inline int op_x86_virt_to_phys(int virt) | ||
133 | { | ||
134 | return virt % model->num_counters; | ||
135 | } | ||
136 | |||
137 | static void nmi_shutdown_mux(void) | ||
138 | { | ||
139 | int i; | ||
140 | |||
141 | if (!has_mux()) | ||
142 | return; | ||
143 | |||
144 | for_each_possible_cpu(i) { | ||
145 | kfree(per_cpu(cpu_msrs, i).multiplex); | ||
146 | per_cpu(cpu_msrs, i).multiplex = NULL; | ||
147 | per_cpu(switch_index, i) = 0; | ||
67 | } | 148 | } |
149 | } | ||
150 | |||
151 | static int nmi_setup_mux(void) | ||
152 | { | ||
153 | size_t multiplex_size = | ||
154 | sizeof(struct op_msr) * model->num_virt_counters; | ||
155 | int i; | ||
156 | |||
157 | if (!has_mux()) | ||
158 | return 1; | ||
159 | |||
160 | for_each_possible_cpu(i) { | ||
161 | per_cpu(cpu_msrs, i).multiplex = | ||
162 | kmalloc(multiplex_size, GFP_KERNEL); | ||
163 | if (!per_cpu(cpu_msrs, i).multiplex) | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | return 1; | ||
168 | } | ||
169 | |||
170 | static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) | ||
171 | { | ||
172 | int i; | ||
173 | struct op_msr *multiplex = msrs->multiplex; | ||
174 | |||
175 | if (!has_mux()) | ||
176 | return; | ||
68 | 177 | ||
69 | for (i = 0; i < nr_ctrls; ++i) { | 178 | for (i = 0; i < model->num_virt_counters; ++i) { |
70 | if (controls[i].addr) { | 179 | if (counter_config[i].enabled) { |
71 | rdmsr(controls[i].addr, | 180 | multiplex[i].saved = -(u64)counter_config[i].count; |
72 | controls[i].saved.low, | 181 | } else { |
73 | controls[i].saved.high); | 182 | multiplex[i].addr = 0; |
183 | multiplex[i].saved = 0; | ||
74 | } | 184 | } |
75 | } | 185 | } |
186 | |||
187 | per_cpu(switch_index, cpu) = 0; | ||
188 | } | ||
189 | |||
190 | static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) | ||
191 | { | ||
192 | struct op_msr *multiplex = msrs->multiplex; | ||
193 | int i; | ||
194 | |||
195 | for (i = 0; i < model->num_counters; ++i) { | ||
196 | int virt = op_x86_phys_to_virt(i); | ||
197 | if (multiplex[virt].addr) | ||
198 | rdmsrl(multiplex[virt].addr, multiplex[virt].saved); | ||
199 | } | ||
200 | } | ||
201 | |||
202 | static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) | ||
203 | { | ||
204 | struct op_msr *multiplex = msrs->multiplex; | ||
205 | int i; | ||
206 | |||
207 | for (i = 0; i < model->num_counters; ++i) { | ||
208 | int virt = op_x86_phys_to_virt(i); | ||
209 | if (multiplex[virt].addr) | ||
210 | wrmsrl(multiplex[virt].addr, multiplex[virt].saved); | ||
211 | } | ||
76 | } | 212 | } |
77 | 213 | ||
78 | static void nmi_save_registers(void *dummy) | 214 | static void nmi_cpu_switch(void *dummy) |
79 | { | 215 | { |
80 | int cpu = smp_processor_id(); | 216 | int cpu = smp_processor_id(); |
217 | int si = per_cpu(switch_index, cpu); | ||
81 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); | 218 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); |
82 | nmi_cpu_save_registers(msrs); | 219 | |
220 | nmi_cpu_stop(NULL); | ||
221 | nmi_cpu_save_mpx_registers(msrs); | ||
222 | |||
223 | /* move to next set */ | ||
224 | si += model->num_counters; | ||
225 | if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) | ||
226 | per_cpu(switch_index, cpu) = 0; | ||
227 | else | ||
228 | per_cpu(switch_index, cpu) = si; | ||
229 | |||
230 | model->switch_ctrl(model, msrs); | ||
231 | nmi_cpu_restore_mpx_registers(msrs); | ||
232 | |||
233 | nmi_cpu_start(NULL); | ||
234 | } | ||
235 | |||
236 | |||
237 | /* | ||
238 | * Quick check to see if multiplexing is necessary. | ||
239 | * The check should be sufficient since counters are used | ||
240 | * in ordre. | ||
241 | */ | ||
242 | static int nmi_multiplex_on(void) | ||
243 | { | ||
244 | return counter_config[model->num_counters].count ? 0 : -EINVAL; | ||
245 | } | ||
246 | |||
247 | static int nmi_switch_event(void) | ||
248 | { | ||
249 | if (!has_mux()) | ||
250 | return -ENOSYS; /* not implemented */ | ||
251 | if (nmi_multiplex_on() < 0) | ||
252 | return -EINVAL; /* not necessary */ | ||
253 | |||
254 | on_each_cpu(nmi_cpu_switch, NULL, 1); | ||
255 | |||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | static inline void mux_init(struct oprofile_operations *ops) | ||
260 | { | ||
261 | if (has_mux()) | ||
262 | ops->switch_events = nmi_switch_event; | ||
263 | } | ||
264 | |||
265 | static void mux_clone(int cpu) | ||
266 | { | ||
267 | if (!has_mux()) | ||
268 | return; | ||
269 | |||
270 | memcpy(per_cpu(cpu_msrs, cpu).multiplex, | ||
271 | per_cpu(cpu_msrs, 0).multiplex, | ||
272 | sizeof(struct op_msr) * model->num_virt_counters); | ||
83 | } | 273 | } |
84 | 274 | ||
275 | #else | ||
276 | |||
277 | inline int op_x86_phys_to_virt(int phys) { return phys; } | ||
278 | inline int op_x86_virt_to_phys(int virt) { return virt; } | ||
279 | static inline void nmi_shutdown_mux(void) { } | ||
280 | static inline int nmi_setup_mux(void) { return 1; } | ||
281 | static inline void | ||
282 | nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } | ||
283 | static inline void mux_init(struct oprofile_operations *ops) { } | ||
284 | static void mux_clone(int cpu) { } | ||
285 | |||
286 | #endif | ||
287 | |||
85 | static void free_msrs(void) | 288 | static void free_msrs(void) |
86 | { | 289 | { |
87 | int i; | 290 | int i; |
@@ -95,38 +298,32 @@ static void free_msrs(void) | |||
95 | 298 | ||
96 | static int allocate_msrs(void) | 299 | static int allocate_msrs(void) |
97 | { | 300 | { |
98 | int success = 1; | ||
99 | size_t controls_size = sizeof(struct op_msr) * model->num_controls; | 301 | size_t controls_size = sizeof(struct op_msr) * model->num_controls; |
100 | size_t counters_size = sizeof(struct op_msr) * model->num_counters; | 302 | size_t counters_size = sizeof(struct op_msr) * model->num_counters; |
101 | 303 | ||
102 | int i; | 304 | int i; |
103 | for_each_possible_cpu(i) { | 305 | for_each_possible_cpu(i) { |
104 | per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, | 306 | per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, |
105 | GFP_KERNEL); | 307 | GFP_KERNEL); |
106 | if (!per_cpu(cpu_msrs, i).counters) { | 308 | if (!per_cpu(cpu_msrs, i).counters) |
107 | success = 0; | 309 | return 0; |
108 | break; | ||
109 | } | ||
110 | per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, | 310 | per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, |
111 | GFP_KERNEL); | 311 | GFP_KERNEL); |
112 | if (!per_cpu(cpu_msrs, i).controls) { | 312 | if (!per_cpu(cpu_msrs, i).controls) |
113 | success = 0; | 313 | return 0; |
114 | break; | ||
115 | } | ||
116 | } | 314 | } |
117 | 315 | ||
118 | if (!success) | 316 | return 1; |
119 | free_msrs(); | ||
120 | |||
121 | return success; | ||
122 | } | 317 | } |
123 | 318 | ||
124 | static void nmi_cpu_setup(void *dummy) | 319 | static void nmi_cpu_setup(void *dummy) |
125 | { | 320 | { |
126 | int cpu = smp_processor_id(); | 321 | int cpu = smp_processor_id(); |
127 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); | 322 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); |
323 | nmi_cpu_save_registers(msrs); | ||
128 | spin_lock(&oprofilefs_lock); | 324 | spin_lock(&oprofilefs_lock); |
129 | model->setup_ctrs(msrs); | 325 | model->setup_ctrs(model, msrs); |
326 | nmi_cpu_setup_mux(cpu, msrs); | ||
130 | spin_unlock(&oprofilefs_lock); | 327 | spin_unlock(&oprofilefs_lock); |
131 | per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); | 328 | per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); |
132 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 329 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
@@ -144,11 +341,15 @@ static int nmi_setup(void) | |||
144 | int cpu; | 341 | int cpu; |
145 | 342 | ||
146 | if (!allocate_msrs()) | 343 | if (!allocate_msrs()) |
147 | return -ENOMEM; | 344 | err = -ENOMEM; |
345 | else if (!nmi_setup_mux()) | ||
346 | err = -ENOMEM; | ||
347 | else | ||
348 | err = register_die_notifier(&profile_exceptions_nb); | ||
148 | 349 | ||
149 | err = register_die_notifier(&profile_exceptions_nb); | ||
150 | if (err) { | 350 | if (err) { |
151 | free_msrs(); | 351 | free_msrs(); |
352 | nmi_shutdown_mux(); | ||
152 | return err; | 353 | return err; |
153 | } | 354 | } |
154 | 355 | ||
@@ -159,45 +360,38 @@ static int nmi_setup(void) | |||
159 | /* Assume saved/restored counters are the same on all CPUs */ | 360 | /* Assume saved/restored counters are the same on all CPUs */ |
160 | model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); | 361 | model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); |
161 | for_each_possible_cpu(cpu) { | 362 | for_each_possible_cpu(cpu) { |
162 | if (cpu != 0) { | 363 | if (!cpu) |
163 | memcpy(per_cpu(cpu_msrs, cpu).counters, | 364 | continue; |
164 | per_cpu(cpu_msrs, 0).counters, | 365 | |
165 | sizeof(struct op_msr) * model->num_counters); | 366 | memcpy(per_cpu(cpu_msrs, cpu).counters, |
166 | 367 | per_cpu(cpu_msrs, 0).counters, | |
167 | memcpy(per_cpu(cpu_msrs, cpu).controls, | 368 | sizeof(struct op_msr) * model->num_counters); |
168 | per_cpu(cpu_msrs, 0).controls, | 369 | |
169 | sizeof(struct op_msr) * model->num_controls); | 370 | memcpy(per_cpu(cpu_msrs, cpu).controls, |
170 | } | 371 | per_cpu(cpu_msrs, 0).controls, |
372 | sizeof(struct op_msr) * model->num_controls); | ||
171 | 373 | ||
374 | mux_clone(cpu); | ||
172 | } | 375 | } |
173 | on_each_cpu(nmi_save_registers, NULL, 1); | ||
174 | on_each_cpu(nmi_cpu_setup, NULL, 1); | 376 | on_each_cpu(nmi_cpu_setup, NULL, 1); |
175 | nmi_enabled = 1; | 377 | nmi_enabled = 1; |
176 | return 0; | 378 | return 0; |
177 | } | 379 | } |
178 | 380 | ||
179 | static void nmi_restore_registers(struct op_msrs *msrs) | 381 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) |
180 | { | 382 | { |
181 | unsigned int const nr_ctrs = model->num_counters; | ||
182 | unsigned int const nr_ctrls = model->num_controls; | ||
183 | struct op_msr *counters = msrs->counters; | 383 | struct op_msr *counters = msrs->counters; |
184 | struct op_msr *controls = msrs->controls; | 384 | struct op_msr *controls = msrs->controls; |
185 | unsigned int i; | 385 | unsigned int i; |
186 | 386 | ||
187 | for (i = 0; i < nr_ctrls; ++i) { | 387 | for (i = 0; i < model->num_controls; ++i) { |
188 | if (controls[i].addr) { | 388 | if (controls[i].addr) |
189 | wrmsr(controls[i].addr, | 389 | wrmsrl(controls[i].addr, controls[i].saved); |
190 | controls[i].saved.low, | ||
191 | controls[i].saved.high); | ||
192 | } | ||
193 | } | 390 | } |
194 | 391 | ||
195 | for (i = 0; i < nr_ctrs; ++i) { | 392 | for (i = 0; i < model->num_counters; ++i) { |
196 | if (counters[i].addr) { | 393 | if (counters[i].addr) |
197 | wrmsr(counters[i].addr, | 394 | wrmsrl(counters[i].addr, counters[i].saved); |
198 | counters[i].saved.low, | ||
199 | counters[i].saved.high); | ||
200 | } | ||
201 | } | 395 | } |
202 | } | 396 | } |
203 | 397 | ||
@@ -205,7 +399,7 @@ static void nmi_cpu_shutdown(void *dummy) | |||
205 | { | 399 | { |
206 | unsigned int v; | 400 | unsigned int v; |
207 | int cpu = smp_processor_id(); | 401 | int cpu = smp_processor_id(); |
208 | struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); | 402 | struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); |
209 | 403 | ||
210 | /* restoring APIC_LVTPC can trigger an apic error because the delivery | 404 | /* restoring APIC_LVTPC can trigger an apic error because the delivery |
211 | * mode and vector nr combination can be illegal. That's by design: on | 405 | * mode and vector nr combination can be illegal. That's by design: on |
@@ -216,7 +410,7 @@ static void nmi_cpu_shutdown(void *dummy) | |||
216 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); | 410 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); |
217 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); | 411 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); |
218 | apic_write(APIC_LVTERR, v); | 412 | apic_write(APIC_LVTERR, v); |
219 | nmi_restore_registers(msrs); | 413 | nmi_cpu_restore_registers(msrs); |
220 | } | 414 | } |
221 | 415 | ||
222 | static void nmi_shutdown(void) | 416 | static void nmi_shutdown(void) |
@@ -226,42 +420,18 @@ static void nmi_shutdown(void) | |||
226 | nmi_enabled = 0; | 420 | nmi_enabled = 0; |
227 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | 421 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); |
228 | unregister_die_notifier(&profile_exceptions_nb); | 422 | unregister_die_notifier(&profile_exceptions_nb); |
423 | nmi_shutdown_mux(); | ||
229 | msrs = &get_cpu_var(cpu_msrs); | 424 | msrs = &get_cpu_var(cpu_msrs); |
230 | model->shutdown(msrs); | 425 | model->shutdown(msrs); |
231 | free_msrs(); | 426 | free_msrs(); |
232 | put_cpu_var(cpu_msrs); | 427 | put_cpu_var(cpu_msrs); |
233 | } | 428 | } |
234 | 429 | ||
235 | static void nmi_cpu_start(void *dummy) | ||
236 | { | ||
237 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | ||
238 | model->start(msrs); | ||
239 | } | ||
240 | |||
241 | static int nmi_start(void) | ||
242 | { | ||
243 | on_each_cpu(nmi_cpu_start, NULL, 1); | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | static void nmi_cpu_stop(void *dummy) | ||
248 | { | ||
249 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | ||
250 | model->stop(msrs); | ||
251 | } | ||
252 | |||
253 | static void nmi_stop(void) | ||
254 | { | ||
255 | on_each_cpu(nmi_cpu_stop, NULL, 1); | ||
256 | } | ||
257 | |||
258 | struct op_counter_config counter_config[OP_MAX_COUNTER]; | ||
259 | |||
260 | static int nmi_create_files(struct super_block *sb, struct dentry *root) | 430 | static int nmi_create_files(struct super_block *sb, struct dentry *root) |
261 | { | 431 | { |
262 | unsigned int i; | 432 | unsigned int i; |
263 | 433 | ||
264 | for (i = 0; i < model->num_counters; ++i) { | 434 | for (i = 0; i < model->num_virt_counters; ++i) { |
265 | struct dentry *dir; | 435 | struct dentry *dir; |
266 | char buf[4]; | 436 | char buf[4]; |
267 | 437 | ||
@@ -270,7 +440,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) | |||
270 | * NOTE: assumes 1:1 mapping here (that counters are organized | 440 | * NOTE: assumes 1:1 mapping here (that counters are organized |
271 | * sequentially in their struct assignment). | 441 | * sequentially in their struct assignment). |
272 | */ | 442 | */ |
273 | if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) | 443 | if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i))) |
274 | continue; | 444 | continue; |
275 | 445 | ||
276 | snprintf(buf, sizeof(buf), "%d", i); | 446 | snprintf(buf, sizeof(buf), "%d", i); |
@@ -390,7 +560,7 @@ static int __init p4_init(char **cpu_type) | |||
390 | static int force_arch_perfmon; | 560 | static int force_arch_perfmon; |
391 | static int force_cpu_type(const char *str, struct kernel_param *kp) | 561 | static int force_cpu_type(const char *str, struct kernel_param *kp) |
392 | { | 562 | { |
393 | if (!strcmp(str, "archperfmon")) { | 563 | if (!strcmp(str, "arch_perfmon")) { |
394 | force_arch_perfmon = 1; | 564 | force_arch_perfmon = 1; |
395 | printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); | 565 | printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); |
396 | } | 566 | } |
@@ -402,6 +572,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); | |||
402 | static int __init ppro_init(char **cpu_type) | 572 | static int __init ppro_init(char **cpu_type) |
403 | { | 573 | { |
404 | __u8 cpu_model = boot_cpu_data.x86_model; | 574 | __u8 cpu_model = boot_cpu_data.x86_model; |
575 | struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ | ||
405 | 576 | ||
406 | if (force_arch_perfmon && cpu_has_arch_perfmon) | 577 | if (force_arch_perfmon && cpu_has_arch_perfmon) |
407 | return 0; | 578 | return 0; |
@@ -428,7 +599,7 @@ static int __init ppro_init(char **cpu_type) | |||
428 | *cpu_type = "i386/core_2"; | 599 | *cpu_type = "i386/core_2"; |
429 | break; | 600 | break; |
430 | case 26: | 601 | case 26: |
431 | arch_perfmon_setup_counters(); | 602 | spec = &op_arch_perfmon_spec; |
432 | *cpu_type = "i386/core_i7"; | 603 | *cpu_type = "i386/core_i7"; |
433 | break; | 604 | break; |
434 | case 28: | 605 | case 28: |
@@ -439,17 +610,7 @@ static int __init ppro_init(char **cpu_type) | |||
439 | return 0; | 610 | return 0; |
440 | } | 611 | } |
441 | 612 | ||
442 | model = &op_ppro_spec; | 613 | model = spec; |
443 | return 1; | ||
444 | } | ||
445 | |||
446 | static int __init arch_perfmon_init(char **cpu_type) | ||
447 | { | ||
448 | if (!cpu_has_arch_perfmon) | ||
449 | return 0; | ||
450 | *cpu_type = "i386/arch_perfmon"; | ||
451 | model = &op_arch_perfmon_spec; | ||
452 | arch_perfmon_setup_counters(); | ||
453 | return 1; | 614 | return 1; |
454 | } | 615 | } |
455 | 616 | ||
@@ -471,27 +632,26 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
471 | /* Needs to be at least an Athlon (or hammer in 32bit mode) */ | 632 | /* Needs to be at least an Athlon (or hammer in 32bit mode) */ |
472 | 633 | ||
473 | switch (family) { | 634 | switch (family) { |
474 | default: | ||
475 | return -ENODEV; | ||
476 | case 6: | 635 | case 6: |
477 | model = &op_amd_spec; | ||
478 | cpu_type = "i386/athlon"; | 636 | cpu_type = "i386/athlon"; |
479 | break; | 637 | break; |
480 | case 0xf: | 638 | case 0xf: |
481 | model = &op_amd_spec; | 639 | /* |
482 | /* Actually it could be i386/hammer too, but give | 640 | * Actually it could be i386/hammer too, but |
483 | user space an consistent name. */ | 641 | * give user space an consistent name. |
642 | */ | ||
484 | cpu_type = "x86-64/hammer"; | 643 | cpu_type = "x86-64/hammer"; |
485 | break; | 644 | break; |
486 | case 0x10: | 645 | case 0x10: |
487 | model = &op_amd_spec; | ||
488 | cpu_type = "x86-64/family10"; | 646 | cpu_type = "x86-64/family10"; |
489 | break; | 647 | break; |
490 | case 0x11: | 648 | case 0x11: |
491 | model = &op_amd_spec; | ||
492 | cpu_type = "x86-64/family11h"; | 649 | cpu_type = "x86-64/family11h"; |
493 | break; | 650 | break; |
651 | default: | ||
652 | return -ENODEV; | ||
494 | } | 653 | } |
654 | model = &op_amd_spec; | ||
495 | break; | 655 | break; |
496 | 656 | ||
497 | case X86_VENDOR_INTEL: | 657 | case X86_VENDOR_INTEL: |
@@ -510,8 +670,15 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
510 | break; | 670 | break; |
511 | } | 671 | } |
512 | 672 | ||
513 | if (!cpu_type && !arch_perfmon_init(&cpu_type)) | 673 | if (cpu_type) |
674 | break; | ||
675 | |||
676 | if (!cpu_has_arch_perfmon) | ||
514 | return -ENODEV; | 677 | return -ENODEV; |
678 | |||
679 | /* use arch perfmon as fallback */ | ||
680 | cpu_type = "i386/arch_perfmon"; | ||
681 | model = &op_arch_perfmon_spec; | ||
515 | break; | 682 | break; |
516 | 683 | ||
517 | default: | 684 | default: |
@@ -522,18 +689,23 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
522 | register_cpu_notifier(&oprofile_cpu_nb); | 689 | register_cpu_notifier(&oprofile_cpu_nb); |
523 | #endif | 690 | #endif |
524 | /* default values, can be overwritten by model */ | 691 | /* default values, can be overwritten by model */ |
525 | ops->create_files = nmi_create_files; | 692 | ops->create_files = nmi_create_files; |
526 | ops->setup = nmi_setup; | 693 | ops->setup = nmi_setup; |
527 | ops->shutdown = nmi_shutdown; | 694 | ops->shutdown = nmi_shutdown; |
528 | ops->start = nmi_start; | 695 | ops->start = nmi_start; |
529 | ops->stop = nmi_stop; | 696 | ops->stop = nmi_stop; |
530 | ops->cpu_type = cpu_type; | 697 | ops->cpu_type = cpu_type; |
531 | 698 | ||
532 | if (model->init) | 699 | if (model->init) |
533 | ret = model->init(ops); | 700 | ret = model->init(ops); |
534 | if (ret) | 701 | if (ret) |
535 | return ret; | 702 | return ret; |
536 | 703 | ||
704 | if (!model->num_virt_counters) | ||
705 | model->num_virt_counters = model->num_counters; | ||
706 | |||
707 | mux_init(ops); | ||
708 | |||
537 | init_sysfs(); | 709 | init_sysfs(); |
538 | using_nmi = 1; | 710 | using_nmi = 1; |
539 | printk(KERN_INFO "oprofile: using NMI interrupt.\n"); | 711 | printk(KERN_INFO "oprofile: using NMI interrupt.\n"); |
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 91b6a116165e..e28398df0df2 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h | |||
@@ -10,7 +10,7 @@ | |||
10 | #ifndef OP_COUNTER_H | 10 | #ifndef OP_COUNTER_H |
11 | #define OP_COUNTER_H | 11 | #define OP_COUNTER_H |
12 | 12 | ||
13 | #define OP_MAX_COUNTER 8 | 13 | #define OP_MAX_COUNTER 32 |
14 | 14 | ||
15 | /* Per-perfctr configuration as set via | 15 | /* Per-perfctr configuration as set via |
16 | * oprofilefs. | 16 | * oprofilefs. |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 8fdf06e4edf9..39686c29f03a 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -9,12 +9,15 @@ | |||
9 | * @author Philippe Elie | 9 | * @author Philippe Elie |
10 | * @author Graydon Hoare | 10 | * @author Graydon Hoare |
11 | * @author Robert Richter <robert.richter@amd.com> | 11 | * @author Robert Richter <robert.richter@amd.com> |
12 | * @author Barry Kasindorf | 12 | * @author Barry Kasindorf <barry.kasindorf@amd.com> |
13 | * @author Jason Yeh <jason.yeh@amd.com> | ||
14 | * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> | ||
13 | */ | 15 | */ |
14 | 16 | ||
15 | #include <linux/oprofile.h> | 17 | #include <linux/oprofile.h> |
16 | #include <linux/device.h> | 18 | #include <linux/device.h> |
17 | #include <linux/pci.h> | 19 | #include <linux/pci.h> |
20 | #include <linux/percpu.h> | ||
18 | 21 | ||
19 | #include <asm/ptrace.h> | 22 | #include <asm/ptrace.h> |
20 | #include <asm/msr.h> | 23 | #include <asm/msr.h> |
@@ -25,43 +28,36 @@ | |||
25 | 28 | ||
26 | #define NUM_COUNTERS 4 | 29 | #define NUM_COUNTERS 4 |
27 | #define NUM_CONTROLS 4 | 30 | #define NUM_CONTROLS 4 |
31 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
32 | #define NUM_VIRT_COUNTERS 32 | ||
33 | #define NUM_VIRT_CONTROLS 32 | ||
34 | #else | ||
35 | #define NUM_VIRT_COUNTERS NUM_COUNTERS | ||
36 | #define NUM_VIRT_CONTROLS NUM_CONTROLS | ||
37 | #endif | ||
38 | |||
39 | #define OP_EVENT_MASK 0x0FFF | ||
40 | #define OP_CTR_OVERFLOW (1ULL<<31) | ||
28 | 41 | ||
29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | 42 | #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) |
30 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) | 43 | |
31 | #define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) | 44 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; |
32 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
33 | |||
34 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | ||
35 | #define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) | ||
36 | #define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) | ||
37 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | ||
38 | #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) | ||
39 | #define CTRL_CLEAR_LO(x) (x &= (1<<21)) | ||
40 | #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) | ||
41 | #define CTRL_SET_ENABLE(val) (val |= 1<<20) | ||
42 | #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) | ||
43 | #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) | ||
44 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | ||
45 | #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) | ||
46 | #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) | ||
47 | #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) | ||
48 | #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) | ||
49 | |||
50 | static unsigned long reset_value[NUM_COUNTERS]; | ||
51 | 45 | ||
52 | #ifdef CONFIG_OPROFILE_IBS | 46 | #ifdef CONFIG_OPROFILE_IBS |
53 | 47 | ||
54 | /* IbsFetchCtl bits/masks */ | 48 | /* IbsFetchCtl bits/masks */ |
55 | #define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ | 49 | #define IBS_FETCH_RAND_EN (1ULL<<57) |
56 | #define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ | 50 | #define IBS_FETCH_VAL (1ULL<<49) |
57 | #define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ | 51 | #define IBS_FETCH_ENABLE (1ULL<<48) |
52 | #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL | ||
58 | 53 | ||
59 | /*IbsOpCtl bits */ | 54 | /*IbsOpCtl bits */ |
60 | #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ | 55 | #define IBS_OP_CNT_CTL (1ULL<<19) |
61 | #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ | 56 | #define IBS_OP_VAL (1ULL<<18) |
57 | #define IBS_OP_ENABLE (1ULL<<17) | ||
62 | 58 | ||
63 | #define IBS_FETCH_SIZE 6 | 59 | #define IBS_FETCH_SIZE 6 |
64 | #define IBS_OP_SIZE 12 | 60 | #define IBS_OP_SIZE 12 |
65 | 61 | ||
66 | static int has_ibs; /* AMD Family10h and later */ | 62 | static int has_ibs; /* AMD Family10h and later */ |
67 | 63 | ||
@@ -78,6 +74,45 @@ static struct op_ibs_config ibs_config; | |||
78 | 74 | ||
79 | #endif | 75 | #endif |
80 | 76 | ||
77 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
78 | |||
79 | static void op_mux_fill_in_addresses(struct op_msrs * const msrs) | ||
80 | { | ||
81 | int i; | ||
82 | |||
83 | for (i = 0; i < NUM_VIRT_COUNTERS; i++) { | ||
84 | int hw_counter = op_x86_virt_to_phys(i); | ||
85 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
86 | msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; | ||
87 | else | ||
88 | msrs->multiplex[i].addr = 0; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | ||
93 | struct op_msrs const * const msrs) | ||
94 | { | ||
95 | u64 val; | ||
96 | int i; | ||
97 | |||
98 | /* enable active counters */ | ||
99 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
100 | int virt = op_x86_phys_to_virt(i); | ||
101 | if (!counter_config[virt].enabled) | ||
102 | continue; | ||
103 | rdmsrl(msrs->controls[i].addr, val); | ||
104 | val &= model->reserved; | ||
105 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
106 | wrmsrl(msrs->controls[i].addr, val); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | #else | ||
111 | |||
112 | static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } | ||
113 | |||
114 | #endif | ||
115 | |||
81 | /* functions for op_amd_spec */ | 116 | /* functions for op_amd_spec */ |
82 | 117 | ||
83 | static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | 118 | static void op_amd_fill_in_addresses(struct op_msrs * const msrs) |
@@ -97,150 +132,174 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
97 | else | 132 | else |
98 | msrs->controls[i].addr = 0; | 133 | msrs->controls[i].addr = 0; |
99 | } | 134 | } |
100 | } | ||
101 | 135 | ||
136 | op_mux_fill_in_addresses(msrs); | ||
137 | } | ||
102 | 138 | ||
103 | static void op_amd_setup_ctrs(struct op_msrs const * const msrs) | 139 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, |
140 | struct op_msrs const * const msrs) | ||
104 | { | 141 | { |
105 | unsigned int low, high; | 142 | u64 val; |
106 | int i; | 143 | int i; |
107 | 144 | ||
145 | /* setup reset_value */ | ||
146 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | ||
147 | if (counter_config[i].enabled) | ||
148 | reset_value[i] = counter_config[i].count; | ||
149 | else | ||
150 | reset_value[i] = 0; | ||
151 | } | ||
152 | |||
108 | /* clear all counters */ | 153 | /* clear all counters */ |
109 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | 154 | for (i = 0; i < NUM_CONTROLS; ++i) { |
110 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 155 | if (unlikely(!msrs->controls[i].addr)) |
111 | continue; | 156 | continue; |
112 | CTRL_READ(low, high, msrs, i); | 157 | rdmsrl(msrs->controls[i].addr, val); |
113 | CTRL_CLEAR_LO(low); | 158 | val &= model->reserved; |
114 | CTRL_CLEAR_HI(high); | 159 | wrmsrl(msrs->controls[i].addr, val); |
115 | CTRL_WRITE(low, high, msrs, i); | ||
116 | } | 160 | } |
117 | 161 | ||
118 | /* avoid a false detection of ctr overflows in NMI handler */ | 162 | /* avoid a false detection of ctr overflows in NMI handler */ |
119 | for (i = 0; i < NUM_COUNTERS; ++i) { | 163 | for (i = 0; i < NUM_COUNTERS; ++i) { |
120 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | 164 | if (unlikely(!msrs->counters[i].addr)) |
121 | continue; | 165 | continue; |
122 | CTR_WRITE(1, msrs, i); | 166 | wrmsrl(msrs->counters[i].addr, -1LL); |
123 | } | 167 | } |
124 | 168 | ||
125 | /* enable active counters */ | 169 | /* enable active counters */ |
126 | for (i = 0; i < NUM_COUNTERS; ++i) { | 170 | for (i = 0; i < NUM_COUNTERS; ++i) { |
127 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | 171 | int virt = op_x86_phys_to_virt(i); |
128 | reset_value[i] = counter_config[i].count; | 172 | if (!counter_config[virt].enabled) |
173 | continue; | ||
174 | if (!msrs->counters[i].addr) | ||
175 | continue; | ||
129 | 176 | ||
130 | CTR_WRITE(counter_config[i].count, msrs, i); | 177 | /* setup counter registers */ |
131 | 178 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | |
132 | CTRL_READ(low, high, msrs, i); | 179 | |
133 | CTRL_CLEAR_LO(low); | 180 | /* setup control registers */ |
134 | CTRL_CLEAR_HI(high); | 181 | rdmsrl(msrs->controls[i].addr, val); |
135 | CTRL_SET_ENABLE(low); | 182 | val &= model->reserved; |
136 | CTRL_SET_USR(low, counter_config[i].user); | 183 | val |= op_x86_get_ctrl(model, &counter_config[virt]); |
137 | CTRL_SET_KERN(low, counter_config[i].kernel); | 184 | wrmsrl(msrs->controls[i].addr, val); |
138 | CTRL_SET_UM(low, counter_config[i].unit_mask); | ||
139 | CTRL_SET_EVENT_LOW(low, counter_config[i].event); | ||
140 | CTRL_SET_EVENT_HIGH(high, counter_config[i].event); | ||
141 | CTRL_SET_HOST_ONLY(high, 0); | ||
142 | CTRL_SET_GUEST_ONLY(high, 0); | ||
143 | |||
144 | CTRL_WRITE(low, high, msrs, i); | ||
145 | } else { | ||
146 | reset_value[i] = 0; | ||
147 | } | ||
148 | } | 185 | } |
149 | } | 186 | } |
150 | 187 | ||
151 | #ifdef CONFIG_OPROFILE_IBS | 188 | #ifdef CONFIG_OPROFILE_IBS |
152 | 189 | ||
153 | static inline int | 190 | static inline void |
154 | op_amd_handle_ibs(struct pt_regs * const regs, | 191 | op_amd_handle_ibs(struct pt_regs * const regs, |
155 | struct op_msrs const * const msrs) | 192 | struct op_msrs const * const msrs) |
156 | { | 193 | { |
157 | u32 low, high; | 194 | u64 val, ctl; |
158 | u64 msr; | ||
159 | struct op_entry entry; | 195 | struct op_entry entry; |
160 | 196 | ||
161 | if (!has_ibs) | 197 | if (!has_ibs) |
162 | return 1; | 198 | return; |
163 | 199 | ||
164 | if (ibs_config.fetch_enabled) { | 200 | if (ibs_config.fetch_enabled) { |
165 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 201 | rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); |
166 | if (high & IBS_FETCH_HIGH_VALID_BIT) { | 202 | if (ctl & IBS_FETCH_VAL) { |
167 | rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); | 203 | rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); |
168 | oprofile_write_reserve(&entry, regs, msr, | 204 | oprofile_write_reserve(&entry, regs, val, |
169 | IBS_FETCH_CODE, IBS_FETCH_SIZE); | 205 | IBS_FETCH_CODE, IBS_FETCH_SIZE); |
170 | oprofile_add_data(&entry, (u32)msr); | 206 | oprofile_add_data64(&entry, val); |
171 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 207 | oprofile_add_data64(&entry, ctl); |
172 | oprofile_add_data(&entry, low); | 208 | rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); |
173 | oprofile_add_data(&entry, high); | 209 | oprofile_add_data64(&entry, val); |
174 | rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); | ||
175 | oprofile_add_data(&entry, (u32)msr); | ||
176 | oprofile_add_data(&entry, (u32)(msr >> 32)); | ||
177 | oprofile_write_commit(&entry); | 210 | oprofile_write_commit(&entry); |
178 | 211 | ||
179 | /* reenable the IRQ */ | 212 | /* reenable the IRQ */ |
180 | high &= ~IBS_FETCH_HIGH_VALID_BIT; | 213 | ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); |
181 | high |= IBS_FETCH_HIGH_ENABLE; | 214 | ctl |= IBS_FETCH_ENABLE; |
182 | low &= IBS_FETCH_LOW_MAX_CNT_MASK; | 215 | wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); |
183 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
184 | } | 216 | } |
185 | } | 217 | } |
186 | 218 | ||
187 | if (ibs_config.op_enabled) { | 219 | if (ibs_config.op_enabled) { |
188 | rdmsr(MSR_AMD64_IBSOPCTL, low, high); | 220 | rdmsrl(MSR_AMD64_IBSOPCTL, ctl); |
189 | if (low & IBS_OP_LOW_VALID_BIT) { | 221 | if (ctl & IBS_OP_VAL) { |
190 | rdmsrl(MSR_AMD64_IBSOPRIP, msr); | 222 | rdmsrl(MSR_AMD64_IBSOPRIP, val); |
191 | oprofile_write_reserve(&entry, regs, msr, | 223 | oprofile_write_reserve(&entry, regs, val, |
192 | IBS_OP_CODE, IBS_OP_SIZE); | 224 | IBS_OP_CODE, IBS_OP_SIZE); |
193 | oprofile_add_data(&entry, (u32)msr); | 225 | oprofile_add_data64(&entry, val); |
194 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 226 | rdmsrl(MSR_AMD64_IBSOPDATA, val); |
195 | rdmsrl(MSR_AMD64_IBSOPDATA, msr); | 227 | oprofile_add_data64(&entry, val); |
196 | oprofile_add_data(&entry, (u32)msr); | 228 | rdmsrl(MSR_AMD64_IBSOPDATA2, val); |
197 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 229 | oprofile_add_data64(&entry, val); |
198 | rdmsrl(MSR_AMD64_IBSOPDATA2, msr); | 230 | rdmsrl(MSR_AMD64_IBSOPDATA3, val); |
199 | oprofile_add_data(&entry, (u32)msr); | 231 | oprofile_add_data64(&entry, val); |
200 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 232 | rdmsrl(MSR_AMD64_IBSDCLINAD, val); |
201 | rdmsrl(MSR_AMD64_IBSOPDATA3, msr); | 233 | oprofile_add_data64(&entry, val); |
202 | oprofile_add_data(&entry, (u32)msr); | 234 | rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); |
203 | oprofile_add_data(&entry, (u32)(msr >> 32)); | 235 | oprofile_add_data64(&entry, val); |
204 | rdmsrl(MSR_AMD64_IBSDCLINAD, msr); | ||
205 | oprofile_add_data(&entry, (u32)msr); | ||
206 | oprofile_add_data(&entry, (u32)(msr >> 32)); | ||
207 | rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); | ||
208 | oprofile_add_data(&entry, (u32)msr); | ||
209 | oprofile_add_data(&entry, (u32)(msr >> 32)); | ||
210 | oprofile_write_commit(&entry); | 236 | oprofile_write_commit(&entry); |
211 | 237 | ||
212 | /* reenable the IRQ */ | 238 | /* reenable the IRQ */ |
213 | high = 0; | 239 | ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; |
214 | low &= ~IBS_OP_LOW_VALID_BIT; | 240 | ctl |= IBS_OP_ENABLE; |
215 | low |= IBS_OP_LOW_ENABLE; | 241 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); |
216 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
217 | } | 242 | } |
218 | } | 243 | } |
244 | } | ||
219 | 245 | ||
220 | return 1; | 246 | static inline void op_amd_start_ibs(void) |
247 | { | ||
248 | u64 val; | ||
249 | if (has_ibs && ibs_config.fetch_enabled) { | ||
250 | val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | ||
251 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; | ||
252 | val |= IBS_FETCH_ENABLE; | ||
253 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); | ||
254 | } | ||
255 | |||
256 | if (has_ibs && ibs_config.op_enabled) { | ||
257 | val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; | ||
258 | val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; | ||
259 | val |= IBS_OP_ENABLE; | ||
260 | wrmsrl(MSR_AMD64_IBSOPCTL, val); | ||
261 | } | ||
262 | } | ||
263 | |||
264 | static void op_amd_stop_ibs(void) | ||
265 | { | ||
266 | if (has_ibs && ibs_config.fetch_enabled) | ||
267 | /* clear max count and enable */ | ||
268 | wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); | ||
269 | |||
270 | if (has_ibs && ibs_config.op_enabled) | ||
271 | /* clear max count and enable */ | ||
272 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); | ||
221 | } | 273 | } |
222 | 274 | ||
275 | #else | ||
276 | |||
277 | static inline void op_amd_handle_ibs(struct pt_regs * const regs, | ||
278 | struct op_msrs const * const msrs) { } | ||
279 | static inline void op_amd_start_ibs(void) { } | ||
280 | static inline void op_amd_stop_ibs(void) { } | ||
281 | |||
223 | #endif | 282 | #endif |
224 | 283 | ||
225 | static int op_amd_check_ctrs(struct pt_regs * const regs, | 284 | static int op_amd_check_ctrs(struct pt_regs * const regs, |
226 | struct op_msrs const * const msrs) | 285 | struct op_msrs const * const msrs) |
227 | { | 286 | { |
228 | unsigned int low, high; | 287 | u64 val; |
229 | int i; | 288 | int i; |
230 | 289 | ||
231 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | 290 | for (i = 0; i < NUM_COUNTERS; ++i) { |
232 | if (!reset_value[i]) | 291 | int virt = op_x86_phys_to_virt(i); |
292 | if (!reset_value[virt]) | ||
233 | continue; | 293 | continue; |
234 | CTR_READ(low, high, msrs, i); | 294 | rdmsrl(msrs->counters[i].addr, val); |
235 | if (CTR_OVERFLOWED(low)) { | 295 | /* bit is clear if overflowed: */ |
236 | oprofile_add_sample(regs, i); | 296 | if (val & OP_CTR_OVERFLOW) |
237 | CTR_WRITE(reset_value[i], msrs, i); | 297 | continue; |
238 | } | 298 | oprofile_add_sample(regs, virt); |
299 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | ||
239 | } | 300 | } |
240 | 301 | ||
241 | #ifdef CONFIG_OPROFILE_IBS | ||
242 | op_amd_handle_ibs(regs, msrs); | 302 | op_amd_handle_ibs(regs, msrs); |
243 | #endif | ||
244 | 303 | ||
245 | /* See op_model_ppro.c */ | 304 | /* See op_model_ppro.c */ |
246 | return 1; | 305 | return 1; |
@@ -248,79 +307,50 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, | |||
248 | 307 | ||
249 | static void op_amd_start(struct op_msrs const * const msrs) | 308 | static void op_amd_start(struct op_msrs const * const msrs) |
250 | { | 309 | { |
251 | unsigned int low, high; | 310 | u64 val; |
252 | int i; | 311 | int i; |
253 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
254 | if (reset_value[i]) { | ||
255 | CTRL_READ(low, high, msrs, i); | ||
256 | CTRL_SET_ACTIVE(low); | ||
257 | CTRL_WRITE(low, high, msrs, i); | ||
258 | } | ||
259 | } | ||
260 | 312 | ||
261 | #ifdef CONFIG_OPROFILE_IBS | 313 | for (i = 0; i < NUM_COUNTERS; ++i) { |
262 | if (has_ibs && ibs_config.fetch_enabled) { | 314 | if (!reset_value[op_x86_phys_to_virt(i)]) |
263 | low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | 315 | continue; |
264 | high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ | 316 | rdmsrl(msrs->controls[i].addr, val); |
265 | + IBS_FETCH_HIGH_ENABLE; | 317 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
266 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 318 | wrmsrl(msrs->controls[i].addr, val); |
267 | } | 319 | } |
268 | 320 | ||
269 | if (has_ibs && ibs_config.op_enabled) { | 321 | op_amd_start_ibs(); |
270 | low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) | ||
271 | + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ | ||
272 | + IBS_OP_LOW_ENABLE; | ||
273 | high = 0; | ||
274 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
275 | } | ||
276 | #endif | ||
277 | } | 322 | } |
278 | 323 | ||
279 | |||
280 | static void op_amd_stop(struct op_msrs const * const msrs) | 324 | static void op_amd_stop(struct op_msrs const * const msrs) |
281 | { | 325 | { |
282 | unsigned int low, high; | 326 | u64 val; |
283 | int i; | 327 | int i; |
284 | 328 | ||
285 | /* | 329 | /* |
286 | * Subtle: stop on all counters to avoid race with setting our | 330 | * Subtle: stop on all counters to avoid race with setting our |
287 | * pm callback | 331 | * pm callback |
288 | */ | 332 | */ |
289 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 333 | for (i = 0; i < NUM_COUNTERS; ++i) { |
290 | if (!reset_value[i]) | 334 | if (!reset_value[op_x86_phys_to_virt(i)]) |
291 | continue; | 335 | continue; |
292 | CTRL_READ(low, high, msrs, i); | 336 | rdmsrl(msrs->controls[i].addr, val); |
293 | CTRL_SET_INACTIVE(low); | 337 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; |
294 | CTRL_WRITE(low, high, msrs, i); | 338 | wrmsrl(msrs->controls[i].addr, val); |
295 | } | ||
296 | |||
297 | #ifdef CONFIG_OPROFILE_IBS | ||
298 | if (has_ibs && ibs_config.fetch_enabled) { | ||
299 | /* clear max count and enable */ | ||
300 | low = 0; | ||
301 | high = 0; | ||
302 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
303 | } | 339 | } |
304 | 340 | ||
305 | if (has_ibs && ibs_config.op_enabled) { | 341 | op_amd_stop_ibs(); |
306 | /* clear max count and enable */ | ||
307 | low = 0; | ||
308 | high = 0; | ||
309 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
310 | } | ||
311 | #endif | ||
312 | } | 342 | } |
313 | 343 | ||
314 | static void op_amd_shutdown(struct op_msrs const * const msrs) | 344 | static void op_amd_shutdown(struct op_msrs const * const msrs) |
315 | { | 345 | { |
316 | int i; | 346 | int i; |
317 | 347 | ||
318 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 348 | for (i = 0; i < NUM_COUNTERS; ++i) { |
319 | if (CTR_IS_RESERVED(msrs, i)) | 349 | if (msrs->counters[i].addr) |
320 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | 350 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); |
321 | } | 351 | } |
322 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | 352 | for (i = 0; i < NUM_CONTROLS; ++i) { |
323 | if (CTRL_IS_RESERVED(msrs, i)) | 353 | if (msrs->controls[i].addr) |
324 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | 354 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); |
325 | } | 355 | } |
326 | } | 356 | } |
@@ -490,15 +520,21 @@ static void op_amd_exit(void) {} | |||
490 | 520 | ||
491 | #endif /* CONFIG_OPROFILE_IBS */ | 521 | #endif /* CONFIG_OPROFILE_IBS */ |
492 | 522 | ||
493 | struct op_x86_model_spec const op_amd_spec = { | 523 | struct op_x86_model_spec op_amd_spec = { |
494 | .init = op_amd_init, | ||
495 | .exit = op_amd_exit, | ||
496 | .num_counters = NUM_COUNTERS, | 524 | .num_counters = NUM_COUNTERS, |
497 | .num_controls = NUM_CONTROLS, | 525 | .num_controls = NUM_CONTROLS, |
526 | .num_virt_counters = NUM_VIRT_COUNTERS, | ||
527 | .reserved = MSR_AMD_EVENTSEL_RESERVED, | ||
528 | .event_mask = OP_EVENT_MASK, | ||
529 | .init = op_amd_init, | ||
530 | .exit = op_amd_exit, | ||
498 | .fill_in_addresses = &op_amd_fill_in_addresses, | 531 | .fill_in_addresses = &op_amd_fill_in_addresses, |
499 | .setup_ctrs = &op_amd_setup_ctrs, | 532 | .setup_ctrs = &op_amd_setup_ctrs, |
500 | .check_ctrs = &op_amd_check_ctrs, | 533 | .check_ctrs = &op_amd_check_ctrs, |
501 | .start = &op_amd_start, | 534 | .start = &op_amd_start, |
502 | .stop = &op_amd_stop, | 535 | .stop = &op_amd_stop, |
503 | .shutdown = &op_amd_shutdown | 536 | .shutdown = &op_amd_shutdown, |
537 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
538 | .switch_ctrl = &op_mux_switch_ctrl, | ||
539 | #endif | ||
504 | }; | 540 | }; |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 819b131fd752..ac6b354becdf 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #define NUM_CCCRS_HT2 9 | 32 | #define NUM_CCCRS_HT2 9 |
33 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) | 33 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) |
34 | 34 | ||
35 | #define OP_CTR_OVERFLOW (1ULL<<31) | ||
36 | |||
35 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; | 37 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; |
36 | static unsigned int num_controls = NUM_CONTROLS_NON_HT; | 38 | static unsigned int num_controls = NUM_CONTROLS_NON_HT; |
37 | 39 | ||
@@ -350,8 +352,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { | |||
350 | #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) | 352 | #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) |
351 | #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) | 353 | #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) |
352 | #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) | 354 | #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) |
353 | #define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) | ||
354 | #define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) | ||
355 | 355 | ||
356 | #define CCCR_RESERVED_BITS 0x38030FFF | 356 | #define CCCR_RESERVED_BITS 0x38030FFF |
357 | #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) | 357 | #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) |
@@ -361,17 +361,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { | |||
361 | #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) | 361 | #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) |
362 | #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) | 362 | #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) |
363 | #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) | 363 | #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) |
364 | #define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) | ||
365 | #define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) | ||
366 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) | 364 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) |
367 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) | 365 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) |
368 | 366 | ||
369 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | ||
370 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | ||
371 | #define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) | ||
372 | #define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) | ||
373 | #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) | ||
374 | |||
375 | 367 | ||
376 | /* this assigns a "stagger" to the current CPU, which is used throughout | 368 | /* this assigns a "stagger" to the current CPU, which is used throughout |
377 | the code in this module as an extra array offset, to select the "even" | 369 | the code in this module as an extra array offset, to select the "even" |
@@ -515,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) | |||
515 | if (ev->bindings[i].virt_counter & counter_bit) { | 507 | if (ev->bindings[i].virt_counter & counter_bit) { |
516 | 508 | ||
517 | /* modify ESCR */ | 509 | /* modify ESCR */ |
518 | ESCR_READ(escr, high, ev, i); | 510 | rdmsr(ev->bindings[i].escr_address, escr, high); |
519 | ESCR_CLEAR(escr); | 511 | ESCR_CLEAR(escr); |
520 | if (stag == 0) { | 512 | if (stag == 0) { |
521 | ESCR_SET_USR_0(escr, counter_config[ctr].user); | 513 | ESCR_SET_USR_0(escr, counter_config[ctr].user); |
@@ -526,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) | |||
526 | } | 518 | } |
527 | ESCR_SET_EVENT_SELECT(escr, ev->event_select); | 519 | ESCR_SET_EVENT_SELECT(escr, ev->event_select); |
528 | ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); | 520 | ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); |
529 | ESCR_WRITE(escr, high, ev, i); | 521 | wrmsr(ev->bindings[i].escr_address, escr, high); |
530 | 522 | ||
531 | /* modify CCCR */ | 523 | /* modify CCCR */ |
532 | CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); | 524 | rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, |
525 | cccr, high); | ||
533 | CCCR_CLEAR(cccr); | 526 | CCCR_CLEAR(cccr); |
534 | CCCR_SET_REQUIRED_BITS(cccr); | 527 | CCCR_SET_REQUIRED_BITS(cccr); |
535 | CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); | 528 | CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); |
@@ -537,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) | |||
537 | CCCR_SET_PMI_OVF_0(cccr); | 530 | CCCR_SET_PMI_OVF_0(cccr); |
538 | else | 531 | else |
539 | CCCR_SET_PMI_OVF_1(cccr); | 532 | CCCR_SET_PMI_OVF_1(cccr); |
540 | CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); | 533 | wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, |
534 | cccr, high); | ||
541 | return; | 535 | return; |
542 | } | 536 | } |
543 | } | 537 | } |
@@ -548,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) | |||
548 | } | 542 | } |
549 | 543 | ||
550 | 544 | ||
551 | static void p4_setup_ctrs(struct op_msrs const * const msrs) | 545 | static void p4_setup_ctrs(struct op_x86_model_spec const *model, |
546 | struct op_msrs const * const msrs) | ||
552 | { | 547 | { |
553 | unsigned int i; | 548 | unsigned int i; |
554 | unsigned int low, high; | 549 | unsigned int low, high; |
@@ -563,8 +558,8 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) | |||
563 | } | 558 | } |
564 | 559 | ||
565 | /* clear the cccrs we will use */ | 560 | /* clear the cccrs we will use */ |
566 | for (i = 0 ; i < num_counters ; i++) { | 561 | for (i = 0; i < num_counters; i++) { |
567 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 562 | if (unlikely(!msrs->controls[i].addr)) |
568 | continue; | 563 | continue; |
569 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | 564 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
570 | CCCR_CLEAR(low); | 565 | CCCR_CLEAR(low); |
@@ -574,17 +569,18 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) | |||
574 | 569 | ||
575 | /* clear all escrs (including those outside our concern) */ | 570 | /* clear all escrs (including those outside our concern) */ |
576 | for (i = num_counters; i < num_controls; i++) { | 571 | for (i = num_counters; i < num_controls; i++) { |
577 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 572 | if (unlikely(!msrs->controls[i].addr)) |
578 | continue; | 573 | continue; |
579 | wrmsr(msrs->controls[i].addr, 0, 0); | 574 | wrmsr(msrs->controls[i].addr, 0, 0); |
580 | } | 575 | } |
581 | 576 | ||
582 | /* setup all counters */ | 577 | /* setup all counters */ |
583 | for (i = 0 ; i < num_counters ; ++i) { | 578 | for (i = 0; i < num_counters; ++i) { |
584 | if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { | 579 | if (counter_config[i].enabled && msrs->controls[i].addr) { |
585 | reset_value[i] = counter_config[i].count; | 580 | reset_value[i] = counter_config[i].count; |
586 | pmc_setup_one_p4_counter(i); | 581 | pmc_setup_one_p4_counter(i); |
587 | CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); | 582 | wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address, |
583 | -(u64)counter_config[i].count); | ||
588 | } else { | 584 | } else { |
589 | reset_value[i] = 0; | 585 | reset_value[i] = 0; |
590 | } | 586 | } |
@@ -624,14 +620,16 @@ static int p4_check_ctrs(struct pt_regs * const regs, | |||
624 | 620 | ||
625 | real = VIRT_CTR(stag, i); | 621 | real = VIRT_CTR(stag, i); |
626 | 622 | ||
627 | CCCR_READ(low, high, real); | 623 | rdmsr(p4_counters[real].cccr_address, low, high); |
628 | CTR_READ(ctr, high, real); | 624 | rdmsr(p4_counters[real].counter_address, ctr, high); |
629 | if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { | 625 | if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { |
630 | oprofile_add_sample(regs, i); | 626 | oprofile_add_sample(regs, i); |
631 | CTR_WRITE(reset_value[i], real); | 627 | wrmsrl(p4_counters[real].counter_address, |
628 | -(u64)reset_value[i]); | ||
632 | CCCR_CLEAR_OVF(low); | 629 | CCCR_CLEAR_OVF(low); |
633 | CCCR_WRITE(low, high, real); | 630 | wrmsr(p4_counters[real].cccr_address, low, high); |
634 | CTR_WRITE(reset_value[i], real); | 631 | wrmsrl(p4_counters[real].counter_address, |
632 | -(u64)reset_value[i]); | ||
635 | } | 633 | } |
636 | } | 634 | } |
637 | 635 | ||
@@ -653,9 +651,9 @@ static void p4_start(struct op_msrs const * const msrs) | |||
653 | for (i = 0; i < num_counters; ++i) { | 651 | for (i = 0; i < num_counters; ++i) { |
654 | if (!reset_value[i]) | 652 | if (!reset_value[i]) |
655 | continue; | 653 | continue; |
656 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | 654 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
657 | CCCR_SET_ENABLE(low); | 655 | CCCR_SET_ENABLE(low); |
658 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | 656 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
659 | } | 657 | } |
660 | } | 658 | } |
661 | 659 | ||
@@ -670,9 +668,9 @@ static void p4_stop(struct op_msrs const * const msrs) | |||
670 | for (i = 0; i < num_counters; ++i) { | 668 | for (i = 0; i < num_counters; ++i) { |
671 | if (!reset_value[i]) | 669 | if (!reset_value[i]) |
672 | continue; | 670 | continue; |
673 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | 671 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
674 | CCCR_SET_DISABLE(low); | 672 | CCCR_SET_DISABLE(low); |
675 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | 673 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
676 | } | 674 | } |
677 | } | 675 | } |
678 | 676 | ||
@@ -680,8 +678,8 @@ static void p4_shutdown(struct op_msrs const * const msrs) | |||
680 | { | 678 | { |
681 | int i; | 679 | int i; |
682 | 680 | ||
683 | for (i = 0 ; i < num_counters ; ++i) { | 681 | for (i = 0; i < num_counters; ++i) { |
684 | if (CTR_IS_RESERVED(msrs, i)) | 682 | if (msrs->counters[i].addr) |
685 | release_perfctr_nmi(msrs->counters[i].addr); | 683 | release_perfctr_nmi(msrs->counters[i].addr); |
686 | } | 684 | } |
687 | /* | 685 | /* |
@@ -689,15 +687,15 @@ static void p4_shutdown(struct op_msrs const * const msrs) | |||
689 | * conjunction with the counter registers (hence the starting offset). | 687 | * conjunction with the counter registers (hence the starting offset). |
690 | * This saves a few bits. | 688 | * This saves a few bits. |
691 | */ | 689 | */ |
692 | for (i = num_counters ; i < num_controls ; ++i) { | 690 | for (i = num_counters; i < num_controls; ++i) { |
693 | if (CTRL_IS_RESERVED(msrs, i)) | 691 | if (msrs->controls[i].addr) |
694 | release_evntsel_nmi(msrs->controls[i].addr); | 692 | release_evntsel_nmi(msrs->controls[i].addr); |
695 | } | 693 | } |
696 | } | 694 | } |
697 | 695 | ||
698 | 696 | ||
699 | #ifdef CONFIG_SMP | 697 | #ifdef CONFIG_SMP |
700 | struct op_x86_model_spec const op_p4_ht2_spec = { | 698 | struct op_x86_model_spec op_p4_ht2_spec = { |
701 | .num_counters = NUM_COUNTERS_HT2, | 699 | .num_counters = NUM_COUNTERS_HT2, |
702 | .num_controls = NUM_CONTROLS_HT2, | 700 | .num_controls = NUM_CONTROLS_HT2, |
703 | .fill_in_addresses = &p4_fill_in_addresses, | 701 | .fill_in_addresses = &p4_fill_in_addresses, |
@@ -709,7 +707,7 @@ struct op_x86_model_spec const op_p4_ht2_spec = { | |||
709 | }; | 707 | }; |
710 | #endif | 708 | #endif |
711 | 709 | ||
712 | struct op_x86_model_spec const op_p4_spec = { | 710 | struct op_x86_model_spec op_p4_spec = { |
713 | .num_counters = NUM_COUNTERS_NON_HT, | 711 | .num_counters = NUM_COUNTERS_NON_HT, |
714 | .num_controls = NUM_CONTROLS_NON_HT, | 712 | .num_controls = NUM_CONTROLS_NON_HT, |
715 | .fill_in_addresses = &p4_fill_in_addresses, | 713 | .fill_in_addresses = &p4_fill_in_addresses, |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 4da7230b3d17..4899215999de 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * @author Philippe Elie | 10 | * @author Philippe Elie |
11 | * @author Graydon Hoare | 11 | * @author Graydon Hoare |
12 | * @author Andi Kleen | 12 | * @author Andi Kleen |
13 | * @author Robert Richter <robert.richter@amd.com> | ||
13 | */ | 14 | */ |
14 | 15 | ||
15 | #include <linux/oprofile.h> | 16 | #include <linux/oprofile.h> |
@@ -18,7 +19,6 @@ | |||
18 | #include <asm/msr.h> | 19 | #include <asm/msr.h> |
19 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
20 | #include <asm/nmi.h> | 21 | #include <asm/nmi.h> |
21 | #include <asm/perf_counter.h> | ||
22 | 22 | ||
23 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
24 | #include "op_counter.h" | 24 | #include "op_counter.h" |
@@ -26,20 +26,7 @@ | |||
26 | static int num_counters = 2; | 26 | static int num_counters = 2; |
27 | static int counter_width = 32; | 27 | static int counter_width = 32; |
28 | 28 | ||
29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | 29 | #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) |
30 | #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) | ||
31 | |||
32 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | ||
33 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | ||
34 | #define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | ||
35 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | ||
36 | #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) | ||
37 | #define CTRL_CLEAR(x) (x &= (1<<21)) | ||
38 | #define CTRL_SET_ENABLE(val) (val |= 1<<20) | ||
39 | #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) | ||
40 | #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) | ||
41 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | ||
42 | #define CTRL_SET_EVENT(val, e) (val |= e) | ||
43 | 30 | ||
44 | static u64 *reset_value; | 31 | static u64 *reset_value; |
45 | 32 | ||
@@ -63,9 +50,10 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) | |||
63 | } | 50 | } |
64 | 51 | ||
65 | 52 | ||
66 | static void ppro_setup_ctrs(struct op_msrs const * const msrs) | 53 | static void ppro_setup_ctrs(struct op_x86_model_spec const *model, |
54 | struct op_msrs const * const msrs) | ||
67 | { | 55 | { |
68 | unsigned int low, high; | 56 | u64 val; |
69 | int i; | 57 | int i; |
70 | 58 | ||
71 | if (!reset_value) { | 59 | if (!reset_value) { |
@@ -93,36 +81,30 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
93 | } | 81 | } |
94 | 82 | ||
95 | /* clear all counters */ | 83 | /* clear all counters */ |
96 | for (i = 0 ; i < num_counters; ++i) { | 84 | for (i = 0; i < num_counters; ++i) { |
97 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 85 | if (unlikely(!msrs->controls[i].addr)) |
98 | continue; | 86 | continue; |
99 | CTRL_READ(low, high, msrs, i); | 87 | rdmsrl(msrs->controls[i].addr, val); |
100 | CTRL_CLEAR(low); | 88 | val &= model->reserved; |
101 | CTRL_WRITE(low, high, msrs, i); | 89 | wrmsrl(msrs->controls[i].addr, val); |
102 | } | 90 | } |
103 | 91 | ||
104 | /* avoid a false detection of ctr overflows in NMI handler */ | 92 | /* avoid a false detection of ctr overflows in NMI handler */ |
105 | for (i = 0; i < num_counters; ++i) { | 93 | for (i = 0; i < num_counters; ++i) { |
106 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | 94 | if (unlikely(!msrs->counters[i].addr)) |
107 | continue; | 95 | continue; |
108 | wrmsrl(msrs->counters[i].addr, -1LL); | 96 | wrmsrl(msrs->counters[i].addr, -1LL); |
109 | } | 97 | } |
110 | 98 | ||
111 | /* enable active counters */ | 99 | /* enable active counters */ |
112 | for (i = 0; i < num_counters; ++i) { | 100 | for (i = 0; i < num_counters; ++i) { |
113 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | 101 | if (counter_config[i].enabled && msrs->counters[i].addr) { |
114 | reset_value[i] = counter_config[i].count; | 102 | reset_value[i] = counter_config[i].count; |
115 | |||
116 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); | 103 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
117 | 104 | rdmsrl(msrs->controls[i].addr, val); | |
118 | CTRL_READ(low, high, msrs, i); | 105 | val &= model->reserved; |
119 | CTRL_CLEAR(low); | 106 | val |= op_x86_get_ctrl(model, &counter_config[i]); |
120 | CTRL_SET_ENABLE(low); | 107 | wrmsrl(msrs->controls[i].addr, val); |
121 | CTRL_SET_USR(low, counter_config[i].user); | ||
122 | CTRL_SET_KERN(low, counter_config[i].kernel); | ||
123 | CTRL_SET_UM(low, counter_config[i].unit_mask); | ||
124 | CTRL_SET_EVENT(low, counter_config[i].event); | ||
125 | CTRL_WRITE(low, high, msrs, i); | ||
126 | } else { | 108 | } else { |
127 | reset_value[i] = 0; | 109 | reset_value[i] = 0; |
128 | } | 110 | } |
@@ -143,14 +125,14 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
143 | if (unlikely(!reset_value)) | 125 | if (unlikely(!reset_value)) |
144 | goto out; | 126 | goto out; |
145 | 127 | ||
146 | for (i = 0 ; i < num_counters; ++i) { | 128 | for (i = 0; i < num_counters; ++i) { |
147 | if (!reset_value[i]) | 129 | if (!reset_value[i]) |
148 | continue; | 130 | continue; |
149 | rdmsrl(msrs->counters[i].addr, val); | 131 | rdmsrl(msrs->counters[i].addr, val); |
150 | if (CTR_OVERFLOWED(val)) { | 132 | if (val & (1ULL << (counter_width - 1))) |
151 | oprofile_add_sample(regs, i); | 133 | continue; |
152 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); | 134 | oprofile_add_sample(regs, i); |
153 | } | 135 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
154 | } | 136 | } |
155 | 137 | ||
156 | out: | 138 | out: |
@@ -171,16 +153,16 @@ out: | |||
171 | 153 | ||
172 | static void ppro_start(struct op_msrs const * const msrs) | 154 | static void ppro_start(struct op_msrs const * const msrs) |
173 | { | 155 | { |
174 | unsigned int low, high; | 156 | u64 val; |
175 | int i; | 157 | int i; |
176 | 158 | ||
177 | if (!reset_value) | 159 | if (!reset_value) |
178 | return; | 160 | return; |
179 | for (i = 0; i < num_counters; ++i) { | 161 | for (i = 0; i < num_counters; ++i) { |
180 | if (reset_value[i]) { | 162 | if (reset_value[i]) { |
181 | CTRL_READ(low, high, msrs, i); | 163 | rdmsrl(msrs->controls[i].addr, val); |
182 | CTRL_SET_ACTIVE(low); | 164 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
183 | CTRL_WRITE(low, high, msrs, i); | 165 | wrmsrl(msrs->controls[i].addr, val); |
184 | } | 166 | } |
185 | } | 167 | } |
186 | } | 168 | } |
@@ -188,7 +170,7 @@ static void ppro_start(struct op_msrs const * const msrs) | |||
188 | 170 | ||
189 | static void ppro_stop(struct op_msrs const * const msrs) | 171 | static void ppro_stop(struct op_msrs const * const msrs) |
190 | { | 172 | { |
191 | unsigned int low, high; | 173 | u64 val; |
192 | int i; | 174 | int i; |
193 | 175 | ||
194 | if (!reset_value) | 176 | if (!reset_value) |
@@ -196,9 +178,9 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
196 | for (i = 0; i < num_counters; ++i) { | 178 | for (i = 0; i < num_counters; ++i) { |
197 | if (!reset_value[i]) | 179 | if (!reset_value[i]) |
198 | continue; | 180 | continue; |
199 | CTRL_READ(low, high, msrs, i); | 181 | rdmsrl(msrs->controls[i].addr, val); |
200 | CTRL_SET_INACTIVE(low); | 182 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; |
201 | CTRL_WRITE(low, high, msrs, i); | 183 | wrmsrl(msrs->controls[i].addr, val); |
202 | } | 184 | } |
203 | } | 185 | } |
204 | 186 | ||
@@ -206,12 +188,12 @@ static void ppro_shutdown(struct op_msrs const * const msrs) | |||
206 | { | 188 | { |
207 | int i; | 189 | int i; |
208 | 190 | ||
209 | for (i = 0 ; i < num_counters ; ++i) { | 191 | for (i = 0; i < num_counters; ++i) { |
210 | if (CTR_IS_RESERVED(msrs, i)) | 192 | if (msrs->counters[i].addr) |
211 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | 193 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); |
212 | } | 194 | } |
213 | for (i = 0 ; i < num_counters ; ++i) { | 195 | for (i = 0; i < num_counters; ++i) { |
214 | if (CTRL_IS_RESERVED(msrs, i)) | 196 | if (msrs->controls[i].addr) |
215 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | 197 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); |
216 | } | 198 | } |
217 | if (reset_value) { | 199 | if (reset_value) { |
@@ -222,8 +204,9 @@ static void ppro_shutdown(struct op_msrs const * const msrs) | |||
222 | 204 | ||
223 | 205 | ||
224 | struct op_x86_model_spec op_ppro_spec = { | 206 | struct op_x86_model_spec op_ppro_spec = { |
225 | .num_counters = 2, /* can be overriden */ | 207 | .num_counters = 2, |
226 | .num_controls = 2, /* dito */ | 208 | .num_controls = 2, |
209 | .reserved = MSR_PPRO_EVENTSEL_RESERVED, | ||
227 | .fill_in_addresses = &ppro_fill_in_addresses, | 210 | .fill_in_addresses = &ppro_fill_in_addresses, |
228 | .setup_ctrs = &ppro_setup_ctrs, | 211 | .setup_ctrs = &ppro_setup_ctrs, |
229 | .check_ctrs = &ppro_check_ctrs, | 212 | .check_ctrs = &ppro_check_ctrs, |
@@ -241,7 +224,7 @@ struct op_x86_model_spec op_ppro_spec = { | |||
241 | * the specific CPU. | 224 | * the specific CPU. |
242 | */ | 225 | */ |
243 | 226 | ||
244 | void arch_perfmon_setup_counters(void) | 227 | static void arch_perfmon_setup_counters(void) |
245 | { | 228 | { |
246 | union cpuid10_eax eax; | 229 | union cpuid10_eax eax; |
247 | 230 | ||
@@ -259,11 +242,17 @@ void arch_perfmon_setup_counters(void) | |||
259 | 242 | ||
260 | op_arch_perfmon_spec.num_counters = num_counters; | 243 | op_arch_perfmon_spec.num_counters = num_counters; |
261 | op_arch_perfmon_spec.num_controls = num_counters; | 244 | op_arch_perfmon_spec.num_controls = num_counters; |
262 | op_ppro_spec.num_counters = num_counters; | 245 | } |
263 | op_ppro_spec.num_controls = num_counters; | 246 | |
247 | static int arch_perfmon_init(struct oprofile_operations *ignore) | ||
248 | { | ||
249 | arch_perfmon_setup_counters(); | ||
250 | return 0; | ||
264 | } | 251 | } |
265 | 252 | ||
266 | struct op_x86_model_spec op_arch_perfmon_spec = { | 253 | struct op_x86_model_spec op_arch_perfmon_spec = { |
254 | .reserved = MSR_PPRO_EVENTSEL_RESERVED, | ||
255 | .init = &arch_perfmon_init, | ||
267 | /* num_counters/num_controls filled in at runtime */ | 256 | /* num_counters/num_controls filled in at runtime */ |
268 | .fill_in_addresses = &ppro_fill_in_addresses, | 257 | .fill_in_addresses = &ppro_fill_in_addresses, |
269 | /* user space does the cpuid check for available events */ | 258 | /* user space does the cpuid check for available events */ |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 825e79064d64..b83776180c7f 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -6,51 +6,66 @@ | |||
6 | * @remark Read the file COPYING | 6 | * @remark Read the file COPYING |
7 | * | 7 | * |
8 | * @author Graydon Hoare | 8 | * @author Graydon Hoare |
9 | * @author Robert Richter <robert.richter@amd.com> | ||
9 | */ | 10 | */ |
10 | 11 | ||
11 | #ifndef OP_X86_MODEL_H | 12 | #ifndef OP_X86_MODEL_H |
12 | #define OP_X86_MODEL_H | 13 | #define OP_X86_MODEL_H |
13 | 14 | ||
14 | struct op_saved_msr { | 15 | #include <asm/types.h> |
15 | unsigned int high; | 16 | #include <asm/perf_counter.h> |
16 | unsigned int low; | ||
17 | }; | ||
18 | 17 | ||
19 | struct op_msr { | 18 | struct op_msr { |
20 | unsigned long addr; | 19 | unsigned long addr; |
21 | struct op_saved_msr saved; | 20 | u64 saved; |
22 | }; | 21 | }; |
23 | 22 | ||
24 | struct op_msrs { | 23 | struct op_msrs { |
25 | struct op_msr *counters; | 24 | struct op_msr *counters; |
26 | struct op_msr *controls; | 25 | struct op_msr *controls; |
26 | struct op_msr *multiplex; | ||
27 | }; | 27 | }; |
28 | 28 | ||
29 | struct pt_regs; | 29 | struct pt_regs; |
30 | 30 | ||
31 | struct oprofile_operations; | ||
32 | |||
31 | /* The model vtable abstracts the differences between | 33 | /* The model vtable abstracts the differences between |
32 | * various x86 CPU models' perfctr support. | 34 | * various x86 CPU models' perfctr support. |
33 | */ | 35 | */ |
34 | struct op_x86_model_spec { | 36 | struct op_x86_model_spec { |
35 | int (*init)(struct oprofile_operations *ops); | 37 | unsigned int num_counters; |
36 | void (*exit)(void); | 38 | unsigned int num_controls; |
37 | unsigned int num_counters; | 39 | unsigned int num_virt_counters; |
38 | unsigned int num_controls; | 40 | u64 reserved; |
39 | void (*fill_in_addresses)(struct op_msrs * const msrs); | 41 | u16 event_mask; |
40 | void (*setup_ctrs)(struct op_msrs const * const msrs); | 42 | int (*init)(struct oprofile_operations *ops); |
41 | int (*check_ctrs)(struct pt_regs * const regs, | 43 | void (*exit)(void); |
42 | struct op_msrs const * const msrs); | 44 | void (*fill_in_addresses)(struct op_msrs * const msrs); |
43 | void (*start)(struct op_msrs const * const msrs); | 45 | void (*setup_ctrs)(struct op_x86_model_spec const *model, |
44 | void (*stop)(struct op_msrs const * const msrs); | 46 | struct op_msrs const * const msrs); |
45 | void (*shutdown)(struct op_msrs const * const msrs); | 47 | int (*check_ctrs)(struct pt_regs * const regs, |
48 | struct op_msrs const * const msrs); | ||
49 | void (*start)(struct op_msrs const * const msrs); | ||
50 | void (*stop)(struct op_msrs const * const msrs); | ||
51 | void (*shutdown)(struct op_msrs const * const msrs); | ||
52 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
53 | void (*switch_ctrl)(struct op_x86_model_spec const *model, | ||
54 | struct op_msrs const * const msrs); | ||
55 | #endif | ||
46 | }; | 56 | }; |
47 | 57 | ||
58 | struct op_counter_config; | ||
59 | |||
60 | extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, | ||
61 | struct op_counter_config *counter_config); | ||
62 | extern int op_x86_phys_to_virt(int phys); | ||
63 | extern int op_x86_virt_to_phys(int virt); | ||
64 | |||
48 | extern struct op_x86_model_spec op_ppro_spec; | 65 | extern struct op_x86_model_spec op_ppro_spec; |
49 | extern struct op_x86_model_spec const op_p4_spec; | 66 | extern struct op_x86_model_spec op_p4_spec; |
50 | extern struct op_x86_model_spec const op_p4_ht2_spec; | 67 | extern struct op_x86_model_spec op_p4_ht2_spec; |
51 | extern struct op_x86_model_spec const op_amd_spec; | 68 | extern struct op_x86_model_spec op_amd_spec; |
52 | extern struct op_x86_model_spec op_arch_perfmon_spec; | 69 | extern struct op_x86_model_spec op_arch_perfmon_spec; |
53 | 70 | ||
54 | extern void arch_perfmon_setup_counters(void); | ||
55 | |||
56 | #endif /* OP_X86_MODEL_H */ | 71 | #endif /* OP_X86_MODEL_H */ |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index b26626dc517c..1014eb4bfc37 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -68,6 +68,10 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
68 | unsigned long flags; | 68 | unsigned long flags; |
69 | struct resource *root; | 69 | struct resource *root; |
70 | int max_root_bus_resources = PCI_BUS_NUM_RESOURCES; | 70 | int max_root_bus_resources = PCI_BUS_NUM_RESOURCES; |
71 | u64 start, end; | ||
72 | |||
73 | if (bus_has_transparent_bridge(info->bus)) | ||
74 | max_root_bus_resources -= 3; | ||
71 | 75 | ||
72 | status = resource_to_addr(acpi_res, &addr); | 76 | status = resource_to_addr(acpi_res, &addr); |
73 | if (!ACPI_SUCCESS(status)) | 77 | if (!ACPI_SUCCESS(status)) |
@@ -84,25 +88,24 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
84 | } else | 88 | } else |
85 | return AE_OK; | 89 | return AE_OK; |
86 | 90 | ||
87 | res = &info->res[info->res_num]; | 91 | start = addr.minimum + addr.translation_offset; |
88 | res->name = info->name; | 92 | end = start + addr.address_length - 1; |
89 | res->flags = flags; | ||
90 | res->start = addr.minimum + addr.translation_offset; | ||
91 | res->end = res->start + addr.address_length - 1; | ||
92 | res->child = NULL; | ||
93 | |||
94 | if (bus_has_transparent_bridge(info->bus)) | ||
95 | max_root_bus_resources -= 3; | ||
96 | if (info->res_num >= max_root_bus_resources) { | 93 | if (info->res_num >= max_root_bus_resources) { |
97 | printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " | 94 | printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " |
98 | "from %s for %s due to _CRS returning more than " | 95 | "from %s for %s due to _CRS returning more than " |
99 | "%d resource descriptors\n", (unsigned long) res->start, | 96 | "%d resource descriptors\n", (unsigned long) start, |
100 | (unsigned long) res->end, root->name, info->name, | 97 | (unsigned long) end, root->name, info->name, |
101 | max_root_bus_resources); | 98 | max_root_bus_resources); |
102 | info->res_num++; | ||
103 | return AE_OK; | 99 | return AE_OK; |
104 | } | 100 | } |
105 | 101 | ||
102 | res = &info->res[info->res_num]; | ||
103 | res->name = info->name; | ||
104 | res->flags = flags; | ||
105 | res->start = start; | ||
106 | res->end = end; | ||
107 | res->child = NULL; | ||
108 | |||
106 | if (insert_resource(root, res)) { | 109 | if (insert_resource(root, res)) { |
107 | printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " | 110 | printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " |
108 | "from %s for %s\n", (unsigned long) res->start, | 111 | "from %s for %s\n", (unsigned long) res->start, |
@@ -115,23 +118,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
115 | } | 118 | } |
116 | 119 | ||
117 | static void | 120 | static void |
118 | adjust_transparent_bridge_resources(struct pci_bus *bus) | ||
119 | { | ||
120 | struct pci_dev *dev; | ||
121 | |||
122 | list_for_each_entry(dev, &bus->devices, bus_list) { | ||
123 | int i; | ||
124 | u16 class = dev->class >> 8; | ||
125 | |||
126 | if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) { | ||
127 | for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++) | ||
128 | dev->subordinate->resource[i] = | ||
129 | dev->bus->resource[i - 3]; | ||
130 | } | ||
131 | } | ||
132 | } | ||
133 | |||
134 | static void | ||
135 | get_current_resources(struct acpi_device *device, int busnum, | 121 | get_current_resources(struct acpi_device *device, int busnum, |
136 | int domain, struct pci_bus *bus) | 122 | int domain, struct pci_bus *bus) |
137 | { | 123 | { |
@@ -158,8 +144,6 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
158 | info.res_num = 0; | 144 | info.res_num = 0; |
159 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, | 145 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, |
160 | &info); | 146 | &info); |
161 | if (info.res_num) | ||
162 | adjust_transparent_bridge_resources(bus); | ||
163 | 147 | ||
164 | return; | 148 | return; |
165 | 149 | ||
@@ -222,8 +206,15 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
222 | */ | 206 | */ |
223 | memcpy(bus->sysdata, sd, sizeof(*sd)); | 207 | memcpy(bus->sysdata, sd, sizeof(*sd)); |
224 | kfree(sd); | 208 | kfree(sd); |
225 | } else | 209 | } else { |
226 | bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); | 210 | bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); |
211 | if (bus) { | ||
212 | if (pci_probe & PCI_USE__CRS) | ||
213 | get_current_resources(device, busnum, domain, | ||
214 | bus); | ||
215 | bus->subordinate = pci_scan_child_bus(bus); | ||
216 | } | ||
217 | } | ||
227 | 218 | ||
228 | if (!bus) | 219 | if (!bus) |
229 | kfree(sd); | 220 | kfree(sd); |
@@ -238,8 +229,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
238 | #endif | 229 | #endif |
239 | } | 230 | } |
240 | 231 | ||
241 | if (bus && (pci_probe & PCI_USE__CRS)) | ||
242 | get_current_resources(device, busnum, domain, bus); | ||
243 | return bus; | 232 | return bus; |
244 | } | 233 | } |
245 | 234 | ||
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index f893d6a6e803..3ffa10df20b9 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -100,8 +100,9 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
100 | int j; | 100 | int j; |
101 | struct pci_root_info *info; | 101 | struct pci_root_info *info; |
102 | 102 | ||
103 | /* don't go for it if _CRS is used */ | 103 | /* don't go for it if _CRS is used already */ |
104 | if (pci_probe & PCI_USE__CRS) | 104 | if (b->resource[0] != &ioport_resource || |
105 | b->resource[1] != &iomem_resource) | ||
105 | return; | 106 | return; |
106 | 107 | ||
107 | /* if only one root bus, don't need to anything */ | 108 | /* if only one root bus, don't need to anything */ |
@@ -116,6 +117,9 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
116 | if (i == pci_root_num) | 117 | if (i == pci_root_num) |
117 | return; | 118 | return; |
118 | 119 | ||
120 | printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", | ||
121 | b->number); | ||
122 | |||
119 | info = &pci_root_info[i]; | 123 | info = &pci_root_info[i]; |
120 | for (j = 0; j < info->res_num; j++) { | 124 | for (j = 0; j < info->res_num; j++) { |
121 | struct resource *res; | 125 | struct resource *res; |
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index bd13c3e4c6db..347d882b3bb3 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c | |||
@@ -192,13 +192,14 @@ struct pci_raw_ops pci_direct_conf2 = { | |||
192 | static int __init pci_sanity_check(struct pci_raw_ops *o) | 192 | static int __init pci_sanity_check(struct pci_raw_ops *o) |
193 | { | 193 | { |
194 | u32 x = 0; | 194 | u32 x = 0; |
195 | int devfn; | 195 | int year, devfn; |
196 | 196 | ||
197 | if (pci_probe & PCI_NO_CHECKS) | 197 | if (pci_probe & PCI_NO_CHECKS) |
198 | return 1; | 198 | return 1; |
199 | /* Assume Type 1 works for newer systems. | 199 | /* Assume Type 1 works for newer systems. |
200 | This handles machines that don't have anything on PCI Bus 0. */ | 200 | This handles machines that don't have anything on PCI Bus 0. */ |
201 | if (dmi_get_year(DMI_BIOS_DATE) >= 2001) | 201 | dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL); |
202 | if (year >= 2001) | ||
202 | return 1; | 203 | return 1; |
203 | 204 | ||
204 | for (devfn = 0; devfn < 0x100; devfn++) { | 205 | for (devfn = 0; devfn < 0x100; devfn++) { |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 0fb56db16d18..52e62e57fedd 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <asm/pat.h> | 35 | #include <asm/pat.h> |
36 | #include <asm/e820.h> | 36 | #include <asm/e820.h> |
37 | #include <asm/pci_x86.h> | 37 | #include <asm/pci_x86.h> |
38 | #include <asm/io_apic.h> | ||
38 | 39 | ||
39 | 40 | ||
40 | static int | 41 | static int |
@@ -227,6 +228,12 @@ void __init pcibios_resource_survey(void) | |||
227 | pcibios_allocate_resources(1); | 228 | pcibios_allocate_resources(1); |
228 | 229 | ||
229 | e820_reserve_resources_late(); | 230 | e820_reserve_resources_late(); |
231 | /* | ||
232 | * Insert the IO APIC resources after PCI initialization has | ||
233 | * occured to handle IO APICS that are mapped in on a BAR in | ||
234 | * PCI space, but before trying to assign unassigned pci res. | ||
235 | */ | ||
236 | ioapic_insert_resources(); | ||
230 | } | 237 | } |
231 | 238 | ||
232 | /** | 239 | /** |
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index de2abbd07544..a6a198c33623 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | # __restore_processor_state() restores %gs after S3 resume and so should not | 1 | # __restore_processor_state() restores %gs after S3 resume and so should not |
2 | # itself be stack-protected | 2 | # itself be stack-protected |
3 | nostackp := $(call cc-option, -fno-stack-protector) | 3 | nostackp := $(call cc-option, -fno-stack-protector) |
4 | CFLAGS_cpu_$(BITS).o := $(nostackp) | 4 | CFLAGS_cpu.o := $(nostackp) |
5 | 5 | ||
6 | obj-$(CONFIG_PM_SLEEP) += cpu.o | 6 | obj-$(CONFIG_PM_SLEEP) += cpu.o |
7 | obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o | 7 | obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d277ef1eea51..b3d20b9cac63 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -244,7 +244,7 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
244 | do_fpu_end(); | 244 | do_fpu_end(); |
245 | mtrr_ap_init(); | 245 | mtrr_ap_init(); |
246 | 246 | ||
247 | #ifdef CONFIG_X86_32 | 247 | #ifdef CONFIG_X86_OLD_MCE |
248 | mcheck_init(&boot_cpu_data); | 248 | mcheck_init(&boot_cpu_data); |
249 | #endif | 249 | #endif |
250 | } | 250 | } |
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 172438f86a02..7410640db173 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -5,6 +5,10 @@ CFLAGS_REMOVE_time.o = -pg | |||
5 | CFLAGS_REMOVE_irq.o = -pg | 5 | CFLAGS_REMOVE_irq.o = -pg |
6 | endif | 6 | endif |
7 | 7 | ||
8 | # Make sure early boot has no stackprotector | ||
9 | nostackp := $(call cc-option, -fno-stack-protector) | ||
10 | CFLAGS_enlighten.o := $(nostackp) | ||
11 | |||
8 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 12 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
9 | time.o xen-asm.o xen-asm_$(BITS).o \ | 13 | time.o xen-asm.o xen-asm_$(BITS).o \ |
10 | grant-table.o suspend.o | 14 | grant-table.o suspend.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0a1700a2be9c..b62ccb840cfb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -215,6 +215,7 @@ static __init void xen_init_cpuid_mask(void) | |||
215 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ | 215 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ |
216 | 216 | ||
217 | ax = 1; | 217 | ax = 1; |
218 | cx = 0; | ||
218 | xen_cpuid(&ax, &bx, &cx, &dx); | 219 | xen_cpuid(&ax, &bx, &cx, &dx); |
219 | 220 | ||
220 | /* cpuid claims we support xsave; try enabling it to see what happens */ | 221 | /* cpuid claims we support xsave; try enabling it to see what happens */ |
@@ -713,7 +714,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
713 | set: | 714 | set: |
714 | base = ((u64)high << 32) | low; | 715 | base = ((u64)high << 32) | low; |
715 | if (HYPERVISOR_set_segment_base(which, base) != 0) | 716 | if (HYPERVISOR_set_segment_base(which, base) != 0) |
716 | ret = -EFAULT; | 717 | ret = -EIO; |
717 | break; | 718 | break; |
718 | #endif | 719 | #endif |
719 | 720 | ||
@@ -974,10 +975,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
974 | 975 | ||
975 | xen_domain_type = XEN_PV_DOMAIN; | 976 | xen_domain_type = XEN_PV_DOMAIN; |
976 | 977 | ||
977 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); | ||
978 | |||
979 | xen_setup_features(); | ||
980 | |||
981 | /* Install Xen paravirt ops */ | 978 | /* Install Xen paravirt ops */ |
982 | pv_info = xen_info; | 979 | pv_info = xen_info; |
983 | pv_init_ops = xen_init_ops; | 980 | pv_init_ops = xen_init_ops; |
@@ -986,8 +983,15 @@ asmlinkage void __init xen_start_kernel(void) | |||
986 | pv_apic_ops = xen_apic_ops; | 983 | pv_apic_ops = xen_apic_ops; |
987 | pv_mmu_ops = xen_mmu_ops; | 984 | pv_mmu_ops = xen_mmu_ops; |
988 | 985 | ||
989 | xen_init_irq_ops(); | 986 | #ifdef CONFIG_X86_64 |
987 | /* | ||
988 | * Setup percpu state. We only need to do this for 64-bit | ||
989 | * because 32-bit already has %fs set properly. | ||
990 | */ | ||
991 | load_percpu_segment(0); | ||
992 | #endif | ||
990 | 993 | ||
994 | xen_init_irq_ops(); | ||
991 | xen_init_cpuid_mask(); | 995 | xen_init_cpuid_mask(); |
992 | 996 | ||
993 | #ifdef CONFIG_X86_LOCAL_APIC | 997 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -997,6 +1001,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
997 | set_xen_basic_apic_ops(); | 1001 | set_xen_basic_apic_ops(); |
998 | #endif | 1002 | #endif |
999 | 1003 | ||
1004 | xen_setup_features(); | ||
1005 | |||
1000 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | 1006 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { |
1001 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; | 1007 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; |
1002 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; | 1008 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; |
@@ -1004,13 +1010,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1004 | 1010 | ||
1005 | machine_ops = xen_machine_ops; | 1011 | machine_ops = xen_machine_ops; |
1006 | 1012 | ||
1007 | #ifdef CONFIG_X86_64 | ||
1008 | /* | ||
1009 | * Setup percpu state. We only need to do this for 64-bit | ||
1010 | * because 32-bit already has %fs set properly. | ||
1011 | */ | ||
1012 | load_percpu_segment(0); | ||
1013 | #endif | ||
1014 | /* | 1013 | /* |
1015 | * The only reliable way to retain the initial address of the | 1014 | * The only reliable way to retain the initial address of the |
1016 | * percpu gdt_page is to remember it here, so we can go and | 1015 | * percpu gdt_page is to remember it here, so we can go and |
@@ -1061,6 +1060,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1061 | /* set up basic CPUID stuff */ | 1060 | /* set up basic CPUID stuff */ |
1062 | cpu_detect(&new_cpu_data); | 1061 | cpu_detect(&new_cpu_data); |
1063 | new_cpu_data.hard_math = 1; | 1062 | new_cpu_data.hard_math = 1; |
1063 | new_cpu_data.wp_works_ok = 1; | ||
1064 | new_cpu_data.x86_capability[0] = cpuid_edx(1); | 1064 | new_cpu_data.x86_capability[0] = cpuid_edx(1); |
1065 | #endif | 1065 | #endif |
1066 | 1066 | ||