diff options
author | H. Peter Anvin <hpa@zytor.com> | 2010-04-29 19:53:17 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-04-29 19:53:17 -0400 |
commit | d9c5841e22231e4e49fd0a1004164e6fce59b7a6 (patch) | |
tree | e1f589c46b3ff79bbe7b1b2469f6362f94576da6 /arch/x86 | |
parent | b701a47ba48b698976fb2fe05fb285b0edc1d26a (diff) | |
parent | 5967ed87ade85a421ef814296c3c7f182b08c225 (diff) |
Merge branch 'x86/asm' into x86/atomic
Merge reason:
Conflict between LOCK_PREFIX_HERE and relative alternatives
pointers
Resolved Conflicts:
arch/x86/include/asm/alternative.h
arch/x86/kernel/alternative.c
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86')
298 files changed, 10290 insertions, 6602 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55298e891571..9458685902bd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -31,6 +31,7 @@ config X86 | |||
31 | select ARCH_WANT_FRAME_POINTERS | 31 | select ARCH_WANT_FRAME_POINTERS |
32 | select HAVE_DMA_ATTRS | 32 | select HAVE_DMA_ATTRS |
33 | select HAVE_KRETPROBES | 33 | select HAVE_KRETPROBES |
34 | select HAVE_OPTPROBES | ||
34 | select HAVE_FTRACE_MCOUNT_RECORD | 35 | select HAVE_FTRACE_MCOUNT_RECORD |
35 | select HAVE_DYNAMIC_FTRACE | 36 | select HAVE_DYNAMIC_FTRACE |
36 | select HAVE_FUNCTION_TRACER | 37 | select HAVE_FUNCTION_TRACER |
@@ -45,10 +46,12 @@ config X86 | |||
45 | select HAVE_GENERIC_DMA_COHERENT if X86_32 | 46 | select HAVE_GENERIC_DMA_COHERENT if X86_32 |
46 | select HAVE_EFFICIENT_UNALIGNED_ACCESS | 47 | select HAVE_EFFICIENT_UNALIGNED_ACCESS |
47 | select USER_STACKTRACE_SUPPORT | 48 | select USER_STACKTRACE_SUPPORT |
49 | select HAVE_REGS_AND_STACK_ACCESS_API | ||
48 | select HAVE_DMA_API_DEBUG | 50 | select HAVE_DMA_API_DEBUG |
49 | select HAVE_KERNEL_GZIP | 51 | select HAVE_KERNEL_GZIP |
50 | select HAVE_KERNEL_BZIP2 | 52 | select HAVE_KERNEL_BZIP2 |
51 | select HAVE_KERNEL_LZMA | 53 | select HAVE_KERNEL_LZMA |
54 | select HAVE_KERNEL_LZO | ||
52 | select HAVE_HW_BREAKPOINT | 55 | select HAVE_HW_BREAKPOINT |
53 | select PERF_EVENTS | 56 | select PERF_EVENTS |
54 | select ANON_INODES | 57 | select ANON_INODES |
@@ -99,6 +102,9 @@ config ZONE_DMA | |||
99 | config SBUS | 102 | config SBUS |
100 | bool | 103 | bool |
101 | 104 | ||
105 | config NEED_DMA_MAP_STATE | ||
106 | def_bool (X86_64 || DMAR || DMA_API_DEBUG) | ||
107 | |||
102 | config GENERIC_ISA_DMA | 108 | config GENERIC_ISA_DMA |
103 | def_bool y | 109 | def_bool y |
104 | 110 | ||
@@ -182,6 +188,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING | |||
182 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 188 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
183 | def_bool y | 189 | def_bool y |
184 | 190 | ||
191 | config HAVE_EARLY_RES | ||
192 | def_bool y | ||
193 | |||
185 | config HAVE_INTEL_TXT | 194 | config HAVE_INTEL_TXT |
186 | def_bool y | 195 | def_bool y |
187 | depends on EXPERIMENTAL && DMAR && ACPI | 196 | depends on EXPERIMENTAL && DMAR && ACPI |
@@ -387,8 +396,12 @@ config X86_ELAN | |||
387 | 396 | ||
388 | config X86_MRST | 397 | config X86_MRST |
389 | bool "Moorestown MID platform" | 398 | bool "Moorestown MID platform" |
399 | depends on PCI | ||
400 | depends on PCI_GOANY | ||
390 | depends on X86_32 | 401 | depends on X86_32 |
391 | depends on X86_EXTENDED_PLATFORM | 402 | depends on X86_EXTENDED_PLATFORM |
403 | depends on X86_IO_APIC | ||
404 | select APB_TIMER | ||
392 | ---help--- | 405 | ---help--- |
393 | Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin | 406 | Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin |
394 | Internet Device(MID) platform. Moorestown consists of two chips: | 407 | Internet Device(MID) platform. Moorestown consists of two chips: |
@@ -423,6 +436,7 @@ config X86_32_NON_STANDARD | |||
423 | config X86_NUMAQ | 436 | config X86_NUMAQ |
424 | bool "NUMAQ (IBM/Sequent)" | 437 | bool "NUMAQ (IBM/Sequent)" |
425 | depends on X86_32_NON_STANDARD | 438 | depends on X86_32_NON_STANDARD |
439 | depends on PCI | ||
426 | select NUMA | 440 | select NUMA |
427 | select X86_MPPARSE | 441 | select X86_MPPARSE |
428 | ---help--- | 442 | ---help--- |
@@ -567,6 +581,18 @@ config PARAVIRT_DEBUG | |||
567 | Enable to debug paravirt_ops internals. Specifically, BUG if | 581 | Enable to debug paravirt_ops internals. Specifically, BUG if |
568 | a paravirt_op is missing when it is called. | 582 | a paravirt_op is missing when it is called. |
569 | 583 | ||
584 | config NO_BOOTMEM | ||
585 | default y | ||
586 | bool "Disable Bootmem code" | ||
587 | ---help--- | ||
588 | Use early_res directly instead of bootmem before slab is ready. | ||
589 | - allocator (buddy) [generic] | ||
590 | - early allocator (bootmem) [generic] | ||
591 | - very early allocator (reserve_early*()) [x86] | ||
592 | - very very early allocator (early brk model) [x86] | ||
593 | So reduce one layer between early allocator to final allocator | ||
594 | |||
595 | |||
570 | config MEMTEST | 596 | config MEMTEST |
571 | bool "Memtest" | 597 | bool "Memtest" |
572 | ---help--- | 598 | ---help--- |
@@ -611,6 +637,16 @@ config HPET_EMULATE_RTC | |||
611 | def_bool y | 637 | def_bool y |
612 | depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) | 638 | depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) |
613 | 639 | ||
640 | config APB_TIMER | ||
641 | def_bool y if MRST | ||
642 | prompt "Langwell APB Timer Support" if X86_MRST | ||
643 | help | ||
644 | APB timer is the replacement for 8254, HPET on X86 MID platforms. | ||
645 | The APBT provides a stable time base on SMP | ||
646 | systems, unlike the TSC, but it is more expensive to access, | ||
647 | as it is off-chip. APB timers are always running regardless of CPU | ||
648 | C states, they are used as per CPU clockevent device when possible. | ||
649 | |||
614 | # Mark as embedded because too many people got it wrong. | 650 | # Mark as embedded because too many people got it wrong. |
615 | # The code disables itself when not needed. | 651 | # The code disables itself when not needed. |
616 | config DMI | 652 | config DMI |
@@ -626,7 +662,7 @@ config GART_IOMMU | |||
626 | bool "GART IOMMU support" if EMBEDDED | 662 | bool "GART IOMMU support" if EMBEDDED |
627 | default y | 663 | default y |
628 | select SWIOTLB | 664 | select SWIOTLB |
629 | depends on X86_64 && PCI | 665 | depends on X86_64 && PCI && K8_NB |
630 | ---help--- | 666 | ---help--- |
631 | Support for full DMA access of devices with 32bit memory access only | 667 | Support for full DMA access of devices with 32bit memory access only |
632 | on systems with more than 3GB. This is usually needed for USB, | 668 | on systems with more than 3GB. This is usually needed for USB, |
@@ -988,12 +1024,6 @@ config X86_CPUID | |||
988 | with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to | 1024 | with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to |
989 | /dev/cpu/31/cpuid. | 1025 | /dev/cpu/31/cpuid. |
990 | 1026 | ||
991 | config X86_CPU_DEBUG | ||
992 | tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support" | ||
993 | ---help--- | ||
994 | If you select this option, this will provide various x86 CPUs | ||
995 | information through debugfs. | ||
996 | |||
997 | choice | 1027 | choice |
998 | prompt "High Memory Support" | 1028 | prompt "High Memory Support" |
999 | default HIGHMEM4G if !X86_NUMAQ | 1029 | default HIGHMEM4G if !X86_NUMAQ |
@@ -1186,8 +1216,8 @@ config NUMA_EMU | |||
1186 | 1216 | ||
1187 | config NODES_SHIFT | 1217 | config NODES_SHIFT |
1188 | int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP | 1218 | int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP |
1189 | range 1 9 | 1219 | range 1 10 |
1190 | default "9" if MAXSMP | 1220 | default "10" if MAXSMP |
1191 | default "6" if X86_64 | 1221 | default "6" if X86_64 |
1192 | default "4" if X86_NUMAQ | 1222 | default "4" if X86_NUMAQ |
1193 | default "3" | 1223 | default "3" |
@@ -1246,6 +1276,11 @@ config ARCH_MEMORY_PROBE | |||
1246 | def_bool X86_64 | 1276 | def_bool X86_64 |
1247 | depends on MEMORY_HOTPLUG | 1277 | depends on MEMORY_HOTPLUG |
1248 | 1278 | ||
1279 | config ILLEGAL_POINTER_VALUE | ||
1280 | hex | ||
1281 | default 0 if X86_32 | ||
1282 | default 0xdead000000000000 if X86_64 | ||
1283 | |||
1249 | source "mm/Kconfig" | 1284 | source "mm/Kconfig" |
1250 | 1285 | ||
1251 | config HIGHPTE | 1286 | config HIGHPTE |
@@ -2026,7 +2061,7 @@ endif # X86_32 | |||
2026 | 2061 | ||
2027 | config K8_NB | 2062 | config K8_NB |
2028 | def_bool y | 2063 | def_bool y |
2029 | depends on AGP_AMD64 || (X86_64 && (GART_IOMMU || (PCI && NUMA))) | 2064 | depends on CPU_SUP_AMD && PCI |
2030 | 2065 | ||
2031 | source "drivers/pcmcia/Kconfig" | 2066 | source "drivers/pcmcia/Kconfig" |
2032 | 2067 | ||
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 08e442bc3ab9..a19829374e6a 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT | |||
319 | 319 | ||
320 | config X86_XADD | 320 | config X86_XADD |
321 | def_bool y | 321 | def_bool y |
322 | depends on X86_32 && !M386 | 322 | depends on X86_64 || !M386 |
323 | 323 | ||
324 | config X86_PPRO_FENCE | 324 | config X86_PPRO_FENCE |
325 | bool "PentiumPro memory ordering errata workaround" | 325 | bool "PentiumPro memory ordering errata workaround" |
@@ -396,7 +396,7 @@ config X86_TSC | |||
396 | 396 | ||
397 | config X86_CMPXCHG64 | 397 | config X86_CMPXCHG64 |
398 | def_bool y | 398 | def_bool y |
399 | depends on !M386 && !M486 | 399 | depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM |
400 | 400 | ||
401 | # this should be set for all -march=.. options where the compiler | 401 | # this should be set for all -march=.. options where the compiler |
402 | # generates cmov. | 402 | # generates cmov. |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 78b32be55e9e..0a43dc515e4c 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -135,9 +135,7 @@ drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ | |||
135 | # suspend and hibernation support | 135 | # suspend and hibernation support |
136 | drivers-$(CONFIG_PM) += arch/x86/power/ | 136 | drivers-$(CONFIG_PM) += arch/x86/power/ |
137 | 137 | ||
138 | ifeq ($(CONFIG_X86_32),y) | ||
139 | drivers-$(CONFIG_FB) += arch/x86/video/ | 138 | drivers-$(CONFIG_FB) += arch/x86/video/ |
140 | endif | ||
141 | 139 | ||
142 | #### | 140 | #### |
143 | # boot loader support. Several targets are kept for legacy purposes | 141 | # boot loader support. Several targets are kept for legacy purposes |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f8ed0658404c..fbb47daf2459 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -4,11 +4,12 @@ | |||
4 | # create a compressed vmlinux image from the original vmlinux | 4 | # create a compressed vmlinux image from the original vmlinux |
5 | # | 5 | # |
6 | 6 | ||
7 | targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o | 7 | targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o piggy.o |
8 | 8 | ||
9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 | 9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 |
10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC | 10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC |
11 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING | 11 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING |
12 | cflags-$(CONFIG_X86_32) := -march=i386 | ||
12 | cflags-$(CONFIG_X86_64) := -mcmodel=small | 13 | cflags-$(CONFIG_X86_64) := -mcmodel=small |
13 | KBUILD_CFLAGS += $(cflags-y) | 14 | KBUILD_CFLAGS += $(cflags-y) |
14 | KBUILD_CFLAGS += $(call cc-option,-ffreestanding) | 15 | KBUILD_CFLAGS += $(call cc-option,-ffreestanding) |
@@ -48,10 +49,13 @@ $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE | |||
48 | $(call if_changed,bzip2) | 49 | $(call if_changed,bzip2) |
49 | $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE | 50 | $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE |
50 | $(call if_changed,lzma) | 51 | $(call if_changed,lzma) |
52 | $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE | ||
53 | $(call if_changed,lzo) | ||
51 | 54 | ||
52 | suffix-$(CONFIG_KERNEL_GZIP) := gz | 55 | suffix-$(CONFIG_KERNEL_GZIP) := gz |
53 | suffix-$(CONFIG_KERNEL_BZIP2) := bz2 | 56 | suffix-$(CONFIG_KERNEL_BZIP2) := bz2 |
54 | suffix-$(CONFIG_KERNEL_LZMA) := lzma | 57 | suffix-$(CONFIG_KERNEL_LZMA) := lzma |
58 | suffix-$(CONFIG_KERNEL_LZO) := lzo | ||
55 | 59 | ||
56 | quiet_cmd_mkpiggy = MKPIGGY $@ | 60 | quiet_cmd_mkpiggy = MKPIGGY $@ |
57 | cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) | 61 | cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 842b2a36174a..51e240779a44 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -19,11 +19,6 @@ | |||
19 | #define _ASM_X86_DESC_H 1 | 19 | #define _ASM_X86_DESC_H 1 |
20 | #endif | 20 | #endif |
21 | 21 | ||
22 | #ifdef CONFIG_X86_64 | ||
23 | #define _LINUX_STRING_H_ 1 | ||
24 | #define __LINUX_BITMAP_H 1 | ||
25 | #endif | ||
26 | |||
27 | #include <linux/linkage.h> | 22 | #include <linux/linkage.h> |
28 | #include <linux/screen_info.h> | 23 | #include <linux/screen_info.h> |
29 | #include <linux/elf.h> | 24 | #include <linux/elf.h> |
@@ -131,8 +126,8 @@ static void error(char *m); | |||
131 | static struct boot_params *real_mode; /* Pointer to real-mode data */ | 126 | static struct boot_params *real_mode; /* Pointer to real-mode data */ |
132 | static int quiet; | 127 | static int quiet; |
133 | 128 | ||
134 | static void *memset(void *s, int c, unsigned n); | 129 | void *memset(void *s, int c, size_t n); |
135 | void *memcpy(void *dest, const void *src, unsigned n); | 130 | void *memcpy(void *dest, const void *src, size_t n); |
136 | 131 | ||
137 | static void __putstr(int, const char *); | 132 | static void __putstr(int, const char *); |
138 | #define putstr(__x) __putstr(0, __x) | 133 | #define putstr(__x) __putstr(0, __x) |
@@ -162,6 +157,10 @@ static int lines, cols; | |||
162 | #include "../../../../lib/decompress_unlzma.c" | 157 | #include "../../../../lib/decompress_unlzma.c" |
163 | #endif | 158 | #endif |
164 | 159 | ||
160 | #ifdef CONFIG_KERNEL_LZO | ||
161 | #include "../../../../lib/decompress_unlzo.c" | ||
162 | #endif | ||
163 | |||
165 | static void scroll(void) | 164 | static void scroll(void) |
166 | { | 165 | { |
167 | int i; | 166 | int i; |
@@ -181,11 +180,9 @@ static void __putstr(int error, const char *s) | |||
181 | return; | 180 | return; |
182 | #endif | 181 | #endif |
183 | 182 | ||
184 | #ifdef CONFIG_X86_32 | ||
185 | if (real_mode->screen_info.orig_video_mode == 0 && | 183 | if (real_mode->screen_info.orig_video_mode == 0 && |
186 | lines == 0 && cols == 0) | 184 | lines == 0 && cols == 0) |
187 | return; | 185 | return; |
188 | #endif | ||
189 | 186 | ||
190 | x = real_mode->screen_info.orig_x; | 187 | x = real_mode->screen_info.orig_x; |
191 | y = real_mode->screen_info.orig_y; | 188 | y = real_mode->screen_info.orig_y; |
@@ -219,7 +216,7 @@ static void __putstr(int error, const char *s) | |||
219 | outb(0xff & (pos >> 1), vidport+1); | 216 | outb(0xff & (pos >> 1), vidport+1); |
220 | } | 217 | } |
221 | 218 | ||
222 | static void *memset(void *s, int c, unsigned n) | 219 | void *memset(void *s, int c, size_t n) |
223 | { | 220 | { |
224 | int i; | 221 | int i; |
225 | char *ss = s; | 222 | char *ss = s; |
@@ -229,7 +226,7 @@ static void *memset(void *s, int c, unsigned n) | |||
229 | return s; | 226 | return s; |
230 | } | 227 | } |
231 | 228 | ||
232 | void *memcpy(void *dest, const void *src, unsigned n) | 229 | void *memcpy(void *dest, const void *src, size_t n) |
233 | { | 230 | { |
234 | int i; | 231 | int i; |
235 | const char *s = src; | 232 | const char *s = src; |
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 8ef60f20b371..919257f526f2 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c | |||
@@ -22,7 +22,7 @@ int main(void) | |||
22 | int i, j; | 22 | int i, j; |
23 | const char *str; | 23 | const char *str; |
24 | 24 | ||
25 | printf("static const char x86_cap_strs[] = \n"); | 25 | printf("static const char x86_cap_strs[] =\n"); |
26 | 26 | ||
27 | for (i = 0; i < NCAPINTS; i++) { | 27 | for (i = 0; i < NCAPINTS; i++) { |
28 | for (j = 0; j < 32; j++) { | 28 | for (j = 0; j < 32; j++) { |
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 819caa1f2008..ed7aeff786b2 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c | |||
@@ -42,22 +42,15 @@ static u8 vga_set_basic_mode(void) | |||
42 | { | 42 | { |
43 | struct biosregs ireg, oreg; | 43 | struct biosregs ireg, oreg; |
44 | u16 ax; | 44 | u16 ax; |
45 | u8 rows; | ||
46 | u8 mode; | 45 | u8 mode; |
47 | 46 | ||
48 | initregs(&ireg); | 47 | initregs(&ireg); |
49 | 48 | ||
49 | /* Query current mode */ | ||
50 | ax = 0x0f00; | 50 | ax = 0x0f00; |
51 | intcall(0x10, &ireg, &oreg); | 51 | intcall(0x10, &ireg, &oreg); |
52 | mode = oreg.al; | 52 | mode = oreg.al; |
53 | 53 | ||
54 | set_fs(0); | ||
55 | rows = rdfs8(0x484); /* rows minus one */ | ||
56 | |||
57 | if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) && | ||
58 | (rows == 0 || rows == 24)) | ||
59 | return mode; | ||
60 | |||
61 | if (mode != 3 && mode != 7) | 54 | if (mode != 3 && mode != 7) |
62 | mode = 3; | 55 | mode = 3; |
63 | 56 | ||
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index f767164cd5df..43eda284d27f 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c | |||
@@ -298,11 +298,18 @@ static void restore_screen(void) | |||
298 | } | 298 | } |
299 | 299 | ||
300 | /* Restore cursor position */ | 300 | /* Restore cursor position */ |
301 | if (saved.curx >= xs) | ||
302 | saved.curx = xs-1; | ||
303 | if (saved.cury >= ys) | ||
304 | saved.cury = ys-1; | ||
305 | |||
301 | initregs(&ireg); | 306 | initregs(&ireg); |
302 | ireg.ah = 0x02; /* Set cursor position */ | 307 | ireg.ah = 0x02; /* Set cursor position */ |
303 | ireg.dh = saved.cury; | 308 | ireg.dh = saved.cury; |
304 | ireg.dl = saved.curx; | 309 | ireg.dl = saved.curx; |
305 | intcall(0x10, &ireg, NULL); | 310 | intcall(0x10, &ireg, NULL); |
311 | |||
312 | store_cursor_position(); | ||
306 | } | 313 | } |
307 | 314 | ||
308 | void set_video(void) | 315 | void set_video(void) |
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index daef6cd2b45d..1a8f8649c035 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/slab.h> | ||
19 | #include <asm/i387.h> | 20 | #include <asm/i387.h> |
20 | 21 | ||
21 | struct crypto_fpu_ctx { | 22 | struct crypto_fpu_ctx { |
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S index 39b98ed2c1b9..575331cb2a8a 100644 --- a/arch/x86/crypto/twofish-i586-asm_32.S +++ b/arch/x86/crypto/twofish-i586-asm_32.S | |||
@@ -22,7 +22,7 @@ | |||
22 | 22 | ||
23 | #include <asm/asm-offsets.h> | 23 | #include <asm/asm-offsets.h> |
24 | 24 | ||
25 | /* return adress at 0 */ | 25 | /* return address at 0 */ |
26 | 26 | ||
27 | #define in_blk 12 /* input byte array address parameter*/ | 27 | #define in_blk 12 /* input byte array address parameter*/ |
28 | #define out_blk 8 /* output byte array address parameter*/ | 28 | #define out_blk 8 /* output byte array address parameter*/ |
@@ -230,8 +230,8 @@ twofish_enc_blk: | |||
230 | push %edi | 230 | push %edi |
231 | 231 | ||
232 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ | 232 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ |
233 | add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ | 233 | add $crypto_tfm_ctx_offset, %ebp /* ctx address */ |
234 | mov in_blk+16(%esp),%edi /* input adress in edi */ | 234 | mov in_blk+16(%esp),%edi /* input address in edi */ |
235 | 235 | ||
236 | mov (%edi), %eax | 236 | mov (%edi), %eax |
237 | mov b_offset(%edi), %ebx | 237 | mov b_offset(%edi), %ebx |
@@ -286,8 +286,8 @@ twofish_dec_blk: | |||
286 | 286 | ||
287 | 287 | ||
288 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ | 288 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ |
289 | add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ | 289 | add $crypto_tfm_ctx_offset, %ebp /* ctx address */ |
290 | mov in_blk+16(%esp),%edi /* input adress in edi */ | 290 | mov in_blk+16(%esp),%edi /* input address in edi */ |
291 | 291 | ||
292 | mov (%edi), %eax | 292 | mov (%edi), %eax |
293 | mov b_offset(%edi), %ebx | 293 | mov b_offset(%edi), %ebx |
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index 35974a586615..573aa102542e 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S | |||
@@ -221,11 +221,11 @@ | |||
221 | twofish_enc_blk: | 221 | twofish_enc_blk: |
222 | pushq R1 | 222 | pushq R1 |
223 | 223 | ||
224 | /* %rdi contains the crypto tfm adress */ | 224 | /* %rdi contains the crypto tfm address */ |
225 | /* %rsi contains the output adress */ | 225 | /* %rsi contains the output address */ |
226 | /* %rdx contains the input adress */ | 226 | /* %rdx contains the input address */ |
227 | add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */ | 227 | add $crypto_tfm_ctx_offset, %rdi /* set ctx address */ |
228 | /* ctx adress is moved to free one non-rex register | 228 | /* ctx address is moved to free one non-rex register |
229 | as target for the 8bit high operations */ | 229 | as target for the 8bit high operations */ |
230 | mov %rdi, %r11 | 230 | mov %rdi, %r11 |
231 | 231 | ||
@@ -274,11 +274,11 @@ twofish_enc_blk: | |||
274 | twofish_dec_blk: | 274 | twofish_dec_blk: |
275 | pushq R1 | 275 | pushq R1 |
276 | 276 | ||
277 | /* %rdi contains the crypto tfm adress */ | 277 | /* %rdi contains the crypto tfm address */ |
278 | /* %rsi contains the output adress */ | 278 | /* %rsi contains the output address */ |
279 | /* %rdx contains the input adress */ | 279 | /* %rdx contains the input address */ |
280 | add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */ | 280 | add $crypto_tfm_ctx_offset, %rdi /* set ctx address */ |
281 | /* ctx adress is moved to free one non-rex register | 281 | /* ctx address is moved to free one non-rex register |
282 | as target for the 8bit high operations */ | 282 | as target for the 8bit high operations */ |
283 | mov %rdi, %r11 | 283 | mov %rdi, %r11 |
284 | 284 | ||
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 2a4d073d2cf1..0350311906ae 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/fcntl.h> | 21 | #include <linux/fcntl.h> |
22 | #include <linux/ptrace.h> | 22 | #include <linux/ptrace.h> |
23 | #include <linux/user.h> | 23 | #include <linux/user.h> |
24 | #include <linux/slab.h> | ||
25 | #include <linux/binfmts.h> | 24 | #include <linux/binfmts.h> |
26 | #include <linux/personality.h> | 25 | #include <linux/personality.h> |
27 | #include <linux/init.h> | 26 | #include <linux/init.h> |
@@ -297,7 +296,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
297 | * size limits imposed on them by creating programs with large | 296 | * size limits imposed on them by creating programs with large |
298 | * arrays in the data or bss. | 297 | * arrays in the data or bss. |
299 | */ | 298 | */ |
300 | rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; | 299 | rlim = rlimit(RLIMIT_DATA); |
301 | if (rlim >= RLIM_INFINITY) | 300 | if (rlim >= RLIM_INFINITY) |
302 | rlim = ~0; | 301 | rlim = ~0; |
303 | if (ex.a_data + ex.a_bss > rlim) | 302 | if (ex.a_data + ex.a_bss > rlim) |
@@ -308,14 +307,15 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
308 | if (retval) | 307 | if (retval) |
309 | return retval; | 308 | return retval; |
310 | 309 | ||
311 | regs->cs = __USER32_CS; | ||
312 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = | ||
313 | regs->r13 = regs->r14 = regs->r15 = 0; | ||
314 | |||
315 | /* OK, This is the point of no return */ | 310 | /* OK, This is the point of no return */ |
316 | set_personality(PER_LINUX); | 311 | set_personality(PER_LINUX); |
317 | set_thread_flag(TIF_IA32); | 312 | set_thread_flag(TIF_IA32); |
318 | clear_thread_flag(TIF_ABI_PENDING); | 313 | |
314 | setup_new_exec(bprm); | ||
315 | |||
316 | regs->cs = __USER32_CS; | ||
317 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = | ||
318 | regs->r13 = regs->r14 = regs->r15 = 0; | ||
319 | 319 | ||
320 | current->mm->end_code = ex.a_text + | 320 | current->mm->end_code = ex.a_text + |
321 | (current->mm->start_code = N_TXTADDR(ex)); | 321 | (current->mm->start_code = N_TXTADDR(ex)); |
@@ -326,7 +326,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
326 | current->mm->free_area_cache = TASK_UNMAPPED_BASE; | 326 | current->mm->free_area_cache = TASK_UNMAPPED_BASE; |
327 | current->mm->cached_hole_size = 0; | 327 | current->mm->cached_hole_size = 0; |
328 | 328 | ||
329 | current->mm->mmap = NULL; | ||
330 | install_exec_creds(bprm); | 329 | install_exec_creds(bprm); |
331 | current->flags &= ~PF_FORKNOEXEC; | 330 | current->flags &= ~PF_FORKNOEXEC; |
332 | 331 | ||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 53147ad85b96..e790bc1fbfa3 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -563,7 +563,7 @@ ia32_sys_call_table: | |||
563 | .quad quiet_ni_syscall /* old mpx syscall holder */ | 563 | .quad quiet_ni_syscall /* old mpx syscall holder */ |
564 | .quad sys_setpgid | 564 | .quad sys_setpgid |
565 | .quad quiet_ni_syscall /* old ulimit syscall holder */ | 565 | .quad quiet_ni_syscall /* old ulimit syscall holder */ |
566 | .quad sys32_olduname | 566 | .quad sys_olduname |
567 | .quad sys_umask /* 60 */ | 567 | .quad sys_umask /* 60 */ |
568 | .quad sys_chroot | 568 | .quad sys_chroot |
569 | .quad compat_sys_ustat | 569 | .quad compat_sys_ustat |
@@ -586,7 +586,7 @@ ia32_sys_call_table: | |||
586 | .quad compat_sys_settimeofday | 586 | .quad compat_sys_settimeofday |
587 | .quad sys_getgroups16 /* 80 */ | 587 | .quad sys_getgroups16 /* 80 */ |
588 | .quad sys_setgroups16 | 588 | .quad sys_setgroups16 |
589 | .quad sys32_old_select | 589 | .quad compat_sys_old_select |
590 | .quad sys_symlink | 590 | .quad sys_symlink |
591 | .quad sys_lstat | 591 | .quad sys_lstat |
592 | .quad sys_readlink /* 85 */ | 592 | .quad sys_readlink /* 85 */ |
@@ -613,7 +613,7 @@ ia32_sys_call_table: | |||
613 | .quad compat_sys_newstat | 613 | .quad compat_sys_newstat |
614 | .quad compat_sys_newlstat | 614 | .quad compat_sys_newlstat |
615 | .quad compat_sys_newfstat | 615 | .quad compat_sys_newfstat |
616 | .quad sys32_uname | 616 | .quad sys_uname |
617 | .quad stub32_iopl /* 110 */ | 617 | .quad stub32_iopl /* 110 */ |
618 | .quad sys_vhangup | 618 | .quad sys_vhangup |
619 | .quad quiet_ni_syscall /* old "idle" system call */ | 619 | .quad quiet_ni_syscall /* old "idle" system call */ |
@@ -626,7 +626,7 @@ ia32_sys_call_table: | |||
626 | .quad stub32_sigreturn | 626 | .quad stub32_sigreturn |
627 | .quad stub32_clone /* 120 */ | 627 | .quad stub32_clone /* 120 */ |
628 | .quad sys_setdomainname | 628 | .quad sys_setdomainname |
629 | .quad sys_uname | 629 | .quad sys_newuname |
630 | .quad sys_modify_ldt | 630 | .quad sys_modify_ldt |
631 | .quad compat_sys_adjtimex | 631 | .quad compat_sys_adjtimex |
632 | .quad sys32_mprotect /* 125 */ | 632 | .quad sys32_mprotect /* 125 */ |
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 422572c77923..626be156d88d 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/ptrace.h> | 40 | #include <linux/ptrace.h> |
41 | #include <linux/highuid.h> | 41 | #include <linux/highuid.h> |
42 | #include <linux/sysctl.h> | 42 | #include <linux/sysctl.h> |
43 | #include <linux/slab.h> | ||
43 | #include <asm/mman.h> | 44 | #include <asm/mman.h> |
44 | #include <asm/types.h> | 45 | #include <asm/types.h> |
45 | #include <asm/uaccess.h> | 46 | #include <asm/uaccess.h> |
@@ -143,7 +144,7 @@ asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, | |||
143 | * block for parameter passing.. | 144 | * block for parameter passing.. |
144 | */ | 145 | */ |
145 | 146 | ||
146 | struct mmap_arg_struct { | 147 | struct mmap_arg_struct32 { |
147 | unsigned int addr; | 148 | unsigned int addr; |
148 | unsigned int len; | 149 | unsigned int len; |
149 | unsigned int prot; | 150 | unsigned int prot; |
@@ -152,9 +153,9 @@ struct mmap_arg_struct { | |||
152 | unsigned int offset; | 153 | unsigned int offset; |
153 | }; | 154 | }; |
154 | 155 | ||
155 | asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) | 156 | asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) |
156 | { | 157 | { |
157 | struct mmap_arg_struct a; | 158 | struct mmap_arg_struct32 a; |
158 | 159 | ||
159 | if (copy_from_user(&a, arg, sizeof(a))) | 160 | if (copy_from_user(&a, arg, sizeof(a))) |
160 | return -EFAULT; | 161 | return -EFAULT; |
@@ -332,24 +333,6 @@ asmlinkage long sys32_alarm(unsigned int seconds) | |||
332 | return alarm_setitimer(seconds); | 333 | return alarm_setitimer(seconds); |
333 | } | 334 | } |
334 | 335 | ||
335 | struct sel_arg_struct { | ||
336 | unsigned int n; | ||
337 | unsigned int inp; | ||
338 | unsigned int outp; | ||
339 | unsigned int exp; | ||
340 | unsigned int tvp; | ||
341 | }; | ||
342 | |||
343 | asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg) | ||
344 | { | ||
345 | struct sel_arg_struct a; | ||
346 | |||
347 | if (copy_from_user(&a, arg, sizeof(a))) | ||
348 | return -EFAULT; | ||
349 | return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), | ||
350 | compat_ptr(a.exp), compat_ptr(a.tvp)); | ||
351 | } | ||
352 | |||
353 | asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, | 336 | asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, |
354 | int options) | 337 | int options) |
355 | { | 338 | { |
@@ -466,58 +449,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, | |||
466 | return ret; | 449 | return ret; |
467 | } | 450 | } |
468 | 451 | ||
469 | asmlinkage long sys32_olduname(struct oldold_utsname __user *name) | ||
470 | { | ||
471 | char *arch = "x86_64"; | ||
472 | int err; | ||
473 | |||
474 | if (!name) | ||
475 | return -EFAULT; | ||
476 | if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) | ||
477 | return -EFAULT; | ||
478 | |||
479 | down_read(&uts_sem); | ||
480 | |||
481 | err = __copy_to_user(&name->sysname, &utsname()->sysname, | ||
482 | __OLD_UTS_LEN); | ||
483 | err |= __put_user(0, name->sysname+__OLD_UTS_LEN); | ||
484 | err |= __copy_to_user(&name->nodename, &utsname()->nodename, | ||
485 | __OLD_UTS_LEN); | ||
486 | err |= __put_user(0, name->nodename+__OLD_UTS_LEN); | ||
487 | err |= __copy_to_user(&name->release, &utsname()->release, | ||
488 | __OLD_UTS_LEN); | ||
489 | err |= __put_user(0, name->release+__OLD_UTS_LEN); | ||
490 | err |= __copy_to_user(&name->version, &utsname()->version, | ||
491 | __OLD_UTS_LEN); | ||
492 | err |= __put_user(0, name->version+__OLD_UTS_LEN); | ||
493 | |||
494 | if (personality(current->personality) == PER_LINUX32) | ||
495 | arch = "i686"; | ||
496 | |||
497 | err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1); | ||
498 | |||
499 | up_read(&uts_sem); | ||
500 | |||
501 | err = err ? -EFAULT : 0; | ||
502 | |||
503 | return err; | ||
504 | } | ||
505 | |||
506 | long sys32_uname(struct old_utsname __user *name) | ||
507 | { | ||
508 | int err; | ||
509 | |||
510 | if (!name) | ||
511 | return -EFAULT; | ||
512 | down_read(&uts_sem); | ||
513 | err = copy_to_user(name, utsname(), sizeof(*name)); | ||
514 | up_read(&uts_sem); | ||
515 | if (personality(current->personality) == PER_LINUX32) | ||
516 | err |= copy_to_user(&name->machine, "i686", 5); | ||
517 | |||
518 | return err ? -EFAULT : 0; | ||
519 | } | ||
520 | |||
521 | asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, | 452 | asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, |
522 | compat_uptr_t __user *envp, struct pt_regs *regs) | 453 | compat_uptr_t __user *envp, struct pt_regs *regs) |
523 | { | 454 | { |
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 9f828f87ca35..493092efaa3b 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -11,6 +11,7 @@ header-y += sigcontext32.h | |||
11 | header-y += ucontext.h | 11 | header-y += ucontext.h |
12 | header-y += processor-flags.h | 12 | header-y += processor-flags.h |
13 | header-y += hw_breakpoint.h | 13 | header-y += hw_breakpoint.h |
14 | header-y += hyperv.h | ||
14 | 15 | ||
15 | unifdef-y += e820.h | 16 | unifdef-y += e820.h |
16 | unifdef-y += ist.h | 17 | unifdef-y += ist.h |
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index b97f786a48d5..a63a68be1cce 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -6,8 +6,8 @@ | |||
6 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
7 | 1: lock | 7 | 1: lock |
8 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
9 | _ASM_ALIGN | 9 | .balign 4 |
10 | _ASM_PTR 1b | 10 | .long 1b - . |
11 | .previous | 11 | .previous |
12 | .endm | 12 | .endm |
13 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index e29a6c9bba00..92a9033c14d1 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -30,8 +30,8 @@ | |||
30 | #ifdef CONFIG_SMP | 30 | #ifdef CONFIG_SMP |
31 | #define LOCK_PREFIX_HERE \ | 31 | #define LOCK_PREFIX_HERE \ |
32 | ".section .smp_locks,\"a\"\n" \ | 32 | ".section .smp_locks,\"a\"\n" \ |
33 | _ASM_ALIGN "\n" \ | 33 | ".balign 4\n" \ |
34 | _ASM_PTR "671f\n" /* address */ \ | 34 | ".long 671f - .\n" /* offset */ \ |
35 | ".previous\n" \ | 35 | ".previous\n" \ |
36 | "671:" | 36 | "671:" |
37 | 37 | ||
@@ -68,12 +68,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, | |||
68 | void *text, void *text_end); | 68 | void *text, void *text_end); |
69 | extern void alternatives_smp_module_del(struct module *mod); | 69 | extern void alternatives_smp_module_del(struct module *mod); |
70 | extern void alternatives_smp_switch(int smp); | 70 | extern void alternatives_smp_switch(int smp); |
71 | extern int alternatives_text_reserved(void *start, void *end); | ||
71 | #else | 72 | #else |
72 | static inline void alternatives_smp_module_add(struct module *mod, char *name, | 73 | static inline void alternatives_smp_module_add(struct module *mod, char *name, |
73 | void *locks, void *locks_end, | 74 | void *locks, void *locks_end, |
74 | void *text, void *text_end) {} | 75 | void *text, void *text_end) {} |
75 | static inline void alternatives_smp_module_del(struct module *mod) {} | 76 | static inline void alternatives_smp_module_del(struct module *mod) {} |
76 | static inline void alternatives_smp_switch(int smp) {} | 77 | static inline void alternatives_smp_switch(int smp) {} |
78 | static inline int alternatives_text_reserved(void *start, void *end) | ||
79 | { | ||
80 | return 0; | ||
81 | } | ||
77 | #endif /* CONFIG_SMP */ | 82 | #endif /* CONFIG_SMP */ |
78 | 83 | ||
79 | /* alternative assembly primitive: */ | 84 | /* alternative assembly primitive: */ |
@@ -163,10 +168,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, | |||
163 | * invalid instruction possible) or if the instructions are changed from a | 168 | * invalid instruction possible) or if the instructions are changed from a |
164 | * consistent state to another consistent state atomically. | 169 | * consistent state to another consistent state atomically. |
165 | * More care must be taken when modifying code in the SMP case because of | 170 | * More care must be taken when modifying code in the SMP case because of |
166 | * Intel's errata. | 171 | * Intel's errata. text_poke_smp() takes care that errata, but still |
172 | * doesn't support NMI/MCE handler code modifying. | ||
167 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an | 173 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an |
168 | * inconsistent instruction while you patch. | 174 | * inconsistent instruction while you patch. |
169 | */ | 175 | */ |
170 | extern void *text_poke(void *addr, const void *opcode, size_t len); | 176 | extern void *text_poke(void *addr, const void *opcode, size_t len); |
177 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); | ||
171 | 178 | ||
172 | #endif /* _ASM_X86_ALTERNATIVE_H */ | 179 | #endif /* _ASM_X86_ALTERNATIVE_H */ |
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 4d817f9e6e77..d2544f1d705d 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h | |||
@@ -31,6 +31,7 @@ extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | |||
31 | extern int amd_iommu_init_devices(void); | 31 | extern int amd_iommu_init_devices(void); |
32 | extern void amd_iommu_uninit_devices(void); | 32 | extern void amd_iommu_uninit_devices(void); |
33 | extern void amd_iommu_init_notifier(void); | 33 | extern void amd_iommu_init_notifier(void); |
34 | extern void amd_iommu_init_api(void); | ||
34 | #ifndef CONFIG_AMD_IOMMU_STATS | 35 | #ifndef CONFIG_AMD_IOMMU_STATS |
35 | 36 | ||
36 | static inline void amd_iommu_stats_init(void) { } | 37 | static inline void amd_iommu_stats_init(void) { } |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ba19ad4c47d0..86a0ff0aeac7 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -21,6 +21,7 @@ | |||
21 | #define _ASM_X86_AMD_IOMMU_TYPES_H | 21 | #define _ASM_X86_AMD_IOMMU_TYPES_H |
22 | 22 | ||
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <linux/mutex.h> | ||
24 | #include <linux/list.h> | 25 | #include <linux/list.h> |
25 | #include <linux/spinlock.h> | 26 | #include <linux/spinlock.h> |
26 | 27 | ||
@@ -140,6 +141,7 @@ | |||
140 | 141 | ||
141 | /* constants to configure the command buffer */ | 142 | /* constants to configure the command buffer */ |
142 | #define CMD_BUFFER_SIZE 8192 | 143 | #define CMD_BUFFER_SIZE 8192 |
144 | #define CMD_BUFFER_UNINITIALIZED 1 | ||
143 | #define CMD_BUFFER_ENTRIES 512 | 145 | #define CMD_BUFFER_ENTRIES 512 |
144 | #define MMIO_CMD_SIZE_SHIFT 56 | 146 | #define MMIO_CMD_SIZE_SHIFT 56 |
145 | #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) | 147 | #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) |
@@ -237,6 +239,7 @@ struct protection_domain { | |||
237 | struct list_head list; /* for list of all protection domains */ | 239 | struct list_head list; /* for list of all protection domains */ |
238 | struct list_head dev_list; /* List of all devices in this domain */ | 240 | struct list_head dev_list; /* List of all devices in this domain */ |
239 | spinlock_t lock; /* mostly used to lock the page table*/ | 241 | spinlock_t lock; /* mostly used to lock the page table*/ |
242 | struct mutex api_lock; /* protect page tables in the iommu-api path */ | ||
240 | u16 id; /* the domain id written to the device table */ | 243 | u16 id; /* the domain id written to the device table */ |
241 | int mode; /* paging mode (0-6 levels) */ | 244 | int mode; /* paging mode (0-6 levels) */ |
242 | u64 *pt_root; /* page table root pointer */ | 245 | u64 *pt_root; /* page table root pointer */ |
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h new file mode 100644 index 000000000000..c74a2eebe570 --- /dev/null +++ b/arch/x86/include/asm/apb_timer.h | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare | ||
3 | * | ||
4 | * (C) Copyright 2009 Intel Corporation | ||
5 | * Author: Jacob Pan (jacob.jun.pan@intel.com) | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | * | ||
12 | * Note: | ||
13 | */ | ||
14 | |||
15 | #ifndef ASM_X86_APBT_H | ||
16 | #define ASM_X86_APBT_H | ||
17 | #include <linux/sfi.h> | ||
18 | |||
19 | #ifdef CONFIG_APB_TIMER | ||
20 | |||
21 | /* Langwell DW APB timer registers */ | ||
22 | #define APBTMR_N_LOAD_COUNT 0x00 | ||
23 | #define APBTMR_N_CURRENT_VALUE 0x04 | ||
24 | #define APBTMR_N_CONTROL 0x08 | ||
25 | #define APBTMR_N_EOI 0x0c | ||
26 | #define APBTMR_N_INT_STATUS 0x10 | ||
27 | |||
28 | #define APBTMRS_INT_STATUS 0xa0 | ||
29 | #define APBTMRS_EOI 0xa4 | ||
30 | #define APBTMRS_RAW_INT_STATUS 0xa8 | ||
31 | #define APBTMRS_COMP_VERSION 0xac | ||
32 | #define APBTMRS_REG_SIZE 0x14 | ||
33 | |||
34 | /* register bits */ | ||
35 | #define APBTMR_CONTROL_ENABLE (1<<0) | ||
36 | #define APBTMR_CONTROL_MODE_PERIODIC (1<<1) /*1: periodic 0:free running */ | ||
37 | #define APBTMR_CONTROL_INT (1<<2) | ||
38 | |||
39 | /* default memory mapped register base */ | ||
40 | #define LNW_SCU_ADDR 0xFF100000 | ||
41 | #define LNW_EXT_TIMER_OFFSET 0x1B800 | ||
42 | #define APBT_DEFAULT_BASE (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET) | ||
43 | #define LNW_EXT_TIMER_PGOFFSET 0x800 | ||
44 | |||
45 | /* APBT clock speed range from PCLK to fabric base, 25-100MHz */ | ||
46 | #define APBT_MAX_FREQ 50 | ||
47 | #define APBT_MIN_FREQ 1 | ||
48 | #define APBT_MMAP_SIZE 1024 | ||
49 | |||
50 | #define APBT_DEV_USED 1 | ||
51 | |||
52 | extern void apbt_time_init(void); | ||
53 | extern struct clock_event_device *global_clock_event; | ||
54 | extern unsigned long apbt_quick_calibrate(void); | ||
55 | extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); | ||
56 | extern void apbt_setup_secondary_clock(void); | ||
57 | extern unsigned int boot_cpu_id; | ||
58 | extern int disable_apbt_percpu; | ||
59 | |||
60 | extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); | ||
61 | extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); | ||
62 | extern int sfi_mtimer_num; | ||
63 | |||
64 | #else /* CONFIG_APB_TIMER */ | ||
65 | |||
66 | static inline unsigned long apbt_quick_calibrate(void) {return 0; } | ||
67 | static inline void apbt_time_init(void) {return 0; } | ||
68 | |||
69 | #endif | ||
70 | #endif /* ASM_X86_APBT_H */ | ||
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 9a9c7bdc923d..306160e58b48 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h | |||
@@ -8,7 +8,8 @@ | |||
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <asm/user32.h> | 9 | #include <asm/user32.h> |
10 | 10 | ||
11 | #define COMPAT_USER_HZ 100 | 11 | #define COMPAT_USER_HZ 100 |
12 | #define COMPAT_UTS_MACHINE "i686\0\0" | ||
12 | 13 | ||
13 | typedef u32 compat_size_t; | 14 | typedef u32 compat_size_t; |
14 | typedef s32 compat_ssize_t; | 15 | typedef s32 compat_ssize_t; |
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h deleted file mode 100644 index d96c1ee3a95c..000000000000 --- a/arch/x86/include/asm/cpu_debug.h +++ /dev/null | |||
@@ -1,127 +0,0 @@ | |||
1 | #ifndef _ASM_X86_CPU_DEBUG_H | ||
2 | #define _ASM_X86_CPU_DEBUG_H | ||
3 | |||
4 | /* | ||
5 | * CPU x86 architecture debug | ||
6 | * | ||
7 | * Copyright(C) 2009 Jaswinder Singh Rajput | ||
8 | */ | ||
9 | |||
10 | /* Register flags */ | ||
11 | enum cpu_debug_bit { | ||
12 | /* Model Specific Registers (MSRs) */ | ||
13 | CPU_MC_BIT, /* Machine Check */ | ||
14 | CPU_MONITOR_BIT, /* Monitor */ | ||
15 | CPU_TIME_BIT, /* Time */ | ||
16 | CPU_PMC_BIT, /* Performance Monitor */ | ||
17 | CPU_PLATFORM_BIT, /* Platform */ | ||
18 | CPU_APIC_BIT, /* APIC */ | ||
19 | CPU_POWERON_BIT, /* Power-on */ | ||
20 | CPU_CONTROL_BIT, /* Control */ | ||
21 | CPU_FEATURES_BIT, /* Features control */ | ||
22 | CPU_LBRANCH_BIT, /* Last Branch */ | ||
23 | CPU_BIOS_BIT, /* BIOS */ | ||
24 | CPU_FREQ_BIT, /* Frequency */ | ||
25 | CPU_MTTR_BIT, /* MTRR */ | ||
26 | CPU_PERF_BIT, /* Performance */ | ||
27 | CPU_CACHE_BIT, /* Cache */ | ||
28 | CPU_SYSENTER_BIT, /* Sysenter */ | ||
29 | CPU_THERM_BIT, /* Thermal */ | ||
30 | CPU_MISC_BIT, /* Miscellaneous */ | ||
31 | CPU_DEBUG_BIT, /* Debug */ | ||
32 | CPU_PAT_BIT, /* PAT */ | ||
33 | CPU_VMX_BIT, /* VMX */ | ||
34 | CPU_CALL_BIT, /* System Call */ | ||
35 | CPU_BASE_BIT, /* BASE Address */ | ||
36 | CPU_VER_BIT, /* Version ID */ | ||
37 | CPU_CONF_BIT, /* Configuration */ | ||
38 | CPU_SMM_BIT, /* System mgmt mode */ | ||
39 | CPU_SVM_BIT, /*Secure Virtual Machine*/ | ||
40 | CPU_OSVM_BIT, /* OS-Visible Workaround*/ | ||
41 | /* Standard Registers */ | ||
42 | CPU_TSS_BIT, /* Task Stack Segment */ | ||
43 | CPU_CR_BIT, /* Control Registers */ | ||
44 | CPU_DT_BIT, /* Descriptor Table */ | ||
45 | /* End of Registers flags */ | ||
46 | CPU_REG_ALL_BIT, /* Select all Registers */ | ||
47 | }; | ||
48 | |||
49 | #define CPU_REG_ALL (~0) /* Select all Registers */ | ||
50 | |||
51 | #define CPU_MC (1 << CPU_MC_BIT) | ||
52 | #define CPU_MONITOR (1 << CPU_MONITOR_BIT) | ||
53 | #define CPU_TIME (1 << CPU_TIME_BIT) | ||
54 | #define CPU_PMC (1 << CPU_PMC_BIT) | ||
55 | #define CPU_PLATFORM (1 << CPU_PLATFORM_BIT) | ||
56 | #define CPU_APIC (1 << CPU_APIC_BIT) | ||
57 | #define CPU_POWERON (1 << CPU_POWERON_BIT) | ||
58 | #define CPU_CONTROL (1 << CPU_CONTROL_BIT) | ||
59 | #define CPU_FEATURES (1 << CPU_FEATURES_BIT) | ||
60 | #define CPU_LBRANCH (1 << CPU_LBRANCH_BIT) | ||
61 | #define CPU_BIOS (1 << CPU_BIOS_BIT) | ||
62 | #define CPU_FREQ (1 << CPU_FREQ_BIT) | ||
63 | #define CPU_MTRR (1 << CPU_MTTR_BIT) | ||
64 | #define CPU_PERF (1 << CPU_PERF_BIT) | ||
65 | #define CPU_CACHE (1 << CPU_CACHE_BIT) | ||
66 | #define CPU_SYSENTER (1 << CPU_SYSENTER_BIT) | ||
67 | #define CPU_THERM (1 << CPU_THERM_BIT) | ||
68 | #define CPU_MISC (1 << CPU_MISC_BIT) | ||
69 | #define CPU_DEBUG (1 << CPU_DEBUG_BIT) | ||
70 | #define CPU_PAT (1 << CPU_PAT_BIT) | ||
71 | #define CPU_VMX (1 << CPU_VMX_BIT) | ||
72 | #define CPU_CALL (1 << CPU_CALL_BIT) | ||
73 | #define CPU_BASE (1 << CPU_BASE_BIT) | ||
74 | #define CPU_VER (1 << CPU_VER_BIT) | ||
75 | #define CPU_CONF (1 << CPU_CONF_BIT) | ||
76 | #define CPU_SMM (1 << CPU_SMM_BIT) | ||
77 | #define CPU_SVM (1 << CPU_SVM_BIT) | ||
78 | #define CPU_OSVM (1 << CPU_OSVM_BIT) | ||
79 | #define CPU_TSS (1 << CPU_TSS_BIT) | ||
80 | #define CPU_CR (1 << CPU_CR_BIT) | ||
81 | #define CPU_DT (1 << CPU_DT_BIT) | ||
82 | |||
83 | /* Register file flags */ | ||
84 | enum cpu_file_bit { | ||
85 | CPU_INDEX_BIT, /* index */ | ||
86 | CPU_VALUE_BIT, /* value */ | ||
87 | }; | ||
88 | |||
89 | #define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) | ||
90 | |||
91 | #define MAX_CPU_FILES 512 | ||
92 | |||
93 | struct cpu_private { | ||
94 | unsigned cpu; | ||
95 | unsigned type; | ||
96 | unsigned reg; | ||
97 | unsigned file; | ||
98 | }; | ||
99 | |||
100 | struct cpu_debug_base { | ||
101 | char *name; /* Register name */ | ||
102 | unsigned flag; /* Register flag */ | ||
103 | unsigned write; /* Register write flag */ | ||
104 | }; | ||
105 | |||
106 | /* | ||
107 | * Currently it looks similar to cpu_debug_base but once we add more files | ||
108 | * cpu_file_base will go in different direction | ||
109 | */ | ||
110 | struct cpu_file_base { | ||
111 | char *name; /* Register file name */ | ||
112 | unsigned flag; /* Register file flag */ | ||
113 | unsigned write; /* Register write flag */ | ||
114 | }; | ||
115 | |||
116 | struct cpu_cpuX_base { | ||
117 | struct dentry *dentry; /* Register dentry */ | ||
118 | int init; /* Register index file */ | ||
119 | }; | ||
120 | |||
121 | struct cpu_debug_range { | ||
122 | unsigned min; /* Register range min */ | ||
123 | unsigned max; /* Register range max */ | ||
124 | unsigned flag; /* Supported flags */ | ||
125 | }; | ||
126 | |||
127 | #endif /* _ASM_X86_CPU_DEBUG_H */ | ||
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 637e1ec963c3..0cd82d068613 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -168,6 +168,10 @@ | |||
168 | #define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ | 168 | #define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ |
169 | #define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ | 169 | #define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ |
170 | #define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ | 170 | #define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ |
171 | #define X86_FEATURE_NPT (8*32+5) /* AMD Nested Page Table support */ | ||
172 | #define X86_FEATURE_LBRV (8*32+6) /* AMD LBR Virtualization support */ | ||
173 | #define X86_FEATURE_SVML (8*32+7) /* "svm_lock" AMD SVM locking MSR */ | ||
174 | #define X86_FEATURE_NRIPS (8*32+8) /* "nrip_save" AMD SVM next_rip save */ | ||
171 | 175 | ||
172 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 176 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
173 | 177 | ||
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 8240f76b531e..b81002f23614 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -14,6 +14,9 @@ | |||
14 | which debugging register was responsible for the trap. The other bits | 14 | which debugging register was responsible for the trap. The other bits |
15 | are either reserved or not of interest to us. */ | 15 | are either reserved or not of interest to us. */ |
16 | 16 | ||
17 | /* Define reserved bits in DR6 which are always set to 1 */ | ||
18 | #define DR6_RESERVED (0xFFFF0FF0) | ||
19 | |||
17 | #define DR_TRAP0 (0x1) /* db0 */ | 20 | #define DR_TRAP0 (0x1) /* db0 */ |
18 | #define DR_TRAP1 (0x2) /* db1 */ | 21 | #define DR_TRAP1 (0x2) /* db1 */ |
19 | #define DR_TRAP2 (0x4) /* db2 */ | 22 | #define DR_TRAP2 (0x4) /* db2 */ |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 761249e396fe..0e22296790d3 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -111,11 +111,8 @@ extern unsigned long end_user_pfn; | |||
111 | 111 | ||
112 | extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); | 112 | extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); |
113 | extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); | 113 | extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); |
114 | extern void reserve_early(u64 start, u64 end, char *name); | ||
115 | extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); | ||
116 | extern void free_early(u64 start, u64 end); | ||
117 | extern void early_res_to_bootmem(u64 start, u64 end); | ||
118 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); | 114 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); |
115 | #include <linux/early_res.h> | ||
119 | 116 | ||
120 | extern unsigned long e820_end_of_ram_pfn(void); | 117 | extern unsigned long e820_end_of_ram_pfn(void); |
121 | extern unsigned long e820_end_of_low_ram_pfn(void); | 118 | extern unsigned long e820_end_of_low_ram_pfn(void); |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index b4501ee223ad..f2ad2163109d 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -170,10 +170,7 @@ static inline void elf_common_init(struct thread_struct *t, | |||
170 | } | 170 | } |
171 | 171 | ||
172 | #define ELF_PLAT_INIT(_r, load_addr) \ | 172 | #define ELF_PLAT_INIT(_r, load_addr) \ |
173 | do { \ | 173 | elf_common_init(¤t->thread, _r, 0) |
174 | elf_common_init(¤t->thread, _r, 0); \ | ||
175 | clear_thread_flag(TIF_IA32); \ | ||
176 | } while (0) | ||
177 | 174 | ||
178 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ | 175 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ |
179 | elf_common_init(¤t->thread, regs, __USER_DS) | 176 | elf_common_init(¤t->thread, regs, __USER_DS) |
@@ -181,14 +178,8 @@ do { \ | |||
181 | void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); | 178 | void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); |
182 | #define compat_start_thread start_thread_ia32 | 179 | #define compat_start_thread start_thread_ia32 |
183 | 180 | ||
184 | #define COMPAT_SET_PERSONALITY(ex) \ | 181 | void set_personality_ia32(void); |
185 | do { \ | 182 | #define COMPAT_SET_PERSONALITY(ex) set_personality_ia32() |
186 | if (test_thread_flag(TIF_IA32)) \ | ||
187 | clear_thread_flag(TIF_ABI_PENDING); \ | ||
188 | else \ | ||
189 | set_thread_flag(TIF_ABI_PENDING); \ | ||
190 | current->personality |= force_personality32; \ | ||
191 | } while (0) | ||
192 | 183 | ||
193 | #define COMPAT_ELF_PLATFORM ("i686") | 184 | #define COMPAT_ELF_PLATFORM ("i686") |
194 | 185 | ||
diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h index 53018464aea6..2519d0679d99 100644 --- a/arch/x86/include/asm/fb.h +++ b/arch/x86/include/asm/fb.h | |||
@@ -12,10 +12,6 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, | |||
12 | pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; | 12 | pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; |
13 | } | 13 | } |
14 | 14 | ||
15 | #ifdef CONFIG_X86_32 | ||
16 | extern int fb_is_primary_device(struct fb_info *info); | 15 | extern int fb_is_primary_device(struct fb_info *info); |
17 | #else | ||
18 | static inline int fb_is_primary_device(struct fb_info *info) { return 0; } | ||
19 | #endif | ||
20 | 16 | ||
21 | #endif /* _ASM_X86_FB_H */ | 17 | #endif /* _ASM_X86_FB_H */ |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 635f03bb4995..d07b44f7d1dc 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -82,6 +82,9 @@ enum fixed_addresses { | |||
82 | #endif | 82 | #endif |
83 | FIX_DBGP_BASE, | 83 | FIX_DBGP_BASE, |
84 | FIX_EARLYCON_MEM_BASE, | 84 | FIX_EARLYCON_MEM_BASE, |
85 | #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | ||
86 | FIX_OHCI1394_BASE, | ||
87 | #endif | ||
85 | #ifdef CONFIG_X86_LOCAL_APIC | 88 | #ifdef CONFIG_X86_LOCAL_APIC |
86 | FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ | 89 | FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ |
87 | #endif | 90 | #endif |
@@ -132,9 +135,6 @@ enum fixed_addresses { | |||
132 | (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1)) | 135 | (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1)) |
133 | : __end_of_permanent_fixed_addresses, | 136 | : __end_of_permanent_fixed_addresses, |
134 | FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, | 137 | FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, |
135 | #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | ||
136 | FIX_OHCI1394_BASE, | ||
137 | #endif | ||
138 | #ifdef CONFIG_X86_32 | 138 | #ifdef CONFIG_X86_32 |
139 | FIX_WP_TEST, | 139 | FIX_WP_TEST, |
140 | #endif | 140 | #endif |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f8576427cfe..aeab29aee617 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -35,7 +35,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); | |||
35 | 35 | ||
36 | #define __ARCH_IRQ_STAT | 36 | #define __ARCH_IRQ_STAT |
37 | 37 | ||
38 | #define inc_irq_stat(member) percpu_add(irq_stat.member, 1) | 38 | #define inc_irq_stat(member) percpu_inc(irq_stat.member) |
39 | 39 | ||
40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) | 40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) |
41 | 41 | ||
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 014c2b85ae45..a726650fc80f 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h | |||
@@ -66,10 +66,6 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); | |||
66 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); | 66 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); |
67 | struct page *kmap_atomic_to_page(void *ptr); | 67 | struct page *kmap_atomic_to_page(void *ptr); |
68 | 68 | ||
69 | #ifndef CONFIG_PARAVIRT | ||
70 | #define kmap_atomic_pte(page, type) kmap_atomic(page, type) | ||
71 | #endif | ||
72 | |||
73 | #define flush_cache_kmaps() do { } while (0) | 69 | #define flush_cache_kmaps() do { } while (0) |
74 | 70 | ||
75 | extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, | 71 | extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, |
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 5d89fd2a3690..1d5c08a1bdfd 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h | |||
@@ -67,6 +67,7 @@ extern unsigned long hpet_address; | |||
67 | extern unsigned long force_hpet_address; | 67 | extern unsigned long force_hpet_address; |
68 | extern u8 hpet_blockid; | 68 | extern u8 hpet_blockid; |
69 | extern int hpet_force_user; | 69 | extern int hpet_force_user; |
70 | extern u8 hpet_msi_disable; | ||
70 | extern int is_hpet_enabled(void); | 71 | extern int is_hpet_enabled(void); |
71 | extern int hpet_enable(void); | 72 | extern int hpet_enable(void); |
72 | extern void hpet_disable(void); | 73 | extern void hpet_disable(void); |
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 0675a7c4c20e..2a1bd8f4f23a 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -10,7 +10,6 @@ | |||
10 | * (display/resolving) | 10 | * (display/resolving) |
11 | */ | 11 | */ |
12 | struct arch_hw_breakpoint { | 12 | struct arch_hw_breakpoint { |
13 | char *name; /* Contains name of the symbol to set bkpt */ | ||
14 | unsigned long address; | 13 | unsigned long address; |
15 | u8 len; | 14 | u8 len; |
16 | u8 type; | 15 | u8 type; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index eeac829a0f44..46c0fe05f230 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -53,13 +53,6 @@ extern void threshold_interrupt(void); | |||
53 | extern void call_function_interrupt(void); | 53 | extern void call_function_interrupt(void); |
54 | extern void call_function_single_interrupt(void); | 54 | extern void call_function_single_interrupt(void); |
55 | 55 | ||
56 | /* PIC specific functions */ | ||
57 | extern void disable_8259A_irq(unsigned int irq); | ||
58 | extern void enable_8259A_irq(unsigned int irq); | ||
59 | extern int i8259A_irq_pending(unsigned int irq); | ||
60 | extern void make_8259A_irq(unsigned int irq); | ||
61 | extern void init_8259A(int aeoi); | ||
62 | |||
63 | /* IOAPIC */ | 56 | /* IOAPIC */ |
64 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) | 57 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) |
65 | extern unsigned long io_apic_irqs; | 58 | extern unsigned long io_apic_irqs; |
@@ -140,6 +133,7 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); | |||
140 | 133 | ||
141 | typedef int vector_irq_t[NR_VECTORS]; | 134 | typedef int vector_irq_t[NR_VECTORS]; |
142 | DECLARE_PER_CPU(vector_irq_t, vector_irq); | 135 | DECLARE_PER_CPU(vector_irq_t, vector_irq); |
136 | extern void setup_vector_irq(int cpu); | ||
143 | 137 | ||
144 | #ifdef CONFIG_X86_IO_APIC | 138 | #ifdef CONFIG_X86_IO_APIC |
145 | extern void lock_vector_lock(void); | 139 | extern void lock_vector_lock(void); |
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h new file mode 100644 index 000000000000..e153a2b3889a --- /dev/null +++ b/arch/x86/include/asm/hyperv.h | |||
@@ -0,0 +1,186 @@ | |||
1 | #ifndef _ASM_X86_KVM_HYPERV_H | ||
2 | #define _ASM_X86_KVM_HYPERV_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | /* | ||
7 | * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent | ||
8 | * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). | ||
9 | */ | ||
10 | #define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 | ||
11 | #define HYPERV_CPUID_INTERFACE 0x40000001 | ||
12 | #define HYPERV_CPUID_VERSION 0x40000002 | ||
13 | #define HYPERV_CPUID_FEATURES 0x40000003 | ||
14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 | ||
15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 | ||
16 | |||
17 | /* | ||
18 | * Feature identification. EAX indicates which features are available | ||
19 | * to the partition based upon the current partition privileges. | ||
20 | */ | ||
21 | |||
22 | /* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ | ||
23 | #define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0) | ||
24 | /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ | ||
25 | #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) | ||
26 | /* | ||
27 | * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM | ||
28 | * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available | ||
29 | */ | ||
30 | #define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2) | ||
31 | /* | ||
32 | * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through | ||
33 | * HV_X64_MSR_STIMER3_COUNT) available | ||
34 | */ | ||
35 | #define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) | ||
36 | /* | ||
37 | * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) | ||
38 | * are available | ||
39 | */ | ||
40 | #define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4) | ||
41 | /* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/ | ||
42 | #define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5) | ||
43 | /* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/ | ||
44 | #define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6) | ||
45 | /* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/ | ||
46 | #define HV_X64_MSR_RESET_AVAILABLE (1 << 7) | ||
47 | /* | ||
48 | * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, | ||
49 | * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, | ||
50 | * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available | ||
51 | */ | ||
52 | #define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) | ||
53 | |||
54 | /* | ||
55 | * Feature identification: EBX indicates which flags were specified at | ||
56 | * partition creation. The format is the same as the partition creation | ||
57 | * flag structure defined in section Partition Creation Flags. | ||
58 | */ | ||
59 | #define HV_X64_CREATE_PARTITIONS (1 << 0) | ||
60 | #define HV_X64_ACCESS_PARTITION_ID (1 << 1) | ||
61 | #define HV_X64_ACCESS_MEMORY_POOL (1 << 2) | ||
62 | #define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3) | ||
63 | #define HV_X64_POST_MESSAGES (1 << 4) | ||
64 | #define HV_X64_SIGNAL_EVENTS (1 << 5) | ||
65 | #define HV_X64_CREATE_PORT (1 << 6) | ||
66 | #define HV_X64_CONNECT_PORT (1 << 7) | ||
67 | #define HV_X64_ACCESS_STATS (1 << 8) | ||
68 | #define HV_X64_DEBUGGING (1 << 11) | ||
69 | #define HV_X64_CPU_POWER_MANAGEMENT (1 << 12) | ||
70 | #define HV_X64_CONFIGURE_PROFILER (1 << 13) | ||
71 | |||
72 | /* | ||
73 | * Feature identification. EDX indicates which miscellaneous features | ||
74 | * are available to the partition. | ||
75 | */ | ||
76 | /* The MWAIT instruction is available (per section MONITOR / MWAIT) */ | ||
77 | #define HV_X64_MWAIT_AVAILABLE (1 << 0) | ||
78 | /* Guest debugging support is available */ | ||
79 | #define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1) | ||
80 | /* Performance Monitor support is available*/ | ||
81 | #define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2) | ||
82 | /* Support for physical CPU dynamic partitioning events is available*/ | ||
83 | #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3) | ||
84 | /* | ||
85 | * Support for passing hypercall input parameter block via XMM | ||
86 | * registers is available | ||
87 | */ | ||
88 | #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) | ||
89 | /* Support for a virtual guest idle state is available */ | ||
90 | #define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) | ||
91 | |||
92 | /* | ||
93 | * Implementation recommendations. Indicates which behaviors the hypervisor | ||
94 | * recommends the OS implement for optimal performance. | ||
95 | */ | ||
96 | /* | ||
97 | * Recommend using hypercall for address space switches rather | ||
98 | * than MOV to CR3 instruction | ||
99 | */ | ||
100 | #define HV_X64_MWAIT_RECOMMENDED (1 << 0) | ||
101 | /* Recommend using hypercall for local TLB flushes rather | ||
102 | * than INVLPG or MOV to CR3 instructions */ | ||
103 | #define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1) | ||
104 | /* | ||
105 | * Recommend using hypercall for remote TLB flushes rather | ||
106 | * than inter-processor interrupts | ||
107 | */ | ||
108 | #define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2) | ||
109 | /* | ||
110 | * Recommend using MSRs for accessing APIC registers | ||
111 | * EOI, ICR and TPR rather than their memory-mapped counterparts | ||
112 | */ | ||
113 | #define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3) | ||
114 | /* Recommend using the hypervisor-provided MSR to initiate a system RESET */ | ||
115 | #define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4) | ||
116 | /* | ||
117 | * Recommend using relaxed timing for this partition. If used, | ||
118 | * the VM should disable any watchdog timeouts that rely on the | ||
119 | * timely delivery of external interrupts | ||
120 | */ | ||
121 | #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) | ||
122 | |||
123 | /* MSR used to identify the guest OS. */ | ||
124 | #define HV_X64_MSR_GUEST_OS_ID 0x40000000 | ||
125 | |||
126 | /* MSR used to setup pages used to communicate with the hypervisor. */ | ||
127 | #define HV_X64_MSR_HYPERCALL 0x40000001 | ||
128 | |||
129 | /* MSR used to provide vcpu index */ | ||
130 | #define HV_X64_MSR_VP_INDEX 0x40000002 | ||
131 | |||
132 | /* Define the virtual APIC registers */ | ||
133 | #define HV_X64_MSR_EOI 0x40000070 | ||
134 | #define HV_X64_MSR_ICR 0x40000071 | ||
135 | #define HV_X64_MSR_TPR 0x40000072 | ||
136 | #define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073 | ||
137 | |||
138 | /* Define synthetic interrupt controller model specific registers. */ | ||
139 | #define HV_X64_MSR_SCONTROL 0x40000080 | ||
140 | #define HV_X64_MSR_SVERSION 0x40000081 | ||
141 | #define HV_X64_MSR_SIEFP 0x40000082 | ||
142 | #define HV_X64_MSR_SIMP 0x40000083 | ||
143 | #define HV_X64_MSR_EOM 0x40000084 | ||
144 | #define HV_X64_MSR_SINT0 0x40000090 | ||
145 | #define HV_X64_MSR_SINT1 0x40000091 | ||
146 | #define HV_X64_MSR_SINT2 0x40000092 | ||
147 | #define HV_X64_MSR_SINT3 0x40000093 | ||
148 | #define HV_X64_MSR_SINT4 0x40000094 | ||
149 | #define HV_X64_MSR_SINT5 0x40000095 | ||
150 | #define HV_X64_MSR_SINT6 0x40000096 | ||
151 | #define HV_X64_MSR_SINT7 0x40000097 | ||
152 | #define HV_X64_MSR_SINT8 0x40000098 | ||
153 | #define HV_X64_MSR_SINT9 0x40000099 | ||
154 | #define HV_X64_MSR_SINT10 0x4000009A | ||
155 | #define HV_X64_MSR_SINT11 0x4000009B | ||
156 | #define HV_X64_MSR_SINT12 0x4000009C | ||
157 | #define HV_X64_MSR_SINT13 0x4000009D | ||
158 | #define HV_X64_MSR_SINT14 0x4000009E | ||
159 | #define HV_X64_MSR_SINT15 0x4000009F | ||
160 | |||
161 | |||
162 | #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 | ||
163 | #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 | ||
164 | #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ | ||
165 | (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) | ||
166 | |||
167 | /* Declare the various hypercall operations. */ | ||
168 | #define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008 | ||
169 | |||
170 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 | ||
171 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 | ||
172 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ | ||
173 | (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) | ||
174 | |||
175 | #define HV_PROCESSOR_POWER_STATE_C0 0 | ||
176 | #define HV_PROCESSOR_POWER_STATE_C1 1 | ||
177 | #define HV_PROCESSOR_POWER_STATE_C2 2 | ||
178 | #define HV_PROCESSOR_POWER_STATE_C3 3 | ||
179 | |||
180 | /* hypercall status code */ | ||
181 | #define HV_STATUS_SUCCESS 0 | ||
182 | #define HV_STATUS_INVALID_HYPERCALL_CODE 2 | ||
183 | #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 | ||
184 | #define HV_STATUS_INVALID_ALIGNMENT 4 | ||
185 | |||
186 | #endif | ||
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index ebfb8a9e11f7..da2930924501 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -33,8 +33,16 @@ extern void init_thread_xstate(void); | |||
33 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 33 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
34 | 34 | ||
35 | extern user_regset_active_fn fpregs_active, xfpregs_active; | 35 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
36 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; | 36 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, |
37 | extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; | 37 | xstateregs_get; |
38 | extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, | ||
39 | xstateregs_set; | ||
40 | |||
41 | /* | ||
42 | * xstateregs_active == fpregs_active. Please refer to the comment | ||
43 | * at the definition of fpregs_active. | ||
44 | */ | ||
45 | #define xstateregs_active fpregs_active | ||
38 | 46 | ||
39 | extern struct _fpx_sw_bytes fx_sw_reserved; | 47 | extern struct _fpx_sw_bytes fx_sw_reserved; |
40 | #ifdef CONFIG_IA32_EMULATION | 48 | #ifdef CONFIG_IA32_EMULATION |
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 58d7091eeb1f..1655147646aa 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h | |||
@@ -24,12 +24,7 @@ extern unsigned int cached_irq_mask; | |||
24 | #define SLAVE_ICW4_DEFAULT 0x01 | 24 | #define SLAVE_ICW4_DEFAULT 0x01 |
25 | #define PIC_ICW4_AEOI 2 | 25 | #define PIC_ICW4_AEOI 2 |
26 | 26 | ||
27 | extern spinlock_t i8259A_lock; | 27 | extern raw_spinlock_t i8259A_lock; |
28 | |||
29 | extern void init_8259A(int auto_eoi); | ||
30 | extern void enable_8259A_irq(unsigned int irq); | ||
31 | extern void disable_8259A_irq(unsigned int irq); | ||
32 | extern unsigned int startup_8259A_irq(unsigned int irq); | ||
33 | 28 | ||
34 | /* the PIC may need a careful delay on some platforms, hence specific calls */ | 29 | /* the PIC may need a careful delay on some platforms, hence specific calls */ |
35 | static inline unsigned char inb_pic(unsigned int port) | 30 | static inline unsigned char inb_pic(unsigned int port) |
@@ -57,7 +52,17 @@ static inline void outb_pic(unsigned char value, unsigned int port) | |||
57 | 52 | ||
58 | extern struct irq_chip i8259A_chip; | 53 | extern struct irq_chip i8259A_chip; |
59 | 54 | ||
60 | extern void mask_8259A(void); | 55 | struct legacy_pic { |
61 | extern void unmask_8259A(void); | 56 | int nr_legacy_irqs; |
57 | struct irq_chip *chip; | ||
58 | void (*mask_all)(void); | ||
59 | void (*restore_mask)(void); | ||
60 | void (*init)(int auto_eoi); | ||
61 | int (*irq_pending)(unsigned int irq); | ||
62 | void (*make_irq)(unsigned int irq); | ||
63 | }; | ||
64 | |||
65 | extern struct legacy_pic *legacy_pic; | ||
66 | extern struct legacy_pic null_legacy_pic; | ||
62 | 67 | ||
63 | #endif /* _ASM_X86_I8259_H */ | 68 | #endif /* _ASM_X86_I8259_H */ |
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 73739322b6d0..a1dcfa3ab17d 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
@@ -1,8 +1,42 @@ | |||
1 | #ifndef _ASM_X86_IO_H | 1 | #ifndef _ASM_X86_IO_H |
2 | #define _ASM_X86_IO_H | 2 | #define _ASM_X86_IO_H |
3 | 3 | ||
4 | /* | ||
5 | * This file contains the definitions for the x86 IO instructions | ||
6 | * inb/inw/inl/outb/outw/outl and the "string versions" of the same | ||
7 | * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" | ||
8 | * versions of the single-IO instructions (inb_p/inw_p/..). | ||
9 | * | ||
10 | * This file is not meant to be obfuscating: it's just complicated | ||
11 | * to (a) handle it all in a way that makes gcc able to optimize it | ||
12 | * as well as possible and (b) trying to avoid writing the same thing | ||
13 | * over and over again with slight variations and possibly making a | ||
14 | * mistake somewhere. | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * Thanks to James van Artsdalen for a better timing-fix than | ||
19 | * the two short jumps: using outb's to a nonexistent port seems | ||
20 | * to guarantee better timings even on fast machines. | ||
21 | * | ||
22 | * On the other hand, I'd like to be sure of a non-existent port: | ||
23 | * I feel a bit unsafe about using 0x80 (should be safe, though) | ||
24 | * | ||
25 | * Linus | ||
26 | */ | ||
27 | |||
28 | /* | ||
29 | * Bit simplified and optimized by Jan Hubicka | ||
30 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. | ||
31 | * | ||
32 | * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, | ||
33 | * isa_read[wl] and isa_write[wl] fixed | ||
34 | * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
35 | */ | ||
36 | |||
4 | #define ARCH_HAS_IOREMAP_WC | 37 | #define ARCH_HAS_IOREMAP_WC |
5 | 38 | ||
39 | #include <linux/string.h> | ||
6 | #include <linux/compiler.h> | 40 | #include <linux/compiler.h> |
7 | #include <asm-generic/int-ll64.h> | 41 | #include <asm-generic/int-ll64.h> |
8 | #include <asm/page.h> | 42 | #include <asm/page.h> |
@@ -173,11 +207,126 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) | |||
173 | extern void iounmap(volatile void __iomem *addr); | 207 | extern void iounmap(volatile void __iomem *addr); |
174 | 208 | ||
175 | 209 | ||
176 | #ifdef CONFIG_X86_32 | 210 | #ifdef __KERNEL__ |
177 | # include "io_32.h" | 211 | |
212 | #include <asm-generic/iomap.h> | ||
213 | |||
214 | #include <linux/vmalloc.h> | ||
215 | |||
216 | /* | ||
217 | * Convert a virtual cached pointer to an uncached pointer | ||
218 | */ | ||
219 | #define xlate_dev_kmem_ptr(p) p | ||
220 | |||
221 | static inline void | ||
222 | memset_io(volatile void __iomem *addr, unsigned char val, size_t count) | ||
223 | { | ||
224 | memset((void __force *)addr, val, count); | ||
225 | } | ||
226 | |||
227 | static inline void | ||
228 | memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) | ||
229 | { | ||
230 | memcpy(dst, (const void __force *)src, count); | ||
231 | } | ||
232 | |||
233 | static inline void | ||
234 | memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) | ||
235 | { | ||
236 | memcpy((void __force *)dst, src, count); | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * ISA space is 'always mapped' on a typical x86 system, no need to | ||
241 | * explicitly ioremap() it. The fact that the ISA IO space is mapped | ||
242 | * to PAGE_OFFSET is pure coincidence - it does not mean ISA values | ||
243 | * are physical addresses. The following constant pointer can be | ||
244 | * used as the IO-area pointer (it can be iounmapped as well, so the | ||
245 | * analogy with PCI is quite large): | ||
246 | */ | ||
247 | #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) | ||
248 | |||
249 | /* | ||
250 | * Cache management | ||
251 | * | ||
252 | * This needed for two cases | ||
253 | * 1. Out of order aware processors | ||
254 | * 2. Accidentally out of order processors (PPro errata #51) | ||
255 | */ | ||
256 | |||
257 | static inline void flush_write_buffers(void) | ||
258 | { | ||
259 | #if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) | ||
260 | asm volatile("lock; addl $0,0(%%esp)": : :"memory"); | ||
261 | #endif | ||
262 | } | ||
263 | |||
264 | #endif /* __KERNEL__ */ | ||
265 | |||
266 | extern void native_io_delay(void); | ||
267 | |||
268 | extern int io_delay_type; | ||
269 | extern void io_delay_init(void); | ||
270 | |||
271 | #if defined(CONFIG_PARAVIRT) | ||
272 | #include <asm/paravirt.h> | ||
178 | #else | 273 | #else |
179 | # include "io_64.h" | 274 | |
275 | static inline void slow_down_io(void) | ||
276 | { | ||
277 | native_io_delay(); | ||
278 | #ifdef REALLY_SLOW_IO | ||
279 | native_io_delay(); | ||
280 | native_io_delay(); | ||
281 | native_io_delay(); | ||
180 | #endif | 282 | #endif |
283 | } | ||
284 | |||
285 | #endif | ||
286 | |||
287 | #define BUILDIO(bwl, bw, type) \ | ||
288 | static inline void out##bwl(unsigned type value, int port) \ | ||
289 | { \ | ||
290 | asm volatile("out" #bwl " %" #bw "0, %w1" \ | ||
291 | : : "a"(value), "Nd"(port)); \ | ||
292 | } \ | ||
293 | \ | ||
294 | static inline unsigned type in##bwl(int port) \ | ||
295 | { \ | ||
296 | unsigned type value; \ | ||
297 | asm volatile("in" #bwl " %w1, %" #bw "0" \ | ||
298 | : "=a"(value) : "Nd"(port)); \ | ||
299 | return value; \ | ||
300 | } \ | ||
301 | \ | ||
302 | static inline void out##bwl##_p(unsigned type value, int port) \ | ||
303 | { \ | ||
304 | out##bwl(value, port); \ | ||
305 | slow_down_io(); \ | ||
306 | } \ | ||
307 | \ | ||
308 | static inline unsigned type in##bwl##_p(int port) \ | ||
309 | { \ | ||
310 | unsigned type value = in##bwl(port); \ | ||
311 | slow_down_io(); \ | ||
312 | return value; \ | ||
313 | } \ | ||
314 | \ | ||
315 | static inline void outs##bwl(int port, const void *addr, unsigned long count) \ | ||
316 | { \ | ||
317 | asm volatile("rep; outs" #bwl \ | ||
318 | : "+S"(addr), "+c"(count) : "d"(port)); \ | ||
319 | } \ | ||
320 | \ | ||
321 | static inline void ins##bwl(int port, void *addr, unsigned long count) \ | ||
322 | { \ | ||
323 | asm volatile("rep; ins" #bwl \ | ||
324 | : "+D"(addr), "+c"(count) : "d"(port)); \ | ||
325 | } | ||
326 | |||
327 | BUILDIO(b, b, char) | ||
328 | BUILDIO(w, w, short) | ||
329 | BUILDIO(l, , int) | ||
181 | 330 | ||
182 | extern void *xlate_dev_mem_ptr(unsigned long phys); | 331 | extern void *xlate_dev_mem_ptr(unsigned long phys); |
183 | extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); | 332 | extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); |
diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h deleted file mode 100644 index a299900f5920..000000000000 --- a/arch/x86/include/asm/io_32.h +++ /dev/null | |||
@@ -1,196 +0,0 @@ | |||
1 | #ifndef _ASM_X86_IO_32_H | ||
2 | #define _ASM_X86_IO_32_H | ||
3 | |||
4 | #include <linux/string.h> | ||
5 | #include <linux/compiler.h> | ||
6 | |||
7 | /* | ||
8 | * This file contains the definitions for the x86 IO instructions | ||
9 | * inb/inw/inl/outb/outw/outl and the "string versions" of the same | ||
10 | * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" | ||
11 | * versions of the single-IO instructions (inb_p/inw_p/..). | ||
12 | * | ||
13 | * This file is not meant to be obfuscating: it's just complicated | ||
14 | * to (a) handle it all in a way that makes gcc able to optimize it | ||
15 | * as well as possible and (b) trying to avoid writing the same thing | ||
16 | * over and over again with slight variations and possibly making a | ||
17 | * mistake somewhere. | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * Thanks to James van Artsdalen for a better timing-fix than | ||
22 | * the two short jumps: using outb's to a nonexistent port seems | ||
23 | * to guarantee better timings even on fast machines. | ||
24 | * | ||
25 | * On the other hand, I'd like to be sure of a non-existent port: | ||
26 | * I feel a bit unsafe about using 0x80 (should be safe, though) | ||
27 | * | ||
28 | * Linus | ||
29 | */ | ||
30 | |||
31 | /* | ||
32 | * Bit simplified and optimized by Jan Hubicka | ||
33 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. | ||
34 | * | ||
35 | * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, | ||
36 | * isa_read[wl] and isa_write[wl] fixed | ||
37 | * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
38 | */ | ||
39 | |||
40 | #define XQUAD_PORTIO_BASE 0xfe400000 | ||
41 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ | ||
42 | |||
43 | #ifdef __KERNEL__ | ||
44 | |||
45 | #include <asm-generic/iomap.h> | ||
46 | |||
47 | #include <linux/vmalloc.h> | ||
48 | |||
49 | /* | ||
50 | * Convert a virtual cached pointer to an uncached pointer | ||
51 | */ | ||
52 | #define xlate_dev_kmem_ptr(p) p | ||
53 | |||
54 | static inline void | ||
55 | memset_io(volatile void __iomem *addr, unsigned char val, int count) | ||
56 | { | ||
57 | memset((void __force *)addr, val, count); | ||
58 | } | ||
59 | |||
60 | static inline void | ||
61 | memcpy_fromio(void *dst, const volatile void __iomem *src, int count) | ||
62 | { | ||
63 | __memcpy(dst, (const void __force *)src, count); | ||
64 | } | ||
65 | |||
66 | static inline void | ||
67 | memcpy_toio(volatile void __iomem *dst, const void *src, int count) | ||
68 | { | ||
69 | __memcpy((void __force *)dst, src, count); | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * ISA space is 'always mapped' on a typical x86 system, no need to | ||
74 | * explicitly ioremap() it. The fact that the ISA IO space is mapped | ||
75 | * to PAGE_OFFSET is pure coincidence - it does not mean ISA values | ||
76 | * are physical addresses. The following constant pointer can be | ||
77 | * used as the IO-area pointer (it can be iounmapped as well, so the | ||
78 | * analogy with PCI is quite large): | ||
79 | */ | ||
80 | #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) | ||
81 | |||
82 | /* | ||
83 | * Cache management | ||
84 | * | ||
85 | * This needed for two cases | ||
86 | * 1. Out of order aware processors | ||
87 | * 2. Accidentally out of order processors (PPro errata #51) | ||
88 | */ | ||
89 | |||
90 | #if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) | ||
91 | |||
92 | static inline void flush_write_buffers(void) | ||
93 | { | ||
94 | asm volatile("lock; addl $0,0(%%esp)": : :"memory"); | ||
95 | } | ||
96 | |||
97 | #else | ||
98 | |||
99 | #define flush_write_buffers() do { } while (0) | ||
100 | |||
101 | #endif | ||
102 | |||
103 | #endif /* __KERNEL__ */ | ||
104 | |||
105 | extern void native_io_delay(void); | ||
106 | |||
107 | extern int io_delay_type; | ||
108 | extern void io_delay_init(void); | ||
109 | |||
110 | #if defined(CONFIG_PARAVIRT) | ||
111 | #include <asm/paravirt.h> | ||
112 | #else | ||
113 | |||
114 | static inline void slow_down_io(void) | ||
115 | { | ||
116 | native_io_delay(); | ||
117 | #ifdef REALLY_SLOW_IO | ||
118 | native_io_delay(); | ||
119 | native_io_delay(); | ||
120 | native_io_delay(); | ||
121 | #endif | ||
122 | } | ||
123 | |||
124 | #endif | ||
125 | |||
126 | #define __BUILDIO(bwl, bw, type) \ | ||
127 | static inline void out##bwl(unsigned type value, int port) \ | ||
128 | { \ | ||
129 | out##bwl##_local(value, port); \ | ||
130 | } \ | ||
131 | \ | ||
132 | static inline unsigned type in##bwl(int port) \ | ||
133 | { \ | ||
134 | return in##bwl##_local(port); \ | ||
135 | } | ||
136 | |||
137 | #define BUILDIO(bwl, bw, type) \ | ||
138 | static inline void out##bwl##_local(unsigned type value, int port) \ | ||
139 | { \ | ||
140 | asm volatile("out" #bwl " %" #bw "0, %w1" \ | ||
141 | : : "a"(value), "Nd"(port)); \ | ||
142 | } \ | ||
143 | \ | ||
144 | static inline unsigned type in##bwl##_local(int port) \ | ||
145 | { \ | ||
146 | unsigned type value; \ | ||
147 | asm volatile("in" #bwl " %w1, %" #bw "0" \ | ||
148 | : "=a"(value) : "Nd"(port)); \ | ||
149 | return value; \ | ||
150 | } \ | ||
151 | \ | ||
152 | static inline void out##bwl##_local_p(unsigned type value, int port) \ | ||
153 | { \ | ||
154 | out##bwl##_local(value, port); \ | ||
155 | slow_down_io(); \ | ||
156 | } \ | ||
157 | \ | ||
158 | static inline unsigned type in##bwl##_local_p(int port) \ | ||
159 | { \ | ||
160 | unsigned type value = in##bwl##_local(port); \ | ||
161 | slow_down_io(); \ | ||
162 | return value; \ | ||
163 | } \ | ||
164 | \ | ||
165 | __BUILDIO(bwl, bw, type) \ | ||
166 | \ | ||
167 | static inline void out##bwl##_p(unsigned type value, int port) \ | ||
168 | { \ | ||
169 | out##bwl(value, port); \ | ||
170 | slow_down_io(); \ | ||
171 | } \ | ||
172 | \ | ||
173 | static inline unsigned type in##bwl##_p(int port) \ | ||
174 | { \ | ||
175 | unsigned type value = in##bwl(port); \ | ||
176 | slow_down_io(); \ | ||
177 | return value; \ | ||
178 | } \ | ||
179 | \ | ||
180 | static inline void outs##bwl(int port, const void *addr, unsigned long count) \ | ||
181 | { \ | ||
182 | asm volatile("rep; outs" #bwl \ | ||
183 | : "+S"(addr), "+c"(count) : "d"(port)); \ | ||
184 | } \ | ||
185 | \ | ||
186 | static inline void ins##bwl(int port, void *addr, unsigned long count) \ | ||
187 | { \ | ||
188 | asm volatile("rep; ins" #bwl \ | ||
189 | : "+D"(addr), "+c"(count) : "d"(port)); \ | ||
190 | } | ||
191 | |||
192 | BUILDIO(b, b, char) | ||
193 | BUILDIO(w, w, short) | ||
194 | BUILDIO(l, , int) | ||
195 | |||
196 | #endif /* _ASM_X86_IO_32_H */ | ||
diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h deleted file mode 100644 index 244067893af4..000000000000 --- a/arch/x86/include/asm/io_64.h +++ /dev/null | |||
@@ -1,181 +0,0 @@ | |||
1 | #ifndef _ASM_X86_IO_64_H | ||
2 | #define _ASM_X86_IO_64_H | ||
3 | |||
4 | |||
5 | /* | ||
6 | * This file contains the definitions for the x86 IO instructions | ||
7 | * inb/inw/inl/outb/outw/outl and the "string versions" of the same | ||
8 | * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" | ||
9 | * versions of the single-IO instructions (inb_p/inw_p/..). | ||
10 | * | ||
11 | * This file is not meant to be obfuscating: it's just complicated | ||
12 | * to (a) handle it all in a way that makes gcc able to optimize it | ||
13 | * as well as possible and (b) trying to avoid writing the same thing | ||
14 | * over and over again with slight variations and possibly making a | ||
15 | * mistake somewhere. | ||
16 | */ | ||
17 | |||
18 | /* | ||
19 | * Thanks to James van Artsdalen for a better timing-fix than | ||
20 | * the two short jumps: using outb's to a nonexistent port seems | ||
21 | * to guarantee better timings even on fast machines. | ||
22 | * | ||
23 | * On the other hand, I'd like to be sure of a non-existent port: | ||
24 | * I feel a bit unsafe about using 0x80 (should be safe, though) | ||
25 | * | ||
26 | * Linus | ||
27 | */ | ||
28 | |||
29 | /* | ||
30 | * Bit simplified and optimized by Jan Hubicka | ||
31 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. | ||
32 | * | ||
33 | * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, | ||
34 | * isa_read[wl] and isa_write[wl] fixed | ||
35 | * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
36 | */ | ||
37 | |||
38 | extern void native_io_delay(void); | ||
39 | |||
40 | extern int io_delay_type; | ||
41 | extern void io_delay_init(void); | ||
42 | |||
43 | #if defined(CONFIG_PARAVIRT) | ||
44 | #include <asm/paravirt.h> | ||
45 | #else | ||
46 | |||
47 | static inline void slow_down_io(void) | ||
48 | { | ||
49 | native_io_delay(); | ||
50 | #ifdef REALLY_SLOW_IO | ||
51 | native_io_delay(); | ||
52 | native_io_delay(); | ||
53 | native_io_delay(); | ||
54 | #endif | ||
55 | } | ||
56 | #endif | ||
57 | |||
58 | /* | ||
59 | * Talk about misusing macros.. | ||
60 | */ | ||
61 | #define __OUT1(s, x) \ | ||
62 | static inline void out##s(unsigned x value, unsigned short port) { | ||
63 | |||
64 | #define __OUT2(s, s1, s2) \ | ||
65 | asm volatile ("out" #s " %" s1 "0,%" s2 "1" | ||
66 | |||
67 | #ifndef REALLY_SLOW_IO | ||
68 | #define REALLY_SLOW_IO | ||
69 | #define UNSET_REALLY_SLOW_IO | ||
70 | #endif | ||
71 | |||
72 | #define __OUT(s, s1, x) \ | ||
73 | __OUT1(s, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \ | ||
74 | } \ | ||
75 | __OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \ | ||
76 | slow_down_io(); \ | ||
77 | } | ||
78 | |||
79 | #define __IN1(s) \ | ||
80 | static inline RETURN_TYPE in##s(unsigned short port) \ | ||
81 | { \ | ||
82 | RETURN_TYPE _v; | ||
83 | |||
84 | #define __IN2(s, s1, s2) \ | ||
85 | asm volatile ("in" #s " %" s2 "1,%" s1 "0" | ||
86 | |||
87 | #define __IN(s, s1, i...) \ | ||
88 | __IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \ | ||
89 | return _v; \ | ||
90 | } \ | ||
91 | __IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \ | ||
92 | slow_down_io(); \ | ||
93 | return _v; } | ||
94 | |||
95 | #ifdef UNSET_REALLY_SLOW_IO | ||
96 | #undef REALLY_SLOW_IO | ||
97 | #endif | ||
98 | |||
99 | #define __INS(s) \ | ||
100 | static inline void ins##s(unsigned short port, void *addr, \ | ||
101 | unsigned long count) \ | ||
102 | { \ | ||
103 | asm volatile ("rep ; ins" #s \ | ||
104 | : "=D" (addr), "=c" (count) \ | ||
105 | : "d" (port), "0" (addr), "1" (count)); \ | ||
106 | } | ||
107 | |||
108 | #define __OUTS(s) \ | ||
109 | static inline void outs##s(unsigned short port, const void *addr, \ | ||
110 | unsigned long count) \ | ||
111 | { \ | ||
112 | asm volatile ("rep ; outs" #s \ | ||
113 | : "=S" (addr), "=c" (count) \ | ||
114 | : "d" (port), "0" (addr), "1" (count)); \ | ||
115 | } | ||
116 | |||
117 | #define RETURN_TYPE unsigned char | ||
118 | __IN(b, "") | ||
119 | #undef RETURN_TYPE | ||
120 | #define RETURN_TYPE unsigned short | ||
121 | __IN(w, "") | ||
122 | #undef RETURN_TYPE | ||
123 | #define RETURN_TYPE unsigned int | ||
124 | __IN(l, "") | ||
125 | #undef RETURN_TYPE | ||
126 | |||
127 | __OUT(b, "b", char) | ||
128 | __OUT(w, "w", short) | ||
129 | __OUT(l, , int) | ||
130 | |||
131 | __INS(b) | ||
132 | __INS(w) | ||
133 | __INS(l) | ||
134 | |||
135 | __OUTS(b) | ||
136 | __OUTS(w) | ||
137 | __OUTS(l) | ||
138 | |||
139 | #if defined(__KERNEL__) && defined(__x86_64__) | ||
140 | |||
141 | #include <linux/vmalloc.h> | ||
142 | |||
143 | #include <asm-generic/iomap.h> | ||
144 | |||
145 | void __memcpy_fromio(void *, unsigned long, unsigned); | ||
146 | void __memcpy_toio(unsigned long, const void *, unsigned); | ||
147 | |||
148 | static inline void memcpy_fromio(void *to, const volatile void __iomem *from, | ||
149 | unsigned len) | ||
150 | { | ||
151 | __memcpy_fromio(to, (unsigned long)from, len); | ||
152 | } | ||
153 | |||
154 | static inline void memcpy_toio(volatile void __iomem *to, const void *from, | ||
155 | unsigned len) | ||
156 | { | ||
157 | __memcpy_toio((unsigned long)to, from, len); | ||
158 | } | ||
159 | |||
160 | void memset_io(volatile void __iomem *a, int b, size_t c); | ||
161 | |||
162 | /* | ||
163 | * ISA space is 'always mapped' on a typical x86 system, no need to | ||
164 | * explicitly ioremap() it. The fact that the ISA IO space is mapped | ||
165 | * to PAGE_OFFSET is pure coincidence - it does not mean ISA values | ||
166 | * are physical addresses. The following constant pointer can be | ||
167 | * used as the IO-area pointer (it can be iounmapped as well, so the | ||
168 | * analogy with PCI is quite large): | ||
169 | */ | ||
170 | #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) | ||
171 | |||
172 | #define flush_write_buffers() | ||
173 | |||
174 | /* | ||
175 | * Convert a virtual cached pointer to an uncached pointer | ||
176 | */ | ||
177 | #define xlate_dev_kmem_ptr(p) p | ||
178 | |||
179 | #endif /* __KERNEL__ */ | ||
180 | |||
181 | #endif /* _ASM_X86_IO_64_H */ | ||
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7c7c16cde1f8..35832a03a515 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -143,8 +143,6 @@ extern int noioapicreroute; | |||
143 | /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ | 143 | /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ |
144 | extern int timer_through_8259; | 144 | extern int timer_through_8259; |
145 | 145 | ||
146 | extern void io_apic_disable_legacy(void); | ||
147 | |||
148 | /* | 146 | /* |
149 | * If we use the IO-APIC for IRQ routing, disable automatic | 147 | * If we use the IO-APIC for IRQ routing, disable automatic |
150 | * assignment of PCI IRQ's. | 148 | * assignment of PCI IRQ's. |
@@ -160,6 +158,7 @@ extern int io_apic_get_redir_entries(int ioapic); | |||
160 | struct io_apic_irq_attr; | 158 | struct io_apic_irq_attr; |
161 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | 159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, |
162 | struct io_apic_irq_attr *irq_attr); | 160 | struct io_apic_irq_attr *irq_attr); |
161 | void setup_IO_APIC_irq_extra(u32 gsi); | ||
163 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); | 162 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); |
164 | extern void ioapic_init_mappings(void); | 163 | extern void ioapic_init_mappings(void); |
165 | extern void ioapic_insert_resources(void); | 164 | extern void ioapic_insert_resources(void); |
@@ -188,6 +187,7 @@ extern struct mp_ioapic_gsi mp_gsi_routing[]; | |||
188 | int mp_find_ioapic(int gsi); | 187 | int mp_find_ioapic(int gsi); |
189 | int mp_find_ioapic_pin(int ioapic, int gsi); | 188 | int mp_find_ioapic_pin(int ioapic, int gsi); |
190 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); | 189 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); |
190 | extern void __init pre_init_apic_IRQ0(void); | ||
191 | 191 | ||
192 | #else /* !CONFIG_X86_IO_APIC */ | 192 | #else /* !CONFIG_X86_IO_APIC */ |
193 | 193 | ||
@@ -197,7 +197,11 @@ static const int timer_through_8259 = 0; | |||
197 | static inline void ioapic_init_mappings(void) { } | 197 | static inline void ioapic_init_mappings(void) { } |
198 | static inline void ioapic_insert_resources(void) { } | 198 | static inline void ioapic_insert_resources(void) { } |
199 | static inline void probe_nr_irqs_gsi(void) { } | 199 | static inline void probe_nr_irqs_gsi(void) { } |
200 | static inline int mp_find_ioapic(int gsi) { return 0; } | ||
200 | 201 | ||
202 | struct io_apic_irq_attr; | ||
203 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, | ||
204 | struct io_apic_irq_attr *irq_attr) { return 0; } | ||
201 | #endif | 205 | #endif |
202 | 206 | ||
203 | #endif /* _ASM_X86_IO_APIC_H */ | 207 | #endif /* _ASM_X86_IO_APIC_H */ |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4611f085cd43..8767d99c4f64 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -28,28 +28,33 @@ | |||
28 | #define MCE_VECTOR 0x12 | 28 | #define MCE_VECTOR 0x12 |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * IDT vectors usable for external interrupt sources start | 31 | * IDT vectors usable for external interrupt sources start at 0x20. |
32 | * at 0x20: | 32 | * (0x80 is the syscall vector, 0x30-0x3f are for ISA) |
33 | */ | 33 | */ |
34 | #define FIRST_EXTERNAL_VECTOR 0x20 | 34 | #define FIRST_EXTERNAL_VECTOR 0x20 |
35 | 35 | /* | |
36 | #ifdef CONFIG_X86_32 | 36 | * We start allocating at 0x21 to spread out vectors evenly between |
37 | # define SYSCALL_VECTOR 0x80 | 37 | * priority levels. (0x80 is the syscall vector) |
38 | # define IA32_SYSCALL_VECTOR 0x80 | 38 | */ |
39 | #else | 39 | #define VECTOR_OFFSET_START 1 |
40 | # define IA32_SYSCALL_VECTOR 0x80 | ||
41 | #endif | ||
42 | 40 | ||
43 | /* | 41 | /* |
44 | * Reserve the lowest usable priority level 0x20 - 0x2f for triggering | 42 | * Reserve the lowest usable vector (and hence lowest priority) 0x20 for |
45 | * cleanup after irq migration. | 43 | * triggering cleanup after irq migration. 0x21-0x2f will still be used |
44 | * for device interrupts. | ||
46 | */ | 45 | */ |
47 | #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR | 46 | #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR |
48 | 47 | ||
48 | #define IA32_SYSCALL_VECTOR 0x80 | ||
49 | #ifdef CONFIG_X86_32 | ||
50 | # define SYSCALL_VECTOR 0x80 | ||
51 | #endif | ||
52 | |||
49 | /* | 53 | /* |
50 | * Vectors 0x30-0x3f are used for ISA interrupts. | 54 | * Vectors 0x30-0x3f are used for ISA interrupts. |
55 | * round up to the next 16-vector boundary | ||
51 | */ | 56 | */ |
52 | #define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) | 57 | #define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) |
53 | 58 | ||
54 | #define IRQ1_VECTOR (IRQ0_VECTOR + 1) | 59 | #define IRQ1_VECTOR (IRQ0_VECTOR + 1) |
55 | #define IRQ2_VECTOR (IRQ0_VECTOR + 2) | 60 | #define IRQ2_VECTOR (IRQ0_VECTOR + 2) |
@@ -120,13 +125,6 @@ | |||
120 | */ | 125 | */ |
121 | #define MCE_SELF_VECTOR 0xeb | 126 | #define MCE_SELF_VECTOR 0xeb |
122 | 127 | ||
123 | /* | ||
124 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | ||
125 | * start at 0x31(0x41) to spread out vectors evenly between priority | ||
126 | * levels. (0x80 is the syscall vector) | ||
127 | */ | ||
128 | #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) | ||
129 | |||
130 | #define NR_VECTORS 256 | 128 | #define NR_VECTORS 256 |
131 | 129 | ||
132 | #define FPU_IRQ 13 | 130 | #define FPU_IRQ 13 |
@@ -154,21 +152,21 @@ static inline int invalid_vm86_irq(int irq) | |||
154 | 152 | ||
155 | #define NR_IRQS_LEGACY 16 | 153 | #define NR_IRQS_LEGACY 16 |
156 | 154 | ||
157 | #define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) | ||
158 | #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) | 155 | #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) |
159 | 156 | ||
160 | #ifdef CONFIG_X86_IO_APIC | 157 | #ifdef CONFIG_X86_IO_APIC |
161 | # ifdef CONFIG_SPARSE_IRQ | 158 | # ifdef CONFIG_SPARSE_IRQ |
159 | # define CPU_VECTOR_LIMIT (64 * NR_CPUS) | ||
162 | # define NR_IRQS \ | 160 | # define NR_IRQS \ |
163 | (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ | 161 | (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ |
164 | (NR_VECTORS + CPU_VECTOR_LIMIT) : \ | 162 | (NR_VECTORS + CPU_VECTOR_LIMIT) : \ |
165 | (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) | 163 | (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) |
166 | # else | 164 | # else |
167 | # if NR_CPUS < MAX_IO_APICS | 165 | # define CPU_VECTOR_LIMIT (32 * NR_CPUS) |
168 | # define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) | 166 | # define NR_IRQS \ |
169 | # else | 167 | (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \ |
170 | # define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) | 168 | (NR_VECTORS + CPU_VECTOR_LIMIT) : \ |
171 | # endif | 169 | (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) |
172 | # endif | 170 | # endif |
173 | #else /* !CONFIG_X86_IO_APIC: */ | 171 | #else /* !CONFIG_X86_IO_APIC: */ |
174 | # define NR_IRQS NR_IRQS_LEGACY | 172 | # define NR_IRQS NR_IRQS_LEGACY |
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4fe681de1e76..4ffa345a8ccb 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h | |||
@@ -32,7 +32,10 @@ struct kprobe; | |||
32 | 32 | ||
33 | typedef u8 kprobe_opcode_t; | 33 | typedef u8 kprobe_opcode_t; |
34 | #define BREAKPOINT_INSTRUCTION 0xcc | 34 | #define BREAKPOINT_INSTRUCTION 0xcc |
35 | #define RELATIVEJUMP_INSTRUCTION 0xe9 | 35 | #define RELATIVEJUMP_OPCODE 0xe9 |
36 | #define RELATIVEJUMP_SIZE 5 | ||
37 | #define RELATIVECALL_OPCODE 0xe8 | ||
38 | #define RELATIVE_ADDR_SIZE 4 | ||
36 | #define MAX_INSN_SIZE 16 | 39 | #define MAX_INSN_SIZE 16 |
37 | #define MAX_STACK_SIZE 64 | 40 | #define MAX_STACK_SIZE 64 |
38 | #define MIN_STACK_SIZE(ADDR) \ | 41 | #define MIN_STACK_SIZE(ADDR) \ |
@@ -44,6 +47,17 @@ typedef u8 kprobe_opcode_t; | |||
44 | 47 | ||
45 | #define flush_insn_slot(p) do { } while (0) | 48 | #define flush_insn_slot(p) do { } while (0) |
46 | 49 | ||
50 | /* optinsn template addresses */ | ||
51 | extern kprobe_opcode_t optprobe_template_entry; | ||
52 | extern kprobe_opcode_t optprobe_template_val; | ||
53 | extern kprobe_opcode_t optprobe_template_call; | ||
54 | extern kprobe_opcode_t optprobe_template_end; | ||
55 | #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) | ||
56 | #define MAX_OPTINSN_SIZE \ | ||
57 | (((unsigned long)&optprobe_template_end - \ | ||
58 | (unsigned long)&optprobe_template_entry) + \ | ||
59 | MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) | ||
60 | |||
47 | extern const int kretprobe_blacklist_size; | 61 | extern const int kretprobe_blacklist_size; |
48 | 62 | ||
49 | void arch_remove_kprobe(struct kprobe *p); | 63 | void arch_remove_kprobe(struct kprobe *p); |
@@ -64,6 +78,21 @@ struct arch_specific_insn { | |||
64 | int boostable; | 78 | int boostable; |
65 | }; | 79 | }; |
66 | 80 | ||
81 | struct arch_optimized_insn { | ||
82 | /* copy of the original instructions */ | ||
83 | kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE]; | ||
84 | /* detour code buffer */ | ||
85 | kprobe_opcode_t *insn; | ||
86 | /* the size of instructions copied to detour code buffer */ | ||
87 | size_t size; | ||
88 | }; | ||
89 | |||
90 | /* Return true (!0) if optinsn is prepared for optimization. */ | ||
91 | static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) | ||
92 | { | ||
93 | return optinsn->size; | ||
94 | } | ||
95 | |||
67 | struct prev_kprobe { | 96 | struct prev_kprobe { |
68 | struct kprobe *kp; | 97 | struct kprobe *kp; |
69 | unsigned long status; | 98 | unsigned long status; |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 950df434763f..f46b79f6c16c 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -254,6 +254,10 @@ struct kvm_reinject_control { | |||
254 | __u8 reserved[31]; | 254 | __u8 reserved[31]; |
255 | }; | 255 | }; |
256 | 256 | ||
257 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ | ||
258 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 | ||
259 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 | ||
260 | |||
257 | /* for KVM_GET/SET_VCPU_EVENTS */ | 261 | /* for KVM_GET/SET_VCPU_EVENTS */ |
258 | struct kvm_vcpu_events { | 262 | struct kvm_vcpu_events { |
259 | struct { | 263 | struct { |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7c18e1230f54..7a6f54fa13ba 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -54,13 +54,23 @@ struct x86_emulate_ctxt; | |||
54 | struct x86_emulate_ops { | 54 | struct x86_emulate_ops { |
55 | /* | 55 | /* |
56 | * read_std: Read bytes of standard (non-emulated/special) memory. | 56 | * read_std: Read bytes of standard (non-emulated/special) memory. |
57 | * Used for instruction fetch, stack operations, and others. | 57 | * Used for descriptor reading. |
58 | * @addr: [IN ] Linear address from which to read. | 58 | * @addr: [IN ] Linear address from which to read. |
59 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | 59 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. |
60 | * @bytes: [IN ] Number of bytes to read from memory. | 60 | * @bytes: [IN ] Number of bytes to read from memory. |
61 | */ | 61 | */ |
62 | int (*read_std)(unsigned long addr, void *val, | 62 | int (*read_std)(unsigned long addr, void *val, |
63 | unsigned int bytes, struct kvm_vcpu *vcpu); | 63 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); |
64 | |||
65 | /* | ||
66 | * fetch: Read bytes of standard (non-emulated/special) memory. | ||
67 | * Used for instruction fetch. | ||
68 | * @addr: [IN ] Linear address from which to read. | ||
69 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | ||
70 | * @bytes: [IN ] Number of bytes to read from memory. | ||
71 | */ | ||
72 | int (*fetch)(unsigned long addr, void *val, | ||
73 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | ||
64 | 74 | ||
65 | /* | 75 | /* |
66 | * read_emulated: Read bytes from emulated/special memory area. | 76 | * read_emulated: Read bytes from emulated/special memory area. |
@@ -74,7 +84,7 @@ struct x86_emulate_ops { | |||
74 | struct kvm_vcpu *vcpu); | 84 | struct kvm_vcpu *vcpu); |
75 | 85 | ||
76 | /* | 86 | /* |
77 | * write_emulated: Read bytes from emulated/special memory area. | 87 | * write_emulated: Write bytes to emulated/special memory area. |
78 | * @addr: [IN ] Linear address to which to write. | 88 | * @addr: [IN ] Linear address to which to write. |
79 | * @val: [IN ] Value to write to memory (low-order bytes used as | 89 | * @val: [IN ] Value to write to memory (low-order bytes used as |
80 | * required). | 90 | * required). |
@@ -168,6 +178,7 @@ struct x86_emulate_ctxt { | |||
168 | 178 | ||
169 | /* Execution mode, passed to the emulator. */ | 179 | /* Execution mode, passed to the emulator. */ |
170 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ | 180 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ |
181 | #define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */ | ||
171 | #define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ | 182 | #define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ |
172 | #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ | 183 | #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ |
173 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ | 184 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4f865e8b8540..06d9e79ca37d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <asm/mtrr.h> | 25 | #include <asm/mtrr.h> |
26 | #include <asm/msr-index.h> | 26 | #include <asm/msr-index.h> |
27 | 27 | ||
28 | #define KVM_MAX_VCPUS 16 | 28 | #define KVM_MAX_VCPUS 64 |
29 | #define KVM_MEMORY_SLOTS 32 | 29 | #define KVM_MEMORY_SLOTS 32 |
30 | /* memory slots that does not exposed to userspace */ | 30 | /* memory slots that does not exposed to userspace */ |
31 | #define KVM_PRIVATE_MEM_SLOTS 4 | 31 | #define KVM_PRIVATE_MEM_SLOTS 4 |
@@ -38,19 +38,6 @@ | |||
38 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ | 38 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ |
39 | 0xFFFFFF0000000000ULL) | 39 | 0xFFFFFF0000000000ULL) |
40 | 40 | ||
41 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | ||
42 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | ||
43 | #define KVM_GUEST_CR0_MASK \ | ||
44 | (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
45 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ | ||
46 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP) | ||
47 | #define KVM_VM_CR0_ALWAYS_ON \ | ||
48 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
49 | #define KVM_GUEST_CR4_MASK \ | ||
50 | (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE) | ||
51 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | ||
52 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | ||
53 | |||
54 | #define INVALID_PAGE (~(hpa_t)0) | 41 | #define INVALID_PAGE (~(hpa_t)0) |
55 | #define UNMAPPED_GVA (~(gpa_t)0) | 42 | #define UNMAPPED_GVA (~(gpa_t)0) |
56 | 43 | ||
@@ -256,7 +243,8 @@ struct kvm_mmu { | |||
256 | void (*new_cr3)(struct kvm_vcpu *vcpu); | 243 | void (*new_cr3)(struct kvm_vcpu *vcpu); |
257 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); | 244 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); |
258 | void (*free)(struct kvm_vcpu *vcpu); | 245 | void (*free)(struct kvm_vcpu *vcpu); |
259 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); | 246 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, |
247 | u32 *error); | ||
260 | void (*prefetch_page)(struct kvm_vcpu *vcpu, | 248 | void (*prefetch_page)(struct kvm_vcpu *vcpu, |
261 | struct kvm_mmu_page *page); | 249 | struct kvm_mmu_page *page); |
262 | int (*sync_page)(struct kvm_vcpu *vcpu, | 250 | int (*sync_page)(struct kvm_vcpu *vcpu, |
@@ -282,13 +270,15 @@ struct kvm_vcpu_arch { | |||
282 | u32 regs_dirty; | 270 | u32 regs_dirty; |
283 | 271 | ||
284 | unsigned long cr0; | 272 | unsigned long cr0; |
273 | unsigned long cr0_guest_owned_bits; | ||
285 | unsigned long cr2; | 274 | unsigned long cr2; |
286 | unsigned long cr3; | 275 | unsigned long cr3; |
287 | unsigned long cr4; | 276 | unsigned long cr4; |
277 | unsigned long cr4_guest_owned_bits; | ||
288 | unsigned long cr8; | 278 | unsigned long cr8; |
289 | u32 hflags; | 279 | u32 hflags; |
290 | u64 pdptrs[4]; /* pae */ | 280 | u64 pdptrs[4]; /* pae */ |
291 | u64 shadow_efer; | 281 | u64 efer; |
292 | u64 apic_base; | 282 | u64 apic_base; |
293 | struct kvm_lapic *apic; /* kernel irqchip context */ | 283 | struct kvm_lapic *apic; /* kernel irqchip context */ |
294 | int32_t apic_arb_prio; | 284 | int32_t apic_arb_prio; |
@@ -374,17 +364,27 @@ struct kvm_vcpu_arch { | |||
374 | /* used for guest single stepping over the given code position */ | 364 | /* used for guest single stepping over the given code position */ |
375 | u16 singlestep_cs; | 365 | u16 singlestep_cs; |
376 | unsigned long singlestep_rip; | 366 | unsigned long singlestep_rip; |
367 | /* fields used by HYPER-V emulation */ | ||
368 | u64 hv_vapic; | ||
377 | }; | 369 | }; |
378 | 370 | ||
379 | struct kvm_mem_alias { | 371 | struct kvm_mem_alias { |
380 | gfn_t base_gfn; | 372 | gfn_t base_gfn; |
381 | unsigned long npages; | 373 | unsigned long npages; |
382 | gfn_t target_gfn; | 374 | gfn_t target_gfn; |
375 | #define KVM_ALIAS_INVALID 1UL | ||
376 | unsigned long flags; | ||
383 | }; | 377 | }; |
384 | 378 | ||
385 | struct kvm_arch{ | 379 | #define KVM_ARCH_HAS_UNALIAS_INSTANTIATION |
386 | int naliases; | 380 | |
381 | struct kvm_mem_aliases { | ||
387 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; | 382 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; |
383 | int naliases; | ||
384 | }; | ||
385 | |||
386 | struct kvm_arch { | ||
387 | struct kvm_mem_aliases *aliases; | ||
388 | 388 | ||
389 | unsigned int n_free_mmu_pages; | 389 | unsigned int n_free_mmu_pages; |
390 | unsigned int n_requested_mmu_pages; | 390 | unsigned int n_requested_mmu_pages; |
@@ -416,6 +416,10 @@ struct kvm_arch{ | |||
416 | s64 kvmclock_offset; | 416 | s64 kvmclock_offset; |
417 | 417 | ||
418 | struct kvm_xen_hvm_config xen_hvm_config; | 418 | struct kvm_xen_hvm_config xen_hvm_config; |
419 | |||
420 | /* fields used by HYPER-V emulation */ | ||
421 | u64 hv_guest_os_id; | ||
422 | u64 hv_hypercall; | ||
419 | }; | 423 | }; |
420 | 424 | ||
421 | struct kvm_vm_stat { | 425 | struct kvm_vm_stat { |
@@ -471,6 +475,7 @@ struct kvm_x86_ops { | |||
471 | int (*hardware_setup)(void); /* __init */ | 475 | int (*hardware_setup)(void); /* __init */ |
472 | void (*hardware_unsetup)(void); /* __exit */ | 476 | void (*hardware_unsetup)(void); /* __exit */ |
473 | bool (*cpu_has_accelerated_tpr)(void); | 477 | bool (*cpu_has_accelerated_tpr)(void); |
478 | void (*cpuid_update)(struct kvm_vcpu *vcpu); | ||
474 | 479 | ||
475 | /* Create, but do not attach this VCPU */ | 480 | /* Create, but do not attach this VCPU */ |
476 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); | 481 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); |
@@ -492,6 +497,7 @@ struct kvm_x86_ops { | |||
492 | void (*set_segment)(struct kvm_vcpu *vcpu, | 497 | void (*set_segment)(struct kvm_vcpu *vcpu, |
493 | struct kvm_segment *var, int seg); | 498 | struct kvm_segment *var, int seg); |
494 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); | 499 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); |
500 | void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); | ||
495 | void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); | 501 | void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); |
496 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); | 502 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); |
497 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 503 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
@@ -501,12 +507,13 @@ struct kvm_x86_ops { | |||
501 | void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 507 | void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); |
502 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 508 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); |
503 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 509 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); |
504 | unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); | 510 | int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); |
505 | void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, | 511 | int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); |
506 | int *exception); | ||
507 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 512 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
508 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 513 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
509 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 514 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
515 | void (*fpu_activate)(struct kvm_vcpu *vcpu); | ||
516 | void (*fpu_deactivate)(struct kvm_vcpu *vcpu); | ||
510 | 517 | ||
511 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 518 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
512 | 519 | ||
@@ -531,7 +538,8 @@ struct kvm_x86_ops { | |||
531 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 538 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
532 | int (*get_tdp_level)(void); | 539 | int (*get_tdp_level)(void); |
533 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 540 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
534 | bool (*gb_page_enable)(void); | 541 | int (*get_lpage_level)(void); |
542 | bool (*rdtscp_supported)(void); | ||
535 | 543 | ||
536 | const struct trace_print_flags *exit_reasons_str; | 544 | const struct trace_print_flags *exit_reasons_str; |
537 | }; | 545 | }; |
@@ -606,8 +614,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | |||
606 | unsigned long value); | 614 | unsigned long value); |
607 | 615 | ||
608 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 616 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
609 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 617 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
610 | int type_bits, int seg); | ||
611 | 618 | ||
612 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); | 619 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); |
613 | 620 | ||
@@ -653,6 +660,10 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | |||
653 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 660 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
654 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 661 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
655 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 662 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
663 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | ||
664 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | ||
665 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | ||
666 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | ||
656 | 667 | ||
657 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 668 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
658 | 669 | ||
@@ -666,6 +677,7 @@ void kvm_disable_tdp(void); | |||
666 | 677 | ||
667 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | 678 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); |
668 | int complete_pio(struct kvm_vcpu *vcpu); | 679 | int complete_pio(struct kvm_vcpu *vcpu); |
680 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | ||
669 | 681 | ||
670 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); | 682 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); |
671 | 683 | ||
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c584076a47f4..ffae1420e7d7 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_KVM_PARA_H | 2 | #define _ASM_X86_KVM_PARA_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <asm/hyperv.h> | ||
5 | 6 | ||
6 | /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It | 7 | /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It |
7 | * should be used to determine that a VM is running under KVM. | 8 | * should be used to determine that a VM is running under KVM. |
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index ba0eed8aa1a6..b60f2924c413 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h | |||
@@ -28,22 +28,39 @@ | |||
28 | 28 | ||
29 | #ifndef __ASSEMBLY__ | 29 | #ifndef __ASSEMBLY__ |
30 | #include <asm/hw_irq.h> | 30 | #include <asm/hw_irq.h> |
31 | #include <asm/kvm_para.h> | ||
32 | 31 | ||
33 | /*G:030 | 32 | /*G:030 |
34 | * But first, how does our Guest contact the Host to ask for privileged | 33 | * But first, how does our Guest contact the Host to ask for privileged |
35 | * operations? There are two ways: the direct way is to make a "hypercall", | 34 | * operations? There are two ways: the direct way is to make a "hypercall", |
36 | * to make requests of the Host Itself. | 35 | * to make requests of the Host Itself. |
37 | * | 36 | * |
38 | * We use the KVM hypercall mechanism, though completely different hypercall | 37 | * Our hypercall mechanism uses the highest unused trap code (traps 32 and |
39 | * numbers. Seventeen hypercalls are available: the hypercall number is put in | 38 | * above are used by real hardware interrupts). Seventeen hypercalls are |
40 | * the %eax register, and the arguments (when required) are placed in %ebx, | 39 | * available: the hypercall number is put in the %eax register, and the |
41 | * %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax. | 40 | * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. |
41 | * If a return value makes sense, it's returned in %eax. | ||
42 | * | 42 | * |
43 | * Grossly invalid calls result in Sudden Death at the hands of the vengeful | 43 | * Grossly invalid calls result in Sudden Death at the hands of the vengeful |
44 | * Host, rather than returning failure. This reflects Winston Churchill's | 44 | * Host, rather than returning failure. This reflects Winston Churchill's |
45 | * definition of a gentleman: "someone who is only rude intentionally". | 45 | * definition of a gentleman: "someone who is only rude intentionally". |
46 | :*/ | 46 | */ |
47 | static inline unsigned long | ||
48 | hcall(unsigned long call, | ||
49 | unsigned long arg1, unsigned long arg2, unsigned long arg3, | ||
50 | unsigned long arg4) | ||
51 | { | ||
52 | /* "int" is the Intel instruction to trigger a trap. */ | ||
53 | asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) | ||
54 | /* The call in %eax (aka "a") might be overwritten */ | ||
55 | : "=a"(call) | ||
56 | /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */ | ||
57 | : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4) | ||
58 | /* "memory" means this might write somewhere in memory. | ||
59 | * This isn't true for all calls, but it's safe to tell | ||
60 | * gcc that it might happen so it doesn't get clever. */ | ||
61 | : "memory"); | ||
62 | return call; | ||
63 | } | ||
47 | 64 | ||
48 | /* Can't use our min() macro here: needs to be a constant */ | 65 | /* Can't use our min() macro here: needs to be a constant */ |
49 | #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) | 66 | #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) |
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 47b9b6f19057..2e9972468a5d 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h | |||
@@ -195,41 +195,4 @@ static inline long local_sub_return(long i, local_t *l) | |||
195 | #define __local_add(i, l) local_add((i), (l)) | 195 | #define __local_add(i, l) local_add((i), (l)) |
196 | #define __local_sub(i, l) local_sub((i), (l)) | 196 | #define __local_sub(i, l) local_sub((i), (l)) |
197 | 197 | ||
198 | /* Use these for per-cpu local_t variables: on some archs they are | ||
199 | * much more efficient than these naive implementations. Note they take | ||
200 | * a variable, not an address. | ||
201 | * | ||
202 | * X86_64: This could be done better if we moved the per cpu data directly | ||
203 | * after GS. | ||
204 | */ | ||
205 | |||
206 | /* Need to disable preemption for the cpu local counters otherwise we could | ||
207 | still access a variable of a previous CPU in a non atomic way. */ | ||
208 | #define cpu_local_wrap_v(l) \ | ||
209 | ({ \ | ||
210 | local_t res__; \ | ||
211 | preempt_disable(); \ | ||
212 | res__ = (l); \ | ||
213 | preempt_enable(); \ | ||
214 | res__; \ | ||
215 | }) | ||
216 | #define cpu_local_wrap(l) \ | ||
217 | ({ \ | ||
218 | preempt_disable(); \ | ||
219 | (l); \ | ||
220 | preempt_enable(); \ | ||
221 | }) \ | ||
222 | |||
223 | #define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var((l)))) | ||
224 | #define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var((l)), (i))) | ||
225 | #define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var((l)))) | ||
226 | #define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var((l)))) | ||
227 | #define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var((l)))) | ||
228 | #define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var((l)))) | ||
229 | |||
230 | #define __cpu_local_inc(l) cpu_local_inc((l)) | ||
231 | #define __cpu_local_dec(l) cpu_local_dec((l)) | ||
232 | #define __cpu_local_add(i, l) cpu_local_add((i), (l)) | ||
233 | #define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) | ||
234 | |||
235 | #endif /* _ASM_X86_LOCAL_H */ | 198 | #endif /* _ASM_X86_LOCAL_H */ |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 858baa061cfc..6c3fdd631ed3 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -108,10 +108,11 @@ struct mce_log { | |||
108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) | 108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) |
109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) | 109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) |
110 | 110 | ||
111 | extern struct atomic_notifier_head x86_mce_decoder_chain; | ||
112 | 111 | ||
113 | #ifdef __KERNEL__ | 112 | #ifdef __KERNEL__ |
114 | 113 | ||
114 | extern struct atomic_notifier_head x86_mce_decoder_chain; | ||
115 | |||
115 | #include <linux/percpu.h> | 116 | #include <linux/percpu.h> |
116 | #include <linux/init.h> | 117 | #include <linux/init.h> |
117 | #include <asm/atomic.h> | 118 | #include <asm/atomic.h> |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index c24ca9a56458..ef51b501e22a 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -12,8 +12,6 @@ struct device; | |||
12 | enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; | 12 | enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; |
13 | 13 | ||
14 | struct microcode_ops { | 14 | struct microcode_ops { |
15 | void (*init)(struct device *device); | ||
16 | void (*fini)(void); | ||
17 | enum ucode_state (*request_microcode_user) (int cpu, | 15 | enum ucode_state (*request_microcode_user) (int cpu, |
18 | const void __user *buf, size_t size); | 16 | const void __user *buf, size_t size); |
19 | 17 | ||
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index a29f48c2a322..288b96f815a6 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h | |||
@@ -39,11 +39,5 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) | |||
39 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) | 39 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) |
40 | #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ | 40 | #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ |
41 | NODE_DATA(nid)->node_spanned_pages) | 41 | NODE_DATA(nid)->node_spanned_pages) |
42 | |||
43 | #ifdef CONFIG_NUMA_EMU | ||
44 | #define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) | ||
45 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) | ||
46 | #endif | ||
47 | |||
48 | #endif | 42 | #endif |
49 | #endif /* _ASM_X86_MMZONE_64_H */ | 43 | #endif /* _ASM_X86_MMZONE_64_H */ |
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h new file mode 100644 index 000000000000..451d30e7f62d --- /dev/null +++ b/arch/x86/include/asm/mrst.h | |||
@@ -0,0 +1,19 @@ | |||
1 | /* | ||
2 | * mrst.h: Intel Moorestown platform specific setup code | ||
3 | * | ||
4 | * (C) Copyright 2009 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; version 2 | ||
9 | * of the License. | ||
10 | */ | ||
11 | #ifndef _ASM_X86_MRST_H | ||
12 | #define _ASM_X86_MRST_H | ||
13 | extern int pci_mrst_init(void); | ||
14 | int __init sfi_parse_mrtc(struct sfi_table_header *table); | ||
15 | |||
16 | #define SFI_MTMR_MAX_NUM 8 | ||
17 | #define SFI_MRTC_MAX 8 | ||
18 | |||
19 | #endif /* _ASM_X86_MRST_H */ | ||
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1cd58cdbc03f..4604e6a54d36 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -105,6 +105,8 @@ | |||
105 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b | 105 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b |
106 | #define MSR_AMD64_NB_CFG 0xc001001f | 106 | #define MSR_AMD64_NB_CFG 0xc001001f |
107 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 | 107 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 |
108 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 | ||
109 | #define MSR_AMD64_OSVW_STATUS 0xc0010141 | ||
108 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 | 110 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 |
109 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 | 111 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 |
110 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 | 112 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 139d4c1a33a7..93da9c3f3341 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); | |||
19 | extern int check_nmi_watchdog(void); | 19 | extern int check_nmi_watchdog(void); |
20 | extern int nmi_watchdog_enabled; | 20 | extern int nmi_watchdog_enabled; |
21 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); | 21 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); |
22 | extern int avail_to_resrv_perfctr_nmi(unsigned int); | ||
23 | extern int reserve_perfctr_nmi(unsigned int); | 22 | extern int reserve_perfctr_nmi(unsigned int); |
24 | extern void release_perfctr_nmi(unsigned int); | 23 | extern void release_perfctr_nmi(unsigned int); |
25 | extern int reserve_evntsel_nmi(unsigned int); | 24 | extern int reserve_evntsel_nmi(unsigned int); |
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index c4ae822e415f..823e070e7c26 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h | |||
@@ -36,6 +36,11 @@ extern void __cpuinit numa_set_node(int cpu, int node); | |||
36 | extern void __cpuinit numa_clear_node(int cpu); | 36 | extern void __cpuinit numa_clear_node(int cpu); |
37 | extern void __cpuinit numa_add_cpu(int cpu); | 37 | extern void __cpuinit numa_add_cpu(int cpu); |
38 | extern void __cpuinit numa_remove_cpu(int cpu); | 38 | extern void __cpuinit numa_remove_cpu(int cpu); |
39 | |||
40 | #ifdef CONFIG_NUMA_EMU | ||
41 | #define FAKE_NODE_MIN_SIZE ((u64)64 << 20) | ||
42 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) | ||
43 | #endif /* CONFIG_NUMA_EMU */ | ||
39 | #else | 44 | #else |
40 | static inline void init_cpu_to_node(void) { } | 45 | static inline void init_cpu_to_node(void) { } |
41 | static inline void numa_set_node(int cpu, int node) { } | 46 | static inline void numa_set_node(int cpu, int node) { } |
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h index 9f0a5f5d29ec..37c516545ec8 100644 --- a/arch/x86/include/asm/numaq.h +++ b/arch/x86/include/asm/numaq.h | |||
@@ -30,9 +30,14 @@ | |||
30 | 30 | ||
31 | extern int found_numaq; | 31 | extern int found_numaq; |
32 | extern int get_memcfg_numaq(void); | 32 | extern int get_memcfg_numaq(void); |
33 | extern int pci_numaq_init(void); | ||
33 | 34 | ||
34 | extern void *xquad_portio; | 35 | extern void *xquad_portio; |
35 | 36 | ||
37 | #define XQUAD_PORTIO_BASE 0xfe400000 | ||
38 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ | ||
39 | #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) | ||
40 | |||
36 | /* | 41 | /* |
37 | * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the | 42 | * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the |
38 | */ | 43 | */ |
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index 3a57385d9fa7..101229b0d8ed 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h | |||
@@ -13,7 +13,6 @@ struct olpc_platform_t { | |||
13 | 13 | ||
14 | #define OLPC_F_PRESENT 0x01 | 14 | #define OLPC_F_PRESENT 0x01 |
15 | #define OLPC_F_DCON 0x02 | 15 | #define OLPC_F_DCON 0x02 |
16 | #define OLPC_F_VSA 0x04 | ||
17 | 16 | ||
18 | #ifdef CONFIG_OLPC | 17 | #ifdef CONFIG_OLPC |
19 | 18 | ||
@@ -51,18 +50,6 @@ static inline int olpc_has_dcon(void) | |||
51 | } | 50 | } |
52 | 51 | ||
53 | /* | 52 | /* |
54 | * The VSA is software from AMD that typical Geode bioses will include. | ||
55 | * It is used to emulate the PCI bus, VGA, etc. OLPC's Open Firmware does | ||
56 | * not include the VSA; instead, PCI is emulated by the kernel. | ||
57 | * | ||
58 | * The VSA is described further in arch/x86/pci/olpc.c. | ||
59 | */ | ||
60 | static inline int olpc_has_vsa(void) | ||
61 | { | ||
62 | return (olpc_platform_info.flags & OLPC_F_VSA) ? 1 : 0; | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * The "Mass Production" version of OLPC's XO is identified as being model | 53 | * The "Mass Production" version of OLPC's XO is identified as being model |
67 | * C2. During the prototype phase, the following models (in chronological | 54 | * C2. During the prototype phase, the following models (in chronological |
68 | * order) were created: A1, B1, B2, B3, B4, C1. The A1 through B2 models | 55 | * order) were created: A1, B1, B2, B3, B4, C1. The A1 through B2 models |
@@ -87,13 +74,10 @@ static inline int olpc_has_dcon(void) | |||
87 | return 0; | 74 | return 0; |
88 | } | 75 | } |
89 | 76 | ||
90 | static inline int olpc_has_vsa(void) | ||
91 | { | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | #endif | 77 | #endif |
96 | 78 | ||
79 | extern int pci_olpc_init(void); | ||
80 | |||
97 | /* EC related functions */ | 81 | /* EC related functions */ |
98 | 82 | ||
99 | extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, | 83 | extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 642fe34b36a2..a667f24c7254 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -40,7 +40,6 @@ | |||
40 | 40 | ||
41 | #ifndef __ASSEMBLY__ | 41 | #ifndef __ASSEMBLY__ |
42 | 42 | ||
43 | extern int page_is_ram(unsigned long pagenr); | ||
44 | extern int devmem_is_allowed(unsigned long pagenr); | 43 | extern int devmem_is_allowed(unsigned long pagenr); |
45 | 44 | ||
46 | extern unsigned long max_low_pfn_mapped; | 45 | extern unsigned long max_low_pfn_mapped; |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index dd59a85a918f..5653f43d90e5 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -435,15 +435,6 @@ static inline void paravirt_release_pud(unsigned long pfn) | |||
435 | PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); | 435 | PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); |
436 | } | 436 | } |
437 | 437 | ||
438 | #ifdef CONFIG_HIGHPTE | ||
439 | static inline void *kmap_atomic_pte(struct page *page, enum km_type type) | ||
440 | { | ||
441 | unsigned long ret; | ||
442 | ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); | ||
443 | return (void *)ret; | ||
444 | } | ||
445 | #endif | ||
446 | |||
447 | static inline void pte_update(struct mm_struct *mm, unsigned long addr, | 438 | static inline void pte_update(struct mm_struct *mm, unsigned long addr, |
448 | pte_t *ptep) | 439 | pte_t *ptep) |
449 | { | 440 | { |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index b1e70d51e40c..db9ef5532341 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -304,10 +304,6 @@ struct pv_mmu_ops { | |||
304 | #endif /* PAGETABLE_LEVELS == 4 */ | 304 | #endif /* PAGETABLE_LEVELS == 4 */ |
305 | #endif /* PAGETABLE_LEVELS >= 3 */ | 305 | #endif /* PAGETABLE_LEVELS >= 3 */ |
306 | 306 | ||
307 | #ifdef CONFIG_HIGHPTE | ||
308 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); | ||
309 | #endif | ||
310 | |||
311 | struct pv_lazy_ops lazy_mode; | 307 | struct pv_lazy_ops lazy_mode; |
312 | 308 | ||
313 | /* dom0 ops */ | 309 | /* dom0 ops */ |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ada8c201d513..404a880ea325 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -45,8 +45,15 @@ static inline int pci_proc_domain(struct pci_bus *bus) | |||
45 | 45 | ||
46 | #ifdef CONFIG_PCI | 46 | #ifdef CONFIG_PCI |
47 | extern unsigned int pcibios_assign_all_busses(void); | 47 | extern unsigned int pcibios_assign_all_busses(void); |
48 | extern int pci_legacy_init(void); | ||
49 | # ifdef CONFIG_ACPI | ||
50 | # define x86_default_pci_init pci_acpi_init | ||
51 | # else | ||
52 | # define x86_default_pci_init pci_legacy_init | ||
53 | # endif | ||
48 | #else | 54 | #else |
49 | #define pcibios_assign_all_busses() 0 | 55 | # define pcibios_assign_all_busses() 0 |
56 | # define x86_default_pci_init NULL | ||
50 | #endif | 57 | #endif |
51 | 58 | ||
52 | extern unsigned long pci_mem_start; | 59 | extern unsigned long pci_mem_start; |
@@ -90,40 +97,14 @@ extern void pci_iommu_alloc(void); | |||
90 | 97 | ||
91 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) | 98 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) |
92 | 99 | ||
93 | #if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG) | ||
94 | |||
95 | #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ | ||
96 | dma_addr_t ADDR_NAME; | ||
97 | #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ | ||
98 | __u32 LEN_NAME; | ||
99 | #define pci_unmap_addr(PTR, ADDR_NAME) \ | ||
100 | ((PTR)->ADDR_NAME) | ||
101 | #define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ | ||
102 | (((PTR)->ADDR_NAME) = (VAL)) | ||
103 | #define pci_unmap_len(PTR, LEN_NAME) \ | ||
104 | ((PTR)->LEN_NAME) | ||
105 | #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ | ||
106 | (((PTR)->LEN_NAME) = (VAL)) | ||
107 | |||
108 | #else | ||
109 | |||
110 | #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0]; | ||
111 | #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0]; | ||
112 | #define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME) | ||
113 | #define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ | ||
114 | do { break; } while (pci_unmap_addr(PTR, ADDR_NAME)) | ||
115 | #define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME) | ||
116 | #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ | ||
117 | do { break; } while (pci_unmap_len(PTR, LEN_NAME)) | ||
118 | |||
119 | #endif | ||
120 | |||
121 | #endif /* __KERNEL__ */ | 100 | #endif /* __KERNEL__ */ |
122 | 101 | ||
123 | #ifdef CONFIG_X86_64 | 102 | #ifdef CONFIG_X86_64 |
124 | #include "pci_64.h" | 103 | #include "pci_64.h" |
125 | #endif | 104 | #endif |
126 | 105 | ||
106 | void dma32_reserve_bootmem(void); | ||
107 | |||
127 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ | 108 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ |
128 | #include <asm-generic/pci-dma-compat.h> | 109 | #include <asm-generic/pci-dma-compat.h> |
129 | 110 | ||
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index ae5e40f67daf..fe15cfb21b9b 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h | |||
@@ -22,8 +22,6 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn, | |||
22 | extern int (*pci_config_write)(int seg, int bus, int dev, int fn, | 22 | extern int (*pci_config_write)(int seg, int bus, int dev, int fn, |
23 | int reg, int len, u32 value); | 23 | int reg, int len, u32 value); |
24 | 24 | ||
25 | extern void dma32_reserve_bootmem(void); | ||
26 | |||
27 | #endif /* __KERNEL__ */ | 25 | #endif /* __KERNEL__ */ |
28 | 26 | ||
29 | #endif /* _ASM_X86_PCI_64_H */ | 27 | #endif /* _ASM_X86_PCI_64_H */ |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b4bf9a942ed0..1a0422348d6d 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -29,6 +29,7 @@ | |||
29 | #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 | 29 | #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 |
30 | #define PCI_HAS_IO_ECS 0x40000 | 30 | #define PCI_HAS_IO_ECS 0x40000 |
31 | #define PCI_NOASSIGN_ROMS 0x80000 | 31 | #define PCI_NOASSIGN_ROMS 0x80000 |
32 | #define PCI_ROOT_NO_CRS 0x100000 | ||
32 | 33 | ||
33 | extern unsigned int pci_probe; | 34 | extern unsigned int pci_probe; |
34 | extern unsigned long pirq_table_addr; | 35 | extern unsigned long pirq_table_addr; |
@@ -82,7 +83,6 @@ struct irq_routing_table { | |||
82 | 83 | ||
83 | extern unsigned int pcibios_irq_mask; | 84 | extern unsigned int pcibios_irq_mask; |
84 | 85 | ||
85 | extern int pcibios_scanned; | ||
86 | extern spinlock_t pci_config_lock; | 86 | extern spinlock_t pci_config_lock; |
87 | 87 | ||
88 | extern int (*pcibios_enable_irq)(struct pci_dev *dev); | 88 | extern int (*pcibios_enable_irq)(struct pci_dev *dev); |
@@ -105,16 +105,15 @@ extern bool port_cf9_safe; | |||
105 | extern int pci_direct_probe(void); | 105 | extern int pci_direct_probe(void); |
106 | extern void pci_direct_init(int type); | 106 | extern void pci_direct_init(int type); |
107 | extern void pci_pcbios_init(void); | 107 | extern void pci_pcbios_init(void); |
108 | extern int pci_olpc_init(void); | ||
109 | extern void __init dmi_check_pciprobe(void); | 108 | extern void __init dmi_check_pciprobe(void); |
110 | extern void __init dmi_check_skip_isa_align(void); | 109 | extern void __init dmi_check_skip_isa_align(void); |
111 | 110 | ||
112 | /* some common used subsys_initcalls */ | 111 | /* some common used subsys_initcalls */ |
113 | extern int __init pci_acpi_init(void); | 112 | extern int __init pci_acpi_init(void); |
114 | extern int __init pcibios_irq_init(void); | 113 | extern void __init pcibios_irq_init(void); |
115 | extern int __init pci_visws_init(void); | ||
116 | extern int __init pci_numaq_init(void); | ||
117 | extern int __init pcibios_init(void); | 114 | extern int __init pcibios_init(void); |
115 | extern int pci_legacy_init(void); | ||
116 | extern void pcibios_fixup_irqs(void); | ||
118 | 117 | ||
119 | /* pci-mmconfig.c */ | 118 | /* pci-mmconfig.c */ |
120 | 119 | ||
@@ -182,3 +181,17 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val) | |||
182 | { | 181 | { |
183 | asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); | 182 | asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); |
184 | } | 183 | } |
184 | |||
185 | #ifdef CONFIG_PCI | ||
186 | # ifdef CONFIG_ACPI | ||
187 | # define x86_default_pci_init pci_acpi_init | ||
188 | # else | ||
189 | # define x86_default_pci_init pci_legacy_init | ||
190 | # endif | ||
191 | # define x86_default_pci_init_irq pcibios_irq_init | ||
192 | # define x86_default_pci_fixup_irqs pcibios_fixup_irqs | ||
193 | #else | ||
194 | # define x86_default_pci_init NULL | ||
195 | # define x86_default_pci_init_irq NULL | ||
196 | # define x86_default_pci_fixup_irqs NULL | ||
197 | #endif | ||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0c44196b78ac..0ec6d12d84e6 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -25,19 +25,18 @@ | |||
25 | */ | 25 | */ |
26 | #ifdef CONFIG_SMP | 26 | #ifdef CONFIG_SMP |
27 | #define PER_CPU(var, reg) \ | 27 | #define PER_CPU(var, reg) \ |
28 | __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ | 28 | __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \ |
29 | lea per_cpu__##var(reg), reg | 29 | lea var(reg), reg |
30 | #define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var | 30 | #define PER_CPU_VAR(var) %__percpu_seg:var |
31 | #else /* ! SMP */ | 31 | #else /* ! SMP */ |
32 | #define PER_CPU(var, reg) \ | 32 | #define PER_CPU(var, reg) __percpu_mov_op $var, reg |
33 | __percpu_mov_op $per_cpu__##var, reg | 33 | #define PER_CPU_VAR(var) var |
34 | #define PER_CPU_VAR(var) per_cpu__##var | ||
35 | #endif /* SMP */ | 34 | #endif /* SMP */ |
36 | 35 | ||
37 | #ifdef CONFIG_X86_64_SMP | 36 | #ifdef CONFIG_X86_64_SMP |
38 | #define INIT_PER_CPU_VAR(var) init_per_cpu__##var | 37 | #define INIT_PER_CPU_VAR(var) init_per_cpu__##var |
39 | #else | 38 | #else |
40 | #define INIT_PER_CPU_VAR(var) per_cpu__##var | 39 | #define INIT_PER_CPU_VAR(var) var |
41 | #endif | 40 | #endif |
42 | 41 | ||
43 | #else /* ...!ASSEMBLY */ | 42 | #else /* ...!ASSEMBLY */ |
@@ -60,12 +59,12 @@ | |||
60 | * There also must be an entry in vmlinux_64.lds.S | 59 | * There also must be an entry in vmlinux_64.lds.S |
61 | */ | 60 | */ |
62 | #define DECLARE_INIT_PER_CPU(var) \ | 61 | #define DECLARE_INIT_PER_CPU(var) \ |
63 | extern typeof(per_cpu_var(var)) init_per_cpu_var(var) | 62 | extern typeof(var) init_per_cpu_var(var) |
64 | 63 | ||
65 | #ifdef CONFIG_X86_64_SMP | 64 | #ifdef CONFIG_X86_64_SMP |
66 | #define init_per_cpu_var(var) init_per_cpu__##var | 65 | #define init_per_cpu_var(var) init_per_cpu__##var |
67 | #else | 66 | #else |
68 | #define init_per_cpu_var(var) per_cpu_var(var) | 67 | #define init_per_cpu_var(var) var |
69 | #endif | 68 | #endif |
70 | 69 | ||
71 | /* For arch-specific code, we can use direct single-insn ops (they | 70 | /* For arch-specific code, we can use direct single-insn ops (they |
@@ -104,6 +103,64 @@ do { \ | |||
104 | } \ | 103 | } \ |
105 | } while (0) | 104 | } while (0) |
106 | 105 | ||
106 | /* | ||
107 | * Generate a percpu add to memory instruction and optimize code | ||
108 | * if a one is added or subtracted. | ||
109 | */ | ||
110 | #define percpu_add_op(var, val) \ | ||
111 | do { \ | ||
112 | typedef typeof(var) pao_T__; \ | ||
113 | const int pao_ID__ = (__builtin_constant_p(val) && \ | ||
114 | ((val) == 1 || (val) == -1)) ? (val) : 0; \ | ||
115 | if (0) { \ | ||
116 | pao_T__ pao_tmp__; \ | ||
117 | pao_tmp__ = (val); \ | ||
118 | } \ | ||
119 | switch (sizeof(var)) { \ | ||
120 | case 1: \ | ||
121 | if (pao_ID__ == 1) \ | ||
122 | asm("incb "__percpu_arg(0) : "+m" (var)); \ | ||
123 | else if (pao_ID__ == -1) \ | ||
124 | asm("decb "__percpu_arg(0) : "+m" (var)); \ | ||
125 | else \ | ||
126 | asm("addb %1, "__percpu_arg(0) \ | ||
127 | : "+m" (var) \ | ||
128 | : "qi" ((pao_T__)(val))); \ | ||
129 | break; \ | ||
130 | case 2: \ | ||
131 | if (pao_ID__ == 1) \ | ||
132 | asm("incw "__percpu_arg(0) : "+m" (var)); \ | ||
133 | else if (pao_ID__ == -1) \ | ||
134 | asm("decw "__percpu_arg(0) : "+m" (var)); \ | ||
135 | else \ | ||
136 | asm("addw %1, "__percpu_arg(0) \ | ||
137 | : "+m" (var) \ | ||
138 | : "ri" ((pao_T__)(val))); \ | ||
139 | break; \ | ||
140 | case 4: \ | ||
141 | if (pao_ID__ == 1) \ | ||
142 | asm("incl "__percpu_arg(0) : "+m" (var)); \ | ||
143 | else if (pao_ID__ == -1) \ | ||
144 | asm("decl "__percpu_arg(0) : "+m" (var)); \ | ||
145 | else \ | ||
146 | asm("addl %1, "__percpu_arg(0) \ | ||
147 | : "+m" (var) \ | ||
148 | : "ri" ((pao_T__)(val))); \ | ||
149 | break; \ | ||
150 | case 8: \ | ||
151 | if (pao_ID__ == 1) \ | ||
152 | asm("incq "__percpu_arg(0) : "+m" (var)); \ | ||
153 | else if (pao_ID__ == -1) \ | ||
154 | asm("decq "__percpu_arg(0) : "+m" (var)); \ | ||
155 | else \ | ||
156 | asm("addq %1, "__percpu_arg(0) \ | ||
157 | : "+m" (var) \ | ||
158 | : "re" ((pao_T__)(val))); \ | ||
159 | break; \ | ||
160 | default: __bad_percpu_size(); \ | ||
161 | } \ | ||
162 | } while (0) | ||
163 | |||
107 | #define percpu_from_op(op, var, constraint) \ | 164 | #define percpu_from_op(op, var, constraint) \ |
108 | ({ \ | 165 | ({ \ |
109 | typeof(var) pfo_ret__; \ | 166 | typeof(var) pfo_ret__; \ |
@@ -133,6 +190,29 @@ do { \ | |||
133 | pfo_ret__; \ | 190 | pfo_ret__; \ |
134 | }) | 191 | }) |
135 | 192 | ||
193 | #define percpu_unary_op(op, var) \ | ||
194 | ({ \ | ||
195 | switch (sizeof(var)) { \ | ||
196 | case 1: \ | ||
197 | asm(op "b "__percpu_arg(0) \ | ||
198 | : "+m" (var)); \ | ||
199 | break; \ | ||
200 | case 2: \ | ||
201 | asm(op "w "__percpu_arg(0) \ | ||
202 | : "+m" (var)); \ | ||
203 | break; \ | ||
204 | case 4: \ | ||
205 | asm(op "l "__percpu_arg(0) \ | ||
206 | : "+m" (var)); \ | ||
207 | break; \ | ||
208 | case 8: \ | ||
209 | asm(op "q "__percpu_arg(0) \ | ||
210 | : "+m" (var)); \ | ||
211 | break; \ | ||
212 | default: __bad_percpu_size(); \ | ||
213 | } \ | ||
214 | }) | ||
215 | |||
136 | /* | 216 | /* |
137 | * percpu_read() makes gcc load the percpu variable every time it is | 217 | * percpu_read() makes gcc load the percpu variable every time it is |
138 | * accessed while percpu_read_stable() allows the value to be cached. | 218 | * accessed while percpu_read_stable() allows the value to be cached. |
@@ -142,16 +222,15 @@ do { \ | |||
142 | * per-thread variables implemented as per-cpu variables and thus | 222 | * per-thread variables implemented as per-cpu variables and thus |
143 | * stable for the duration of the respective task. | 223 | * stable for the duration of the respective task. |
144 | */ | 224 | */ |
145 | #define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \ | 225 | #define percpu_read(var) percpu_from_op("mov", var, "m" (var)) |
146 | "m" (per_cpu__##var)) | 226 | #define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) |
147 | #define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \ | 227 | #define percpu_write(var, val) percpu_to_op("mov", var, val) |
148 | "p" (&per_cpu__##var)) | 228 | #define percpu_add(var, val) percpu_add_op(var, val) |
149 | #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) | 229 | #define percpu_sub(var, val) percpu_add_op(var, -(val)) |
150 | #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) | 230 | #define percpu_and(var, val) percpu_to_op("and", var, val) |
151 | #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) | 231 | #define percpu_or(var, val) percpu_to_op("or", var, val) |
152 | #define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) | 232 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) |
153 | #define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) | 233 | #define percpu_inc(var) percpu_unary_op("inc", var) |
154 | #define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) | ||
155 | 234 | ||
156 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 235 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
157 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 236 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
@@ -160,9 +239,9 @@ do { \ | |||
160 | #define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) | 239 | #define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) |
161 | #define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) | 240 | #define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) |
162 | #define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) | 241 | #define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) |
163 | #define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) | 242 | #define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
164 | #define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) | 243 | #define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
165 | #define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) | 244 | #define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
166 | #define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 245 | #define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
167 | #define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 246 | #define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
168 | #define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 247 | #define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
@@ -179,9 +258,9 @@ do { \ | |||
179 | #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) | 258 | #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) |
180 | #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) | 259 | #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) |
181 | #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) | 260 | #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) |
182 | #define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) | 261 | #define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
183 | #define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) | 262 | #define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
184 | #define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) | 263 | #define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
185 | #define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 264 | #define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
186 | #define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 265 | #define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
187 | #define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 266 | #define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
@@ -192,9 +271,9 @@ do { \ | |||
192 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 271 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
193 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 272 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
194 | 273 | ||
195 | #define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) | 274 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
196 | #define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) | 275 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
197 | #define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) | 276 | #define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
198 | #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 277 | #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
199 | #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 278 | #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
200 | #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 279 | #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
@@ -212,19 +291,19 @@ do { \ | |||
212 | #ifdef CONFIG_X86_64 | 291 | #ifdef CONFIG_X86_64 |
213 | #define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 292 | #define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
214 | #define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 293 | #define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
215 | #define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) | 294 | #define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
216 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 295 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
217 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 296 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
218 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 297 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
219 | 298 | ||
220 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 299 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
221 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 300 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
222 | #define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) | 301 | #define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
223 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 302 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
224 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 303 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
225 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 304 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
226 | 305 | ||
227 | #define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) | 306 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
228 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 307 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
229 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 308 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
230 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 309 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
@@ -236,7 +315,7 @@ do { \ | |||
236 | ({ \ | 315 | ({ \ |
237 | int old__; \ | 316 | int old__; \ |
238 | asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ | 317 | asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ |
239 | : "=r" (old__), "+m" (per_cpu__##var) \ | 318 | : "=r" (old__), "+m" (var) \ |
240 | : "dIr" (bit)); \ | 319 | : "dIr" (bit)); \ |
241 | old__; \ | 320 | old__; \ |
242 | }) | 321 | }) |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8d9f8548a870..db6109a885a7 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -18,7 +18,8 @@ | |||
18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | 18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 |
19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | 19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 |
20 | 20 | ||
21 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | 21 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) |
22 | #define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) | ||
22 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | 23 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) |
23 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | 24 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) |
24 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | 25 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) |
@@ -26,7 +27,14 @@ | |||
26 | /* | 27 | /* |
27 | * Includes eventsel and unit mask as well: | 28 | * Includes eventsel and unit mask as well: |
28 | */ | 29 | */ |
29 | #define ARCH_PERFMON_EVENT_MASK 0xffff | 30 | |
31 | |||
32 | #define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL | ||
33 | #define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL | ||
34 | #define INTEL_ARCH_EDGE_MASK 0x00040000ULL | ||
35 | #define INTEL_ARCH_INV_MASK 0x00800000ULL | ||
36 | #define INTEL_ARCH_CNT_MASK 0xFF000000ULL | ||
37 | #define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) | ||
30 | 38 | ||
31 | /* | 39 | /* |
32 | * filter mask to validate fixed counter events. | 40 | * filter mask to validate fixed counter events. |
@@ -37,7 +45,12 @@ | |||
37 | * The other filters are supported by fixed counters. | 45 | * The other filters are supported by fixed counters. |
38 | * The any-thread option is supported starting with v3. | 46 | * The any-thread option is supported starting with v3. |
39 | */ | 47 | */ |
40 | #define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 | 48 | #define INTEL_ARCH_FIXED_MASK \ |
49 | (INTEL_ARCH_CNT_MASK| \ | ||
50 | INTEL_ARCH_INV_MASK| \ | ||
51 | INTEL_ARCH_EDGE_MASK|\ | ||
52 | INTEL_ARCH_UNIT_MASK|\ | ||
53 | INTEL_ARCH_EVENT_MASK) | ||
41 | 54 | ||
42 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 55 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
43 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 56 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
@@ -104,6 +117,18 @@ union cpuid10_edx { | |||
104 | */ | 117 | */ |
105 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) | 118 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) |
106 | 119 | ||
120 | /* IbsFetchCtl bits/masks */ | ||
121 | #define IBS_FETCH_RAND_EN (1ULL<<57) | ||
122 | #define IBS_FETCH_VAL (1ULL<<49) | ||
123 | #define IBS_FETCH_ENABLE (1ULL<<48) | ||
124 | #define IBS_FETCH_CNT 0xFFFF0000ULL | ||
125 | #define IBS_FETCH_MAX_CNT 0x0000FFFFULL | ||
126 | |||
127 | /* IbsOpCtl bits */ | ||
128 | #define IBS_OP_CNT_CTL (1ULL<<19) | ||
129 | #define IBS_OP_VAL (1ULL<<18) | ||
130 | #define IBS_OP_ENABLE (1ULL<<17) | ||
131 | #define IBS_OP_MAX_CNT 0x0000FFFFULL | ||
107 | 132 | ||
108 | #ifdef CONFIG_PERF_EVENTS | 133 | #ifdef CONFIG_PERF_EVENTS |
109 | extern void init_hw_perf_events(void); | 134 | extern void init_hw_perf_events(void); |
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 0e8c2a0fd922..271de94c3810 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h | |||
@@ -23,6 +23,11 @@ static inline void paravirt_release_pud(unsigned long pfn) {} | |||
23 | #endif | 23 | #endif |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Flags to use when allocating a user page table page. | ||
27 | */ | ||
28 | extern gfp_t __userpte_alloc_gfp; | ||
29 | |||
30 | /* | ||
26 | * Allocate and free page tables. | 31 | * Allocate and free page tables. |
27 | */ | 32 | */ |
28 | extern pgd_t *pgd_alloc(struct mm_struct *); | 33 | extern pgd_t *pgd_alloc(struct mm_struct *); |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 01fd9461d323..2984a25ff383 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <asm/paravirt.h> | 19 | #include <asm/paravirt.h> |
20 | 20 | ||
21 | #include <linux/bitops.h> | 21 | #include <linux/bitops.h> |
22 | #include <linux/slab.h> | ||
23 | #include <linux/list.h> | 22 | #include <linux/list.h> |
24 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
25 | 24 | ||
@@ -54,10 +53,10 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); | |||
54 | in_irq() ? KM_IRQ_PTE : \ | 53 | in_irq() ? KM_IRQ_PTE : \ |
55 | KM_PTE0) | 54 | KM_PTE0) |
56 | #define pte_offset_map(dir, address) \ | 55 | #define pte_offset_map(dir, address) \ |
57 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ | 56 | ((pte_t *)kmap_atomic(pmd_page(*(dir)), __KM_PTE) + \ |
58 | pte_index((address))) | 57 | pte_index((address))) |
59 | #define pte_offset_map_nested(dir, address) \ | 58 | #define pte_offset_map_nested(dir, address) \ |
60 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ | 59 | ((pte_t *)kmap_atomic(pmd_page(*(dir)), KM_PTE1) + \ |
61 | pte_index((address))) | 60 | pte_index((address))) |
62 | #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) | 61 | #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) |
63 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) | 62 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) |
@@ -80,7 +79,7 @@ do { \ | |||
80 | * The i386 doesn't have any external MMU info: the kernel page | 79 | * The i386 doesn't have any external MMU info: the kernel page |
81 | * tables contain all the necessary information. | 80 | * tables contain all the necessary information. |
82 | */ | 81 | */ |
83 | #define update_mmu_cache(vma, address, pte) do { } while (0) | 82 | #define update_mmu_cache(vma, address, ptep) do { } while (0) |
84 | 83 | ||
85 | #endif /* !__ASSEMBLY__ */ | 84 | #endif /* !__ASSEMBLY__ */ |
86 | 85 | ||
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index c57a30117149..181be528c612 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -129,7 +129,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } | |||
129 | #define pte_unmap(pte) /* NOP */ | 129 | #define pte_unmap(pte) /* NOP */ |
130 | #define pte_unmap_nested(pte) /* NOP */ | 130 | #define pte_unmap_nested(pte) /* NOP */ |
131 | 131 | ||
132 | #define update_mmu_cache(vma, address, pte) do { } while (0) | 132 | #define update_mmu_cache(vma, address, ptep) do { } while (0) |
133 | 133 | ||
134 | /* Encode and de-code a swap entry */ | 134 | /* Encode and de-code a swap entry */ |
135 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE | 135 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fc801bab1b3b..b753ea59703a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -450,6 +450,8 @@ struct thread_struct { | |||
450 | struct perf_event *ptrace_bps[HBP_NUM]; | 450 | struct perf_event *ptrace_bps[HBP_NUM]; |
451 | /* Debug status used for traps, single steps, etc... */ | 451 | /* Debug status used for traps, single steps, etc... */ |
452 | unsigned long debugreg6; | 452 | unsigned long debugreg6; |
453 | /* Keep track of the exact dr7 value set by the user */ | ||
454 | unsigned long ptrace_dr7; | ||
453 | /* Fault info: */ | 455 | /* Fault info: */ |
454 | unsigned long cr2; | 456 | unsigned long cr2; |
455 | unsigned long trap_no; | 457 | unsigned long trap_no; |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 4009f6534f52..6f414ed88620 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -23,14 +23,4 @@ extern int reboot_force; | |||
23 | 23 | ||
24 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); | 24 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); |
25 | 25 | ||
26 | /* | ||
27 | * This looks more complex than it should be. But we need to | ||
28 | * get the type for the ~ right in round_down (it needs to be | ||
29 | * as wide as the result!), and we want to evaluate the macro | ||
30 | * arguments just once each. | ||
31 | */ | ||
32 | #define __round_mask(x,y) ((__typeof__(x))((y)-1)) | ||
33 | #define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1) | ||
34 | #define round_down(x,y) ((x) & ~__round_mask(x,y)) | ||
35 | |||
36 | #endif /* _ASM_X86_PROTO_H */ | 26 | #endif /* _ASM_X86_PROTO_H */ |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 9d369f680321..69a686a7dff0 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -274,18 +274,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, | |||
274 | return 0; | 274 | return 0; |
275 | } | 275 | } |
276 | 276 | ||
277 | /* Get Nth argument at function call */ | ||
278 | extern unsigned long regs_get_argument_nth(struct pt_regs *regs, | ||
279 | unsigned int n); | ||
280 | |||
281 | /* | ||
282 | * These are defined as per linux/ptrace.h, which see. | ||
283 | */ | ||
284 | #define arch_has_single_step() (1) | 277 | #define arch_has_single_step() (1) |
285 | extern void user_enable_single_step(struct task_struct *); | ||
286 | extern void user_disable_single_step(struct task_struct *); | ||
287 | |||
288 | extern void user_enable_block_step(struct task_struct *); | ||
289 | #ifdef CONFIG_X86_DEBUGCTLMSR | 278 | #ifdef CONFIG_X86_DEBUGCTLMSR |
290 | #define arch_has_block_step() (1) | 279 | #define arch_has_block_step() (1) |
291 | #else | 280 | #else |
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 413620024768..606ede126972 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/list.h> | 41 | #include <linux/list.h> |
42 | #include <linux/spinlock.h> | 42 | #include <linux/spinlock.h> |
43 | #include <linux/lockdep.h> | 43 | #include <linux/lockdep.h> |
44 | #include <asm/asm.h> | ||
44 | 45 | ||
45 | struct rwsem_waiter; | 46 | struct rwsem_waiter; |
46 | 47 | ||
@@ -55,17 +56,28 @@ extern asmregparm struct rw_semaphore * | |||
55 | 56 | ||
56 | /* | 57 | /* |
57 | * the semaphore definition | 58 | * the semaphore definition |
59 | * | ||
60 | * The bias values and the counter type limits the number of | ||
61 | * potential readers/writers to 32767 for 32 bits and 2147483647 | ||
62 | * for 64 bits. | ||
58 | */ | 63 | */ |
59 | 64 | ||
60 | #define RWSEM_UNLOCKED_VALUE 0x00000000 | 65 | #ifdef CONFIG_X86_64 |
61 | #define RWSEM_ACTIVE_BIAS 0x00000001 | 66 | # define RWSEM_ACTIVE_MASK 0xffffffffL |
62 | #define RWSEM_ACTIVE_MASK 0x0000ffff | 67 | #else |
63 | #define RWSEM_WAITING_BIAS (-0x00010000) | 68 | # define RWSEM_ACTIVE_MASK 0x0000ffffL |
69 | #endif | ||
70 | |||
71 | #define RWSEM_UNLOCKED_VALUE 0x00000000L | ||
72 | #define RWSEM_ACTIVE_BIAS 0x00000001L | ||
73 | #define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) | ||
64 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS | 74 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS |
65 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) | 75 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) |
66 | 76 | ||
77 | typedef signed long rwsem_count_t; | ||
78 | |||
67 | struct rw_semaphore { | 79 | struct rw_semaphore { |
68 | signed long count; | 80 | rwsem_count_t count; |
69 | spinlock_t wait_lock; | 81 | spinlock_t wait_lock; |
70 | struct list_head wait_list; | 82 | struct list_head wait_list; |
71 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 83 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
@@ -105,7 +117,7 @@ do { \ | |||
105 | static inline void __down_read(struct rw_semaphore *sem) | 117 | static inline void __down_read(struct rw_semaphore *sem) |
106 | { | 118 | { |
107 | asm volatile("# beginning down_read\n\t" | 119 | asm volatile("# beginning down_read\n\t" |
108 | LOCK_PREFIX " inc%z0 (%1)\n\t" | 120 | LOCK_PREFIX _ASM_INC "(%1)\n\t" |
109 | /* adds 0x00000001, returns the old value */ | 121 | /* adds 0x00000001, returns the old value */ |
110 | " jns 1f\n" | 122 | " jns 1f\n" |
111 | " call call_rwsem_down_read_failed\n" | 123 | " call call_rwsem_down_read_failed\n" |
@@ -121,7 +133,7 @@ static inline void __down_read(struct rw_semaphore *sem) | |||
121 | */ | 133 | */ |
122 | static inline int __down_read_trylock(struct rw_semaphore *sem) | 134 | static inline int __down_read_trylock(struct rw_semaphore *sem) |
123 | { | 135 | { |
124 | __s32 result, tmp; | 136 | rwsem_count_t result, tmp; |
125 | asm volatile("# beginning __down_read_trylock\n\t" | 137 | asm volatile("# beginning __down_read_trylock\n\t" |
126 | " mov %0,%1\n\t" | 138 | " mov %0,%1\n\t" |
127 | "1:\n\t" | 139 | "1:\n\t" |
@@ -143,7 +155,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) | |||
143 | */ | 155 | */ |
144 | static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) | 156 | static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) |
145 | { | 157 | { |
146 | int tmp; | 158 | rwsem_count_t tmp; |
147 | 159 | ||
148 | tmp = RWSEM_ACTIVE_WRITE_BIAS; | 160 | tmp = RWSEM_ACTIVE_WRITE_BIAS; |
149 | asm volatile("# beginning down_write\n\t" | 161 | asm volatile("# beginning down_write\n\t" |
@@ -170,9 +182,9 @@ static inline void __down_write(struct rw_semaphore *sem) | |||
170 | */ | 182 | */ |
171 | static inline int __down_write_trylock(struct rw_semaphore *sem) | 183 | static inline int __down_write_trylock(struct rw_semaphore *sem) |
172 | { | 184 | { |
173 | signed long ret = cmpxchg(&sem->count, | 185 | rwsem_count_t ret = cmpxchg(&sem->count, |
174 | RWSEM_UNLOCKED_VALUE, | 186 | RWSEM_UNLOCKED_VALUE, |
175 | RWSEM_ACTIVE_WRITE_BIAS); | 187 | RWSEM_ACTIVE_WRITE_BIAS); |
176 | if (ret == RWSEM_UNLOCKED_VALUE) | 188 | if (ret == RWSEM_UNLOCKED_VALUE) |
177 | return 1; | 189 | return 1; |
178 | return 0; | 190 | return 0; |
@@ -183,7 +195,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) | |||
183 | */ | 195 | */ |
184 | static inline void __up_read(struct rw_semaphore *sem) | 196 | static inline void __up_read(struct rw_semaphore *sem) |
185 | { | 197 | { |
186 | __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; | 198 | rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS; |
187 | asm volatile("# beginning __up_read\n\t" | 199 | asm volatile("# beginning __up_read\n\t" |
188 | LOCK_PREFIX " xadd %1,(%2)\n\t" | 200 | LOCK_PREFIX " xadd %1,(%2)\n\t" |
189 | /* subtracts 1, returns the old value */ | 201 | /* subtracts 1, returns the old value */ |
@@ -201,7 +213,7 @@ static inline void __up_read(struct rw_semaphore *sem) | |||
201 | */ | 213 | */ |
202 | static inline void __up_write(struct rw_semaphore *sem) | 214 | static inline void __up_write(struct rw_semaphore *sem) |
203 | { | 215 | { |
204 | unsigned long tmp; | 216 | rwsem_count_t tmp; |
205 | asm volatile("# beginning __up_write\n\t" | 217 | asm volatile("# beginning __up_write\n\t" |
206 | LOCK_PREFIX " xadd %1,(%2)\n\t" | 218 | LOCK_PREFIX " xadd %1,(%2)\n\t" |
207 | /* tries to transition | 219 | /* tries to transition |
@@ -221,33 +233,38 @@ static inline void __up_write(struct rw_semaphore *sem) | |||
221 | static inline void __downgrade_write(struct rw_semaphore *sem) | 233 | static inline void __downgrade_write(struct rw_semaphore *sem) |
222 | { | 234 | { |
223 | asm volatile("# beginning __downgrade_write\n\t" | 235 | asm volatile("# beginning __downgrade_write\n\t" |
224 | LOCK_PREFIX " add%z0 %2,(%1)\n\t" | 236 | LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t" |
225 | /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ | 237 | /* |
238 | * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386) | ||
239 | * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64) | ||
240 | */ | ||
226 | " jns 1f\n\t" | 241 | " jns 1f\n\t" |
227 | " call call_rwsem_downgrade_wake\n" | 242 | " call call_rwsem_downgrade_wake\n" |
228 | "1:\n\t" | 243 | "1:\n\t" |
229 | "# ending __downgrade_write\n" | 244 | "# ending __downgrade_write\n" |
230 | : "+m" (sem->count) | 245 | : "+m" (sem->count) |
231 | : "a" (sem), "i" (-RWSEM_WAITING_BIAS) | 246 | : "a" (sem), "er" (-RWSEM_WAITING_BIAS) |
232 | : "memory", "cc"); | 247 | : "memory", "cc"); |
233 | } | 248 | } |
234 | 249 | ||
235 | /* | 250 | /* |
236 | * implement atomic add functionality | 251 | * implement atomic add functionality |
237 | */ | 252 | */ |
238 | static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) | 253 | static inline void rwsem_atomic_add(rwsem_count_t delta, |
254 | struct rw_semaphore *sem) | ||
239 | { | 255 | { |
240 | asm volatile(LOCK_PREFIX "add%z0 %1,%0" | 256 | asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" |
241 | : "+m" (sem->count) | 257 | : "+m" (sem->count) |
242 | : "ir" (delta)); | 258 | : "er" (delta)); |
243 | } | 259 | } |
244 | 260 | ||
245 | /* | 261 | /* |
246 | * implement exchange and add functionality | 262 | * implement exchange and add functionality |
247 | */ | 263 | */ |
248 | static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) | 264 | static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, |
265 | struct rw_semaphore *sem) | ||
249 | { | 266 | { |
250 | int tmp = delta; | 267 | rwsem_count_t tmp = delta; |
251 | 268 | ||
252 | asm volatile(LOCK_PREFIX "xadd %0,%1" | 269 | asm volatile(LOCK_PREFIX "xadd %0,%1" |
253 | : "+r" (tmp), "+m" (sem->count) | 270 | : "+r" (tmp), "+m" (sem->count) |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 18e496c98ff0..86b1506f4179 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -37,10 +37,8 @@ void setup_bios_corruption_check(void); | |||
37 | 37 | ||
38 | #ifdef CONFIG_X86_VISWS | 38 | #ifdef CONFIG_X86_VISWS |
39 | extern void visws_early_detect(void); | 39 | extern void visws_early_detect(void); |
40 | extern int is_visws_box(void); | ||
41 | #else | 40 | #else |
42 | static inline void visws_early_detect(void) { } | 41 | static inline void visws_early_detect(void) { } |
43 | static inline int is_visws_box(void) { return 0; } | ||
44 | #endif | 42 | #endif |
45 | 43 | ||
46 | extern unsigned long saved_video_mode; | 44 | extern unsigned long saved_video_mode; |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 1e796782cd7b..4cfc90824068 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -135,6 +135,8 @@ int native_cpu_disable(void); | |||
135 | void native_cpu_die(unsigned int cpu); | 135 | void native_cpu_die(unsigned int cpu); |
136 | void native_play_dead(void); | 136 | void native_play_dead(void); |
137 | void play_dead_common(void); | 137 | void play_dead_common(void); |
138 | void wbinvd_on_cpu(int cpu); | ||
139 | int wbinvd_on_all_cpus(void); | ||
138 | 140 | ||
139 | void native_send_call_func_ipi(const struct cpumask *mask); | 141 | void native_send_call_func_ipi(const struct cpumask *mask); |
140 | void native_send_call_func_single_ipi(int cpu); | 142 | void native_send_call_func_single_ipi(int cpu); |
@@ -147,6 +149,13 @@ static inline int num_booting_cpus(void) | |||
147 | { | 149 | { |
148 | return cpumask_weight(cpu_callout_mask); | 150 | return cpumask_weight(cpu_callout_mask); |
149 | } | 151 | } |
152 | #else /* !CONFIG_SMP */ | ||
153 | #define wbinvd_on_cpu(cpu) wbinvd() | ||
154 | static inline int wbinvd_on_all_cpus(void) | ||
155 | { | ||
156 | wbinvd(); | ||
157 | return 0; | ||
158 | } | ||
150 | #endif /* CONFIG_SMP */ | 159 | #endif /* CONFIG_SMP */ |
151 | 160 | ||
152 | extern unsigned disabled_cpus __cpuinitdata; | 161 | extern unsigned disabled_cpus __cpuinitdata; |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 35e89122a42f..4dab78edbad9 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -3,8 +3,6 @@ | |||
3 | 3 | ||
4 | extern int kstack_depth_to_print; | 4 | extern int kstack_depth_to_print; |
5 | 5 | ||
6 | int x86_is_stack_id(int id, char *name); | ||
7 | |||
8 | struct thread_info; | 6 | struct thread_info; |
9 | struct stacktrace_ops; | 7 | struct stacktrace_ops; |
10 | 8 | ||
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 1fecb7e61130..38638cd2fa4c 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -313,7 +313,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
313 | 313 | ||
314 | #define SVM_EXIT_ERR -1 | 314 | #define SVM_EXIT_ERR -1 |
315 | 315 | ||
316 | #define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ | 316 | #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) |
317 | 317 | ||
318 | #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" | 318 | #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" |
319 | #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" | 319 | #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" |
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index d5f69045c100..3ad421784ae7 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h | |||
@@ -26,8 +26,8 @@ asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *); | |||
26 | asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); | 26 | asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); |
27 | asmlinkage long sys32_fstatat(unsigned int, char __user *, | 27 | asmlinkage long sys32_fstatat(unsigned int, char __user *, |
28 | struct stat64 __user *, int); | 28 | struct stat64 __user *, int); |
29 | struct mmap_arg_struct; | 29 | struct mmap_arg_struct32; |
30 | asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); | 30 | asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); |
31 | asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); | 31 | asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); |
32 | 32 | ||
33 | struct sigaction32; | 33 | struct sigaction32; |
@@ -40,8 +40,6 @@ asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, | |||
40 | compat_sigset_t __user *, unsigned int); | 40 | compat_sigset_t __user *, unsigned int); |
41 | asmlinkage long sys32_alarm(unsigned int); | 41 | asmlinkage long sys32_alarm(unsigned int); |
42 | 42 | ||
43 | struct sel_arg_struct; | ||
44 | asmlinkage long sys32_old_select(struct sel_arg_struct __user *); | ||
45 | asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); | 43 | asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); |
46 | asmlinkage long sys32_sysfs(int, u32, u32); | 44 | asmlinkage long sys32_sysfs(int, u32, u32); |
47 | 45 | ||
@@ -56,11 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); | |||
56 | asmlinkage long sys32_personality(unsigned long); | 54 | asmlinkage long sys32_personality(unsigned long); |
57 | asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); | 55 | asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); |
58 | 56 | ||
59 | struct oldold_utsname; | ||
60 | struct old_utsname; | ||
61 | asmlinkage long sys32_olduname(struct oldold_utsname __user *); | ||
62 | long sys32_uname(struct old_utsname __user *); | ||
63 | |||
64 | asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, | 57 | asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, |
65 | compat_uptr_t __user *, struct pt_regs *); | 58 | compat_uptr_t __user *, struct pt_regs *); |
66 | asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); | 59 | asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); |
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8d33bc5462d1..c4a348f7bd43 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
18 | 18 | ||
19 | extern const unsigned long sys_call_table[]; | ||
20 | |||
19 | /* | 21 | /* |
20 | * Only the low 32 bits of orig_ax are meaningful, so we return int. | 22 | * Only the low 32 bits of orig_ax are meaningful, so we return int. |
21 | * This importantly ignores the high bits on 64-bit, so comparisons | 23 | * This importantly ignores the high bits on 64-bit, so comparisons |
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 8868b9420b0e..5c044b43e9a7 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h | |||
@@ -50,18 +50,6 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, | |||
50 | struct old_sigaction __user *); | 50 | struct old_sigaction __user *); |
51 | unsigned long sys_sigreturn(struct pt_regs *); | 51 | unsigned long sys_sigreturn(struct pt_regs *); |
52 | 52 | ||
53 | /* kernel/sys_i386_32.c */ | ||
54 | struct mmap_arg_struct; | ||
55 | struct sel_arg_struct; | ||
56 | struct oldold_utsname; | ||
57 | struct old_utsname; | ||
58 | |||
59 | asmlinkage int old_mmap(struct mmap_arg_struct __user *); | ||
60 | asmlinkage int old_select(struct sel_arg_struct __user *); | ||
61 | asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); | ||
62 | asmlinkage int sys_uname(struct old_utsname __user *); | ||
63 | asmlinkage int sys_olduname(struct oldold_utsname __user *); | ||
64 | |||
65 | /* kernel/vm86_32.c */ | 53 | /* kernel/vm86_32.c */ |
66 | int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); | 54 | int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); |
67 | int sys_vm86(unsigned long, unsigned long, struct pt_regs *); | 55 | int sys_vm86(unsigned long, unsigned long, struct pt_regs *); |
@@ -73,11 +61,8 @@ int sys_vm86(unsigned long, unsigned long, struct pt_regs *); | |||
73 | long sys_arch_prctl(int, unsigned long); | 61 | long sys_arch_prctl(int, unsigned long); |
74 | 62 | ||
75 | /* kernel/sys_x86_64.c */ | 63 | /* kernel/sys_x86_64.c */ |
76 | struct new_utsname; | ||
77 | |||
78 | asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, | 64 | asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, |
79 | unsigned long, unsigned long, unsigned long); | 65 | unsigned long, unsigned long, unsigned long); |
80 | asmlinkage long sys_uname(struct new_utsname __user *); | ||
81 | 66 | ||
82 | #endif /* CONFIG_X86_32 */ | 67 | #endif /* CONFIG_X86_32 */ |
83 | #endif /* _ASM_X86_SYSCALLS_H */ | 68 | #endif /* _ASM_X86_SYSCALLS_H */ |
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index ecb544e65382..b8fe48ee2ed9 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -11,9 +11,9 @@ | |||
11 | #include <linux/irqflags.h> | 11 | #include <linux/irqflags.h> |
12 | 12 | ||
13 | /* entries in ARCH_DLINFO: */ | 13 | /* entries in ARCH_DLINFO: */ |
14 | #ifdef CONFIG_IA32_EMULATION | 14 | #if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) |
15 | # define AT_VECTOR_SIZE_ARCH 2 | 15 | # define AT_VECTOR_SIZE_ARCH 2 |
16 | #else | 16 | #else /* else it's non-compat x86-64 */ |
17 | # define AT_VECTOR_SIZE_ARCH 1 | 17 | # define AT_VECTOR_SIZE_ARCH 1 |
18 | #endif | 18 | #endif |
19 | 19 | ||
@@ -32,7 +32,7 @@ extern void show_regs_common(void); | |||
32 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ | 32 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ |
33 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" | 33 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" |
34 | #define __switch_canary_oparam \ | 34 | #define __switch_canary_oparam \ |
35 | , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) | 35 | , [stack_canary] "=m" (stack_canary.canary) |
36 | #define __switch_canary_iparam \ | 36 | #define __switch_canary_iparam \ |
37 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) | 37 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) |
38 | #else /* CC_STACKPROTECTOR */ | 38 | #else /* CC_STACKPROTECTOR */ |
@@ -114,7 +114,7 @@ do { \ | |||
114 | "movq %P[task_canary](%%rsi),%%r8\n\t" \ | 114 | "movq %P[task_canary](%%rsi),%%r8\n\t" \ |
115 | "movq %%r8,"__percpu_arg([gs_canary])"\n\t" | 115 | "movq %%r8,"__percpu_arg([gs_canary])"\n\t" |
116 | #define __switch_canary_oparam \ | 116 | #define __switch_canary_oparam \ |
117 | , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) | 117 | , [gs_canary] "=m" (irq_stack_union.stack_canary) |
118 | #define __switch_canary_iparam \ | 118 | #define __switch_canary_iparam \ |
119 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) | 119 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) |
120 | #else /* CC_STACKPROTECTOR */ | 120 | #else /* CC_STACKPROTECTOR */ |
@@ -133,7 +133,7 @@ do { \ | |||
133 | __switch_canary \ | 133 | __switch_canary \ |
134 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ | 134 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ |
135 | "movq %%rax,%%rdi\n\t" \ | 135 | "movq %%rax,%%rdi\n\t" \ |
136 | "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ | 136 | "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ |
137 | "jnz ret_from_fork\n\t" \ | 137 | "jnz ret_from_fork\n\t" \ |
138 | RESTORE_CONTEXT \ | 138 | RESTORE_CONTEXT \ |
139 | : "=a" (last) \ | 139 | : "=a" (last) \ |
@@ -143,7 +143,7 @@ do { \ | |||
143 | [ti_flags] "i" (offsetof(struct thread_info, flags)), \ | 143 | [ti_flags] "i" (offsetof(struct thread_info, flags)), \ |
144 | [_tif_fork] "i" (_TIF_FORK), \ | 144 | [_tif_fork] "i" (_TIF_FORK), \ |
145 | [thread_info] "i" (offsetof(struct task_struct, stack)), \ | 145 | [thread_info] "i" (offsetof(struct task_struct, stack)), \ |
146 | [current_task] "m" (per_cpu_var(current_task)) \ | 146 | [current_task] "m" (current_task) \ |
147 | __switch_canary_iparam \ | 147 | __switch_canary_iparam \ |
148 | : "memory", "cc" __EXTRA_CLOBBER) | 148 | : "memory", "cc" __EXTRA_CLOBBER) |
149 | #endif | 149 | #endif |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 375c917c37d2..e0d28901e969 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -87,7 +87,6 @@ struct thread_info { | |||
87 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 87 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
88 | #define TIF_IA32 17 /* 32bit process */ | 88 | #define TIF_IA32 17 /* 32bit process */ |
89 | #define TIF_FORK 18 /* ret_from_fork */ | 89 | #define TIF_FORK 18 /* ret_from_fork */ |
90 | #define TIF_ABI_PENDING 19 | ||
91 | #define TIF_MEMDIE 20 | 90 | #define TIF_MEMDIE 20 |
92 | #define TIF_DEBUG 21 /* uses debug registers */ | 91 | #define TIF_DEBUG 21 /* uses debug registers */ |
93 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ | 92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
@@ -112,7 +111,6 @@ struct thread_info { | |||
112 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 111 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
113 | #define _TIF_IA32 (1 << TIF_IA32) | 112 | #define _TIF_IA32 (1 << TIF_IA32) |
114 | #define _TIF_FORK (1 << TIF_FORK) | 113 | #define _TIF_FORK (1 << TIF_FORK) |
115 | #define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) | ||
116 | #define _TIF_DEBUG (1 << TIF_DEBUG) | 114 | #define _TIF_DEBUG (1 << TIF_DEBUG) |
117 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 115 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
118 | #define _TIF_FREEZE (1 << TIF_FREEZE) | 116 | #define _TIF_FREEZE (1 << TIF_FREEZE) |
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 0c9825e97f36..088d09fb1615 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -205,14 +205,13 @@ static inline unsigned long __must_check copy_from_user(void *to, | |||
205 | unsigned long n) | 205 | unsigned long n) |
206 | { | 206 | { |
207 | int sz = __compiletime_object_size(to); | 207 | int sz = __compiletime_object_size(to); |
208 | int ret = -EFAULT; | ||
209 | 208 | ||
210 | if (likely(sz == -1 || sz >= n)) | 209 | if (likely(sz == -1 || sz >= n)) |
211 | ret = _copy_from_user(to, from, n); | 210 | n = _copy_from_user(to, from, n); |
212 | else | 211 | else |
213 | copy_from_user_overflow(); | 212 | copy_from_user_overflow(); |
214 | 213 | ||
215 | return ret; | 214 | return n; |
216 | } | 215 | } |
217 | 216 | ||
218 | long __must_check strncpy_from_user(char *dst, const char __user *src, | 217 | long __must_check strncpy_from_user(char *dst, const char __user *src, |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index a78c40305447..316708d5af92 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -49,16 +49,15 @@ static inline unsigned long __must_check copy_from_user(void *to, | |||
49 | unsigned long n) | 49 | unsigned long n) |
50 | { | 50 | { |
51 | int sz = __compiletime_object_size(to); | 51 | int sz = __compiletime_object_size(to); |
52 | int ret = -EFAULT; | ||
53 | 52 | ||
54 | might_fault(); | 53 | might_fault(); |
55 | if (likely(sz == -1 || sz >= n)) | 54 | if (likely(sz == -1 || sz >= n)) |
56 | ret = _copy_from_user(to, from, n); | 55 | n = _copy_from_user(to, from, n); |
57 | #ifdef CONFIG_DEBUG_VM | 56 | #ifdef CONFIG_DEBUG_VM |
58 | else | 57 | else |
59 | WARN(1, "Buffer overflow detected!\n"); | 58 | WARN(1, "Buffer overflow detected!\n"); |
60 | #endif | 59 | #endif |
61 | return ret; | 60 | return n; |
62 | } | 61 | } |
63 | 62 | ||
64 | static __always_inline __must_check | 63 | static __always_inline __must_check |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 3baf379fa840..beb9b5f8f8a4 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -354,6 +354,7 @@ | |||
354 | #define __ARCH_WANT_STAT64 | 354 | #define __ARCH_WANT_STAT64 |
355 | #define __ARCH_WANT_SYS_ALARM | 355 | #define __ARCH_WANT_SYS_ALARM |
356 | #define __ARCH_WANT_SYS_GETHOSTNAME | 356 | #define __ARCH_WANT_SYS_GETHOSTNAME |
357 | #define __ARCH_WANT_SYS_IPC | ||
357 | #define __ARCH_WANT_SYS_PAUSE | 358 | #define __ARCH_WANT_SYS_PAUSE |
358 | #define __ARCH_WANT_SYS_SGETMASK | 359 | #define __ARCH_WANT_SYS_SGETMASK |
359 | #define __ARCH_WANT_SYS_SIGNAL | 360 | #define __ARCH_WANT_SYS_SIGNAL |
@@ -366,6 +367,9 @@ | |||
366 | #define __ARCH_WANT_SYS_LLSEEK | 367 | #define __ARCH_WANT_SYS_LLSEEK |
367 | #define __ARCH_WANT_SYS_NICE | 368 | #define __ARCH_WANT_SYS_NICE |
368 | #define __ARCH_WANT_SYS_OLD_GETRLIMIT | 369 | #define __ARCH_WANT_SYS_OLD_GETRLIMIT |
370 | #define __ARCH_WANT_SYS_OLD_UNAME | ||
371 | #define __ARCH_WANT_SYS_OLD_MMAP | ||
372 | #define __ARCH_WANT_SYS_OLD_SELECT | ||
369 | #define __ARCH_WANT_SYS_OLDUMOUNT | 373 | #define __ARCH_WANT_SYS_OLDUMOUNT |
370 | #define __ARCH_WANT_SYS_SIGPENDING | 374 | #define __ARCH_WANT_SYS_SIGPENDING |
371 | #define __ARCH_WANT_SYS_SIGPROCMASK | 375 | #define __ARCH_WANT_SYS_SIGPROCMASK |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 4843f7ba754a..ff4307b0e81e 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -146,7 +146,7 @@ __SYSCALL(__NR_wait4, sys_wait4) | |||
146 | #define __NR_kill 62 | 146 | #define __NR_kill 62 |
147 | __SYSCALL(__NR_kill, sys_kill) | 147 | __SYSCALL(__NR_kill, sys_kill) |
148 | #define __NR_uname 63 | 148 | #define __NR_uname 63 |
149 | __SYSCALL(__NR_uname, sys_uname) | 149 | __SYSCALL(__NR_uname, sys_newuname) |
150 | 150 | ||
151 | #define __NR_semget 64 | 151 | #define __NR_semget 64 |
152 | __SYSCALL(__NR_semget, sys_semget) | 152 | __SYSCALL(__NR_semget, sys_semget) |
@@ -680,6 +680,7 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg) | |||
680 | #define __ARCH_WANT_SYS_LLSEEK | 680 | #define __ARCH_WANT_SYS_LLSEEK |
681 | #define __ARCH_WANT_SYS_NICE | 681 | #define __ARCH_WANT_SYS_NICE |
682 | #define __ARCH_WANT_SYS_OLD_GETRLIMIT | 682 | #define __ARCH_WANT_SYS_OLD_GETRLIMIT |
683 | #define __ARCH_WANT_SYS_OLD_UNAME | ||
683 | #define __ARCH_WANT_SYS_OLDUMOUNT | 684 | #define __ARCH_WANT_SYS_OLDUMOUNT |
684 | #define __ARCH_WANT_SYS_SIGPENDING | 685 | #define __ARCH_WANT_SYS_SIGPENDING |
685 | #define __ARCH_WANT_SYS_SIGPROCMASK | 686 | #define __ARCH_WANT_SYS_SIGPROCMASK |
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h index 999873b22e7f..24532c7da3d6 100644 --- a/arch/x86/include/asm/user.h +++ b/arch/x86/include/asm/user.h | |||
@@ -1,5 +1,63 @@ | |||
1 | #ifndef _ASM_X86_USER_H | ||
2 | #define _ASM_X86_USER_H | ||
3 | |||
1 | #ifdef CONFIG_X86_32 | 4 | #ifdef CONFIG_X86_32 |
2 | # include "user_32.h" | 5 | # include "user_32.h" |
3 | #else | 6 | #else |
4 | # include "user_64.h" | 7 | # include "user_64.h" |
5 | #endif | 8 | #endif |
9 | |||
10 | #include <asm/types.h> | ||
11 | |||
12 | struct user_ymmh_regs { | ||
13 | /* 16 * 16 bytes for each YMMH-reg */ | ||
14 | __u32 ymmh_space[64]; | ||
15 | }; | ||
16 | |||
17 | struct user_xsave_hdr { | ||
18 | __u64 xstate_bv; | ||
19 | __u64 reserved1[2]; | ||
20 | __u64 reserved2[5]; | ||
21 | }; | ||
22 | |||
23 | /* | ||
24 | * The structure layout of user_xstateregs, used for exporting the | ||
25 | * extended register state through ptrace and core-dump (NT_X86_XSTATE note) | ||
26 | * interfaces will be same as the memory layout of xsave used by the processor | ||
27 | * (except for the bytes 464..511, which can be used by the software) and hence | ||
28 | * the size of this structure varies depending on the features supported by the | ||
29 | * processor and OS. The size of the structure that users need to use can be | ||
30 | * obtained by doing: | ||
31 | * cpuid_count(0xd, 0, &eax, &ptrace_xstateregs_struct_size, &ecx, &edx); | ||
32 | * i.e., cpuid.(eax=0xd,ecx=0).ebx will be the size that user (debuggers, etc.) | ||
33 | * need to use. | ||
34 | * | ||
35 | * For now, only the first 8 bytes of the software usable bytes[464..471] will | ||
36 | * be used and will be set to OS enabled xstate mask (which is same as the | ||
37 | * 64bit mask returned by the xgetbv's xCR0). Users (analyzing core dump | ||
38 | * remotely, etc.) can use this mask as well as the mask saved in the | ||
39 | * xstate_hdr bytes and interpret what states the processor/OS supports | ||
40 | * and what states are in modified/initialized conditions for the | ||
41 | * particular process/thread. | ||
42 | * | ||
43 | * Also when the user modifies certain state FP/SSE/etc through the | ||
44 | * ptrace interface, they must ensure that the xsave_hdr.xstate_bv | ||
45 | * bytes[512..519] of the memory layout are updated correspondingly. | ||
46 | * i.e., for example when FP state is modified to a non-init state, | ||
47 | * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to | ||
48 | * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc. | ||
49 | */ | ||
50 | #define USER_XSTATE_FX_SW_WORDS 6 | ||
51 | #define USER_XSTATE_XCR0_WORD 0 | ||
52 | |||
53 | struct user_xstateregs { | ||
54 | struct { | ||
55 | __u64 fpx_space[58]; | ||
56 | __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS]; | ||
57 | } i387; | ||
58 | struct user_xsave_hdr xsave_hdr; | ||
59 | struct user_ymmh_regs ymmh; | ||
60 | /* further processor state extensions go here */ | ||
61 | }; | ||
62 | |||
63 | #endif /* _ASM_X86_USER_H */ | ||
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 2751f3075d8b..71605c7d5c5c 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h | |||
@@ -18,8 +18,8 @@ | |||
18 | * along with this program; if not, write to the Free Software | 18 | * along with this program; if not, write to the Free Software |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
20 | * | 20 | * |
21 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | 21 | * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. |
22 | * Copyright (c) Russ Anderson | 22 | * Copyright (c) Russ Anderson <rja@sgi.com> |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/rtc.h> | 25 | #include <linux/rtc.h> |
@@ -36,7 +36,8 @@ enum uv_bios_cmd { | |||
36 | UV_BIOS_WATCHLIST_ALLOC, | 36 | UV_BIOS_WATCHLIST_ALLOC, |
37 | UV_BIOS_WATCHLIST_FREE, | 37 | UV_BIOS_WATCHLIST_FREE, |
38 | UV_BIOS_MEMPROTECT, | 38 | UV_BIOS_MEMPROTECT, |
39 | UV_BIOS_GET_PARTITION_ADDR | 39 | UV_BIOS_GET_PARTITION_ADDR, |
40 | UV_BIOS_SET_LEGACY_VGA_TARGET | ||
40 | }; | 41 | }; |
41 | 42 | ||
42 | /* | 43 | /* |
@@ -89,13 +90,14 @@ extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64); | |||
89 | extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); | 90 | extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); |
90 | extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); | 91 | extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); |
91 | 92 | ||
92 | extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); | 93 | extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *); |
93 | extern s64 uv_bios_freq_base(u64, u64 *); | 94 | extern s64 uv_bios_freq_base(u64, u64 *); |
94 | extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, | 95 | extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, |
95 | unsigned long *); | 96 | unsigned long *); |
96 | extern int uv_bios_mq_watchlist_free(int, int); | 97 | extern int uv_bios_mq_watchlist_free(int, int); |
97 | extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); | 98 | extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); |
98 | extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); | 99 | extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); |
100 | extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus); | ||
99 | 101 | ||
100 | extern void uv_bios_init(void); | 102 | extern void uv_bios_init(void); |
101 | 103 | ||
@@ -104,6 +106,7 @@ extern int uv_type; | |||
104 | extern long sn_partition_id; | 106 | extern long sn_partition_id; |
105 | extern long sn_coherency_id; | 107 | extern long sn_coherency_id; |
106 | extern long sn_region_size; | 108 | extern long sn_region_size; |
109 | extern long system_serial_number; | ||
107 | #define partition_coherence_id() (sn_coherency_id) | 110 | #define partition_coherence_id() (sn_coherency_id) |
108 | 111 | ||
109 | extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ | 112 | extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ |
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index c0a01b5d985b..3bb9491b7659 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h | |||
@@ -11,6 +11,7 @@ struct mm_struct; | |||
11 | extern enum uv_system_type get_uv_system_type(void); | 11 | extern enum uv_system_type get_uv_system_type(void); |
12 | extern int is_uv_system(void); | 12 | extern int is_uv_system(void); |
13 | extern void uv_cpu_init(void); | 13 | extern void uv_cpu_init(void); |
14 | extern void uv_nmi_init(void); | ||
14 | extern void uv_system_init(void); | 15 | extern void uv_system_init(void); |
15 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | 16 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
16 | struct mm_struct *mm, | 17 | struct mm_struct *mm, |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 811bfabc80b7..14cc74ba5d23 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -31,20 +31,20 @@ | |||
31 | * contiguous (although various IO spaces may punch holes in | 31 | * contiguous (although various IO spaces may punch holes in |
32 | * it).. | 32 | * it).. |
33 | * | 33 | * |
34 | * N - Number of bits in the node portion of a socket physical | 34 | * N - Number of bits in the node portion of a socket physical |
35 | * address. | 35 | * address. |
36 | * | 36 | * |
37 | * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of | 37 | * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of |
38 | * routers always have low bit of 1, C/MBricks have low bit | 38 | * routers always have low bit of 1, C/MBricks have low bit |
39 | * equal to 0. Most addressing macros that target UV hub chips | 39 | * equal to 0. Most addressing macros that target UV hub chips |
40 | * right shift the NASID by 1 to exclude the always-zero bit. | 40 | * right shift the NASID by 1 to exclude the always-zero bit. |
41 | * NASIDs contain up to 15 bits. | 41 | * NASIDs contain up to 15 bits. |
42 | * | 42 | * |
43 | * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead | 43 | * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead |
44 | * of nasids. | 44 | * of nasids. |
45 | * | 45 | * |
46 | * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant | 46 | * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant |
47 | * of the nasid for socket usage. | 47 | * of the nasid for socket usage. |
48 | * | 48 | * |
49 | * | 49 | * |
50 | * NumaLink Global Physical Address Format: | 50 | * NumaLink Global Physical Address Format: |
@@ -71,12 +71,12 @@ | |||
71 | * | 71 | * |
72 | * | 72 | * |
73 | * APICID format | 73 | * APICID format |
74 | * NOTE!!!!!! This is the current format of the APICID. However, code | 74 | * NOTE!!!!!! This is the current format of the APICID. However, code |
75 | * should assume that this will change in the future. Use functions | 75 | * should assume that this will change in the future. Use functions |
76 | * in this file for all APICID bit manipulations and conversion. | 76 | * in this file for all APICID bit manipulations and conversion. |
77 | * | 77 | * |
78 | * 1111110000000000 | 78 | * 1111110000000000 |
79 | * 5432109876543210 | 79 | * 5432109876543210 |
80 | * pppppppppplc0cch | 80 | * pppppppppplc0cch |
81 | * sssssssssss | 81 | * sssssssssss |
82 | * | 82 | * |
@@ -89,9 +89,9 @@ | |||
89 | * Note: Processor only supports 12 bits in the APICID register. The ACPI | 89 | * Note: Processor only supports 12 bits in the APICID register. The ACPI |
90 | * tables hold all 16 bits. Software needs to be aware of this. | 90 | * tables hold all 16 bits. Software needs to be aware of this. |
91 | * | 91 | * |
92 | * Unless otherwise specified, all references to APICID refer to | 92 | * Unless otherwise specified, all references to APICID refer to |
93 | * the FULL value contained in ACPI tables, not the subset in the | 93 | * the FULL value contained in ACPI tables, not the subset in the |
94 | * processor APICID register. | 94 | * processor APICID register. |
95 | */ | 95 | */ |
96 | 96 | ||
97 | 97 | ||
@@ -151,16 +151,16 @@ struct uv_hub_info_s { | |||
151 | }; | 151 | }; |
152 | 152 | ||
153 | DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | 153 | DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); |
154 | #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) | 154 | #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) |
155 | #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) | 155 | #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) |
156 | 156 | ||
157 | /* | 157 | /* |
158 | * Local & Global MMR space macros. | 158 | * Local & Global MMR space macros. |
159 | * Note: macros are intended to be used ONLY by inline functions | 159 | * Note: macros are intended to be used ONLY by inline functions |
160 | * in this file - not by other kernel code. | 160 | * in this file - not by other kernel code. |
161 | * n - NASID (full 15-bit global nasid) | 161 | * n - NASID (full 15-bit global nasid) |
162 | * g - GNODE (full 15-bit global nasid, right shifted 1) | 162 | * g - GNODE (full 15-bit global nasid, right shifted 1) |
163 | * p - PNODE (local part of nsids, right shifted 1) | 163 | * p - PNODE (local part of nsids, right shifted 1) |
164 | */ | 164 | */ |
165 | #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) | 165 | #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) |
166 | #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) | 166 | #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) |
@@ -215,8 +215,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
215 | /* | 215 | /* |
216 | * Macros for converting between kernel virtual addresses, socket local physical | 216 | * Macros for converting between kernel virtual addresses, socket local physical |
217 | * addresses, and UV global physical addresses. | 217 | * addresses, and UV global physical addresses. |
218 | * Note: use the standard __pa() & __va() macros for converting | 218 | * Note: use the standard __pa() & __va() macros for converting |
219 | * between socket virtual and socket physical addresses. | 219 | * between socket virtual and socket physical addresses. |
220 | */ | 220 | */ |
221 | 221 | ||
222 | /* socket phys RAM --> UV global physical address */ | 222 | /* socket phys RAM --> UV global physical address */ |
@@ -287,21 +287,18 @@ static inline int uv_apicid_to_pnode(int apicid) | |||
287 | * Access global MMRs using the low memory MMR32 space. This region supports | 287 | * Access global MMRs using the low memory MMR32 space. This region supports |
288 | * faster MMR access but not all MMRs are accessible in this space. | 288 | * faster MMR access but not all MMRs are accessible in this space. |
289 | */ | 289 | */ |
290 | static inline unsigned long *uv_global_mmr32_address(int pnode, | 290 | static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset) |
291 | unsigned long offset) | ||
292 | { | 291 | { |
293 | return __va(UV_GLOBAL_MMR32_BASE | | 292 | return __va(UV_GLOBAL_MMR32_BASE | |
294 | UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); | 293 | UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); |
295 | } | 294 | } |
296 | 295 | ||
297 | static inline void uv_write_global_mmr32(int pnode, unsigned long offset, | 296 | static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val) |
298 | unsigned long val) | ||
299 | { | 297 | { |
300 | writeq(val, uv_global_mmr32_address(pnode, offset)); | 298 | writeq(val, uv_global_mmr32_address(pnode, offset)); |
301 | } | 299 | } |
302 | 300 | ||
303 | static inline unsigned long uv_read_global_mmr32(int pnode, | 301 | static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset) |
304 | unsigned long offset) | ||
305 | { | 302 | { |
306 | return readq(uv_global_mmr32_address(pnode, offset)); | 303 | return readq(uv_global_mmr32_address(pnode, offset)); |
307 | } | 304 | } |
@@ -310,21 +307,18 @@ static inline unsigned long uv_read_global_mmr32(int pnode, | |||
310 | * Access Global MMR space using the MMR space located at the top of physical | 307 | * Access Global MMR space using the MMR space located at the top of physical |
311 | * memory. | 308 | * memory. |
312 | */ | 309 | */ |
313 | static inline unsigned long *uv_global_mmr64_address(int pnode, | 310 | static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) |
314 | unsigned long offset) | ||
315 | { | 311 | { |
316 | return __va(UV_GLOBAL_MMR64_BASE | | 312 | return __va(UV_GLOBAL_MMR64_BASE | |
317 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); | 313 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); |
318 | } | 314 | } |
319 | 315 | ||
320 | static inline void uv_write_global_mmr64(int pnode, unsigned long offset, | 316 | static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val) |
321 | unsigned long val) | ||
322 | { | 317 | { |
323 | writeq(val, uv_global_mmr64_address(pnode, offset)); | 318 | writeq(val, uv_global_mmr64_address(pnode, offset)); |
324 | } | 319 | } |
325 | 320 | ||
326 | static inline unsigned long uv_read_global_mmr64(int pnode, | 321 | static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset) |
327 | unsigned long offset) | ||
328 | { | 322 | { |
329 | return readq(uv_global_mmr64_address(pnode, offset)); | 323 | return readq(uv_global_mmr64_address(pnode, offset)); |
330 | } | 324 | } |
@@ -335,7 +329,18 @@ static inline unsigned long uv_read_global_mmr64(int pnode, | |||
335 | */ | 329 | */ |
336 | static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset) | 330 | static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset) |
337 | { | 331 | { |
338 | return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); | 332 | return UV_GLOBAL_GRU_MMR_BASE | offset | |
333 | ((unsigned long)pnode << uv_hub_info->m_val); | ||
334 | } | ||
335 | |||
336 | static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val) | ||
337 | { | ||
338 | writeb(val, uv_global_mmr64_address(pnode, offset)); | ||
339 | } | ||
340 | |||
341 | static inline unsigned char uv_read_global_mmr8(int pnode, unsigned long offset) | ||
342 | { | ||
343 | return readb(uv_global_mmr64_address(pnode, offset)); | ||
339 | } | 344 | } |
340 | 345 | ||
341 | /* | 346 | /* |
@@ -457,11 +462,17 @@ static inline void uv_set_scir_bits(unsigned char value) | |||
457 | } | 462 | } |
458 | } | 463 | } |
459 | 464 | ||
465 | static inline unsigned long uv_scir_offset(int apicid) | ||
466 | { | ||
467 | return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f); | ||
468 | } | ||
469 | |||
460 | static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) | 470 | static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) |
461 | { | 471 | { |
462 | if (uv_cpu_hub_info(cpu)->scir.state != value) { | 472 | if (uv_cpu_hub_info(cpu)->scir.state != value) { |
473 | uv_write_global_mmr8(uv_cpu_to_pnode(cpu), | ||
474 | uv_cpu_hub_info(cpu)->scir.offset, value); | ||
463 | uv_cpu_hub_info(cpu)->scir.state = value; | 475 | uv_cpu_hub_info(cpu)->scir.state = value; |
464 | uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); | ||
465 | } | 476 | } |
466 | } | 477 | } |
467 | 478 | ||
@@ -485,5 +496,17 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) | |||
485 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 496 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
486 | } | 497 | } |
487 | 498 | ||
499 | /* | ||
500 | * Get the minimum revision number of the hub chips within the partition. | ||
501 | * 1 - initial rev 1.0 silicon | ||
502 | * 2 - rev 2.0 production silicon | ||
503 | */ | ||
504 | static inline int uv_get_min_hub_revision_id(void) | ||
505 | { | ||
506 | extern int uv_min_hub_revision_id; | ||
507 | |||
508 | return uv_min_hub_revision_id; | ||
509 | } | ||
510 | |||
488 | #endif /* CONFIG_X86_64 */ | 511 | #endif /* CONFIG_X86_64 */ |
489 | #endif /* _ASM_X86_UV_UV_HUB_H */ | 512 | #endif /* _ASM_X86_UV_UV_HUB_H */ |
diff --git a/arch/x86/include/asm/visws/cobalt.h b/arch/x86/include/asm/visws/cobalt.h index 166adf61e770..2edb37637ead 100644 --- a/arch/x86/include/asm/visws/cobalt.h +++ b/arch/x86/include/asm/visws/cobalt.h | |||
@@ -122,4 +122,6 @@ extern char visws_board_type; | |||
122 | 122 | ||
123 | extern char visws_board_rev; | 123 | extern char visws_board_rev; |
124 | 124 | ||
125 | extern int pci_visws_init(void); | ||
126 | |||
125 | #endif /* _ASM_X86_VISWS_COBALT_H */ | 127 | #endif /* _ASM_X86_VISWS_COBALT_H */ |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 2b4945419a84..fb9a080740ec 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -53,6 +53,7 @@ | |||
53 | */ | 53 | */ |
54 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | 54 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 |
55 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 | 55 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 |
56 | #define SECONDARY_EXEC_RDTSCP 0x00000008 | ||
56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 57 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 58 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 59 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
@@ -251,6 +252,7 @@ enum vmcs_field { | |||
251 | #define EXIT_REASON_MSR_READ 31 | 252 | #define EXIT_REASON_MSR_READ 31 |
252 | #define EXIT_REASON_MSR_WRITE 32 | 253 | #define EXIT_REASON_MSR_WRITE 32 |
253 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 254 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
255 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 | ||
254 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | 256 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 |
255 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | 257 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 |
256 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 258 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
@@ -362,6 +364,7 @@ enum vmcs_field { | |||
362 | #define VMX_EPTP_UC_BIT (1ull << 8) | 364 | #define VMX_EPTP_UC_BIT (1ull << 8) |
363 | #define VMX_EPTP_WB_BIT (1ull << 14) | 365 | #define VMX_EPTP_WB_BIT (1ull << 14) |
364 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) | 366 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) |
367 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) | ||
365 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) | 368 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) |
366 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 369 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
367 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 370 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
@@ -374,7 +377,7 @@ enum vmcs_field { | |||
374 | #define VMX_EPT_READABLE_MASK 0x1ull | 377 | #define VMX_EPT_READABLE_MASK 0x1ull |
375 | #define VMX_EPT_WRITABLE_MASK 0x2ull | 378 | #define VMX_EPT_WRITABLE_MASK 0x2ull |
376 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | 379 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull |
377 | #define VMX_EPT_IGMT_BIT (1ull << 6) | 380 | #define VMX_EPT_IPAT_BIT (1ull << 6) |
378 | 381 | ||
379 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | 382 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul |
380 | 383 | ||
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index ea0e8ea15e15..519b54327d75 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -99,6 +99,20 @@ struct x86_init_iommu { | |||
99 | }; | 99 | }; |
100 | 100 | ||
101 | /** | 101 | /** |
102 | * struct x86_init_pci - platform specific pci init functions | ||
103 | * @arch_init: platform specific pci arch init call | ||
104 | * @init: platform specific pci subsystem init | ||
105 | * @init_irq: platform specific pci irq init | ||
106 | * @fixup_irqs: platform specific pci irq fixup | ||
107 | */ | ||
108 | struct x86_init_pci { | ||
109 | int (*arch_init)(void); | ||
110 | int (*init)(void); | ||
111 | void (*init_irq)(void); | ||
112 | void (*fixup_irqs)(void); | ||
113 | }; | ||
114 | |||
115 | /** | ||
102 | * struct x86_init_ops - functions for platform specific setup | 116 | * struct x86_init_ops - functions for platform specific setup |
103 | * | 117 | * |
104 | */ | 118 | */ |
@@ -110,6 +124,7 @@ struct x86_init_ops { | |||
110 | struct x86_init_paging paging; | 124 | struct x86_init_paging paging; |
111 | struct x86_init_timers timers; | 125 | struct x86_init_timers timers; |
112 | struct x86_init_iommu iommu; | 126 | struct x86_init_iommu iommu; |
127 | struct x86_init_pci pci; | ||
113 | }; | 128 | }; |
114 | 129 | ||
115 | /** | 130 | /** |
@@ -126,6 +141,7 @@ struct x86_cpuinit_ops { | |||
126 | * @get_wallclock: get time from HW clock like RTC etc. | 141 | * @get_wallclock: get time from HW clock like RTC etc. |
127 | * @set_wallclock: set time back to HW clock | 142 | * @set_wallclock: set time back to HW clock |
128 | * @is_untracked_pat_range exclude from PAT logic | 143 | * @is_untracked_pat_range exclude from PAT logic |
144 | * @nmi_init enable NMI on cpus | ||
129 | */ | 145 | */ |
130 | struct x86_platform_ops { | 146 | struct x86_platform_ops { |
131 | unsigned long (*calibrate_tsc)(void); | 147 | unsigned long (*calibrate_tsc)(void); |
@@ -133,6 +149,7 @@ struct x86_platform_ops { | |||
133 | int (*set_wallclock)(unsigned long nowtime); | 149 | int (*set_wallclock)(unsigned long nowtime); |
134 | void (*iommu_shutdown)(void); | 150 | void (*iommu_shutdown)(void); |
135 | bool (*is_untracked_pat_range)(u64 start, u64 end); | 151 | bool (*is_untracked_pat_range)(u64 start, u64 end); |
152 | void (*nmi_init)(void); | ||
136 | }; | 153 | }; |
137 | 154 | ||
138 | extern struct x86_init_ops x86_init; | 155 | extern struct x86_init_ops x86_init; |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 727acc152344..ddc04ccad03b 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
@@ -27,9 +27,11 @@ | |||
27 | extern unsigned int xstate_size; | 27 | extern unsigned int xstate_size; |
28 | extern u64 pcntxt_mask; | 28 | extern u64 pcntxt_mask; |
29 | extern struct xsave_struct *init_xstate_buf; | 29 | extern struct xsave_struct *init_xstate_buf; |
30 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | ||
30 | 31 | ||
31 | extern void xsave_cntxt_init(void); | 32 | extern void xsave_cntxt_init(void); |
32 | extern void xsave_init(void); | 33 | extern void xsave_init(void); |
34 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); | ||
33 | extern int init_fpu(struct task_struct *child); | 35 | extern int init_fpu(struct task_struct *child); |
34 | extern int check_for_xstate(struct i387_fxsave_struct __user *buf, | 36 | extern int check_for_xstate(struct i387_fxsave_struct __user *buf, |
35 | void __user *fpstate, | 37 | void __user *fpstate, |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d87f09bc5a52..4c58352209e0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -87,6 +87,7 @@ obj-$(CONFIG_VM86) += vm86_32.o | |||
87 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 87 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
88 | 88 | ||
89 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 89 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
90 | obj-$(CONFIG_APB_TIMER) += apb_timer.o | ||
90 | 91 | ||
91 | obj-$(CONFIG_K8_NB) += k8.o | 92 | obj-$(CONFIG_K8_NB) += k8.o |
92 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | 93 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fb1035cd9a6a..cd40aba6aa95 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -31,10 +31,12 @@ | |||
31 | #include <linux/module.h> | 31 | #include <linux/module.h> |
32 | #include <linux/dmi.h> | 32 | #include <linux/dmi.h> |
33 | #include <linux/irq.h> | 33 | #include <linux/irq.h> |
34 | #include <linux/slab.h> | ||
34 | #include <linux/bootmem.h> | 35 | #include <linux/bootmem.h> |
35 | #include <linux/ioport.h> | 36 | #include <linux/ioport.h> |
36 | #include <linux/pci.h> | 37 | #include <linux/pci.h> |
37 | 38 | ||
39 | #include <asm/pci_x86.h> | ||
38 | #include <asm/pgtable.h> | 40 | #include <asm/pgtable.h> |
39 | #include <asm/io_apic.h> | 41 | #include <asm/io_apic.h> |
40 | #include <asm/apic.h> | 42 | #include <asm/apic.h> |
@@ -49,6 +51,7 @@ EXPORT_SYMBOL(acpi_disabled); | |||
49 | 51 | ||
50 | #ifdef CONFIG_X86_64 | 52 | #ifdef CONFIG_X86_64 |
51 | # include <asm/proto.h> | 53 | # include <asm/proto.h> |
54 | # include <asm/numa_64.h> | ||
52 | #endif /* X86 */ | 55 | #endif /* X86 */ |
53 | 56 | ||
54 | #define BAD_MADT_ENTRY(entry, end) ( \ | 57 | #define BAD_MADT_ENTRY(entry, end) ( \ |
@@ -446,6 +449,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) | |||
446 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | 449 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) |
447 | { | 450 | { |
448 | *irq = gsi; | 451 | *irq = gsi; |
452 | |||
453 | #ifdef CONFIG_X86_IO_APIC | ||
454 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) | ||
455 | setup_IO_APIC_irq_extra(gsi); | ||
456 | #endif | ||
457 | |||
449 | return 0; | 458 | return 0; |
450 | } | 459 | } |
451 | 460 | ||
@@ -473,7 +482,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
473 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); | 482 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); |
474 | } | 483 | } |
475 | #endif | 484 | #endif |
476 | acpi_gsi_to_irq(plat_gsi, &irq); | 485 | irq = plat_gsi; |
486 | |||
477 | return irq; | 487 | return irq; |
478 | } | 488 | } |
479 | 489 | ||
@@ -481,6 +491,26 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
481 | * ACPI based hotplug support for CPU | 491 | * ACPI based hotplug support for CPU |
482 | */ | 492 | */ |
483 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | 493 | #ifdef CONFIG_ACPI_HOTPLUG_CPU |
494 | #include <acpi/processor.h> | ||
495 | |||
496 | static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) | ||
497 | { | ||
498 | #ifdef CONFIG_ACPI_NUMA | ||
499 | int nid; | ||
500 | |||
501 | nid = acpi_get_node(handle); | ||
502 | if (nid == -1 || !node_online(nid)) | ||
503 | return; | ||
504 | #ifdef CONFIG_X86_64 | ||
505 | apicid_to_node[physid] = nid; | ||
506 | numa_set_node(cpu, nid); | ||
507 | #else /* CONFIG_X86_32 */ | ||
508 | apicid_2_node[physid] = nid; | ||
509 | cpu_to_node_map[cpu] = nid; | ||
510 | #endif | ||
511 | |||
512 | #endif | ||
513 | } | ||
484 | 514 | ||
485 | static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | 515 | static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) |
486 | { | 516 | { |
@@ -539,7 +569,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | |||
539 | goto free_new_map; | 569 | goto free_new_map; |
540 | } | 570 | } |
541 | 571 | ||
572 | acpi_processor_set_pdc(handle); | ||
573 | |||
542 | cpu = cpumask_first(new_map); | 574 | cpu = cpumask_first(new_map); |
575 | acpi_map_cpu2node(handle, cpu, physid); | ||
543 | 576 | ||
544 | *pcpu = cpu; | 577 | *pcpu = cpu; |
545 | retval = 0; | 578 | retval = 0; |
@@ -1185,9 +1218,6 @@ static void __init acpi_process_madt(void) | |||
1185 | if (!error) { | 1218 | if (!error) { |
1186 | acpi_lapic = 1; | 1219 | acpi_lapic = 1; |
1187 | 1220 | ||
1188 | #ifdef CONFIG_X86_BIGSMP | ||
1189 | generic_bigsmp_probe(); | ||
1190 | #endif | ||
1191 | /* | 1221 | /* |
1192 | * Parse MADT IO-APIC entries | 1222 | * Parse MADT IO-APIC entries |
1193 | */ | 1223 | */ |
@@ -1197,8 +1227,6 @@ static void __init acpi_process_madt(void) | |||
1197 | acpi_ioapic = 1; | 1227 | acpi_ioapic = 1; |
1198 | 1228 | ||
1199 | smp_found_config = 1; | 1229 | smp_found_config = 1; |
1200 | if (apic->setup_apic_routing) | ||
1201 | apic->setup_apic_routing(); | ||
1202 | } | 1230 | } |
1203 | } | 1231 | } |
1204 | if (error == -EINVAL) { | 1232 | if (error == -EINVAL) { |
@@ -1269,23 +1297,6 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d) | |||
1269 | } | 1297 | } |
1270 | 1298 | ||
1271 | /* | 1299 | /* |
1272 | * Limit ACPI to CPU enumeration for HT | ||
1273 | */ | ||
1274 | static int __init force_acpi_ht(const struct dmi_system_id *d) | ||
1275 | { | ||
1276 | if (!acpi_force) { | ||
1277 | printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", | ||
1278 | d->ident); | ||
1279 | disable_acpi(); | ||
1280 | acpi_ht = 1; | ||
1281 | } else { | ||
1282 | printk(KERN_NOTICE | ||
1283 | "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); | ||
1284 | } | ||
1285 | return 0; | ||
1286 | } | ||
1287 | |||
1288 | /* | ||
1289 | * Force ignoring BIOS IRQ0 pin2 override | 1300 | * Force ignoring BIOS IRQ0 pin2 override |
1290 | */ | 1301 | */ |
1291 | static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) | 1302 | static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) |
@@ -1321,90 +1332,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1321 | }, | 1332 | }, |
1322 | 1333 | ||
1323 | /* | 1334 | /* |
1324 | * Boxes that need acpi=ht | ||
1325 | */ | ||
1326 | { | ||
1327 | .callback = force_acpi_ht, | ||
1328 | .ident = "FSC Primergy T850", | ||
1329 | .matches = { | ||
1330 | DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), | ||
1331 | DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), | ||
1332 | }, | ||
1333 | }, | ||
1334 | { | ||
1335 | .callback = force_acpi_ht, | ||
1336 | .ident = "HP VISUALIZE NT Workstation", | ||
1337 | .matches = { | ||
1338 | DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), | ||
1339 | DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), | ||
1340 | }, | ||
1341 | }, | ||
1342 | { | ||
1343 | .callback = force_acpi_ht, | ||
1344 | .ident = "Compaq Workstation W8000", | ||
1345 | .matches = { | ||
1346 | DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), | ||
1347 | DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), | ||
1348 | }, | ||
1349 | }, | ||
1350 | { | ||
1351 | .callback = force_acpi_ht, | ||
1352 | .ident = "ASUS P2B-DS", | ||
1353 | .matches = { | ||
1354 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
1355 | DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), | ||
1356 | }, | ||
1357 | }, | ||
1358 | { | ||
1359 | .callback = force_acpi_ht, | ||
1360 | .ident = "ASUS CUR-DLS", | ||
1361 | .matches = { | ||
1362 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
1363 | DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), | ||
1364 | }, | ||
1365 | }, | ||
1366 | { | ||
1367 | .callback = force_acpi_ht, | ||
1368 | .ident = "ABIT i440BX-W83977", | ||
1369 | .matches = { | ||
1370 | DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"), | ||
1371 | DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), | ||
1372 | }, | ||
1373 | }, | ||
1374 | { | ||
1375 | .callback = force_acpi_ht, | ||
1376 | .ident = "IBM Bladecenter", | ||
1377 | .matches = { | ||
1378 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
1379 | DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), | ||
1380 | }, | ||
1381 | }, | ||
1382 | { | ||
1383 | .callback = force_acpi_ht, | ||
1384 | .ident = "IBM eServer xSeries 360", | ||
1385 | .matches = { | ||
1386 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
1387 | DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), | ||
1388 | }, | ||
1389 | }, | ||
1390 | { | ||
1391 | .callback = force_acpi_ht, | ||
1392 | .ident = "IBM eserver xSeries 330", | ||
1393 | .matches = { | ||
1394 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
1395 | DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), | ||
1396 | }, | ||
1397 | }, | ||
1398 | { | ||
1399 | .callback = force_acpi_ht, | ||
1400 | .ident = "IBM eserver xSeries 440", | ||
1401 | .matches = { | ||
1402 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
1403 | DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), | ||
1404 | }, | ||
1405 | }, | ||
1406 | |||
1407 | /* | ||
1408 | * Boxes that need ACPI PCI IRQ routing disabled | 1335 | * Boxes that need ACPI PCI IRQ routing disabled |
1409 | */ | 1336 | */ |
1410 | { | 1337 | { |
@@ -1529,16 +1456,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { | |||
1529 | * if acpi_blacklisted() acpi_disabled = 1; | 1456 | * if acpi_blacklisted() acpi_disabled = 1; |
1530 | * acpi_irq_model=... | 1457 | * acpi_irq_model=... |
1531 | * ... | 1458 | * ... |
1532 | * | ||
1533 | * return value: (currently ignored) | ||
1534 | * 0: success | ||
1535 | * !0: failure | ||
1536 | */ | 1459 | */ |
1537 | 1460 | ||
1538 | int __init acpi_boot_table_init(void) | 1461 | void __init acpi_boot_table_init(void) |
1539 | { | 1462 | { |
1540 | int error; | ||
1541 | |||
1542 | dmi_check_system(acpi_dmi_table); | 1463 | dmi_check_system(acpi_dmi_table); |
1543 | 1464 | ||
1544 | /* | 1465 | /* |
@@ -1546,15 +1467,14 @@ int __init acpi_boot_table_init(void) | |||
1546 | * One exception: acpi=ht continues far enough to enumerate LAPICs | 1467 | * One exception: acpi=ht continues far enough to enumerate LAPICs |
1547 | */ | 1468 | */ |
1548 | if (acpi_disabled && !acpi_ht) | 1469 | if (acpi_disabled && !acpi_ht) |
1549 | return 1; | 1470 | return; |
1550 | 1471 | ||
1551 | /* | 1472 | /* |
1552 | * Initialize the ACPI boot-time table parser. | 1473 | * Initialize the ACPI boot-time table parser. |
1553 | */ | 1474 | */ |
1554 | error = acpi_table_init(); | 1475 | if (acpi_table_init()) { |
1555 | if (error) { | ||
1556 | disable_acpi(); | 1476 | disable_acpi(); |
1557 | return error; | 1477 | return; |
1558 | } | 1478 | } |
1559 | 1479 | ||
1560 | acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); | 1480 | acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); |
@@ -1562,18 +1482,15 @@ int __init acpi_boot_table_init(void) | |||
1562 | /* | 1482 | /* |
1563 | * blacklist may disable ACPI entirely | 1483 | * blacklist may disable ACPI entirely |
1564 | */ | 1484 | */ |
1565 | error = acpi_blacklisted(); | 1485 | if (acpi_blacklisted()) { |
1566 | if (error) { | ||
1567 | if (acpi_force) { | 1486 | if (acpi_force) { |
1568 | printk(KERN_WARNING PREFIX "acpi=force override\n"); | 1487 | printk(KERN_WARNING PREFIX "acpi=force override\n"); |
1569 | } else { | 1488 | } else { |
1570 | printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); | 1489 | printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); |
1571 | disable_acpi(); | 1490 | disable_acpi(); |
1572 | return error; | 1491 | return; |
1573 | } | 1492 | } |
1574 | } | 1493 | } |
1575 | |||
1576 | return 0; | ||
1577 | } | 1494 | } |
1578 | 1495 | ||
1579 | int __init early_acpi_boot_init(void) | 1496 | int __init early_acpi_boot_init(void) |
@@ -1619,6 +1536,9 @@ int __init acpi_boot_init(void) | |||
1619 | 1536 | ||
1620 | acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); | 1537 | acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); |
1621 | 1538 | ||
1539 | if (!acpi_noirq) | ||
1540 | x86_init.pci.init = pci_acpi_init; | ||
1541 | |||
1622 | return 0; | 1542 | return 0; |
1623 | } | 1543 | } |
1624 | 1544 | ||
@@ -1643,8 +1563,10 @@ static int __init parse_acpi(char *arg) | |||
1643 | } | 1563 | } |
1644 | /* Limit ACPI just to boot-time to enable HT */ | 1564 | /* Limit ACPI just to boot-time to enable HT */ |
1645 | else if (strcmp(arg, "ht") == 0) { | 1565 | else if (strcmp(arg, "ht") == 0) { |
1646 | if (!acpi_force) | 1566 | if (!acpi_force) { |
1567 | printk(KERN_WARNING "acpi=ht will be removed in Linux-2.6.35\n"); | ||
1647 | disable_acpi(); | 1568 | disable_acpi(); |
1569 | } | ||
1648 | acpi_ht = 1; | 1570 | acpi_ht = 1; |
1649 | } | 1571 | } |
1650 | /* acpi=rsdt use RSDT instead of XSDT */ | 1572 | /* acpi=rsdt use RSDT instead of XSDT */ |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b91..f9961034e557 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str) | |||
162 | #endif | 162 | #endif |
163 | if (strncmp(str, "old_ordering", 12) == 0) | 163 | if (strncmp(str, "old_ordering", 12) == 0) |
164 | acpi_old_suspend_ordering(); | 164 | acpi_old_suspend_ordering(); |
165 | if (strncmp(str, "sci_force_enable", 16) == 0) | ||
166 | acpi_set_sci_en_on_resume(); | ||
165 | str = strchr(str, ','); | 167 | str = strchr(str, ','); |
166 | if (str != NULL) | 168 | if (str != NULL) |
167 | str += strspn(str, ", \t"); | 169 | str += strspn(str, ", \t"); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 80b222ea4cf6..70237732a6c7 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
8 | #include <linux/vmalloc.h> | 8 | #include <linux/vmalloc.h> |
9 | #include <linux/memory.h> | 9 | #include <linux/memory.h> |
10 | #include <linux/stop_machine.h> | ||
11 | #include <linux/slab.h> | ||
10 | #include <asm/alternative.h> | 12 | #include <asm/alternative.h> |
11 | #include <asm/sections.h> | 13 | #include <asm/sections.h> |
12 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
@@ -192,7 +194,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
192 | } | 194 | } |
193 | 195 | ||
194 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
195 | extern u8 *__smp_locks[], *__smp_locks_end[]; | 197 | extern s32 __smp_locks[], __smp_locks_end[]; |
196 | static void *text_poke_early(void *addr, const void *opcode, size_t len); | 198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); |
197 | 199 | ||
198 | /* Replace instructions with better alternatives for this CPU type. | 200 | /* Replace instructions with better alternatives for this CPU type. |
@@ -233,39 +235,41 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
233 | 235 | ||
234 | #ifdef CONFIG_SMP | 236 | #ifdef CONFIG_SMP |
235 | 237 | ||
236 | static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 238 | static void alternatives_smp_lock(const s32 *start, const s32 *end, |
239 | u8 *text, u8 *text_end) | ||
237 | { | 240 | { |
238 | u8 **ptr; | 241 | const s32 *poff; |
239 | 242 | ||
240 | mutex_lock(&text_mutex); | 243 | mutex_lock(&text_mutex); |
241 | for (ptr = start; ptr < end; ptr++) { | 244 | for (poff = start; poff < end; poff++) { |
242 | if (*ptr < text) | 245 | u8 *ptr = (u8 *)poff + *poff; |
243 | continue; | 246 | |
244 | if (*ptr > text_end) | 247 | if (!*poff || ptr < text || ptr >= text_end) |
245 | continue; | 248 | continue; |
246 | /* turn DS segment override prefix into lock prefix */ | 249 | /* turn DS segment override prefix into lock prefix */ |
247 | if (**ptr == 0x3e) | 250 | if (*ptr == 0x3e) |
248 | text_poke(*ptr, ((unsigned char []){0xf0}), 1); | 251 | text_poke(ptr, ((unsigned char []){0xf0}), 1); |
249 | }; | 252 | }; |
250 | mutex_unlock(&text_mutex); | 253 | mutex_unlock(&text_mutex); |
251 | } | 254 | } |
252 | 255 | ||
253 | static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 256 | static void alternatives_smp_unlock(const s32 *start, const s32 *end, |
257 | u8 *text, u8 *text_end) | ||
254 | { | 258 | { |
255 | u8 **ptr; | 259 | const s32 *poff; |
256 | 260 | ||
257 | if (noreplace_smp) | 261 | if (noreplace_smp) |
258 | return; | 262 | return; |
259 | 263 | ||
260 | mutex_lock(&text_mutex); | 264 | mutex_lock(&text_mutex); |
261 | for (ptr = start; ptr < end; ptr++) { | 265 | for (poff = start; poff < end; poff++) { |
262 | if (*ptr < text) | 266 | u8 *ptr = (u8 *)poff + *poff; |
263 | continue; | 267 | |
264 | if (*ptr > text_end) | 268 | if (!*poff || ptr < text || ptr >= text_end) |
265 | continue; | 269 | continue; |
266 | /* turn lock prefix into DS segment override prefix */ | 270 | /* turn lock prefix into DS segment override prefix */ |
267 | if (**ptr == 0xf0) | 271 | if (*ptr == 0xf0) |
268 | text_poke(*ptr, ((unsigned char []){0x3E}), 1); | 272 | text_poke(ptr, ((unsigned char []){0x3E}), 1); |
269 | }; | 273 | }; |
270 | mutex_unlock(&text_mutex); | 274 | mutex_unlock(&text_mutex); |
271 | } | 275 | } |
@@ -276,8 +280,8 @@ struct smp_alt_module { | |||
276 | char *name; | 280 | char *name; |
277 | 281 | ||
278 | /* ptrs to lock prefixes */ | 282 | /* ptrs to lock prefixes */ |
279 | u8 **locks; | 283 | const s32 *locks; |
280 | u8 **locks_end; | 284 | const s32 *locks_end; |
281 | 285 | ||
282 | /* .text segment, needed to avoid patching init code ;) */ | 286 | /* .text segment, needed to avoid patching init code ;) */ |
283 | u8 *text; | 287 | u8 *text; |
@@ -394,6 +398,27 @@ void alternatives_smp_switch(int smp) | |||
394 | mutex_unlock(&smp_alt); | 398 | mutex_unlock(&smp_alt); |
395 | } | 399 | } |
396 | 400 | ||
401 | /* Return 1 if the address range is reserved for smp-alternatives */ | ||
402 | int alternatives_text_reserved(void *start, void *end) | ||
403 | { | ||
404 | struct smp_alt_module *mod; | ||
405 | const s32 *poff; | ||
406 | u8 *text_start = start; | ||
407 | u8 *text_end = end; | ||
408 | |||
409 | list_for_each_entry(mod, &smp_alt_modules, next) { | ||
410 | if (mod->text > text_end || mod->text_end < text_start) | ||
411 | continue; | ||
412 | for (poff = mod->locks; poff < mod->locks_end; poff++) { | ||
413 | const u8 *ptr = (const u8 *)poff + *poff; | ||
414 | |||
415 | if (text_start <= ptr && text_end > ptr) | ||
416 | return 1; | ||
417 | } | ||
418 | } | ||
419 | |||
420 | return 0; | ||
421 | } | ||
397 | #endif | 422 | #endif |
398 | 423 | ||
399 | #ifdef CONFIG_PARAVIRT | 424 | #ifdef CONFIG_PARAVIRT |
@@ -556,3 +581,62 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) | |||
556 | local_irq_restore(flags); | 581 | local_irq_restore(flags); |
557 | return addr; | 582 | return addr; |
558 | } | 583 | } |
584 | |||
585 | /* | ||
586 | * Cross-modifying kernel text with stop_machine(). | ||
587 | * This code originally comes from immediate value. | ||
588 | */ | ||
589 | static atomic_t stop_machine_first; | ||
590 | static int wrote_text; | ||
591 | |||
592 | struct text_poke_params { | ||
593 | void *addr; | ||
594 | const void *opcode; | ||
595 | size_t len; | ||
596 | }; | ||
597 | |||
598 | static int __kprobes stop_machine_text_poke(void *data) | ||
599 | { | ||
600 | struct text_poke_params *tpp = data; | ||
601 | |||
602 | if (atomic_dec_and_test(&stop_machine_first)) { | ||
603 | text_poke(tpp->addr, tpp->opcode, tpp->len); | ||
604 | smp_wmb(); /* Make sure other cpus see that this has run */ | ||
605 | wrote_text = 1; | ||
606 | } else { | ||
607 | while (!wrote_text) | ||
608 | cpu_relax(); | ||
609 | smp_mb(); /* Load wrote_text before following execution */ | ||
610 | } | ||
611 | |||
612 | flush_icache_range((unsigned long)tpp->addr, | ||
613 | (unsigned long)tpp->addr + tpp->len); | ||
614 | return 0; | ||
615 | } | ||
616 | |||
617 | /** | ||
618 | * text_poke_smp - Update instructions on a live kernel on SMP | ||
619 | * @addr: address to modify | ||
620 | * @opcode: source of the copy | ||
621 | * @len: length to copy | ||
622 | * | ||
623 | * Modify multi-byte instruction by using stop_machine() on SMP. This allows | ||
624 | * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying | ||
625 | * should be allowed, since stop_machine() does _not_ protect code against | ||
626 | * NMI and MCE. | ||
627 | * | ||
628 | * Note: Must be called under get_online_cpus() and text_mutex. | ||
629 | */ | ||
630 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | ||
631 | { | ||
632 | struct text_poke_params tpp; | ||
633 | |||
634 | tpp.addr = addr; | ||
635 | tpp.opcode = opcode; | ||
636 | tpp.len = len; | ||
637 | atomic_set(&stop_machine_first, 1); | ||
638 | wrote_text = 0; | ||
639 | stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | ||
640 | return addr; | ||
641 | } | ||
642 | |||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 23824fef789c..f854d89b7edf 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -18,8 +18,8 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/pci.h> | 20 | #include <linux/pci.h> |
21 | #include <linux/gfp.h> | ||
22 | #include <linux/bitmap.h> | 21 | #include <linux/bitmap.h> |
22 | #include <linux/slab.h> | ||
23 | #include <linux/debugfs.h> | 23 | #include <linux/debugfs.h> |
24 | #include <linux/scatterlist.h> | 24 | #include <linux/scatterlist.h> |
25 | #include <linux/dma-mapping.h> | 25 | #include <linux/dma-mapping.h> |
@@ -118,7 +118,7 @@ static bool check_device(struct device *dev) | |||
118 | return false; | 118 | return false; |
119 | 119 | ||
120 | /* No device or no PCI device */ | 120 | /* No device or no PCI device */ |
121 | if (!dev || dev->bus != &pci_bus_type) | 121 | if (dev->bus != &pci_bus_type) |
122 | return false; | 122 | return false; |
123 | 123 | ||
124 | devid = get_device_id(dev); | 124 | devid = get_device_id(dev); |
@@ -392,6 +392,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
392 | u32 tail, head; | 392 | u32 tail, head; |
393 | u8 *target; | 393 | u8 *target; |
394 | 394 | ||
395 | WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); | ||
395 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 396 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
396 | target = iommu->cmd_buf + tail; | 397 | target = iommu->cmd_buf + tail; |
397 | memcpy_toio(target, cmd, sizeof(*cmd)); | 398 | memcpy_toio(target, cmd, sizeof(*cmd)); |
@@ -980,7 +981,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, | |||
980 | { | 981 | { |
981 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; | 982 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; |
982 | struct amd_iommu *iommu; | 983 | struct amd_iommu *iommu; |
983 | int i; | 984 | unsigned long i; |
984 | 985 | ||
985 | #ifdef CONFIG_IOMMU_STRESS | 986 | #ifdef CONFIG_IOMMU_STRESS |
986 | populate = false; | 987 | populate = false; |
@@ -1489,11 +1490,14 @@ static void __detach_device(struct device *dev) | |||
1489 | { | 1490 | { |
1490 | struct iommu_dev_data *dev_data = get_dev_data(dev); | 1491 | struct iommu_dev_data *dev_data = get_dev_data(dev); |
1491 | struct iommu_dev_data *alias_data; | 1492 | struct iommu_dev_data *alias_data; |
1493 | struct protection_domain *domain; | ||
1492 | unsigned long flags; | 1494 | unsigned long flags; |
1493 | 1495 | ||
1494 | BUG_ON(!dev_data->domain); | 1496 | BUG_ON(!dev_data->domain); |
1495 | 1497 | ||
1496 | spin_lock_irqsave(&dev_data->domain->lock, flags); | 1498 | domain = dev_data->domain; |
1499 | |||
1500 | spin_lock_irqsave(&domain->lock, flags); | ||
1497 | 1501 | ||
1498 | if (dev_data->alias != dev) { | 1502 | if (dev_data->alias != dev) { |
1499 | alias_data = get_dev_data(dev_data->alias); | 1503 | alias_data = get_dev_data(dev_data->alias); |
@@ -1504,13 +1508,15 @@ static void __detach_device(struct device *dev) | |||
1504 | if (atomic_dec_and_test(&dev_data->bind)) | 1508 | if (atomic_dec_and_test(&dev_data->bind)) |
1505 | do_detach(dev); | 1509 | do_detach(dev); |
1506 | 1510 | ||
1507 | spin_unlock_irqrestore(&dev_data->domain->lock, flags); | 1511 | spin_unlock_irqrestore(&domain->lock, flags); |
1508 | 1512 | ||
1509 | /* | 1513 | /* |
1510 | * If we run in passthrough mode the device must be assigned to the | 1514 | * If we run in passthrough mode the device must be assigned to the |
1511 | * passthrough domain if it is detached from any other domain | 1515 | * passthrough domain if it is detached from any other domain. |
1516 | * Make sure we can deassign from the pt_domain itself. | ||
1512 | */ | 1517 | */ |
1513 | if (iommu_pass_through && dev_data->domain == NULL) | 1518 | if (iommu_pass_through && |
1519 | (dev_data->domain == NULL && domain != pt_domain)) | ||
1514 | __attach_device(dev, pt_domain); | 1520 | __attach_device(dev, pt_domain); |
1515 | } | 1521 | } |
1516 | 1522 | ||
@@ -2181,7 +2187,7 @@ static void prealloc_protection_domains(void) | |||
2181 | struct dma_ops_domain *dma_dom; | 2187 | struct dma_ops_domain *dma_dom; |
2182 | u16 devid; | 2188 | u16 devid; |
2183 | 2189 | ||
2184 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 2190 | for_each_pci_dev(dev) { |
2185 | 2191 | ||
2186 | /* Do we handle this device? */ | 2192 | /* Do we handle this device? */ |
2187 | if (!check_device(&dev->dev)) | 2193 | if (!check_device(&dev->dev)) |
@@ -2218,6 +2224,12 @@ static struct dma_map_ops amd_iommu_dma_ops = { | |||
2218 | /* | 2224 | /* |
2219 | * The function which clues the AMD IOMMU driver into dma_ops. | 2225 | * The function which clues the AMD IOMMU driver into dma_ops. |
2220 | */ | 2226 | */ |
2227 | |||
2228 | void __init amd_iommu_init_api(void) | ||
2229 | { | ||
2230 | register_iommu(&amd_iommu_ops); | ||
2231 | } | ||
2232 | |||
2221 | int __init amd_iommu_init_dma_ops(void) | 2233 | int __init amd_iommu_init_dma_ops(void) |
2222 | { | 2234 | { |
2223 | struct amd_iommu *iommu; | 2235 | struct amd_iommu *iommu; |
@@ -2253,8 +2265,6 @@ int __init amd_iommu_init_dma_ops(void) | |||
2253 | /* Make the driver finally visible to the drivers */ | 2265 | /* Make the driver finally visible to the drivers */ |
2254 | dma_ops = &amd_iommu_dma_ops; | 2266 | dma_ops = &amd_iommu_dma_ops; |
2255 | 2267 | ||
2256 | register_iommu(&amd_iommu_ops); | ||
2257 | |||
2258 | amd_iommu_stats_init(); | 2268 | amd_iommu_stats_init(); |
2259 | 2269 | ||
2260 | return 0; | 2270 | return 0; |
@@ -2289,7 +2299,7 @@ static void cleanup_domain(struct protection_domain *domain) | |||
2289 | list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { | 2299 | list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { |
2290 | struct device *dev = dev_data->dev; | 2300 | struct device *dev = dev_data->dev; |
2291 | 2301 | ||
2292 | do_detach(dev); | 2302 | __detach_device(dev); |
2293 | atomic_set(&dev_data->bind, 0); | 2303 | atomic_set(&dev_data->bind, 0); |
2294 | } | 2304 | } |
2295 | 2305 | ||
@@ -2318,6 +2328,7 @@ static struct protection_domain *protection_domain_alloc(void) | |||
2318 | return NULL; | 2328 | return NULL; |
2319 | 2329 | ||
2320 | spin_lock_init(&domain->lock); | 2330 | spin_lock_init(&domain->lock); |
2331 | mutex_init(&domain->api_lock); | ||
2321 | domain->id = domain_id_alloc(); | 2332 | domain->id = domain_id_alloc(); |
2322 | if (!domain->id) | 2333 | if (!domain->id) |
2323 | goto out_err; | 2334 | goto out_err; |
@@ -2370,9 +2381,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) | |||
2370 | 2381 | ||
2371 | free_pagetable(domain); | 2382 | free_pagetable(domain); |
2372 | 2383 | ||
2373 | domain_id_free(domain->id); | 2384 | protection_domain_free(domain); |
2374 | |||
2375 | kfree(domain); | ||
2376 | 2385 | ||
2377 | dom->priv = NULL; | 2386 | dom->priv = NULL; |
2378 | } | 2387 | } |
@@ -2447,6 +2456,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
2447 | iova &= PAGE_MASK; | 2456 | iova &= PAGE_MASK; |
2448 | paddr &= PAGE_MASK; | 2457 | paddr &= PAGE_MASK; |
2449 | 2458 | ||
2459 | mutex_lock(&domain->api_lock); | ||
2460 | |||
2450 | for (i = 0; i < npages; ++i) { | 2461 | for (i = 0; i < npages; ++i) { |
2451 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); | 2462 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); |
2452 | if (ret) | 2463 | if (ret) |
@@ -2456,6 +2467,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
2456 | paddr += PAGE_SIZE; | 2467 | paddr += PAGE_SIZE; |
2457 | } | 2468 | } |
2458 | 2469 | ||
2470 | mutex_unlock(&domain->api_lock); | ||
2471 | |||
2459 | return 0; | 2472 | return 0; |
2460 | } | 2473 | } |
2461 | 2474 | ||
@@ -2468,12 +2481,16 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, | |||
2468 | 2481 | ||
2469 | iova &= PAGE_MASK; | 2482 | iova &= PAGE_MASK; |
2470 | 2483 | ||
2484 | mutex_lock(&domain->api_lock); | ||
2485 | |||
2471 | for (i = 0; i < npages; ++i) { | 2486 | for (i = 0; i < npages; ++i) { |
2472 | iommu_unmap_page(domain, iova, PM_MAP_4k); | 2487 | iommu_unmap_page(domain, iova, PM_MAP_4k); |
2473 | iova += PAGE_SIZE; | 2488 | iova += PAGE_SIZE; |
2474 | } | 2489 | } |
2475 | 2490 | ||
2476 | iommu_flush_tlb_pde(domain); | 2491 | iommu_flush_tlb_pde(domain); |
2492 | |||
2493 | mutex_unlock(&domain->api_lock); | ||
2477 | } | 2494 | } |
2478 | 2495 | ||
2479 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | 2496 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 1dca9c34eaeb..6360abf993d4 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -19,8 +19,8 @@ | |||
19 | 19 | ||
20 | #include <linux/pci.h> | 20 | #include <linux/pci.h> |
21 | #include <linux/acpi.h> | 21 | #include <linux/acpi.h> |
22 | #include <linux/gfp.h> | ||
23 | #include <linux/list.h> | 22 | #include <linux/list.h> |
23 | #include <linux/slab.h> | ||
24 | #include <linux/sysdev.h> | 24 | #include <linux/sysdev.h> |
25 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
26 | #include <linux/msi.h> | 26 | #include <linux/msi.h> |
@@ -138,6 +138,11 @@ int amd_iommus_present; | |||
138 | bool amd_iommu_np_cache __read_mostly; | 138 | bool amd_iommu_np_cache __read_mostly; |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * The ACPI table parsing functions set this variable on an error | ||
142 | */ | ||
143 | static int __initdata amd_iommu_init_err; | ||
144 | |||
145 | /* | ||
141 | * List of protection domains - used during resume | 146 | * List of protection domains - used during resume |
142 | */ | 147 | */ |
143 | LIST_HEAD(amd_iommu_pd_list); | 148 | LIST_HEAD(amd_iommu_pd_list); |
@@ -386,9 +391,11 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) | |||
386 | */ | 391 | */ |
387 | for (i = 0; i < table->length; ++i) | 392 | for (i = 0; i < table->length; ++i) |
388 | checksum += p[i]; | 393 | checksum += p[i]; |
389 | if (checksum != 0) | 394 | if (checksum != 0) { |
390 | /* ACPI table corrupt */ | 395 | /* ACPI table corrupt */ |
391 | return -ENODEV; | 396 | amd_iommu_init_err = -ENODEV; |
397 | return 0; | ||
398 | } | ||
392 | 399 | ||
393 | p += IVRS_HEADER_LENGTH; | 400 | p += IVRS_HEADER_LENGTH; |
394 | 401 | ||
@@ -431,7 +438,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
431 | if (cmd_buf == NULL) | 438 | if (cmd_buf == NULL) |
432 | return NULL; | 439 | return NULL; |
433 | 440 | ||
434 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; | 441 | iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; |
435 | 442 | ||
436 | return cmd_buf; | 443 | return cmd_buf; |
437 | } | 444 | } |
@@ -467,12 +474,13 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) | |||
467 | &entry, sizeof(entry)); | 474 | &entry, sizeof(entry)); |
468 | 475 | ||
469 | amd_iommu_reset_cmd_buffer(iommu); | 476 | amd_iommu_reset_cmd_buffer(iommu); |
477 | iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); | ||
470 | } | 478 | } |
471 | 479 | ||
472 | static void __init free_command_buffer(struct amd_iommu *iommu) | 480 | static void __init free_command_buffer(struct amd_iommu *iommu) |
473 | { | 481 | { |
474 | free_pages((unsigned long)iommu->cmd_buf, | 482 | free_pages((unsigned long)iommu->cmd_buf, |
475 | get_order(iommu->cmd_buf_size)); | 483 | get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); |
476 | } | 484 | } |
477 | 485 | ||
478 | /* allocates the memory where the IOMMU will log its events to */ | 486 | /* allocates the memory where the IOMMU will log its events to */ |
@@ -915,11 +923,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
915 | h->mmio_phys); | 923 | h->mmio_phys); |
916 | 924 | ||
917 | iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); | 925 | iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); |
918 | if (iommu == NULL) | 926 | if (iommu == NULL) { |
919 | return -ENOMEM; | 927 | amd_iommu_init_err = -ENOMEM; |
928 | return 0; | ||
929 | } | ||
930 | |||
920 | ret = init_iommu_one(iommu, h); | 931 | ret = init_iommu_one(iommu, h); |
921 | if (ret) | 932 | if (ret) { |
922 | return ret; | 933 | amd_iommu_init_err = ret; |
934 | return 0; | ||
935 | } | ||
923 | break; | 936 | break; |
924 | default: | 937 | default: |
925 | break; | 938 | break; |
@@ -1204,6 +1217,10 @@ static int __init amd_iommu_init(void) | |||
1204 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) | 1217 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) |
1205 | return -ENODEV; | 1218 | return -ENODEV; |
1206 | 1219 | ||
1220 | ret = amd_iommu_init_err; | ||
1221 | if (ret) | ||
1222 | goto out; | ||
1223 | |||
1207 | dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); | 1224 | dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); |
1208 | alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); | 1225 | alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); |
1209 | rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); | 1226 | rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); |
@@ -1263,9 +1280,19 @@ static int __init amd_iommu_init(void) | |||
1263 | if (acpi_table_parse("IVRS", init_iommu_all) != 0) | 1280 | if (acpi_table_parse("IVRS", init_iommu_all) != 0) |
1264 | goto free; | 1281 | goto free; |
1265 | 1282 | ||
1283 | if (amd_iommu_init_err) { | ||
1284 | ret = amd_iommu_init_err; | ||
1285 | goto free; | ||
1286 | } | ||
1287 | |||
1266 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) | 1288 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) |
1267 | goto free; | 1289 | goto free; |
1268 | 1290 | ||
1291 | if (amd_iommu_init_err) { | ||
1292 | ret = amd_iommu_init_err; | ||
1293 | goto free; | ||
1294 | } | ||
1295 | |||
1269 | ret = sysdev_class_register(&amd_iommu_sysdev_class); | 1296 | ret = sysdev_class_register(&amd_iommu_sysdev_class); |
1270 | if (ret) | 1297 | if (ret) |
1271 | goto free; | 1298 | goto free; |
@@ -1278,16 +1305,19 @@ static int __init amd_iommu_init(void) | |||
1278 | if (ret) | 1305 | if (ret) |
1279 | goto free; | 1306 | goto free; |
1280 | 1307 | ||
1308 | enable_iommus(); | ||
1309 | |||
1281 | if (iommu_pass_through) | 1310 | if (iommu_pass_through) |
1282 | ret = amd_iommu_init_passthrough(); | 1311 | ret = amd_iommu_init_passthrough(); |
1283 | else | 1312 | else |
1284 | ret = amd_iommu_init_dma_ops(); | 1313 | ret = amd_iommu_init_dma_ops(); |
1314 | |||
1285 | if (ret) | 1315 | if (ret) |
1286 | goto free; | 1316 | goto free; |
1287 | 1317 | ||
1288 | amd_iommu_init_notifier(); | 1318 | amd_iommu_init_api(); |
1289 | 1319 | ||
1290 | enable_iommus(); | 1320 | amd_iommu_init_notifier(); |
1291 | 1321 | ||
1292 | if (iommu_pass_through) | 1322 | if (iommu_pass_through) |
1293 | goto out; | 1323 | goto out; |
@@ -1302,6 +1332,7 @@ out: | |||
1302 | return ret; | 1332 | return ret; |
1303 | 1333 | ||
1304 | free: | 1334 | free: |
1335 | disable_iommus(); | ||
1305 | 1336 | ||
1306 | amd_iommu_uninit_devices(); | 1337 | amd_iommu_uninit_devices(); |
1307 | 1338 | ||
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c new file mode 100644 index 000000000000..ff469e470059 --- /dev/null +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -0,0 +1,785 @@ | |||
1 | /* | ||
2 | * apb_timer.c: Driver for Langwell APB timers | ||
3 | * | ||
4 | * (C) Copyright 2009 Intel Corporation | ||
5 | * Author: Jacob Pan (jacob.jun.pan@intel.com) | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | * | ||
12 | * Note: | ||
13 | * Langwell is the south complex of Intel Moorestown MID platform. There are | ||
14 | * eight external timers in total that can be used by the operating system. | ||
15 | * The timer information, such as frequency and addresses, is provided to the | ||
16 | * OS via SFI tables. | ||
17 | * Timer interrupts are routed via FW/HW emulated IOAPIC independently via | ||
18 | * individual redirection table entries (RTE). | ||
19 | * Unlike HPET, there is no master counter, therefore one of the timers are | ||
20 | * used as clocksource. The overall allocation looks like: | ||
21 | * - timer 0 - NR_CPUs for per cpu timer | ||
22 | * - one timer for clocksource | ||
23 | * - one timer for watchdog driver. | ||
24 | * It is also worth notice that APB timer does not support true one-shot mode, | ||
25 | * free-running mode will be used here to emulate one-shot mode. | ||
26 | * APB timer can also be used as broadcast timer along with per cpu local APIC | ||
27 | * timer, but by default APB timer has higher rating than local APIC timers. | ||
28 | */ | ||
29 | |||
30 | #include <linux/clocksource.h> | ||
31 | #include <linux/clockchips.h> | ||
32 | #include <linux/delay.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/sysdev.h> | ||
36 | #include <linux/slab.h> | ||
37 | #include <linux/pm.h> | ||
38 | #include <linux/pci.h> | ||
39 | #include <linux/sfi.h> | ||
40 | #include <linux/interrupt.h> | ||
41 | #include <linux/cpu.h> | ||
42 | #include <linux/irq.h> | ||
43 | |||
44 | #include <asm/fixmap.h> | ||
45 | #include <asm/apb_timer.h> | ||
46 | |||
47 | #define APBT_MASK CLOCKSOURCE_MASK(32) | ||
48 | #define APBT_SHIFT 22 | ||
49 | #define APBT_CLOCKEVENT_RATING 150 | ||
50 | #define APBT_CLOCKSOURCE_RATING 250 | ||
51 | #define APBT_MIN_DELTA_USEC 200 | ||
52 | |||
53 | #define EVT_TO_APBT_DEV(evt) container_of(evt, struct apbt_dev, evt) | ||
54 | #define APBT_CLOCKEVENT0_NUM (0) | ||
55 | #define APBT_CLOCKEVENT1_NUM (1) | ||
56 | #define APBT_CLOCKSOURCE_NUM (2) | ||
57 | |||
58 | static unsigned long apbt_address; | ||
59 | static int apb_timer_block_enabled; | ||
60 | static void __iomem *apbt_virt_address; | ||
61 | static int phy_cs_timer_id; | ||
62 | |||
63 | /* | ||
64 | * Common DW APB timer info | ||
65 | */ | ||
66 | static uint64_t apbt_freq; | ||
67 | |||
68 | static void apbt_set_mode(enum clock_event_mode mode, | ||
69 | struct clock_event_device *evt); | ||
70 | static int apbt_next_event(unsigned long delta, | ||
71 | struct clock_event_device *evt); | ||
72 | static cycle_t apbt_read_clocksource(struct clocksource *cs); | ||
73 | static void apbt_restart_clocksource(struct clocksource *cs); | ||
74 | |||
75 | struct apbt_dev { | ||
76 | struct clock_event_device evt; | ||
77 | unsigned int num; | ||
78 | int cpu; | ||
79 | unsigned int irq; | ||
80 | unsigned int tick; | ||
81 | unsigned int count; | ||
82 | unsigned int flags; | ||
83 | char name[10]; | ||
84 | }; | ||
85 | |||
86 | int disable_apbt_percpu __cpuinitdata; | ||
87 | |||
88 | static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); | ||
89 | |||
90 | #ifdef CONFIG_SMP | ||
91 | static unsigned int apbt_num_timers_used; | ||
92 | static struct apbt_dev *apbt_devs; | ||
93 | #endif | ||
94 | |||
95 | static inline unsigned long apbt_readl_reg(unsigned long a) | ||
96 | { | ||
97 | return readl(apbt_virt_address + a); | ||
98 | } | ||
99 | |||
100 | static inline void apbt_writel_reg(unsigned long d, unsigned long a) | ||
101 | { | ||
102 | writel(d, apbt_virt_address + a); | ||
103 | } | ||
104 | |||
105 | static inline unsigned long apbt_readl(int n, unsigned long a) | ||
106 | { | ||
107 | return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE); | ||
108 | } | ||
109 | |||
110 | static inline void apbt_writel(int n, unsigned long d, unsigned long a) | ||
111 | { | ||
112 | writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE); | ||
113 | } | ||
114 | |||
115 | static inline void apbt_set_mapping(void) | ||
116 | { | ||
117 | struct sfi_timer_table_entry *mtmr; | ||
118 | |||
119 | if (apbt_virt_address) { | ||
120 | pr_debug("APBT base already mapped\n"); | ||
121 | return; | ||
122 | } | ||
123 | mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); | ||
124 | if (mtmr == NULL) { | ||
125 | printk(KERN_ERR "Failed to get MTMR %d from SFI\n", | ||
126 | APBT_CLOCKEVENT0_NUM); | ||
127 | return; | ||
128 | } | ||
129 | apbt_address = (unsigned long)mtmr->phys_addr; | ||
130 | if (!apbt_address) { | ||
131 | printk(KERN_WARNING "No timer base from SFI, use default\n"); | ||
132 | apbt_address = APBT_DEFAULT_BASE; | ||
133 | } | ||
134 | apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); | ||
135 | if (apbt_virt_address) { | ||
136 | pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\ | ||
137 | (void *)apbt_address, (void *)apbt_virt_address); | ||
138 | } else { | ||
139 | pr_debug("Failed mapping APBT phy address at %p\n",\ | ||
140 | (void *)apbt_address); | ||
141 | goto panic_noapbt; | ||
142 | } | ||
143 | apbt_freq = mtmr->freq_hz / USEC_PER_SEC; | ||
144 | sfi_free_mtmr(mtmr); | ||
145 | |||
146 | /* Now figure out the physical timer id for clocksource device */ | ||
147 | mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM); | ||
148 | if (mtmr == NULL) | ||
149 | goto panic_noapbt; | ||
150 | |||
151 | /* Now figure out the physical timer id */ | ||
152 | phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) | ||
153 | / APBTMRS_REG_SIZE; | ||
154 | pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id); | ||
155 | return; | ||
156 | |||
157 | panic_noapbt: | ||
158 | panic("Failed to setup APB system timer\n"); | ||
159 | |||
160 | } | ||
161 | |||
162 | static inline void apbt_clear_mapping(void) | ||
163 | { | ||
164 | iounmap(apbt_virt_address); | ||
165 | apbt_virt_address = NULL; | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * APBT timer interrupt enable / disable | ||
170 | */ | ||
171 | static inline int is_apbt_capable(void) | ||
172 | { | ||
173 | return apbt_virt_address ? 1 : 0; | ||
174 | } | ||
175 | |||
176 | static struct clocksource clocksource_apbt = { | ||
177 | .name = "apbt", | ||
178 | .rating = APBT_CLOCKSOURCE_RATING, | ||
179 | .read = apbt_read_clocksource, | ||
180 | .mask = APBT_MASK, | ||
181 | .shift = APBT_SHIFT, | ||
182 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
183 | .resume = apbt_restart_clocksource, | ||
184 | }; | ||
185 | |||
186 | /* boot APB clock event device */ | ||
187 | static struct clock_event_device apbt_clockevent = { | ||
188 | .name = "apbt0", | ||
189 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
190 | .set_mode = apbt_set_mode, | ||
191 | .set_next_event = apbt_next_event, | ||
192 | .shift = APBT_SHIFT, | ||
193 | .irq = 0, | ||
194 | .rating = APBT_CLOCKEVENT_RATING, | ||
195 | }; | ||
196 | |||
197 | /* | ||
198 | * if user does not want to use per CPU apb timer, just give it a lower rating | ||
199 | * than local apic timer and skip the late per cpu timer init. | ||
200 | */ | ||
201 | static inline int __init setup_x86_mrst_timer(char *arg) | ||
202 | { | ||
203 | if (!arg) | ||
204 | return -EINVAL; | ||
205 | |||
206 | if (strcmp("apbt_only", arg) == 0) | ||
207 | disable_apbt_percpu = 0; | ||
208 | else if (strcmp("lapic_and_apbt", arg) == 0) | ||
209 | disable_apbt_percpu = 1; | ||
210 | else { | ||
211 | pr_warning("X86 MRST timer option %s not recognised" | ||
212 | " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", | ||
213 | arg); | ||
214 | return -EINVAL; | ||
215 | } | ||
216 | return 0; | ||
217 | } | ||
218 | __setup("x86_mrst_timer=", setup_x86_mrst_timer); | ||
219 | |||
220 | /* | ||
221 | * start count down from 0xffff_ffff. this is done by toggling the enable bit | ||
222 | * then load initial load count to ~0. | ||
223 | */ | ||
224 | static void apbt_start_counter(int n) | ||
225 | { | ||
226 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
227 | |||
228 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
229 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
230 | apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT); | ||
231 | /* enable, mask interrupt */ | ||
232 | ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; | ||
233 | ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT); | ||
234 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
235 | /* read it once to get cached counter value initialized */ | ||
236 | apbt_read_clocksource(&clocksource_apbt); | ||
237 | } | ||
238 | |||
239 | static irqreturn_t apbt_interrupt_handler(int irq, void *data) | ||
240 | { | ||
241 | struct apbt_dev *dev = (struct apbt_dev *)data; | ||
242 | struct clock_event_device *aevt = &dev->evt; | ||
243 | |||
244 | if (!aevt->event_handler) { | ||
245 | printk(KERN_INFO "Spurious APBT timer interrupt on %d\n", | ||
246 | dev->num); | ||
247 | return IRQ_NONE; | ||
248 | } | ||
249 | aevt->event_handler(aevt); | ||
250 | return IRQ_HANDLED; | ||
251 | } | ||
252 | |||
253 | static void apbt_restart_clocksource(struct clocksource *cs) | ||
254 | { | ||
255 | apbt_start_counter(phy_cs_timer_id); | ||
256 | } | ||
257 | |||
258 | /* Setup IRQ routing via IOAPIC */ | ||
259 | #ifdef CONFIG_SMP | ||
260 | static void apbt_setup_irq(struct apbt_dev *adev) | ||
261 | { | ||
262 | struct irq_chip *chip; | ||
263 | struct irq_desc *desc; | ||
264 | |||
265 | /* timer0 irq has been setup early */ | ||
266 | if (adev->irq == 0) | ||
267 | return; | ||
268 | desc = irq_to_desc(adev->irq); | ||
269 | chip = get_irq_chip(adev->irq); | ||
270 | disable_irq(adev->irq); | ||
271 | desc->status |= IRQ_MOVE_PCNTXT; | ||
272 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); | ||
273 | /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */ | ||
274 | set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge"); | ||
275 | enable_irq(adev->irq); | ||
276 | if (system_state == SYSTEM_BOOTING) | ||
277 | if (request_irq(adev->irq, apbt_interrupt_handler, | ||
278 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | ||
279 | adev->name, adev)) { | ||
280 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
281 | adev->num); | ||
282 | } | ||
283 | } | ||
284 | #endif | ||
285 | |||
286 | static void apbt_enable_int(int n) | ||
287 | { | ||
288 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
289 | /* clear pending intr */ | ||
290 | apbt_readl(n, APBTMR_N_EOI); | ||
291 | ctrl &= ~APBTMR_CONTROL_INT; | ||
292 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
293 | } | ||
294 | |||
295 | static void apbt_disable_int(int n) | ||
296 | { | ||
297 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
298 | |||
299 | ctrl |= APBTMR_CONTROL_INT; | ||
300 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
301 | } | ||
302 | |||
303 | |||
304 | static int __init apbt_clockevent_register(void) | ||
305 | { | ||
306 | struct sfi_timer_table_entry *mtmr; | ||
307 | struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev); | ||
308 | |||
309 | mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); | ||
310 | if (mtmr == NULL) { | ||
311 | printk(KERN_ERR "Failed to get MTMR %d from SFI\n", | ||
312 | APBT_CLOCKEVENT0_NUM); | ||
313 | return -ENODEV; | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * We need to calculate the scaled math multiplication factor for | ||
318 | * nanosecond to apbt tick conversion. | ||
319 | * mult = (nsec/cycle)*2^APBT_SHIFT | ||
320 | */ | ||
321 | apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz | ||
322 | , NSEC_PER_SEC, APBT_SHIFT); | ||
323 | |||
324 | /* Calculate the min / max delta */ | ||
325 | apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | ||
326 | &apbt_clockevent); | ||
327 | apbt_clockevent.min_delta_ns = clockevent_delta2ns( | ||
328 | APBT_MIN_DELTA_USEC*apbt_freq, | ||
329 | &apbt_clockevent); | ||
330 | /* | ||
331 | * Start apbt with the boot cpu mask and make it | ||
332 | * global if not used for per cpu timer. | ||
333 | */ | ||
334 | apbt_clockevent.cpumask = cpumask_of(smp_processor_id()); | ||
335 | adev->num = smp_processor_id(); | ||
336 | memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); | ||
337 | |||
338 | if (disable_apbt_percpu) { | ||
339 | apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; | ||
340 | global_clock_event = &adev->evt; | ||
341 | printk(KERN_DEBUG "%s clockevent registered as global\n", | ||
342 | global_clock_event->name); | ||
343 | } | ||
344 | |||
345 | if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler, | ||
346 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | ||
347 | apbt_clockevent.name, adev)) { | ||
348 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
349 | apbt_clockevent.irq); | ||
350 | } | ||
351 | |||
352 | clockevents_register_device(&adev->evt); | ||
353 | /* Start APBT 0 interrupts */ | ||
354 | apbt_enable_int(APBT_CLOCKEVENT0_NUM); | ||
355 | |||
356 | sfi_free_mtmr(mtmr); | ||
357 | return 0; | ||
358 | } | ||
359 | |||
360 | #ifdef CONFIG_SMP | ||
361 | /* Should be called with per cpu */ | ||
362 | void apbt_setup_secondary_clock(void) | ||
363 | { | ||
364 | struct apbt_dev *adev; | ||
365 | struct clock_event_device *aevt; | ||
366 | int cpu; | ||
367 | |||
368 | /* Don't register boot CPU clockevent */ | ||
369 | cpu = smp_processor_id(); | ||
370 | if (cpu == boot_cpu_id) | ||
371 | return; | ||
372 | /* | ||
373 | * We need to calculate the scaled math multiplication factor for | ||
374 | * nanosecond to apbt tick conversion. | ||
375 | * mult = (nsec/cycle)*2^APBT_SHIFT | ||
376 | */ | ||
377 | printk(KERN_INFO "Init per CPU clockevent %d\n", cpu); | ||
378 | adev = &per_cpu(cpu_apbt_dev, cpu); | ||
379 | aevt = &adev->evt; | ||
380 | |||
381 | memcpy(aevt, &apbt_clockevent, sizeof(*aevt)); | ||
382 | aevt->cpumask = cpumask_of(cpu); | ||
383 | aevt->name = adev->name; | ||
384 | aevt->mode = CLOCK_EVT_MODE_UNUSED; | ||
385 | |||
386 | printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n", | ||
387 | cpu, aevt->name, *(u32 *)aevt->cpumask); | ||
388 | |||
389 | apbt_setup_irq(adev); | ||
390 | |||
391 | clockevents_register_device(aevt); | ||
392 | |||
393 | apbt_enable_int(cpu); | ||
394 | |||
395 | return; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * this notify handler process CPU hotplug events. in case of S0i3, nonboot | ||
400 | * cpus are disabled/enabled frequently, for performance reasons, we keep the | ||
401 | * per cpu timer irq registered so that we do need to do free_irq/request_irq. | ||
402 | * | ||
403 | * TODO: it might be more reliable to directly disable percpu clockevent device | ||
404 | * without the notifier chain. currently, cpu 0 may get interrupts from other | ||
405 | * cpu timers during the offline process due to the ordering of notification. | ||
406 | * the extra interrupt is harmless. | ||
407 | */ | ||
408 | static int apbt_cpuhp_notify(struct notifier_block *n, | ||
409 | unsigned long action, void *hcpu) | ||
410 | { | ||
411 | unsigned long cpu = (unsigned long)hcpu; | ||
412 | struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu); | ||
413 | |||
414 | switch (action & 0xf) { | ||
415 | case CPU_DEAD: | ||
416 | apbt_disable_int(cpu); | ||
417 | if (system_state == SYSTEM_RUNNING) | ||
418 | pr_debug("skipping APBT CPU %lu offline\n", cpu); | ||
419 | else if (adev) { | ||
420 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); | ||
421 | free_irq(adev->irq, adev); | ||
422 | } | ||
423 | break; | ||
424 | default: | ||
425 | pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); | ||
426 | } | ||
427 | return NOTIFY_OK; | ||
428 | } | ||
429 | |||
430 | static __init int apbt_late_init(void) | ||
431 | { | ||
432 | if (disable_apbt_percpu) | ||
433 | return 0; | ||
434 | /* This notifier should be called after workqueue is ready */ | ||
435 | hotcpu_notifier(apbt_cpuhp_notify, -20); | ||
436 | return 0; | ||
437 | } | ||
438 | fs_initcall(apbt_late_init); | ||
439 | #else | ||
440 | |||
441 | void apbt_setup_secondary_clock(void) {} | ||
442 | |||
443 | #endif /* CONFIG_SMP */ | ||
444 | |||
445 | static void apbt_set_mode(enum clock_event_mode mode, | ||
446 | struct clock_event_device *evt) | ||
447 | { | ||
448 | unsigned long ctrl; | ||
449 | uint64_t delta; | ||
450 | int timer_num; | ||
451 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); | ||
452 | |||
453 | timer_num = adev->num; | ||
454 | pr_debug("%s CPU %d timer %d mode=%d\n", | ||
455 | __func__, first_cpu(*evt->cpumask), timer_num, mode); | ||
456 | |||
457 | switch (mode) { | ||
458 | case CLOCK_EVT_MODE_PERIODIC: | ||
459 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult; | ||
460 | delta >>= apbt_clockevent.shift; | ||
461 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
462 | ctrl |= APBTMR_CONTROL_MODE_PERIODIC; | ||
463 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
464 | /* | ||
465 | * DW APB p. 46, have to disable timer before load counter, | ||
466 | * may cause sync problem. | ||
467 | */ | ||
468 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
469 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
470 | udelay(1); | ||
471 | pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ); | ||
472 | apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); | ||
473 | ctrl |= APBTMR_CONTROL_ENABLE; | ||
474 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
475 | break; | ||
476 | /* APB timer does not have one-shot mode, use free running mode */ | ||
477 | case CLOCK_EVT_MODE_ONESHOT: | ||
478 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
479 | /* | ||
480 | * set free running mode, this mode will let timer reload max | ||
481 | * timeout which will give time (3min on 25MHz clock) to rearm | ||
482 | * the next event, therefore emulate the one-shot mode. | ||
483 | */ | ||
484 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
485 | ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; | ||
486 | |||
487 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
488 | /* write again to set free running mode */ | ||
489 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
490 | |||
491 | /* | ||
492 | * DW APB p. 46, load counter with all 1s before starting free | ||
493 | * running mode. | ||
494 | */ | ||
495 | apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT); | ||
496 | ctrl &= ~APBTMR_CONTROL_INT; | ||
497 | ctrl |= APBTMR_CONTROL_ENABLE; | ||
498 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
499 | break; | ||
500 | |||
501 | case CLOCK_EVT_MODE_UNUSED: | ||
502 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
503 | apbt_disable_int(timer_num); | ||
504 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
505 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
506 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
507 | break; | ||
508 | |||
509 | case CLOCK_EVT_MODE_RESUME: | ||
510 | apbt_enable_int(timer_num); | ||
511 | break; | ||
512 | } | ||
513 | } | ||
514 | |||
515 | static int apbt_next_event(unsigned long delta, | ||
516 | struct clock_event_device *evt) | ||
517 | { | ||
518 | unsigned long ctrl; | ||
519 | int timer_num; | ||
520 | |||
521 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); | ||
522 | |||
523 | timer_num = adev->num; | ||
524 | /* Disable timer */ | ||
525 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
526 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
527 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
528 | /* write new count */ | ||
529 | apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); | ||
530 | ctrl |= APBTMR_CONTROL_ENABLE; | ||
531 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * APB timer clock is not in sync with pclk on Langwell, which translates to | ||
537 | * unreliable read value caused by sampling error. the error does not add up | ||
538 | * overtime and only happens when sampling a 0 as a 1 by mistake. so the time | ||
539 | * would go backwards. the following code is trying to prevent time traveling | ||
540 | * backwards. little bit paranoid. | ||
541 | */ | ||
542 | static cycle_t apbt_read_clocksource(struct clocksource *cs) | ||
543 | { | ||
544 | unsigned long t0, t1, t2; | ||
545 | static unsigned long last_read; | ||
546 | |||
547 | bad_count: | ||
548 | t1 = apbt_readl(phy_cs_timer_id, | ||
549 | APBTMR_N_CURRENT_VALUE); | ||
550 | t2 = apbt_readl(phy_cs_timer_id, | ||
551 | APBTMR_N_CURRENT_VALUE); | ||
552 | if (unlikely(t1 < t2)) { | ||
553 | pr_debug("APBT: read current count error %lx:%lx:%lx\n", | ||
554 | t1, t2, t2 - t1); | ||
555 | goto bad_count; | ||
556 | } | ||
557 | /* | ||
558 | * check against cached last read, makes sure time does not go back. | ||
559 | * it could be a normal rollover but we will do tripple check anyway | ||
560 | */ | ||
561 | if (unlikely(t2 > last_read)) { | ||
562 | /* check if we have a normal rollover */ | ||
563 | unsigned long raw_intr_status = | ||
564 | apbt_readl_reg(APBTMRS_RAW_INT_STATUS); | ||
565 | /* | ||
566 | * cs timer interrupt is masked but raw intr bit is set if | ||
567 | * rollover occurs. then we read EOI reg to clear it. | ||
568 | */ | ||
569 | if (raw_intr_status & (1 << phy_cs_timer_id)) { | ||
570 | apbt_readl(phy_cs_timer_id, APBTMR_N_EOI); | ||
571 | goto out; | ||
572 | } | ||
573 | pr_debug("APB CS going back %lx:%lx:%lx ", | ||
574 | t2, last_read, t2 - last_read); | ||
575 | bad_count_x3: | ||
576 | pr_debug(KERN_INFO "tripple check enforced\n"); | ||
577 | t0 = apbt_readl(phy_cs_timer_id, | ||
578 | APBTMR_N_CURRENT_VALUE); | ||
579 | udelay(1); | ||
580 | t1 = apbt_readl(phy_cs_timer_id, | ||
581 | APBTMR_N_CURRENT_VALUE); | ||
582 | udelay(1); | ||
583 | t2 = apbt_readl(phy_cs_timer_id, | ||
584 | APBTMR_N_CURRENT_VALUE); | ||
585 | if ((t2 > t1) || (t1 > t0)) { | ||
586 | printk(KERN_ERR "Error: APB CS tripple check failed\n"); | ||
587 | goto bad_count_x3; | ||
588 | } | ||
589 | } | ||
590 | out: | ||
591 | last_read = t2; | ||
592 | return (cycle_t)~t2; | ||
593 | } | ||
594 | |||
595 | static int apbt_clocksource_register(void) | ||
596 | { | ||
597 | u64 start, now; | ||
598 | cycle_t t1; | ||
599 | |||
600 | /* Start the counter, use timer 2 as source, timer 0/1 for event */ | ||
601 | apbt_start_counter(phy_cs_timer_id); | ||
602 | |||
603 | /* Verify whether apbt counter works */ | ||
604 | t1 = apbt_read_clocksource(&clocksource_apbt); | ||
605 | rdtscll(start); | ||
606 | |||
607 | /* | ||
608 | * We don't know the TSC frequency yet, but waiting for | ||
609 | * 200000 TSC cycles is safe: | ||
610 | * 4 GHz == 50us | ||
611 | * 1 GHz == 200us | ||
612 | */ | ||
613 | do { | ||
614 | rep_nop(); | ||
615 | rdtscll(now); | ||
616 | } while ((now - start) < 200000UL); | ||
617 | |||
618 | /* APBT is the only always on clocksource, it has to work! */ | ||
619 | if (t1 == apbt_read_clocksource(&clocksource_apbt)) | ||
620 | panic("APBT counter not counting. APBT disabled\n"); | ||
621 | |||
622 | /* | ||
623 | * initialize and register APBT clocksource | ||
624 | * convert that to ns/clock cycle | ||
625 | * mult = (ns/c) * 2^APBT_SHIFT | ||
626 | */ | ||
627 | clocksource_apbt.mult = div_sc(MSEC_PER_SEC, | ||
628 | (unsigned long) apbt_freq, APBT_SHIFT); | ||
629 | clocksource_register(&clocksource_apbt); | ||
630 | |||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | /* | ||
635 | * Early setup the APBT timer, only use timer 0 for booting then switch to | ||
636 | * per CPU timer if possible. | ||
637 | * returns 1 if per cpu apbt is setup | ||
638 | * returns 0 if no per cpu apbt is chosen | ||
639 | * panic if set up failed, this is the only platform timer on Moorestown. | ||
640 | */ | ||
641 | void __init apbt_time_init(void) | ||
642 | { | ||
643 | #ifdef CONFIG_SMP | ||
644 | int i; | ||
645 | struct sfi_timer_table_entry *p_mtmr; | ||
646 | unsigned int percpu_timer; | ||
647 | struct apbt_dev *adev; | ||
648 | #endif | ||
649 | |||
650 | if (apb_timer_block_enabled) | ||
651 | return; | ||
652 | apbt_set_mapping(); | ||
653 | if (apbt_virt_address) { | ||
654 | pr_debug("Found APBT version 0x%lx\n",\ | ||
655 | apbt_readl_reg(APBTMRS_COMP_VERSION)); | ||
656 | } else | ||
657 | goto out_noapbt; | ||
658 | /* | ||
659 | * Read the frequency and check for a sane value, for ESL model | ||
660 | * we extend the possible clock range to allow time scaling. | ||
661 | */ | ||
662 | |||
663 | if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) { | ||
664 | pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq); | ||
665 | goto out_noapbt; | ||
666 | } | ||
667 | if (apbt_clocksource_register()) { | ||
668 | pr_debug("APBT has failed to register clocksource\n"); | ||
669 | goto out_noapbt; | ||
670 | } | ||
671 | if (!apbt_clockevent_register()) | ||
672 | apb_timer_block_enabled = 1; | ||
673 | else { | ||
674 | pr_debug("APBT has failed to register clockevent\n"); | ||
675 | goto out_noapbt; | ||
676 | } | ||
677 | #ifdef CONFIG_SMP | ||
678 | /* kernel cmdline disable apb timer, so we will use lapic timers */ | ||
679 | if (disable_apbt_percpu) { | ||
680 | printk(KERN_INFO "apbt: disabled per cpu timer\n"); | ||
681 | return; | ||
682 | } | ||
683 | pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); | ||
684 | if (num_possible_cpus() <= sfi_mtimer_num) { | ||
685 | percpu_timer = 1; | ||
686 | apbt_num_timers_used = num_possible_cpus(); | ||
687 | } else { | ||
688 | percpu_timer = 0; | ||
689 | apbt_num_timers_used = 1; | ||
690 | adev = &per_cpu(cpu_apbt_dev, 0); | ||
691 | adev->flags &= ~APBT_DEV_USED; | ||
692 | } | ||
693 | pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); | ||
694 | |||
695 | /* here we set up per CPU timer data structure */ | ||
696 | apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used, | ||
697 | GFP_KERNEL); | ||
698 | if (!apbt_devs) { | ||
699 | printk(KERN_ERR "Failed to allocate APB timer devices\n"); | ||
700 | return; | ||
701 | } | ||
702 | for (i = 0; i < apbt_num_timers_used; i++) { | ||
703 | adev = &per_cpu(cpu_apbt_dev, i); | ||
704 | adev->num = i; | ||
705 | adev->cpu = i; | ||
706 | p_mtmr = sfi_get_mtmr(i); | ||
707 | if (p_mtmr) { | ||
708 | adev->tick = p_mtmr->freq_hz; | ||
709 | adev->irq = p_mtmr->irq; | ||
710 | } else | ||
711 | printk(KERN_ERR "Failed to get timer for cpu %d\n", i); | ||
712 | adev->count = 0; | ||
713 | sprintf(adev->name, "apbt%d", i); | ||
714 | } | ||
715 | #endif | ||
716 | |||
717 | return; | ||
718 | |||
719 | out_noapbt: | ||
720 | apbt_clear_mapping(); | ||
721 | apb_timer_block_enabled = 0; | ||
722 | panic("failed to enable APB timer\n"); | ||
723 | } | ||
724 | |||
725 | static inline void apbt_disable(int n) | ||
726 | { | ||
727 | if (is_apbt_capable()) { | ||
728 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
729 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
730 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
731 | } | ||
732 | } | ||
733 | |||
734 | /* called before apb_timer_enable, use early map */ | ||
735 | unsigned long apbt_quick_calibrate() | ||
736 | { | ||
737 | int i, scale; | ||
738 | u64 old, new; | ||
739 | cycle_t t1, t2; | ||
740 | unsigned long khz = 0; | ||
741 | u32 loop, shift; | ||
742 | |||
743 | apbt_set_mapping(); | ||
744 | apbt_start_counter(phy_cs_timer_id); | ||
745 | |||
746 | /* check if the timer can count down, otherwise return */ | ||
747 | old = apbt_read_clocksource(&clocksource_apbt); | ||
748 | i = 10000; | ||
749 | while (--i) { | ||
750 | if (old != apbt_read_clocksource(&clocksource_apbt)) | ||
751 | break; | ||
752 | } | ||
753 | if (!i) | ||
754 | goto failed; | ||
755 | |||
756 | /* count 16 ms */ | ||
757 | loop = (apbt_freq * 1000) << 4; | ||
758 | |||
759 | /* restart the timer to ensure it won't get to 0 in the calibration */ | ||
760 | apbt_start_counter(phy_cs_timer_id); | ||
761 | |||
762 | old = apbt_read_clocksource(&clocksource_apbt); | ||
763 | old += loop; | ||
764 | |||
765 | t1 = __native_read_tsc(); | ||
766 | |||
767 | do { | ||
768 | new = apbt_read_clocksource(&clocksource_apbt); | ||
769 | } while (new < old); | ||
770 | |||
771 | t2 = __native_read_tsc(); | ||
772 | |||
773 | shift = 5; | ||
774 | if (unlikely(loop >> shift == 0)) { | ||
775 | printk(KERN_INFO | ||
776 | "APBT TSC calibration failed, not enough resolution\n"); | ||
777 | return 0; | ||
778 | } | ||
779 | scale = (int)div_u64((t2 - t1), loop >> shift); | ||
780 | khz = (scale * apbt_freq * 1000) >> shift; | ||
781 | printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz); | ||
782 | return khz; | ||
783 | failed: | ||
784 | return 0; | ||
785 | } | ||
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3704997e8b25..b5d8b0bcf235 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -393,6 +393,7 @@ void __init gart_iommu_hole_init(void) | |||
393 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 393 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
394 | int bus; | 394 | int bus; |
395 | int dev_base, dev_limit; | 395 | int dev_base, dev_limit; |
396 | u32 ctl; | ||
396 | 397 | ||
397 | bus = bus_dev_ranges[i].bus; | 398 | bus = bus_dev_ranges[i].bus; |
398 | dev_base = bus_dev_ranges[i].dev_base; | 399 | dev_base = bus_dev_ranges[i].dev_base; |
@@ -406,7 +407,19 @@ void __init gart_iommu_hole_init(void) | |||
406 | gart_iommu_aperture = 1; | 407 | gart_iommu_aperture = 1; |
407 | x86_init.iommu.iommu_init = gart_iommu_init; | 408 | x86_init.iommu.iommu_init = gart_iommu_init; |
408 | 409 | ||
409 | aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; | 410 | ctl = read_pci_config(bus, slot, 3, |
411 | AMD64_GARTAPERTURECTL); | ||
412 | |||
413 | /* | ||
414 | * Before we do anything else disable the GART. It may | ||
415 | * still be enabled if we boot into a crash-kernel here. | ||
416 | * Reconfiguring the GART while it is enabled could have | ||
417 | * unknown side-effects. | ||
418 | */ | ||
419 | ctl &= ~GARTEN; | ||
420 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); | ||
421 | |||
422 | aper_order = (ctl >> 1) & 7; | ||
410 | aper_size = (32 * 1024 * 1024) << aper_order; | 423 | aper_size = (32 * 1024 * 1024) << aper_order; |
411 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; | 424 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; |
412 | aper_base <<= 25; | 425 | aper_base <<= 25; |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index aa57c079c98f..e5a4a1e01618 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -61,12 +61,6 @@ unsigned int boot_cpu_physical_apicid = -1U; | |||
61 | 61 | ||
62 | /* | 62 | /* |
63 | * The highest APIC ID seen during enumeration. | 63 | * The highest APIC ID seen during enumeration. |
64 | * | ||
65 | * On AMD, this determines the messaging protocol we can use: if all APIC IDs | ||
66 | * are in the 0 ... 7 range, then we can use logical addressing which | ||
67 | * has some performance advantages (better broadcasting). | ||
68 | * | ||
69 | * If there's an APIC ID above 8, we use physical addressing. | ||
70 | */ | 64 | */ |
71 | unsigned int max_physical_apicid; | 65 | unsigned int max_physical_apicid; |
72 | 66 | ||
@@ -587,7 +581,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) | |||
587 | res = (((u64)(*deltatsc)) * pm_100ms); | 581 | res = (((u64)(*deltatsc)) * pm_100ms); |
588 | do_div(res, deltapm); | 582 | do_div(res, deltapm); |
589 | apic_printk(APIC_VERBOSE, "TSC delta adjusted to " | 583 | apic_printk(APIC_VERBOSE, "TSC delta adjusted to " |
590 | "PM-Timer: %lu (%ld) \n", | 584 | "PM-Timer: %lu (%ld)\n", |
591 | (unsigned long)res, *deltatsc); | 585 | (unsigned long)res, *deltatsc); |
592 | *deltatsc = (long)res; | 586 | *deltatsc = (long)res; |
593 | } | 587 | } |
@@ -1396,7 +1390,7 @@ void __init enable_IR_x2apic(void) | |||
1396 | } | 1390 | } |
1397 | 1391 | ||
1398 | local_irq_save(flags); | 1392 | local_irq_save(flags); |
1399 | mask_8259A(); | 1393 | legacy_pic->mask_all(); |
1400 | mask_IO_APIC_setup(ioapic_entries); | 1394 | mask_IO_APIC_setup(ioapic_entries); |
1401 | 1395 | ||
1402 | if (dmar_table_init_ret) | 1396 | if (dmar_table_init_ret) |
@@ -1428,7 +1422,7 @@ void __init enable_IR_x2apic(void) | |||
1428 | nox2apic: | 1422 | nox2apic: |
1429 | if (!ret) /* IR enabling failed */ | 1423 | if (!ret) /* IR enabling failed */ |
1430 | restore_IO_APIC_setup(ioapic_entries); | 1424 | restore_IO_APIC_setup(ioapic_entries); |
1431 | unmask_8259A(); | 1425 | legacy_pic->restore_mask(); |
1432 | local_irq_restore(flags); | 1426 | local_irq_restore(flags); |
1433 | 1427 | ||
1434 | out: | 1428 | out: |
@@ -1646,8 +1640,8 @@ int __init APIC_init_uniprocessor(void) | |||
1646 | } | 1640 | } |
1647 | #endif | 1641 | #endif |
1648 | 1642 | ||
1643 | #ifndef CONFIG_SMP | ||
1649 | enable_IR_x2apic(); | 1644 | enable_IR_x2apic(); |
1650 | #ifdef CONFIG_X86_64 | ||
1651 | default_setup_apic_routing(); | 1645 | default_setup_apic_routing(); |
1652 | #endif | 1646 | #endif |
1653 | 1647 | ||
@@ -1897,18 +1891,6 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1897 | if (apicid > max_physical_apicid) | 1891 | if (apicid > max_physical_apicid) |
1898 | max_physical_apicid = apicid; | 1892 | max_physical_apicid = apicid; |
1899 | 1893 | ||
1900 | #ifdef CONFIG_X86_32 | ||
1901 | switch (boot_cpu_data.x86_vendor) { | ||
1902 | case X86_VENDOR_INTEL: | ||
1903 | if (num_processors > 8) | ||
1904 | def_to_bigsmp = 1; | ||
1905 | break; | ||
1906 | case X86_VENDOR_AMD: | ||
1907 | if (max_physical_apicid >= 8) | ||
1908 | def_to_bigsmp = 1; | ||
1909 | } | ||
1910 | #endif | ||
1911 | |||
1912 | #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) | 1894 | #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) |
1913 | early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; | 1895 | early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; |
1914 | early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; | 1896 | early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; |
@@ -2038,7 +2020,7 @@ static int lapic_resume(struct sys_device *dev) | |||
2038 | } | 2020 | } |
2039 | 2021 | ||
2040 | mask_IO_APIC_setup(ioapic_entries); | 2022 | mask_IO_APIC_setup(ioapic_entries); |
2041 | mask_8259A(); | 2023 | legacy_pic->mask_all(); |
2042 | } | 2024 | } |
2043 | 2025 | ||
2044 | if (x2apic_mode) | 2026 | if (x2apic_mode) |
@@ -2082,7 +2064,7 @@ static int lapic_resume(struct sys_device *dev) | |||
2082 | 2064 | ||
2083 | if (intr_remapping_enabled) { | 2065 | if (intr_remapping_enabled) { |
2084 | reenable_intr_remapping(x2apic_mode); | 2066 | reenable_intr_remapping(x2apic_mode); |
2085 | unmask_8259A(); | 2067 | legacy_pic->restore_mask(); |
2086 | restore_IO_APIC_setup(ioapic_entries); | 2068 | restore_IO_APIC_setup(ioapic_entries); |
2087 | free_ioapic_entries(ioapic_entries); | 2069 | free_ioapic_entries(ioapic_entries); |
2088 | } | 2070 | } |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index eacbd2b31d27..09d3b17ce0c2 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -223,7 +223,7 @@ struct apic apic_flat = { | |||
223 | }; | 223 | }; |
224 | 224 | ||
225 | /* | 225 | /* |
226 | * Physflat mode is used when there are more than 8 CPUs on a AMD system. | 226 | * Physflat mode is used when there are more than 8 CPUs on a system. |
227 | * We cannot use logical delivery in this case because the mask | 227 | * We cannot use logical delivery in this case because the mask |
228 | * overflows, so use physical mode. | 228 | * overflows, so use physical mode. |
229 | */ | 229 | */ |
@@ -240,6 +240,11 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
240 | printk(KERN_DEBUG "system APIC only can use physical flat"); | 240 | printk(KERN_DEBUG "system APIC only can use physical flat"); |
241 | return 1; | 241 | return 1; |
242 | } | 242 | } |
243 | |||
244 | if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { | ||
245 | printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); | ||
246 | return 1; | ||
247 | } | ||
243 | #endif | 248 | #endif |
244 | 249 | ||
245 | return 0; | 250 | return 0; |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index dd2b5f264643..03ba1b895f5e 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/errno.h> | 42 | #include <linux/errno.h> |
43 | #include <linux/acpi.h> | 43 | #include <linux/acpi.h> |
44 | #include <linux/init.h> | 44 | #include <linux/init.h> |
45 | #include <linux/gfp.h> | ||
45 | #include <linux/nmi.h> | 46 | #include <linux/nmi.h> |
46 | #include <linux/smp.h> | 47 | #include <linux/smp.h> |
47 | #include <linux/io.h> | 48 | #include <linux/io.h> |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index de00c4619a55..127b8718abfb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/freezer.h> | 36 | #include <linux/freezer.h> |
37 | #include <linux/kthread.h> | 37 | #include <linux/kthread.h> |
38 | #include <linux/jiffies.h> /* time_after() */ | 38 | #include <linux/jiffies.h> /* time_after() */ |
39 | #include <linux/slab.h> | ||
39 | #ifdef CONFIG_ACPI | 40 | #ifdef CONFIG_ACPI |
40 | #include <acpi/acpi_bus.h> | 41 | #include <acpi/acpi_bus.h> |
41 | #endif | 42 | #endif |
@@ -73,8 +74,8 @@ | |||
73 | */ | 74 | */ |
74 | int sis_apic_bug = -1; | 75 | int sis_apic_bug = -1; |
75 | 76 | ||
76 | static DEFINE_SPINLOCK(ioapic_lock); | 77 | static DEFINE_RAW_SPINLOCK(ioapic_lock); |
77 | static DEFINE_SPINLOCK(vector_lock); | 78 | static DEFINE_RAW_SPINLOCK(vector_lock); |
78 | 79 | ||
79 | /* | 80 | /* |
80 | * # of IRQ routing registers | 81 | * # of IRQ routing registers |
@@ -94,8 +95,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |||
94 | /* # of MP IRQ source entries */ | 95 | /* # of MP IRQ source entries */ |
95 | int mp_irq_entries; | 96 | int mp_irq_entries; |
96 | 97 | ||
97 | /* Number of legacy interrupts */ | ||
98 | static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; | ||
99 | /* GSI interrupts */ | 98 | /* GSI interrupts */ |
100 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | 99 | static int nr_irqs_gsi = NR_IRQS_LEGACY; |
101 | 100 | ||
@@ -140,33 +139,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) | |||
140 | 139 | ||
141 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 140 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
142 | #ifdef CONFIG_SPARSE_IRQ | 141 | #ifdef CONFIG_SPARSE_IRQ |
143 | static struct irq_cfg irq_cfgx[] = { | 142 | static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; |
144 | #else | 143 | #else |
145 | static struct irq_cfg irq_cfgx[NR_IRQS] = { | 144 | static struct irq_cfg irq_cfgx[NR_IRQS]; |
146 | #endif | 145 | #endif |
147 | [0] = { .vector = IRQ0_VECTOR, }, | ||
148 | [1] = { .vector = IRQ1_VECTOR, }, | ||
149 | [2] = { .vector = IRQ2_VECTOR, }, | ||
150 | [3] = { .vector = IRQ3_VECTOR, }, | ||
151 | [4] = { .vector = IRQ4_VECTOR, }, | ||
152 | [5] = { .vector = IRQ5_VECTOR, }, | ||
153 | [6] = { .vector = IRQ6_VECTOR, }, | ||
154 | [7] = { .vector = IRQ7_VECTOR, }, | ||
155 | [8] = { .vector = IRQ8_VECTOR, }, | ||
156 | [9] = { .vector = IRQ9_VECTOR, }, | ||
157 | [10] = { .vector = IRQ10_VECTOR, }, | ||
158 | [11] = { .vector = IRQ11_VECTOR, }, | ||
159 | [12] = { .vector = IRQ12_VECTOR, }, | ||
160 | [13] = { .vector = IRQ13_VECTOR, }, | ||
161 | [14] = { .vector = IRQ14_VECTOR, }, | ||
162 | [15] = { .vector = IRQ15_VECTOR, }, | ||
163 | }; | ||
164 | |||
165 | void __init io_apic_disable_legacy(void) | ||
166 | { | ||
167 | nr_legacy_irqs = 0; | ||
168 | nr_irqs_gsi = 0; | ||
169 | } | ||
170 | 146 | ||
171 | int __init arch_early_irq_init(void) | 147 | int __init arch_early_irq_init(void) |
172 | { | 148 | { |
@@ -176,6 +152,11 @@ int __init arch_early_irq_init(void) | |||
176 | int node; | 152 | int node; |
177 | int i; | 153 | int i; |
178 | 154 | ||
155 | if (!legacy_pic->nr_legacy_irqs) { | ||
156 | nr_irqs_gsi = 0; | ||
157 | io_apic_irqs = ~0UL; | ||
158 | } | ||
159 | |||
179 | cfg = irq_cfgx; | 160 | cfg = irq_cfgx; |
180 | count = ARRAY_SIZE(irq_cfgx); | 161 | count = ARRAY_SIZE(irq_cfgx); |
181 | node= cpu_to_node(boot_cpu_id); | 162 | node= cpu_to_node(boot_cpu_id); |
@@ -185,8 +166,14 @@ int __init arch_early_irq_init(void) | |||
185 | desc->chip_data = &cfg[i]; | 166 | desc->chip_data = &cfg[i]; |
186 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); | 167 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); |
187 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); | 168 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); |
188 | if (i < nr_legacy_irqs) | 169 | /* |
189 | cpumask_setall(cfg[i].domain); | 170 | * For legacy IRQ's, start with assigning irq0 to irq15 to |
171 | * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. | ||
172 | */ | ||
173 | if (i < legacy_pic->nr_legacy_irqs) { | ||
174 | cfg[i].vector = IRQ0_VECTOR + i; | ||
175 | cpumask_set_cpu(0, cfg[i].domain); | ||
176 | } | ||
190 | } | 177 | } |
191 | 178 | ||
192 | return 0; | 179 | return 0; |
@@ -406,7 +393,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
406 | struct irq_pin_list *entry; | 393 | struct irq_pin_list *entry; |
407 | unsigned long flags; | 394 | unsigned long flags; |
408 | 395 | ||
409 | spin_lock_irqsave(&ioapic_lock, flags); | 396 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
410 | for_each_irq_pin(entry, cfg->irq_2_pin) { | 397 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
411 | unsigned int reg; | 398 | unsigned int reg; |
412 | int pin; | 399 | int pin; |
@@ -415,11 +402,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
415 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | 402 | reg = io_apic_read(entry->apic, 0x10 + pin*2); |
416 | /* Is the remote IRR bit set? */ | 403 | /* Is the remote IRR bit set? */ |
417 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { | 404 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { |
418 | spin_unlock_irqrestore(&ioapic_lock, flags); | 405 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
419 | return true; | 406 | return true; |
420 | } | 407 | } |
421 | } | 408 | } |
422 | spin_unlock_irqrestore(&ioapic_lock, flags); | 409 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
423 | 410 | ||
424 | return false; | 411 | return false; |
425 | } | 412 | } |
@@ -433,10 +420,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | |||
433 | { | 420 | { |
434 | union entry_union eu; | 421 | union entry_union eu; |
435 | unsigned long flags; | 422 | unsigned long flags; |
436 | spin_lock_irqsave(&ioapic_lock, flags); | 423 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
437 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); | 424 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); |
438 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); | 425 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); |
439 | spin_unlock_irqrestore(&ioapic_lock, flags); | 426 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
440 | return eu.entry; | 427 | return eu.entry; |
441 | } | 428 | } |
442 | 429 | ||
@@ -459,9 +446,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |||
459 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 446 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
460 | { | 447 | { |
461 | unsigned long flags; | 448 | unsigned long flags; |
462 | spin_lock_irqsave(&ioapic_lock, flags); | 449 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
463 | __ioapic_write_entry(apic, pin, e); | 450 | __ioapic_write_entry(apic, pin, e); |
464 | spin_unlock_irqrestore(&ioapic_lock, flags); | 451 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
465 | } | 452 | } |
466 | 453 | ||
467 | /* | 454 | /* |
@@ -474,10 +461,10 @@ static void ioapic_mask_entry(int apic, int pin) | |||
474 | unsigned long flags; | 461 | unsigned long flags; |
475 | union entry_union eu = { .entry.mask = 1 }; | 462 | union entry_union eu = { .entry.mask = 1 }; |
476 | 463 | ||
477 | spin_lock_irqsave(&ioapic_lock, flags); | 464 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
478 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 465 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
479 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | 466 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
480 | spin_unlock_irqrestore(&ioapic_lock, flags); | 467 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
481 | } | 468 | } |
482 | 469 | ||
483 | /* | 470 | /* |
@@ -604,9 +591,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | |||
604 | 591 | ||
605 | BUG_ON(!cfg); | 592 | BUG_ON(!cfg); |
606 | 593 | ||
607 | spin_lock_irqsave(&ioapic_lock, flags); | 594 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
608 | __mask_IO_APIC_irq(cfg); | 595 | __mask_IO_APIC_irq(cfg); |
609 | spin_unlock_irqrestore(&ioapic_lock, flags); | 596 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
610 | } | 597 | } |
611 | 598 | ||
612 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) | 599 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) |
@@ -614,9 +601,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) | |||
614 | struct irq_cfg *cfg = desc->chip_data; | 601 | struct irq_cfg *cfg = desc->chip_data; |
615 | unsigned long flags; | 602 | unsigned long flags; |
616 | 603 | ||
617 | spin_lock_irqsave(&ioapic_lock, flags); | 604 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
618 | __unmask_IO_APIC_irq(cfg); | 605 | __unmask_IO_APIC_irq(cfg); |
619 | spin_unlock_irqrestore(&ioapic_lock, flags); | 606 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
620 | } | 607 | } |
621 | 608 | ||
622 | static void mask_IO_APIC_irq(unsigned int irq) | 609 | static void mask_IO_APIC_irq(unsigned int irq) |
@@ -865,7 +852,7 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
865 | */ | 852 | */ |
866 | static int EISA_ELCR(unsigned int irq) | 853 | static int EISA_ELCR(unsigned int irq) |
867 | { | 854 | { |
868 | if (irq < nr_legacy_irqs) { | 855 | if (irq < legacy_pic->nr_legacy_irqs) { |
869 | unsigned int port = 0x4d0 + (irq >> 3); | 856 | unsigned int port = 0x4d0 + (irq >> 3); |
870 | return (inb(port) >> (irq & 7)) & 1; | 857 | return (inb(port) >> (irq & 7)) & 1; |
871 | } | 858 | } |
@@ -1140,12 +1127,12 @@ void lock_vector_lock(void) | |||
1140 | /* Used to the online set of cpus does not change | 1127 | /* Used to the online set of cpus does not change |
1141 | * during assign_irq_vector. | 1128 | * during assign_irq_vector. |
1142 | */ | 1129 | */ |
1143 | spin_lock(&vector_lock); | 1130 | raw_spin_lock(&vector_lock); |
1144 | } | 1131 | } |
1145 | 1132 | ||
1146 | void unlock_vector_lock(void) | 1133 | void unlock_vector_lock(void) |
1147 | { | 1134 | { |
1148 | spin_unlock(&vector_lock); | 1135 | raw_spin_unlock(&vector_lock); |
1149 | } | 1136 | } |
1150 | 1137 | ||
1151 | static int | 1138 | static int |
@@ -1162,7 +1149,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1162 | * Also, we've got to be careful not to trash gate | 1149 | * Also, we've got to be careful not to trash gate |
1163 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 1150 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
1164 | */ | 1151 | */ |
1165 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; | 1152 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; |
1153 | static int current_offset = VECTOR_OFFSET_START % 8; | ||
1166 | unsigned int old_vector; | 1154 | unsigned int old_vector; |
1167 | int cpu, err; | 1155 | int cpu, err; |
1168 | cpumask_var_t tmp_mask; | 1156 | cpumask_var_t tmp_mask; |
@@ -1198,7 +1186,7 @@ next: | |||
1198 | if (vector >= first_system_vector) { | 1186 | if (vector >= first_system_vector) { |
1199 | /* If out of vectors on large boxen, must share them. */ | 1187 | /* If out of vectors on large boxen, must share them. */ |
1200 | offset = (offset + 1) % 8; | 1188 | offset = (offset + 1) % 8; |
1201 | vector = FIRST_DEVICE_VECTOR + offset; | 1189 | vector = FIRST_EXTERNAL_VECTOR + offset; |
1202 | } | 1190 | } |
1203 | if (unlikely(current_vector == vector)) | 1191 | if (unlikely(current_vector == vector)) |
1204 | continue; | 1192 | continue; |
@@ -1232,9 +1220,9 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1232 | int err; | 1220 | int err; |
1233 | unsigned long flags; | 1221 | unsigned long flags; |
1234 | 1222 | ||
1235 | spin_lock_irqsave(&vector_lock, flags); | 1223 | raw_spin_lock_irqsave(&vector_lock, flags); |
1236 | err = __assign_irq_vector(irq, cfg, mask); | 1224 | err = __assign_irq_vector(irq, cfg, mask); |
1237 | spin_unlock_irqrestore(&vector_lock, flags); | 1225 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
1238 | return err; | 1226 | return err; |
1239 | } | 1227 | } |
1240 | 1228 | ||
@@ -1268,14 +1256,27 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1268 | void __setup_vector_irq(int cpu) | 1256 | void __setup_vector_irq(int cpu) |
1269 | { | 1257 | { |
1270 | /* Initialize vector_irq on a new cpu */ | 1258 | /* Initialize vector_irq on a new cpu */ |
1271 | /* This function must be called with vector_lock held */ | ||
1272 | int irq, vector; | 1259 | int irq, vector; |
1273 | struct irq_cfg *cfg; | 1260 | struct irq_cfg *cfg; |
1274 | struct irq_desc *desc; | 1261 | struct irq_desc *desc; |
1275 | 1262 | ||
1263 | /* | ||
1264 | * vector_lock will make sure that we don't run into irq vector | ||
1265 | * assignments that might be happening on another cpu in parallel, | ||
1266 | * while we setup our initial vector to irq mappings. | ||
1267 | */ | ||
1268 | raw_spin_lock(&vector_lock); | ||
1276 | /* Mark the inuse vectors */ | 1269 | /* Mark the inuse vectors */ |
1277 | for_each_irq_desc(irq, desc) { | 1270 | for_each_irq_desc(irq, desc) { |
1278 | cfg = desc->chip_data; | 1271 | cfg = desc->chip_data; |
1272 | |||
1273 | /* | ||
1274 | * If it is a legacy IRQ handled by the legacy PIC, this cpu | ||
1275 | * will be part of the irq_cfg's domain. | ||
1276 | */ | ||
1277 | if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq)) | ||
1278 | cpumask_set_cpu(cpu, cfg->domain); | ||
1279 | |||
1279 | if (!cpumask_test_cpu(cpu, cfg->domain)) | 1280 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1280 | continue; | 1281 | continue; |
1281 | vector = cfg->vector; | 1282 | vector = cfg->vector; |
@@ -1291,6 +1292,7 @@ void __setup_vector_irq(int cpu) | |||
1291 | if (!cpumask_test_cpu(cpu, cfg->domain)) | 1292 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1292 | per_cpu(vector_irq, cpu)[vector] = -1; | 1293 | per_cpu(vector_irq, cpu)[vector] = -1; |
1293 | } | 1294 | } |
1295 | raw_spin_unlock(&vector_lock); | ||
1294 | } | 1296 | } |
1295 | 1297 | ||
1296 | static struct irq_chip ioapic_chip; | 1298 | static struct irq_chip ioapic_chip; |
@@ -1440,6 +1442,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
1440 | 1442 | ||
1441 | cfg = desc->chip_data; | 1443 | cfg = desc->chip_data; |
1442 | 1444 | ||
1445 | /* | ||
1446 | * For legacy irqs, cfg->domain starts with cpu 0 for legacy | ||
1447 | * controllers like 8259. Now that IO-APIC can handle this irq, update | ||
1448 | * the cfg->domain. | ||
1449 | */ | ||
1450 | if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) | ||
1451 | apic->vector_allocation_domain(0, cfg->domain); | ||
1452 | |||
1443 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) | 1453 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) |
1444 | return; | 1454 | return; |
1445 | 1455 | ||
@@ -1461,8 +1471,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
1461 | } | 1471 | } |
1462 | 1472 | ||
1463 | ioapic_register_intr(irq, desc, trigger); | 1473 | ioapic_register_intr(irq, desc, trigger); |
1464 | if (irq < nr_legacy_irqs) | 1474 | if (irq < legacy_pic->nr_legacy_irqs) |
1465 | disable_8259A_irq(irq); | 1475 | legacy_pic->chip->mask(irq); |
1466 | 1476 | ||
1467 | ioapic_write_entry(apic_id, pin, entry); | 1477 | ioapic_write_entry(apic_id, pin, entry); |
1468 | } | 1478 | } |
@@ -1473,7 +1483,7 @@ static struct { | |||
1473 | 1483 | ||
1474 | static void __init setup_IO_APIC_irqs(void) | 1484 | static void __init setup_IO_APIC_irqs(void) |
1475 | { | 1485 | { |
1476 | int apic_id = 0, pin, idx, irq; | 1486 | int apic_id, pin, idx, irq; |
1477 | int notcon = 0; | 1487 | int notcon = 0; |
1478 | struct irq_desc *desc; | 1488 | struct irq_desc *desc; |
1479 | struct irq_cfg *cfg; | 1489 | struct irq_cfg *cfg; |
@@ -1481,14 +1491,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
1481 | 1491 | ||
1482 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1492 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
1483 | 1493 | ||
1484 | #ifdef CONFIG_ACPI | 1494 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) |
1485 | if (!acpi_disabled && acpi_ioapic) { | ||
1486 | apic_id = mp_find_ioapic(0); | ||
1487 | if (apic_id < 0) | ||
1488 | apic_id = 0; | ||
1489 | } | ||
1490 | #endif | ||
1491 | |||
1492 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { | 1495 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { |
1493 | idx = find_irq_entry(apic_id, pin, mp_INT); | 1496 | idx = find_irq_entry(apic_id, pin, mp_INT); |
1494 | if (idx == -1) { | 1497 | if (idx == -1) { |
@@ -1510,6 +1513,9 @@ static void __init setup_IO_APIC_irqs(void) | |||
1510 | 1513 | ||
1511 | irq = pin_2_irq(idx, apic_id, pin); | 1514 | irq = pin_2_irq(idx, apic_id, pin); |
1512 | 1515 | ||
1516 | if ((apic_id > 0) && (irq > 16)) | ||
1517 | continue; | ||
1518 | |||
1513 | /* | 1519 | /* |
1514 | * Skip the timer IRQ if there's a quirk handler | 1520 | * Skip the timer IRQ if there's a quirk handler |
1515 | * installed and if it returns 1: | 1521 | * installed and if it returns 1: |
@@ -1539,6 +1545,56 @@ static void __init setup_IO_APIC_irqs(void) | |||
1539 | } | 1545 | } |
1540 | 1546 | ||
1541 | /* | 1547 | /* |
1548 | * for the gsit that is not in first ioapic | ||
1549 | * but could not use acpi_register_gsi() | ||
1550 | * like some special sci in IBM x3330 | ||
1551 | */ | ||
1552 | void setup_IO_APIC_irq_extra(u32 gsi) | ||
1553 | { | ||
1554 | int apic_id = 0, pin, idx, irq; | ||
1555 | int node = cpu_to_node(boot_cpu_id); | ||
1556 | struct irq_desc *desc; | ||
1557 | struct irq_cfg *cfg; | ||
1558 | |||
1559 | /* | ||
1560 | * Convert 'gsi' to 'ioapic.pin'. | ||
1561 | */ | ||
1562 | apic_id = mp_find_ioapic(gsi); | ||
1563 | if (apic_id < 0) | ||
1564 | return; | ||
1565 | |||
1566 | pin = mp_find_ioapic_pin(apic_id, gsi); | ||
1567 | idx = find_irq_entry(apic_id, pin, mp_INT); | ||
1568 | if (idx == -1) | ||
1569 | return; | ||
1570 | |||
1571 | irq = pin_2_irq(idx, apic_id, pin); | ||
1572 | #ifdef CONFIG_SPARSE_IRQ | ||
1573 | desc = irq_to_desc(irq); | ||
1574 | if (desc) | ||
1575 | return; | ||
1576 | #endif | ||
1577 | desc = irq_to_desc_alloc_node(irq, node); | ||
1578 | if (!desc) { | ||
1579 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
1580 | return; | ||
1581 | } | ||
1582 | |||
1583 | cfg = desc->chip_data; | ||
1584 | add_pin_to_irq_node(cfg, node, apic_id, pin); | ||
1585 | |||
1586 | if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { | ||
1587 | pr_debug("Pin %d-%d already programmed\n", | ||
1588 | mp_ioapics[apic_id].apicid, pin); | ||
1589 | return; | ||
1590 | } | ||
1591 | set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); | ||
1592 | |||
1593 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | ||
1594 | irq_trigger(idx), irq_polarity(idx)); | ||
1595 | } | ||
1596 | |||
1597 | /* | ||
1542 | * Set up the timer pin, possibly with the 8259A-master behind. | 1598 | * Set up the timer pin, possibly with the 8259A-master behind. |
1543 | */ | 1599 | */ |
1544 | static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, | 1600 | static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, |
@@ -1601,14 +1657,14 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1601 | 1657 | ||
1602 | for (apic = 0; apic < nr_ioapics; apic++) { | 1658 | for (apic = 0; apic < nr_ioapics; apic++) { |
1603 | 1659 | ||
1604 | spin_lock_irqsave(&ioapic_lock, flags); | 1660 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
1605 | reg_00.raw = io_apic_read(apic, 0); | 1661 | reg_00.raw = io_apic_read(apic, 0); |
1606 | reg_01.raw = io_apic_read(apic, 1); | 1662 | reg_01.raw = io_apic_read(apic, 1); |
1607 | if (reg_01.bits.version >= 0x10) | 1663 | if (reg_01.bits.version >= 0x10) |
1608 | reg_02.raw = io_apic_read(apic, 2); | 1664 | reg_02.raw = io_apic_read(apic, 2); |
1609 | if (reg_01.bits.version >= 0x20) | 1665 | if (reg_01.bits.version >= 0x20) |
1610 | reg_03.raw = io_apic_read(apic, 3); | 1666 | reg_03.raw = io_apic_read(apic, 3); |
1611 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1667 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1612 | 1668 | ||
1613 | printk("\n"); | 1669 | printk("\n"); |
1614 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); | 1670 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); |
@@ -1647,7 +1703,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1647 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | 1703 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
1648 | 1704 | ||
1649 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" | 1705 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" |
1650 | " Stat Dmod Deli Vect: \n"); | 1706 | " Stat Dmod Deli Vect:\n"); |
1651 | 1707 | ||
1652 | for (i = 0; i <= reg_01.bits.entries; i++) { | 1708 | for (i = 0; i <= reg_01.bits.entries; i++) { |
1653 | struct IO_APIC_route_entry entry; | 1709 | struct IO_APIC_route_entry entry; |
@@ -1825,12 +1881,12 @@ __apicdebuginit(void) print_PIC(void) | |||
1825 | unsigned int v; | 1881 | unsigned int v; |
1826 | unsigned long flags; | 1882 | unsigned long flags; |
1827 | 1883 | ||
1828 | if (!nr_legacy_irqs) | 1884 | if (!legacy_pic->nr_legacy_irqs) |
1829 | return; | 1885 | return; |
1830 | 1886 | ||
1831 | printk(KERN_DEBUG "\nprinting PIC contents\n"); | 1887 | printk(KERN_DEBUG "\nprinting PIC contents\n"); |
1832 | 1888 | ||
1833 | spin_lock_irqsave(&i8259A_lock, flags); | 1889 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
1834 | 1890 | ||
1835 | v = inb(0xa1) << 8 | inb(0x21); | 1891 | v = inb(0xa1) << 8 | inb(0x21); |
1836 | printk(KERN_DEBUG "... PIC IMR: %04x\n", v); | 1892 | printk(KERN_DEBUG "... PIC IMR: %04x\n", v); |
@@ -1844,7 +1900,7 @@ __apicdebuginit(void) print_PIC(void) | |||
1844 | outb(0x0a,0xa0); | 1900 | outb(0x0a,0xa0); |
1845 | outb(0x0a,0x20); | 1901 | outb(0x0a,0x20); |
1846 | 1902 | ||
1847 | spin_unlock_irqrestore(&i8259A_lock, flags); | 1903 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
1848 | 1904 | ||
1849 | printk(KERN_DEBUG "... PIC ISR: %04x\n", v); | 1905 | printk(KERN_DEBUG "... PIC ISR: %04x\n", v); |
1850 | 1906 | ||
@@ -1903,13 +1959,13 @@ void __init enable_IO_APIC(void) | |||
1903 | * The number of IO-APIC IRQ registers (== #pins): | 1959 | * The number of IO-APIC IRQ registers (== #pins): |
1904 | */ | 1960 | */ |
1905 | for (apic = 0; apic < nr_ioapics; apic++) { | 1961 | for (apic = 0; apic < nr_ioapics; apic++) { |
1906 | spin_lock_irqsave(&ioapic_lock, flags); | 1962 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
1907 | reg_01.raw = io_apic_read(apic, 1); | 1963 | reg_01.raw = io_apic_read(apic, 1); |
1908 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1964 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1909 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | 1965 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; |
1910 | } | 1966 | } |
1911 | 1967 | ||
1912 | if (!nr_legacy_irqs) | 1968 | if (!legacy_pic->nr_legacy_irqs) |
1913 | return; | 1969 | return; |
1914 | 1970 | ||
1915 | for(apic = 0; apic < nr_ioapics; apic++) { | 1971 | for(apic = 0; apic < nr_ioapics; apic++) { |
@@ -1966,7 +2022,7 @@ void disable_IO_APIC(void) | |||
1966 | */ | 2022 | */ |
1967 | clear_IO_APIC(); | 2023 | clear_IO_APIC(); |
1968 | 2024 | ||
1969 | if (!nr_legacy_irqs) | 2025 | if (!legacy_pic->nr_legacy_irqs) |
1970 | return; | 2026 | return; |
1971 | 2027 | ||
1972 | /* | 2028 | /* |
@@ -2045,9 +2101,9 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2045 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { | 2101 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { |
2046 | 2102 | ||
2047 | /* Read the register 0 value */ | 2103 | /* Read the register 0 value */ |
2048 | spin_lock_irqsave(&ioapic_lock, flags); | 2104 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2049 | reg_00.raw = io_apic_read(apic_id, 0); | 2105 | reg_00.raw = io_apic_read(apic_id, 0); |
2050 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2106 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2051 | 2107 | ||
2052 | old_id = mp_ioapics[apic_id].apicid; | 2108 | old_id = mp_ioapics[apic_id].apicid; |
2053 | 2109 | ||
@@ -2106,16 +2162,16 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2106 | mp_ioapics[apic_id].apicid); | 2162 | mp_ioapics[apic_id].apicid); |
2107 | 2163 | ||
2108 | reg_00.bits.ID = mp_ioapics[apic_id].apicid; | 2164 | reg_00.bits.ID = mp_ioapics[apic_id].apicid; |
2109 | spin_lock_irqsave(&ioapic_lock, flags); | 2165 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2110 | io_apic_write(apic_id, 0, reg_00.raw); | 2166 | io_apic_write(apic_id, 0, reg_00.raw); |
2111 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2167 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2112 | 2168 | ||
2113 | /* | 2169 | /* |
2114 | * Sanity check | 2170 | * Sanity check |
2115 | */ | 2171 | */ |
2116 | spin_lock_irqsave(&ioapic_lock, flags); | 2172 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2117 | reg_00.raw = io_apic_read(apic_id, 0); | 2173 | reg_00.raw = io_apic_read(apic_id, 0); |
2118 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2174 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2119 | if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) | 2175 | if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) |
2120 | printk("could not set ID!\n"); | 2176 | printk("could not set ID!\n"); |
2121 | else | 2177 | else |
@@ -2198,15 +2254,15 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2198 | unsigned long flags; | 2254 | unsigned long flags; |
2199 | struct irq_cfg *cfg; | 2255 | struct irq_cfg *cfg; |
2200 | 2256 | ||
2201 | spin_lock_irqsave(&ioapic_lock, flags); | 2257 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2202 | if (irq < nr_legacy_irqs) { | 2258 | if (irq < legacy_pic->nr_legacy_irqs) { |
2203 | disable_8259A_irq(irq); | 2259 | legacy_pic->chip->mask(irq); |
2204 | if (i8259A_irq_pending(irq)) | 2260 | if (legacy_pic->irq_pending(irq)) |
2205 | was_pending = 1; | 2261 | was_pending = 1; |
2206 | } | 2262 | } |
2207 | cfg = irq_cfg(irq); | 2263 | cfg = irq_cfg(irq); |
2208 | __unmask_IO_APIC_irq(cfg); | 2264 | __unmask_IO_APIC_irq(cfg); |
2209 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2265 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2210 | 2266 | ||
2211 | return was_pending; | 2267 | return was_pending; |
2212 | } | 2268 | } |
@@ -2217,9 +2273,9 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2217 | struct irq_cfg *cfg = irq_cfg(irq); | 2273 | struct irq_cfg *cfg = irq_cfg(irq); |
2218 | unsigned long flags; | 2274 | unsigned long flags; |
2219 | 2275 | ||
2220 | spin_lock_irqsave(&vector_lock, flags); | 2276 | raw_spin_lock_irqsave(&vector_lock, flags); |
2221 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); | 2277 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); |
2222 | spin_unlock_irqrestore(&vector_lock, flags); | 2278 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
2223 | 2279 | ||
2224 | return 1; | 2280 | return 1; |
2225 | } | 2281 | } |
@@ -2312,14 +2368,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
2312 | irq = desc->irq; | 2368 | irq = desc->irq; |
2313 | cfg = desc->chip_data; | 2369 | cfg = desc->chip_data; |
2314 | 2370 | ||
2315 | spin_lock_irqsave(&ioapic_lock, flags); | 2371 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2316 | ret = set_desc_affinity(desc, mask, &dest); | 2372 | ret = set_desc_affinity(desc, mask, &dest); |
2317 | if (!ret) { | 2373 | if (!ret) { |
2318 | /* Only the high 8 bits are valid. */ | 2374 | /* Only the high 8 bits are valid. */ |
2319 | dest = SET_APIC_LOGICAL_ID(dest); | 2375 | dest = SET_APIC_LOGICAL_ID(dest); |
2320 | __target_IO_APIC_irq(irq, dest, cfg); | 2376 | __target_IO_APIC_irq(irq, dest, cfg); |
2321 | } | 2377 | } |
2322 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2378 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2323 | 2379 | ||
2324 | return ret; | 2380 | return ret; |
2325 | } | 2381 | } |
@@ -2434,6 +2490,13 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2434 | cfg = irq_cfg(irq); | 2490 | cfg = irq_cfg(irq); |
2435 | raw_spin_lock(&desc->lock); | 2491 | raw_spin_lock(&desc->lock); |
2436 | 2492 | ||
2493 | /* | ||
2494 | * Check if the irq migration is in progress. If so, we | ||
2495 | * haven't received the cleanup request yet for this irq. | ||
2496 | */ | ||
2497 | if (cfg->move_in_progress) | ||
2498 | goto unlock; | ||
2499 | |||
2437 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) | 2500 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2438 | goto unlock; | 2501 | goto unlock; |
2439 | 2502 | ||
@@ -2547,9 +2610,9 @@ static void eoi_ioapic_irq(struct irq_desc *desc) | |||
2547 | irq = desc->irq; | 2610 | irq = desc->irq; |
2548 | cfg = desc->chip_data; | 2611 | cfg = desc->chip_data; |
2549 | 2612 | ||
2550 | spin_lock_irqsave(&ioapic_lock, flags); | 2613 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2551 | __eoi_ioapic_irq(irq, cfg); | 2614 | __eoi_ioapic_irq(irq, cfg); |
2552 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2615 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2553 | } | 2616 | } |
2554 | 2617 | ||
2555 | static void ack_apic_level(unsigned int irq) | 2618 | static void ack_apic_level(unsigned int irq) |
@@ -2727,8 +2790,8 @@ static inline void init_IO_APIC_traps(void) | |||
2727 | * so default to an old-fashioned 8259 | 2790 | * so default to an old-fashioned 8259 |
2728 | * interrupt if we can.. | 2791 | * interrupt if we can.. |
2729 | */ | 2792 | */ |
2730 | if (irq < nr_legacy_irqs) | 2793 | if (irq < legacy_pic->nr_legacy_irqs) |
2731 | make_8259A_irq(irq); | 2794 | legacy_pic->make_irq(irq); |
2732 | else | 2795 | else |
2733 | /* Strange. Oh, well.. */ | 2796 | /* Strange. Oh, well.. */ |
2734 | desc->chip = &no_irq_chip; | 2797 | desc->chip = &no_irq_chip; |
@@ -2885,7 +2948,7 @@ static inline void __init check_timer(void) | |||
2885 | /* | 2948 | /* |
2886 | * get/set the timer IRQ vector: | 2949 | * get/set the timer IRQ vector: |
2887 | */ | 2950 | */ |
2888 | disable_8259A_irq(0); | 2951 | legacy_pic->chip->mask(0); |
2889 | assign_irq_vector(0, cfg, apic->target_cpus()); | 2952 | assign_irq_vector(0, cfg, apic->target_cpus()); |
2890 | 2953 | ||
2891 | /* | 2954 | /* |
@@ -2898,7 +2961,7 @@ static inline void __init check_timer(void) | |||
2898 | * automatically. | 2961 | * automatically. |
2899 | */ | 2962 | */ |
2900 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2963 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2901 | init_8259A(1); | 2964 | legacy_pic->init(1); |
2902 | #ifdef CONFIG_X86_32 | 2965 | #ifdef CONFIG_X86_32 |
2903 | { | 2966 | { |
2904 | unsigned int ver; | 2967 | unsigned int ver; |
@@ -2957,7 +3020,7 @@ static inline void __init check_timer(void) | |||
2957 | if (timer_irq_works()) { | 3020 | if (timer_irq_works()) { |
2958 | if (nmi_watchdog == NMI_IO_APIC) { | 3021 | if (nmi_watchdog == NMI_IO_APIC) { |
2959 | setup_nmi(); | 3022 | setup_nmi(); |
2960 | enable_8259A_irq(0); | 3023 | legacy_pic->chip->unmask(0); |
2961 | } | 3024 | } |
2962 | if (disable_timer_pin_1 > 0) | 3025 | if (disable_timer_pin_1 > 0) |
2963 | clear_IO_APIC_pin(0, pin1); | 3026 | clear_IO_APIC_pin(0, pin1); |
@@ -2980,14 +3043,14 @@ static inline void __init check_timer(void) | |||
2980 | */ | 3043 | */ |
2981 | replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); | 3044 | replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); |
2982 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | 3045 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); |
2983 | enable_8259A_irq(0); | 3046 | legacy_pic->chip->unmask(0); |
2984 | if (timer_irq_works()) { | 3047 | if (timer_irq_works()) { |
2985 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 3048 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
2986 | timer_through_8259 = 1; | 3049 | timer_through_8259 = 1; |
2987 | if (nmi_watchdog == NMI_IO_APIC) { | 3050 | if (nmi_watchdog == NMI_IO_APIC) { |
2988 | disable_8259A_irq(0); | 3051 | legacy_pic->chip->mask(0); |
2989 | setup_nmi(); | 3052 | setup_nmi(); |
2990 | enable_8259A_irq(0); | 3053 | legacy_pic->chip->unmask(0); |
2991 | } | 3054 | } |
2992 | goto out; | 3055 | goto out; |
2993 | } | 3056 | } |
@@ -2995,7 +3058,7 @@ static inline void __init check_timer(void) | |||
2995 | * Cleanup, just in case ... | 3058 | * Cleanup, just in case ... |
2996 | */ | 3059 | */ |
2997 | local_irq_disable(); | 3060 | local_irq_disable(); |
2998 | disable_8259A_irq(0); | 3061 | legacy_pic->chip->mask(0); |
2999 | clear_IO_APIC_pin(apic2, pin2); | 3062 | clear_IO_APIC_pin(apic2, pin2); |
3000 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | 3063 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
3001 | } | 3064 | } |
@@ -3014,22 +3077,22 @@ static inline void __init check_timer(void) | |||
3014 | 3077 | ||
3015 | lapic_register_intr(0, desc); | 3078 | lapic_register_intr(0, desc); |
3016 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 3079 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
3017 | enable_8259A_irq(0); | 3080 | legacy_pic->chip->unmask(0); |
3018 | 3081 | ||
3019 | if (timer_irq_works()) { | 3082 | if (timer_irq_works()) { |
3020 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | 3083 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
3021 | goto out; | 3084 | goto out; |
3022 | } | 3085 | } |
3023 | local_irq_disable(); | 3086 | local_irq_disable(); |
3024 | disable_8259A_irq(0); | 3087 | legacy_pic->chip->mask(0); |
3025 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 3088 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
3026 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); | 3089 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
3027 | 3090 | ||
3028 | apic_printk(APIC_QUIET, KERN_INFO | 3091 | apic_printk(APIC_QUIET, KERN_INFO |
3029 | "...trying to set up timer as ExtINT IRQ...\n"); | 3092 | "...trying to set up timer as ExtINT IRQ...\n"); |
3030 | 3093 | ||
3031 | init_8259A(0); | 3094 | legacy_pic->init(0); |
3032 | make_8259A_irq(0); | 3095 | legacy_pic->make_irq(0); |
3033 | apic_write(APIC_LVT0, APIC_DM_EXTINT); | 3096 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
3034 | 3097 | ||
3035 | unlock_ExtINT_logic(); | 3098 | unlock_ExtINT_logic(); |
@@ -3071,7 +3134,7 @@ void __init setup_IO_APIC(void) | |||
3071 | /* | 3134 | /* |
3072 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | 3135 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP |
3073 | */ | 3136 | */ |
3074 | io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; | 3137 | io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; |
3075 | 3138 | ||
3076 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | 3139 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
3077 | /* | 3140 | /* |
@@ -3082,7 +3145,7 @@ void __init setup_IO_APIC(void) | |||
3082 | sync_Arb_IDs(); | 3145 | sync_Arb_IDs(); |
3083 | setup_IO_APIC_irqs(); | 3146 | setup_IO_APIC_irqs(); |
3084 | init_IO_APIC_traps(); | 3147 | init_IO_APIC_traps(); |
3085 | if (nr_legacy_irqs) | 3148 | if (legacy_pic->nr_legacy_irqs) |
3086 | check_timer(); | 3149 | check_timer(); |
3087 | } | 3150 | } |
3088 | 3151 | ||
@@ -3131,13 +3194,13 @@ static int ioapic_resume(struct sys_device *dev) | |||
3131 | data = container_of(dev, struct sysfs_ioapic_data, dev); | 3194 | data = container_of(dev, struct sysfs_ioapic_data, dev); |
3132 | entry = data->entry; | 3195 | entry = data->entry; |
3133 | 3196 | ||
3134 | spin_lock_irqsave(&ioapic_lock, flags); | 3197 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3135 | reg_00.raw = io_apic_read(dev->id, 0); | 3198 | reg_00.raw = io_apic_read(dev->id, 0); |
3136 | if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { | 3199 | if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { |
3137 | reg_00.bits.ID = mp_ioapics[dev->id].apicid; | 3200 | reg_00.bits.ID = mp_ioapics[dev->id].apicid; |
3138 | io_apic_write(dev->id, 0, reg_00.raw); | 3201 | io_apic_write(dev->id, 0, reg_00.raw); |
3139 | } | 3202 | } |
3140 | spin_unlock_irqrestore(&ioapic_lock, flags); | 3203 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3141 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) | 3204 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) |
3142 | ioapic_write_entry(dev->id, i, entry[i]); | 3205 | ioapic_write_entry(dev->id, i, entry[i]); |
3143 | 3206 | ||
@@ -3200,7 +3263,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) | |||
3200 | if (irq_want < nr_irqs_gsi) | 3263 | if (irq_want < nr_irqs_gsi) |
3201 | irq_want = nr_irqs_gsi; | 3264 | irq_want = nr_irqs_gsi; |
3202 | 3265 | ||
3203 | spin_lock_irqsave(&vector_lock, flags); | 3266 | raw_spin_lock_irqsave(&vector_lock, flags); |
3204 | for (new = irq_want; new < nr_irqs; new++) { | 3267 | for (new = irq_want; new < nr_irqs; new++) { |
3205 | desc_new = irq_to_desc_alloc_node(new, node); | 3268 | desc_new = irq_to_desc_alloc_node(new, node); |
3206 | if (!desc_new) { | 3269 | if (!desc_new) { |
@@ -3219,14 +3282,11 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) | |||
3219 | irq = new; | 3282 | irq = new; |
3220 | break; | 3283 | break; |
3221 | } | 3284 | } |
3222 | spin_unlock_irqrestore(&vector_lock, flags); | 3285 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
3286 | |||
3287 | if (irq > 0) | ||
3288 | dynamic_irq_init_keep_chip_data(irq); | ||
3223 | 3289 | ||
3224 | if (irq > 0) { | ||
3225 | dynamic_irq_init(irq); | ||
3226 | /* restore it, in case dynamic_irq_init clear it */ | ||
3227 | if (desc_new) | ||
3228 | desc_new->chip_data = cfg_new; | ||
3229 | } | ||
3230 | return irq; | 3290 | return irq; |
3231 | } | 3291 | } |
3232 | 3292 | ||
@@ -3248,20 +3308,13 @@ int create_irq(void) | |||
3248 | void destroy_irq(unsigned int irq) | 3308 | void destroy_irq(unsigned int irq) |
3249 | { | 3309 | { |
3250 | unsigned long flags; | 3310 | unsigned long flags; |
3251 | struct irq_cfg *cfg; | ||
3252 | struct irq_desc *desc; | ||
3253 | 3311 | ||
3254 | /* store it, in case dynamic_irq_cleanup clear it */ | 3312 | dynamic_irq_cleanup_keep_chip_data(irq); |
3255 | desc = irq_to_desc(irq); | ||
3256 | cfg = desc->chip_data; | ||
3257 | dynamic_irq_cleanup(irq); | ||
3258 | /* connect back irq_cfg */ | ||
3259 | desc->chip_data = cfg; | ||
3260 | 3313 | ||
3261 | free_irte(irq); | 3314 | free_irte(irq); |
3262 | spin_lock_irqsave(&vector_lock, flags); | 3315 | raw_spin_lock_irqsave(&vector_lock, flags); |
3263 | __clear_irq_vector(irq, cfg); | 3316 | __clear_irq_vector(irq, get_irq_chip_data(irq)); |
3264 | spin_unlock_irqrestore(&vector_lock, flags); | 3317 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
3265 | } | 3318 | } |
3266 | 3319 | ||
3267 | /* | 3320 | /* |
@@ -3798,9 +3851,9 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
3798 | union IO_APIC_reg_01 reg_01; | 3851 | union IO_APIC_reg_01 reg_01; |
3799 | unsigned long flags; | 3852 | unsigned long flags; |
3800 | 3853 | ||
3801 | spin_lock_irqsave(&ioapic_lock, flags); | 3854 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3802 | reg_01.raw = io_apic_read(ioapic, 1); | 3855 | reg_01.raw = io_apic_read(ioapic, 1); |
3803 | spin_unlock_irqrestore(&ioapic_lock, flags); | 3856 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3804 | 3857 | ||
3805 | return reg_01.bits.entries; | 3858 | return reg_01.bits.entries; |
3806 | } | 3859 | } |
@@ -3883,7 +3936,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, | |||
3883 | /* | 3936 | /* |
3884 | * IRQs < 16 are already in the irq_2_pin[] map | 3937 | * IRQs < 16 are already in the irq_2_pin[] map |
3885 | */ | 3938 | */ |
3886 | if (irq >= nr_legacy_irqs) { | 3939 | if (irq >= legacy_pic->nr_legacy_irqs) { |
3887 | cfg = desc->chip_data; | 3940 | cfg = desc->chip_data; |
3888 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { | 3941 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { |
3889 | printk(KERN_INFO "can not add pin %d for irq %d\n", | 3942 | printk(KERN_INFO "can not add pin %d for irq %d\n", |
@@ -3962,9 +4015,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3962 | if (physids_empty(apic_id_map)) | 4015 | if (physids_empty(apic_id_map)) |
3963 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); | 4016 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); |
3964 | 4017 | ||
3965 | spin_lock_irqsave(&ioapic_lock, flags); | 4018 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3966 | reg_00.raw = io_apic_read(ioapic, 0); | 4019 | reg_00.raw = io_apic_read(ioapic, 0); |
3967 | spin_unlock_irqrestore(&ioapic_lock, flags); | 4020 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3968 | 4021 | ||
3969 | if (apic_id >= get_physical_broadcast()) { | 4022 | if (apic_id >= get_physical_broadcast()) { |
3970 | printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " | 4023 | printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " |
@@ -3998,10 +4051,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3998 | if (reg_00.bits.ID != apic_id) { | 4051 | if (reg_00.bits.ID != apic_id) { |
3999 | reg_00.bits.ID = apic_id; | 4052 | reg_00.bits.ID = apic_id; |
4000 | 4053 | ||
4001 | spin_lock_irqsave(&ioapic_lock, flags); | 4054 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
4002 | io_apic_write(ioapic, 0, reg_00.raw); | 4055 | io_apic_write(ioapic, 0, reg_00.raw); |
4003 | reg_00.raw = io_apic_read(ioapic, 0); | 4056 | reg_00.raw = io_apic_read(ioapic, 0); |
4004 | spin_unlock_irqrestore(&ioapic_lock, flags); | 4057 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
4005 | 4058 | ||
4006 | /* Sanity check */ | 4059 | /* Sanity check */ |
4007 | if (reg_00.bits.ID != apic_id) { | 4060 | if (reg_00.bits.ID != apic_id) { |
@@ -4022,9 +4075,9 @@ int __init io_apic_get_version(int ioapic) | |||
4022 | union IO_APIC_reg_01 reg_01; | 4075 | union IO_APIC_reg_01 reg_01; |
4023 | unsigned long flags; | 4076 | unsigned long flags; |
4024 | 4077 | ||
4025 | spin_lock_irqsave(&ioapic_lock, flags); | 4078 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
4026 | reg_01.raw = io_apic_read(ioapic, 1); | 4079 | reg_01.raw = io_apic_read(ioapic, 1); |
4027 | spin_unlock_irqrestore(&ioapic_lock, flags); | 4080 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
4028 | 4081 | ||
4029 | return reg_01.bits.version; | 4082 | return reg_01.bits.version; |
4030 | } | 4083 | } |
@@ -4056,27 +4109,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
4056 | #ifdef CONFIG_SMP | 4109 | #ifdef CONFIG_SMP |
4057 | void __init setup_ioapic_dest(void) | 4110 | void __init setup_ioapic_dest(void) |
4058 | { | 4111 | { |
4059 | int pin, ioapic = 0, irq, irq_entry; | 4112 | int pin, ioapic, irq, irq_entry; |
4060 | struct irq_desc *desc; | 4113 | struct irq_desc *desc; |
4061 | const struct cpumask *mask; | 4114 | const struct cpumask *mask; |
4062 | 4115 | ||
4063 | if (skip_ioapic_setup == 1) | 4116 | if (skip_ioapic_setup == 1) |
4064 | return; | 4117 | return; |
4065 | 4118 | ||
4066 | #ifdef CONFIG_ACPI | 4119 | for (ioapic = 0; ioapic < nr_ioapics; ioapic++) |
4067 | if (!acpi_disabled && acpi_ioapic) { | ||
4068 | ioapic = mp_find_ioapic(0); | ||
4069 | if (ioapic < 0) | ||
4070 | ioapic = 0; | ||
4071 | } | ||
4072 | #endif | ||
4073 | |||
4074 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { | 4120 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { |
4075 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); | 4121 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); |
4076 | if (irq_entry == -1) | 4122 | if (irq_entry == -1) |
4077 | continue; | 4123 | continue; |
4078 | irq = pin_2_irq(irq_entry, ioapic, pin); | 4124 | irq = pin_2_irq(irq_entry, ioapic, pin); |
4079 | 4125 | ||
4126 | if ((ioapic > 0) && (irq > 16)) | ||
4127 | continue; | ||
4128 | |||
4080 | desc = irq_to_desc(irq); | 4129 | desc = irq_to_desc(irq); |
4081 | 4130 | ||
4082 | /* | 4131 | /* |
@@ -4261,3 +4310,24 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
4261 | 4310 | ||
4262 | nr_ioapics++; | 4311 | nr_ioapics++; |
4263 | } | 4312 | } |
4313 | |||
4314 | /* Enable IOAPIC early just for system timer */ | ||
4315 | void __init pre_init_apic_IRQ0(void) | ||
4316 | { | ||
4317 | struct irq_cfg *cfg; | ||
4318 | struct irq_desc *desc; | ||
4319 | |||
4320 | printk(KERN_INFO "Early APIC setup for system timer0\n"); | ||
4321 | #ifndef CONFIG_SMP | ||
4322 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | ||
4323 | #endif | ||
4324 | desc = irq_to_desc_alloc_node(0, 0); | ||
4325 | |||
4326 | setup_local_APIC(); | ||
4327 | |||
4328 | cfg = irq_cfg(0); | ||
4329 | add_pin_to_irq_node(cfg, 0, 0, 0); | ||
4330 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); | ||
4331 | |||
4332 | setup_IO_APIC_irq(0, 0, 0, desc, 0, 0); | ||
4333 | } | ||
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 0159a69396cb..1edaf15c0b8e 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/slab.h> | ||
21 | #include <linux/sysdev.h> | 22 | #include <linux/sysdev.h> |
22 | #include <linux/sysctl.h> | 23 | #include <linux/sysctl.h> |
23 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
@@ -177,7 +178,7 @@ int __init check_nmi_watchdog(void) | |||
177 | error: | 178 | error: |
178 | if (nmi_watchdog == NMI_IO_APIC) { | 179 | if (nmi_watchdog == NMI_IO_APIC) { |
179 | if (!timer_through_8259) | 180 | if (!timer_through_8259) |
180 | disable_8259A_irq(0); | 181 | legacy_pic->chip->mask(0); |
181 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | 182 | on_each_cpu(__acpi_nmi_disable, NULL, 1); |
182 | } | 183 | } |
183 | 184 | ||
@@ -416,13 +417,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
416 | 417 | ||
417 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | 418 | /* We can be called before check_nmi_watchdog, hence NULL check. */ |
418 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | 419 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { |
419 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 420 | static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ |
420 | 421 | ||
421 | spin_lock(&lock); | 422 | raw_spin_lock(&lock); |
422 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 423 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
423 | show_regs(regs); | 424 | show_regs(regs); |
424 | dump_stack(); | 425 | dump_stack(); |
425 | spin_unlock(&lock); | 426 | raw_spin_unlock(&lock); |
426 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | 427 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
427 | 428 | ||
428 | rc = 1; | 429 | rc = 1; |
@@ -438,8 +439,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
438 | * Ayiee, looks like this CPU is stuck ... | 439 | * Ayiee, looks like this CPU is stuck ... |
439 | * wait a few IRQs (5 seconds) before doing the oops ... | 440 | * wait a few IRQs (5 seconds) before doing the oops ... |
440 | */ | 441 | */ |
441 | __this_cpu_inc(per_cpu_var(alert_counter)); | 442 | __this_cpu_inc(alert_counter); |
442 | if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz) | 443 | if (__this_cpu_read(alert_counter) == 5 * nmi_hz) |
443 | /* | 444 | /* |
444 | * die_nmi will return ONLY if NOTIFY_STOP happens.. | 445 | * die_nmi will return ONLY if NOTIFY_STOP happens.. |
445 | */ | 446 | */ |
@@ -447,7 +448,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
447 | regs, panic_on_timeout); | 448 | regs, panic_on_timeout); |
448 | } else { | 449 | } else { |
449 | __get_cpu_var(last_irq_sum) = sum; | 450 | __get_cpu_var(last_irq_sum) = sum; |
450 | __this_cpu_write(per_cpu_var(alert_counter), 0); | 451 | __this_cpu_write(alert_counter, 0); |
451 | } | 452 | } |
452 | 453 | ||
453 | /* see if the nmi watchdog went off */ | 454 | /* see if the nmi watchdog went off */ |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 98c4665f251c..3e28401f161c 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -225,7 +225,7 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) | |||
225 | 225 | ||
226 | mpc_record = 0; | 226 | mpc_record = 0; |
227 | printk(KERN_INFO | 227 | printk(KERN_INFO |
228 | "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); | 228 | "Found an OEM MPC table at %8p - parsing it...\n", oemtable); |
229 | 229 | ||
230 | if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { | 230 | if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { |
231 | printk(KERN_WARNING | 231 | printk(KERN_WARNING |
@@ -277,6 +277,7 @@ static __init void early_check_numaq(void) | |||
277 | x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; | 277 | x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; |
278 | x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; | 278 | x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; |
279 | x86_init.timers.tsc_pre_init = numaq_tsc_init; | 279 | x86_init.timers.tsc_pre_init = numaq_tsc_init; |
280 | x86_init.pci.init = pci_numaq_init; | ||
280 | } | 281 | } |
281 | } | 282 | } |
282 | 283 | ||
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1a6559f6768c..99d2fe016084 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -52,7 +52,32 @@ static int __init print_ipi_mode(void) | |||
52 | } | 52 | } |
53 | late_initcall(print_ipi_mode); | 53 | late_initcall(print_ipi_mode); |
54 | 54 | ||
55 | void default_setup_apic_routing(void) | 55 | void __init default_setup_apic_routing(void) |
56 | { | ||
57 | int version = apic_version[boot_cpu_physical_apicid]; | ||
58 | |||
59 | if (num_possible_cpus() > 8) { | ||
60 | switch (boot_cpu_data.x86_vendor) { | ||
61 | case X86_VENDOR_INTEL: | ||
62 | if (!APIC_XAPIC(version)) { | ||
63 | def_to_bigsmp = 0; | ||
64 | break; | ||
65 | } | ||
66 | /* If P4 and above fall through */ | ||
67 | case X86_VENDOR_AMD: | ||
68 | def_to_bigsmp = 1; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | #ifdef CONFIG_X86_BIGSMP | ||
73 | generic_bigsmp_probe(); | ||
74 | #endif | ||
75 | |||
76 | if (apic->setup_apic_routing) | ||
77 | apic->setup_apic_routing(); | ||
78 | } | ||
79 | |||
80 | static void setup_apic_flat_routing(void) | ||
56 | { | 81 | { |
57 | #ifdef CONFIG_X86_IO_APIC | 82 | #ifdef CONFIG_X86_IO_APIC |
58 | printk(KERN_INFO | 83 | printk(KERN_INFO |
@@ -103,7 +128,7 @@ struct apic apic_default = { | |||
103 | .init_apic_ldr = default_init_apic_ldr, | 128 | .init_apic_ldr = default_init_apic_ldr, |
104 | 129 | ||
105 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | 130 | .ioapic_phys_id_map = default_ioapic_phys_id_map, |
106 | .setup_apic_routing = default_setup_apic_routing, | 131 | .setup_apic_routing = setup_apic_flat_routing, |
107 | .multi_timer_check = NULL, | 132 | .multi_timer_check = NULL, |
108 | .apicid_to_node = default_apicid_to_node, | 133 | .apicid_to_node = default_apicid_to_node, |
109 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, | 134 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index c4cbd3080c1c..83e9be4778e2 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -67,17 +67,8 @@ void __init default_setup_apic_routing(void) | |||
67 | } | 67 | } |
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | if (apic == &apic_flat) { | 70 | if (apic == &apic_flat && num_possible_cpus() > 8) |
71 | switch (boot_cpu_data.x86_vendor) { | 71 | apic = &apic_physflat; |
72 | case X86_VENDOR_INTEL: | ||
73 | if (num_processors > 8) | ||
74 | apic = &apic_physflat; | ||
75 | break; | ||
76 | case X86_VENDOR_AMD: | ||
77 | if (max_physical_apicid >= 8) | ||
78 | apic = &apic_physflat; | ||
79 | } | ||
80 | } | ||
81 | 72 | ||
82 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | 73 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); |
83 | 74 | ||
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d56b0efb2057..c085d52dbaf2 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * SGI UV APIC functions (note: not an Intel compatible APIC) | 6 | * SGI UV APIC functions (note: not an Intel compatible APIC) |
7 | * | 7 | * |
8 | * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | #include <linux/cpumask.h> | 10 | #include <linux/cpumask.h> |
11 | #include <linux/hardirq.h> | 11 | #include <linux/hardirq.h> |
@@ -17,9 +17,12 @@ | |||
17 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/timer.h> | 19 | #include <linux/timer.h> |
20 | #include <linux/slab.h> | ||
20 | #include <linux/cpu.h> | 21 | #include <linux/cpu.h> |
21 | #include <linux/init.h> | 22 | #include <linux/init.h> |
22 | #include <linux/io.h> | 23 | #include <linux/io.h> |
24 | #include <linux/pci.h> | ||
25 | #include <linux/kdebug.h> | ||
23 | 26 | ||
24 | #include <asm/uv/uv_mmrs.h> | 27 | #include <asm/uv/uv_mmrs.h> |
25 | #include <asm/uv/uv_hub.h> | 28 | #include <asm/uv/uv_hub.h> |
@@ -34,8 +37,13 @@ | |||
34 | 37 | ||
35 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 38 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
36 | 39 | ||
40 | #define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args) | ||
41 | |||
37 | static enum uv_system_type uv_system_type; | 42 | static enum uv_system_type uv_system_type; |
38 | static u64 gru_start_paddr, gru_end_paddr; | 43 | static u64 gru_start_paddr, gru_end_paddr; |
44 | int uv_min_hub_revision_id; | ||
45 | EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); | ||
46 | static DEFINE_SPINLOCK(uv_nmi_lock); | ||
39 | 47 | ||
40 | static inline bool is_GRU_range(u64 start, u64 end) | 48 | static inline bool is_GRU_range(u64 start, u64 end) |
41 | { | 49 | { |
@@ -55,20 +63,28 @@ static int early_get_nodeid(void) | |||
55 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); | 63 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); |
56 | node_id.v = *mmr; | 64 | node_id.v = *mmr; |
57 | early_iounmap(mmr, sizeof(*mmr)); | 65 | early_iounmap(mmr, sizeof(*mmr)); |
66 | |||
67 | /* Currently, all blades have same revision number */ | ||
68 | uv_min_hub_revision_id = node_id.s.revision; | ||
69 | |||
58 | return node_id.s.node_id; | 70 | return node_id.s.node_id; |
59 | } | 71 | } |
60 | 72 | ||
61 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 73 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
62 | { | 74 | { |
75 | int nodeid; | ||
76 | |||
63 | if (!strcmp(oem_id, "SGI")) { | 77 | if (!strcmp(oem_id, "SGI")) { |
78 | nodeid = early_get_nodeid(); | ||
64 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | 79 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; |
80 | x86_platform.nmi_init = uv_nmi_init; | ||
65 | if (!strcmp(oem_table_id, "UVL")) | 81 | if (!strcmp(oem_table_id, "UVL")) |
66 | uv_system_type = UV_LEGACY_APIC; | 82 | uv_system_type = UV_LEGACY_APIC; |
67 | else if (!strcmp(oem_table_id, "UVX")) | 83 | else if (!strcmp(oem_table_id, "UVX")) |
68 | uv_system_type = UV_X2APIC; | 84 | uv_system_type = UV_X2APIC; |
69 | else if (!strcmp(oem_table_id, "UVH")) { | 85 | else if (!strcmp(oem_table_id, "UVH")) { |
70 | __get_cpu_var(x2apic_extra_bits) = | 86 | __get_cpu_var(x2apic_extra_bits) = |
71 | early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1); | 87 | nodeid << (UV_APIC_PNODE_SHIFT - 1); |
72 | uv_system_type = UV_NON_UNIQUE_APIC; | 88 | uv_system_type = UV_NON_UNIQUE_APIC; |
73 | return 1; | 89 | return 1; |
74 | } | 90 | } |
@@ -105,11 +121,9 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); | |||
105 | unsigned long sn_rtc_cycles_per_second; | 121 | unsigned long sn_rtc_cycles_per_second; |
106 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); | 122 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); |
107 | 123 | ||
108 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | ||
109 | |||
110 | static const struct cpumask *uv_target_cpus(void) | 124 | static const struct cpumask *uv_target_cpus(void) |
111 | { | 125 | { |
112 | return cpumask_of(0); | 126 | return cpu_online_mask; |
113 | } | 127 | } |
114 | 128 | ||
115 | static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) | 129 | static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) |
@@ -374,13 +388,13 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) | |||
374 | 388 | ||
375 | enum map_type {map_wb, map_uc}; | 389 | enum map_type {map_wb, map_uc}; |
376 | 390 | ||
377 | static __init void map_high(char *id, unsigned long base, int shift, | 391 | static __init void map_high(char *id, unsigned long base, int pshift, |
378 | int max_pnode, enum map_type map_type) | 392 | int bshift, int max_pnode, enum map_type map_type) |
379 | { | 393 | { |
380 | unsigned long bytes, paddr; | 394 | unsigned long bytes, paddr; |
381 | 395 | ||
382 | paddr = base << shift; | 396 | paddr = base << pshift; |
383 | bytes = (1UL << shift) * (max_pnode + 1); | 397 | bytes = (1UL << bshift) * (max_pnode + 1); |
384 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, | 398 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, |
385 | paddr + bytes); | 399 | paddr + bytes); |
386 | if (map_type == map_uc) | 400 | if (map_type == map_uc) |
@@ -396,7 +410,7 @@ static __init void map_gru_high(int max_pnode) | |||
396 | 410 | ||
397 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); | 411 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); |
398 | if (gru.s.enable) { | 412 | if (gru.s.enable) { |
399 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); | 413 | map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); |
400 | gru_start_paddr = ((u64)gru.s.base << shift); | 414 | gru_start_paddr = ((u64)gru.s.base << shift); |
401 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); | 415 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); |
402 | 416 | ||
@@ -410,7 +424,7 @@ static __init void map_mmr_high(int max_pnode) | |||
410 | 424 | ||
411 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); | 425 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); |
412 | if (mmr.s.enable) | 426 | if (mmr.s.enable) |
413 | map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); | 427 | map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); |
414 | } | 428 | } |
415 | 429 | ||
416 | static __init void map_mmioh_high(int max_pnode) | 430 | static __init void map_mmioh_high(int max_pnode) |
@@ -420,7 +434,8 @@ static __init void map_mmioh_high(int max_pnode) | |||
420 | 434 | ||
421 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); | 435 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); |
422 | if (mmioh.s.enable) | 436 | if (mmioh.s.enable) |
423 | map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); | 437 | map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, |
438 | max_pnode, map_uc); | ||
424 | } | 439 | } |
425 | 440 | ||
426 | static __init void map_low_mmrs(void) | 441 | static __init void map_low_mmrs(void) |
@@ -472,7 +487,7 @@ static void uv_heartbeat(unsigned long ignored) | |||
472 | 487 | ||
473 | static void __cpuinit uv_heartbeat_enable(int cpu) | 488 | static void __cpuinit uv_heartbeat_enable(int cpu) |
474 | { | 489 | { |
475 | if (!uv_cpu_hub_info(cpu)->scir.enabled) { | 490 | while (!uv_cpu_hub_info(cpu)->scir.enabled) { |
476 | struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; | 491 | struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; |
477 | 492 | ||
478 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); | 493 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); |
@@ -480,11 +495,10 @@ static void __cpuinit uv_heartbeat_enable(int cpu) | |||
480 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; | 495 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; |
481 | add_timer_on(timer, cpu); | 496 | add_timer_on(timer, cpu); |
482 | uv_cpu_hub_info(cpu)->scir.enabled = 1; | 497 | uv_cpu_hub_info(cpu)->scir.enabled = 1; |
483 | } | ||
484 | 498 | ||
485 | /* check boot cpu */ | 499 | /* also ensure that boot cpu is enabled */ |
486 | if (!uv_cpu_hub_info(0)->scir.enabled) | 500 | cpu = 0; |
487 | uv_heartbeat_enable(0); | 501 | } |
488 | } | 502 | } |
489 | 503 | ||
490 | #ifdef CONFIG_HOTPLUG_CPU | 504 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -543,6 +557,30 @@ late_initcall(uv_init_heartbeat); | |||
543 | 557 | ||
544 | #endif /* !CONFIG_HOTPLUG_CPU */ | 558 | #endif /* !CONFIG_HOTPLUG_CPU */ |
545 | 559 | ||
560 | /* Direct Legacy VGA I/O traffic to designated IOH */ | ||
561 | int uv_set_vga_state(struct pci_dev *pdev, bool decode, | ||
562 | unsigned int command_bits, bool change_bridge) | ||
563 | { | ||
564 | int domain, bus, rc; | ||
565 | |||
566 | PR_DEVEL("devfn %x decode %d cmd %x chg_brdg %d\n", | ||
567 | pdev->devfn, decode, command_bits, change_bridge); | ||
568 | |||
569 | if (!change_bridge) | ||
570 | return 0; | ||
571 | |||
572 | if ((command_bits & PCI_COMMAND_IO) == 0) | ||
573 | return 0; | ||
574 | |||
575 | domain = pci_domain_nr(pdev->bus); | ||
576 | bus = pdev->bus->number; | ||
577 | |||
578 | rc = uv_bios_set_legacy_vga_target(decode, domain, bus); | ||
579 | PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc); | ||
580 | |||
581 | return rc; | ||
582 | } | ||
583 | |||
546 | /* | 584 | /* |
547 | * Called on each cpu to initialize the per_cpu UV data area. | 585 | * Called on each cpu to initialize the per_cpu UV data area. |
548 | * FIXME: hotplug not supported yet | 586 | * FIXME: hotplug not supported yet |
@@ -559,6 +597,46 @@ void __cpuinit uv_cpu_init(void) | |||
559 | set_x2apic_extra_bits(uv_hub_info->pnode); | 597 | set_x2apic_extra_bits(uv_hub_info->pnode); |
560 | } | 598 | } |
561 | 599 | ||
600 | /* | ||
601 | * When NMI is received, print a stack trace. | ||
602 | */ | ||
603 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | ||
604 | { | ||
605 | if (reason != DIE_NMI_IPI) | ||
606 | return NOTIFY_OK; | ||
607 | /* | ||
608 | * Use a lock so only one cpu prints at a time | ||
609 | * to prevent intermixed output. | ||
610 | */ | ||
611 | spin_lock(&uv_nmi_lock); | ||
612 | pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); | ||
613 | dump_stack(); | ||
614 | spin_unlock(&uv_nmi_lock); | ||
615 | |||
616 | return NOTIFY_STOP; | ||
617 | } | ||
618 | |||
619 | static struct notifier_block uv_dump_stack_nmi_nb = { | ||
620 | .notifier_call = uv_handle_nmi | ||
621 | }; | ||
622 | |||
623 | void uv_register_nmi_notifier(void) | ||
624 | { | ||
625 | if (register_die_notifier(&uv_dump_stack_nmi_nb)) | ||
626 | printk(KERN_WARNING "UV NMI handler failed to register\n"); | ||
627 | } | ||
628 | |||
629 | void uv_nmi_init(void) | ||
630 | { | ||
631 | unsigned int value; | ||
632 | |||
633 | /* | ||
634 | * Unmask NMI on all cpus | ||
635 | */ | ||
636 | value = apic_read(APIC_LVT1) | APIC_DM_NMI; | ||
637 | value &= ~APIC_LVT_MASKED; | ||
638 | apic_write(APIC_LVT1, value); | ||
639 | } | ||
562 | 640 | ||
563 | void __init uv_system_init(void) | 641 | void __init uv_system_init(void) |
564 | { | 642 | { |
@@ -624,13 +702,15 @@ void __init uv_system_init(void) | |||
624 | } | 702 | } |
625 | 703 | ||
626 | uv_bios_init(); | 704 | uv_bios_init(); |
627 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, | 705 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, |
628 | &sn_coherency_id, &sn_region_size); | 706 | &sn_region_size, &system_serial_number); |
629 | uv_rtc_init(); | 707 | uv_rtc_init(); |
630 | 708 | ||
631 | for_each_present_cpu(cpu) { | 709 | for_each_present_cpu(cpu) { |
710 | int apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
711 | |||
632 | nid = cpu_to_node(cpu); | 712 | nid = cpu_to_node(cpu); |
633 | pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); | 713 | pnode = uv_apicid_to_pnode(apicid); |
634 | blade = boot_pnode_to_blade(pnode); | 714 | blade = boot_pnode_to_blade(pnode); |
635 | lcpu = uv_blade_info[blade].nr_possible_cpus; | 715 | lcpu = uv_blade_info[blade].nr_possible_cpus; |
636 | uv_blade_info[blade].nr_possible_cpus++; | 716 | uv_blade_info[blade].nr_possible_cpus++; |
@@ -651,15 +731,13 @@ void __init uv_system_init(void) | |||
651 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; | 731 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; |
652 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 732 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
653 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; | 733 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; |
654 | uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; | 734 | uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); |
655 | uv_node_to_blade[nid] = blade; | 735 | uv_node_to_blade[nid] = blade; |
656 | uv_cpu_to_blade[cpu] = blade; | 736 | uv_cpu_to_blade[cpu] = blade; |
657 | max_pnode = max(pnode, max_pnode); | 737 | max_pnode = max(pnode, max_pnode); |
658 | 738 | ||
659 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " | 739 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", |
660 | "lcpu %d, blade %d\n", | 740 | cpu, apicid, pnode, nid, lcpu, blade); |
661 | cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, | ||
662 | lcpu, blade); | ||
663 | } | 741 | } |
664 | 742 | ||
665 | /* Add blade/pnode info for nodes without cpus */ | 743 | /* Add blade/pnode info for nodes without cpus */ |
@@ -680,5 +758,9 @@ void __init uv_system_init(void) | |||
680 | 758 | ||
681 | uv_cpu_init(); | 759 | uv_cpu_init(); |
682 | uv_scir_register_cpu_notifier(); | 760 | uv_scir_register_cpu_notifier(); |
761 | uv_register_nmi_notifier(); | ||
683 | proc_mkdir("sgi_uv", NULL); | 762 | proc_mkdir("sgi_uv", NULL); |
763 | |||
764 | /* register Legacy VGA I/O redirection handler */ | ||
765 | pci_register_set_vga_state(uv_set_vga_state); | ||
684 | } | 766 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index b5b6b23bce53..031aa887b0eb 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -1992,8 +1992,8 @@ static int __init apm_is_horked_d850md(const struct dmi_system_id *d) | |||
1992 | apm_info.disabled = 1; | 1992 | apm_info.disabled = 1; |
1993 | printk(KERN_INFO "%s machine detected. " | 1993 | printk(KERN_INFO "%s machine detected. " |
1994 | "Disabling APM.\n", d->ident); | 1994 | "Disabling APM.\n", d->ident); |
1995 | printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); | 1995 | printk(KERN_INFO "This bug is fixed in bios P15 which is available for\n"); |
1996 | printk(KERN_INFO "download from support.intel.com \n"); | 1996 | printk(KERN_INFO "download from support.intel.com\n"); |
1997 | } | 1997 | } |
1998 | return 0; | 1998 | return 0; |
1999 | } | 1999 | } |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index b0206a211b09..8bc57baaa9ad 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c | |||
@@ -15,8 +15,8 @@ | |||
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | * | 17 | * |
18 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | 18 | * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. |
19 | * Copyright (c) Russ Anderson | 19 | * Copyright (c) Russ Anderson <rja@sgi.com> |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/efi.h> | 22 | #include <linux/efi.h> |
@@ -30,6 +30,7 @@ static struct uv_systab uv_systab; | |||
30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | 30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) |
31 | { | 31 | { |
32 | struct uv_systab *tab = &uv_systab; | 32 | struct uv_systab *tab = &uv_systab; |
33 | s64 ret; | ||
33 | 34 | ||
34 | if (!tab->function) | 35 | if (!tab->function) |
35 | /* | 36 | /* |
@@ -37,9 +38,11 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | |||
37 | */ | 38 | */ |
38 | return BIOS_STATUS_UNIMPLEMENTED; | 39 | return BIOS_STATUS_UNIMPLEMENTED; |
39 | 40 | ||
40 | return efi_call6((void *)__va(tab->function), | 41 | ret = efi_call6((void *)__va(tab->function), (u64)which, |
41 | (u64)which, a1, a2, a3, a4, a5); | 42 | a1, a2, a3, a4, a5); |
43 | return ret; | ||
42 | } | 44 | } |
45 | EXPORT_SYMBOL_GPL(uv_bios_call); | ||
43 | 46 | ||
44 | s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | 47 | s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, |
45 | u64 a4, u64 a5) | 48 | u64 a4, u64 a5) |
@@ -73,11 +76,14 @@ long sn_coherency_id; | |||
73 | EXPORT_SYMBOL_GPL(sn_coherency_id); | 76 | EXPORT_SYMBOL_GPL(sn_coherency_id); |
74 | long sn_region_size; | 77 | long sn_region_size; |
75 | EXPORT_SYMBOL_GPL(sn_region_size); | 78 | EXPORT_SYMBOL_GPL(sn_region_size); |
79 | long system_serial_number; | ||
80 | EXPORT_SYMBOL_GPL(system_serial_number); | ||
76 | int uv_type; | 81 | int uv_type; |
82 | EXPORT_SYMBOL_GPL(uv_type); | ||
77 | 83 | ||
78 | 84 | ||
79 | s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | 85 | s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, |
80 | long *region) | 86 | long *region, long *ssn) |
81 | { | 87 | { |
82 | s64 ret; | 88 | s64 ret; |
83 | u64 v0, v1; | 89 | u64 v0, v1; |
@@ -97,8 +103,11 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | |||
97 | *coher = part.coherence_id; | 103 | *coher = part.coherence_id; |
98 | if (region) | 104 | if (region) |
99 | *region = part.region_size; | 105 | *region = part.region_size; |
106 | if (ssn) | ||
107 | *ssn = v1; | ||
100 | return ret; | 108 | return ret; |
101 | } | 109 | } |
110 | EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); | ||
102 | 111 | ||
103 | int | 112 | int |
104 | uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, | 113 | uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, |
@@ -154,6 +163,25 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) | |||
154 | } | 163 | } |
155 | EXPORT_SYMBOL_GPL(uv_bios_freq_base); | 164 | EXPORT_SYMBOL_GPL(uv_bios_freq_base); |
156 | 165 | ||
166 | /* | ||
167 | * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target | ||
168 | * @decode: true to enable target, false to disable target | ||
169 | * @domain: PCI domain number | ||
170 | * @bus: PCI bus number | ||
171 | * | ||
172 | * Returns: | ||
173 | * 0: Success | ||
174 | * -EINVAL: Invalid domain or bus number | ||
175 | * -ENOSYS: Capability not available | ||
176 | * -EBUSY: Legacy VGA I/O cannot be retargeted at this time | ||
177 | */ | ||
178 | int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) | ||
179 | { | ||
180 | return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET, | ||
181 | (u64)decode, (u64)domain, (u64)bus, 0, 0); | ||
182 | } | ||
183 | EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); | ||
184 | |||
157 | 185 | ||
158 | #ifdef CONFIG_EFI | 186 | #ifdef CONFIG_EFI |
159 | void uv_bios_init(void) | 187 | void uv_bios_init(void) |
@@ -185,4 +213,3 @@ void uv_bios_init(void) | |||
185 | 213 | ||
186 | void uv_bios_init(void) { } | 214 | void uv_bios_init(void) { } |
187 | #endif | 215 | #endif |
188 | |||
diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c index 30f25a75fe28..5de7f4c56971 100644 --- a/arch/x86/kernel/bootflag.c +++ b/arch/x86/kernel/bootflag.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/kernel.h> | 5 | #include <linux/kernel.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/slab.h> | ||
9 | #include <linux/spinlock.h> | 8 | #include <linux/spinlock.h> |
10 | #include <linux/acpi.h> | 9 | #include <linux/acpi.h> |
11 | #include <asm/io.h> | 10 | #include <asm/io.h> |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 1d2cb383410e..c202b62f3671 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -19,8 +19,6 @@ obj-y += vmware.o hypervisor.o sched.o | |||
19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
21 | 21 | ||
22 | obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o | ||
23 | |||
24 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o | 22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o |
25 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o | 23 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o |
26 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o | 24 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 468489b57aae..97ad79cdf688 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -32,6 +32,10 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | 33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, |
34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | 34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, |
35 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | ||
36 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | ||
37 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
38 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
35 | { 0, 0, 0, 0 } | 39 | { 0, 0, 0, 0 } |
36 | }; | 40 | }; |
37 | 41 | ||
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c deleted file mode 100644 index b368cd862997..000000000000 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ /dev/null | |||
@@ -1,688 +0,0 @@ | |||
1 | /* | ||
2 | * CPU x86 architecture debug code | ||
3 | * | ||
4 | * Copyright(C) 2009 Jaswinder Singh Rajput | ||
5 | * | ||
6 | * For licencing details see kernel-base/COPYING | ||
7 | */ | ||
8 | |||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/compiler.h> | ||
11 | #include <linux/seq_file.h> | ||
12 | #include <linux/debugfs.h> | ||
13 | #include <linux/kprobes.h> | ||
14 | #include <linux/uaccess.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/percpu.h> | ||
18 | #include <linux/signal.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/smp.h> | ||
25 | |||
26 | #include <asm/cpu_debug.h> | ||
27 | #include <asm/paravirt.h> | ||
28 | #include <asm/system.h> | ||
29 | #include <asm/traps.h> | ||
30 | #include <asm/apic.h> | ||
31 | #include <asm/desc.h> | ||
32 | |||
33 | static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr); | ||
34 | static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr); | ||
35 | static DEFINE_PER_CPU(int, cpud_priv_count); | ||
36 | |||
37 | static DEFINE_MUTEX(cpu_debug_lock); | ||
38 | |||
39 | static struct dentry *cpu_debugfs_dir; | ||
40 | |||
41 | static struct cpu_debug_base cpu_base[] = { | ||
42 | { "mc", CPU_MC, 0 }, | ||
43 | { "monitor", CPU_MONITOR, 0 }, | ||
44 | { "time", CPU_TIME, 0 }, | ||
45 | { "pmc", CPU_PMC, 1 }, | ||
46 | { "platform", CPU_PLATFORM, 0 }, | ||
47 | { "apic", CPU_APIC, 0 }, | ||
48 | { "poweron", CPU_POWERON, 0 }, | ||
49 | { "control", CPU_CONTROL, 0 }, | ||
50 | { "features", CPU_FEATURES, 0 }, | ||
51 | { "lastbranch", CPU_LBRANCH, 0 }, | ||
52 | { "bios", CPU_BIOS, 0 }, | ||
53 | { "freq", CPU_FREQ, 0 }, | ||
54 | { "mtrr", CPU_MTRR, 0 }, | ||
55 | { "perf", CPU_PERF, 0 }, | ||
56 | { "cache", CPU_CACHE, 0 }, | ||
57 | { "sysenter", CPU_SYSENTER, 0 }, | ||
58 | { "therm", CPU_THERM, 0 }, | ||
59 | { "misc", CPU_MISC, 0 }, | ||
60 | { "debug", CPU_DEBUG, 0 }, | ||
61 | { "pat", CPU_PAT, 0 }, | ||
62 | { "vmx", CPU_VMX, 0 }, | ||
63 | { "call", CPU_CALL, 0 }, | ||
64 | { "base", CPU_BASE, 0 }, | ||
65 | { "ver", CPU_VER, 0 }, | ||
66 | { "conf", CPU_CONF, 0 }, | ||
67 | { "smm", CPU_SMM, 0 }, | ||
68 | { "svm", CPU_SVM, 0 }, | ||
69 | { "osvm", CPU_OSVM, 0 }, | ||
70 | { "tss", CPU_TSS, 0 }, | ||
71 | { "cr", CPU_CR, 0 }, | ||
72 | { "dt", CPU_DT, 0 }, | ||
73 | { "registers", CPU_REG_ALL, 0 }, | ||
74 | }; | ||
75 | |||
76 | static struct cpu_file_base cpu_file[] = { | ||
77 | { "index", CPU_REG_ALL, 0 }, | ||
78 | { "value", CPU_REG_ALL, 1 }, | ||
79 | }; | ||
80 | |||
81 | /* CPU Registers Range */ | ||
82 | static struct cpu_debug_range cpu_reg_range[] = { | ||
83 | { 0x00000000, 0x00000001, CPU_MC, }, | ||
84 | { 0x00000006, 0x00000007, CPU_MONITOR, }, | ||
85 | { 0x00000010, 0x00000010, CPU_TIME, }, | ||
86 | { 0x00000011, 0x00000013, CPU_PMC, }, | ||
87 | { 0x00000017, 0x00000017, CPU_PLATFORM, }, | ||
88 | { 0x0000001B, 0x0000001B, CPU_APIC, }, | ||
89 | { 0x0000002A, 0x0000002B, CPU_POWERON, }, | ||
90 | { 0x0000002C, 0x0000002C, CPU_FREQ, }, | ||
91 | { 0x0000003A, 0x0000003A, CPU_CONTROL, }, | ||
92 | { 0x00000040, 0x00000047, CPU_LBRANCH, }, | ||
93 | { 0x00000060, 0x00000067, CPU_LBRANCH, }, | ||
94 | { 0x00000079, 0x00000079, CPU_BIOS, }, | ||
95 | { 0x00000088, 0x0000008A, CPU_CACHE, }, | ||
96 | { 0x0000008B, 0x0000008B, CPU_BIOS, }, | ||
97 | { 0x0000009B, 0x0000009B, CPU_MONITOR, }, | ||
98 | { 0x000000C1, 0x000000C4, CPU_PMC, }, | ||
99 | { 0x000000CD, 0x000000CD, CPU_FREQ, }, | ||
100 | { 0x000000E7, 0x000000E8, CPU_PERF, }, | ||
101 | { 0x000000FE, 0x000000FE, CPU_MTRR, }, | ||
102 | |||
103 | { 0x00000116, 0x0000011E, CPU_CACHE, }, | ||
104 | { 0x00000174, 0x00000176, CPU_SYSENTER, }, | ||
105 | { 0x00000179, 0x0000017B, CPU_MC, }, | ||
106 | { 0x00000186, 0x00000189, CPU_PMC, }, | ||
107 | { 0x00000198, 0x00000199, CPU_PERF, }, | ||
108 | { 0x0000019A, 0x0000019A, CPU_TIME, }, | ||
109 | { 0x0000019B, 0x0000019D, CPU_THERM, }, | ||
110 | { 0x000001A0, 0x000001A0, CPU_MISC, }, | ||
111 | { 0x000001C9, 0x000001C9, CPU_LBRANCH, }, | ||
112 | { 0x000001D7, 0x000001D8, CPU_LBRANCH, }, | ||
113 | { 0x000001D9, 0x000001D9, CPU_DEBUG, }, | ||
114 | { 0x000001DA, 0x000001E0, CPU_LBRANCH, }, | ||
115 | |||
116 | { 0x00000200, 0x0000020F, CPU_MTRR, }, | ||
117 | { 0x00000250, 0x00000250, CPU_MTRR, }, | ||
118 | { 0x00000258, 0x00000259, CPU_MTRR, }, | ||
119 | { 0x00000268, 0x0000026F, CPU_MTRR, }, | ||
120 | { 0x00000277, 0x00000277, CPU_PAT, }, | ||
121 | { 0x000002FF, 0x000002FF, CPU_MTRR, }, | ||
122 | |||
123 | { 0x00000300, 0x00000311, CPU_PMC, }, | ||
124 | { 0x00000345, 0x00000345, CPU_PMC, }, | ||
125 | { 0x00000360, 0x00000371, CPU_PMC, }, | ||
126 | { 0x0000038D, 0x00000390, CPU_PMC, }, | ||
127 | { 0x000003A0, 0x000003BE, CPU_PMC, }, | ||
128 | { 0x000003C0, 0x000003CD, CPU_PMC, }, | ||
129 | { 0x000003E0, 0x000003E1, CPU_PMC, }, | ||
130 | { 0x000003F0, 0x000003F2, CPU_PMC, }, | ||
131 | |||
132 | { 0x00000400, 0x00000417, CPU_MC, }, | ||
133 | { 0x00000480, 0x0000048B, CPU_VMX, }, | ||
134 | |||
135 | { 0x00000600, 0x00000600, CPU_DEBUG, }, | ||
136 | { 0x00000680, 0x0000068F, CPU_LBRANCH, }, | ||
137 | { 0x000006C0, 0x000006CF, CPU_LBRANCH, }, | ||
138 | |||
139 | { 0x000107CC, 0x000107D3, CPU_PMC, }, | ||
140 | |||
141 | { 0xC0000080, 0xC0000080, CPU_FEATURES, }, | ||
142 | { 0xC0000081, 0xC0000084, CPU_CALL, }, | ||
143 | { 0xC0000100, 0xC0000102, CPU_BASE, }, | ||
144 | { 0xC0000103, 0xC0000103, CPU_TIME, }, | ||
145 | |||
146 | { 0xC0010000, 0xC0010007, CPU_PMC, }, | ||
147 | { 0xC0010010, 0xC0010010, CPU_CONF, }, | ||
148 | { 0xC0010015, 0xC0010015, CPU_CONF, }, | ||
149 | { 0xC0010016, 0xC001001A, CPU_MTRR, }, | ||
150 | { 0xC001001D, 0xC001001D, CPU_MTRR, }, | ||
151 | { 0xC001001F, 0xC001001F, CPU_CONF, }, | ||
152 | { 0xC0010030, 0xC0010035, CPU_BIOS, }, | ||
153 | { 0xC0010044, 0xC0010048, CPU_MC, }, | ||
154 | { 0xC0010050, 0xC0010056, CPU_SMM, }, | ||
155 | { 0xC0010058, 0xC0010058, CPU_CONF, }, | ||
156 | { 0xC0010060, 0xC0010060, CPU_CACHE, }, | ||
157 | { 0xC0010061, 0xC0010068, CPU_SMM, }, | ||
158 | { 0xC0010069, 0xC001006B, CPU_SMM, }, | ||
159 | { 0xC0010070, 0xC0010071, CPU_SMM, }, | ||
160 | { 0xC0010111, 0xC0010113, CPU_SMM, }, | ||
161 | { 0xC0010114, 0xC0010118, CPU_SVM, }, | ||
162 | { 0xC0010140, 0xC0010141, CPU_OSVM, }, | ||
163 | { 0xC0011022, 0xC0011023, CPU_CONF, }, | ||
164 | }; | ||
165 | |||
166 | static int is_typeflag_valid(unsigned cpu, unsigned flag) | ||
167 | { | ||
168 | int i; | ||
169 | |||
170 | /* Standard Registers should be always valid */ | ||
171 | if (flag >= CPU_TSS) | ||
172 | return 1; | ||
173 | |||
174 | for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { | ||
175 | if (cpu_reg_range[i].flag == flag) | ||
176 | return 1; | ||
177 | } | ||
178 | |||
179 | /* Invalid */ | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, | ||
184 | int index, unsigned flag) | ||
185 | { | ||
186 | if (cpu_reg_range[index].flag == flag) { | ||
187 | *min = cpu_reg_range[index].min; | ||
188 | *max = cpu_reg_range[index].max; | ||
189 | } else | ||
190 | *max = 0; | ||
191 | |||
192 | return *max; | ||
193 | } | ||
194 | |||
195 | /* This function can also be called with seq = NULL for printk */ | ||
196 | static void print_cpu_data(struct seq_file *seq, unsigned type, | ||
197 | u32 low, u32 high) | ||
198 | { | ||
199 | struct cpu_private *priv; | ||
200 | u64 val = high; | ||
201 | |||
202 | if (seq) { | ||
203 | priv = seq->private; | ||
204 | if (priv->file) { | ||
205 | val = (val << 32) | low; | ||
206 | seq_printf(seq, "0x%llx\n", val); | ||
207 | } else | ||
208 | seq_printf(seq, " %08x: %08x_%08x\n", | ||
209 | type, high, low); | ||
210 | } else | ||
211 | printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low); | ||
212 | } | ||
213 | |||
214 | /* This function can also be called with seq = NULL for printk */ | ||
215 | static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) | ||
216 | { | ||
217 | unsigned msr, msr_min, msr_max; | ||
218 | struct cpu_private *priv; | ||
219 | u32 low, high; | ||
220 | int i; | ||
221 | |||
222 | if (seq) { | ||
223 | priv = seq->private; | ||
224 | if (priv->file) { | ||
225 | if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg, | ||
226 | &low, &high)) | ||
227 | print_cpu_data(seq, priv->reg, low, high); | ||
228 | return; | ||
229 | } | ||
230 | } | ||
231 | |||
232 | for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { | ||
233 | if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) | ||
234 | continue; | ||
235 | |||
236 | for (msr = msr_min; msr <= msr_max; msr++) { | ||
237 | if (rdmsr_safe_on_cpu(cpu, msr, &low, &high)) | ||
238 | continue; | ||
239 | print_cpu_data(seq, msr, low, high); | ||
240 | } | ||
241 | } | ||
242 | } | ||
243 | |||
244 | static void print_tss(void *arg) | ||
245 | { | ||
246 | struct pt_regs *regs = task_pt_regs(current); | ||
247 | struct seq_file *seq = arg; | ||
248 | unsigned int seg; | ||
249 | |||
250 | seq_printf(seq, " RAX\t: %016lx\n", regs->ax); | ||
251 | seq_printf(seq, " RBX\t: %016lx\n", regs->bx); | ||
252 | seq_printf(seq, " RCX\t: %016lx\n", regs->cx); | ||
253 | seq_printf(seq, " RDX\t: %016lx\n", regs->dx); | ||
254 | |||
255 | seq_printf(seq, " RSI\t: %016lx\n", regs->si); | ||
256 | seq_printf(seq, " RDI\t: %016lx\n", regs->di); | ||
257 | seq_printf(seq, " RBP\t: %016lx\n", regs->bp); | ||
258 | seq_printf(seq, " ESP\t: %016lx\n", regs->sp); | ||
259 | |||
260 | #ifdef CONFIG_X86_64 | ||
261 | seq_printf(seq, " R08\t: %016lx\n", regs->r8); | ||
262 | seq_printf(seq, " R09\t: %016lx\n", regs->r9); | ||
263 | seq_printf(seq, " R10\t: %016lx\n", regs->r10); | ||
264 | seq_printf(seq, " R11\t: %016lx\n", regs->r11); | ||
265 | seq_printf(seq, " R12\t: %016lx\n", regs->r12); | ||
266 | seq_printf(seq, " R13\t: %016lx\n", regs->r13); | ||
267 | seq_printf(seq, " R14\t: %016lx\n", regs->r14); | ||
268 | seq_printf(seq, " R15\t: %016lx\n", regs->r15); | ||
269 | #endif | ||
270 | |||
271 | asm("movl %%cs,%0" : "=r" (seg)); | ||
272 | seq_printf(seq, " CS\t: %04x\n", seg); | ||
273 | asm("movl %%ds,%0" : "=r" (seg)); | ||
274 | seq_printf(seq, " DS\t: %04x\n", seg); | ||
275 | seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff); | ||
276 | asm("movl %%es,%0" : "=r" (seg)); | ||
277 | seq_printf(seq, " ES\t: %04x\n", seg); | ||
278 | asm("movl %%fs,%0" : "=r" (seg)); | ||
279 | seq_printf(seq, " FS\t: %04x\n", seg); | ||
280 | asm("movl %%gs,%0" : "=r" (seg)); | ||
281 | seq_printf(seq, " GS\t: %04x\n", seg); | ||
282 | |||
283 | seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags); | ||
284 | |||
285 | seq_printf(seq, " EIP\t: %016lx\n", regs->ip); | ||
286 | } | ||
287 | |||
288 | static void print_cr(void *arg) | ||
289 | { | ||
290 | struct seq_file *seq = arg; | ||
291 | |||
292 | seq_printf(seq, " cr0\t: %016lx\n", read_cr0()); | ||
293 | seq_printf(seq, " cr2\t: %016lx\n", read_cr2()); | ||
294 | seq_printf(seq, " cr3\t: %016lx\n", read_cr3()); | ||
295 | seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe()); | ||
296 | #ifdef CONFIG_X86_64 | ||
297 | seq_printf(seq, " cr8\t: %016lx\n", read_cr8()); | ||
298 | #endif | ||
299 | } | ||
300 | |||
301 | static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt) | ||
302 | { | ||
303 | seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size)); | ||
304 | } | ||
305 | |||
306 | static void print_dt(void *seq) | ||
307 | { | ||
308 | struct desc_ptr dt; | ||
309 | unsigned long ldt; | ||
310 | |||
311 | /* IDT */ | ||
312 | store_idt((struct desc_ptr *)&dt); | ||
313 | print_desc_ptr("IDT", seq, dt); | ||
314 | |||
315 | /* GDT */ | ||
316 | store_gdt((struct desc_ptr *)&dt); | ||
317 | print_desc_ptr("GDT", seq, dt); | ||
318 | |||
319 | /* LDT */ | ||
320 | store_ldt(ldt); | ||
321 | seq_printf(seq, " LDT\t: %016lx\n", ldt); | ||
322 | |||
323 | /* TR */ | ||
324 | store_tr(ldt); | ||
325 | seq_printf(seq, " TR\t: %016lx\n", ldt); | ||
326 | } | ||
327 | |||
328 | static void print_dr(void *arg) | ||
329 | { | ||
330 | struct seq_file *seq = arg; | ||
331 | unsigned long dr; | ||
332 | int i; | ||
333 | |||
334 | for (i = 0; i < 8; i++) { | ||
335 | /* Ignore db4, db5 */ | ||
336 | if ((i == 4) || (i == 5)) | ||
337 | continue; | ||
338 | get_debugreg(dr, i); | ||
339 | seq_printf(seq, " dr%d\t: %016lx\n", i, dr); | ||
340 | } | ||
341 | |||
342 | seq_printf(seq, "\n MSR\t:\n"); | ||
343 | } | ||
344 | |||
345 | static void print_apic(void *arg) | ||
346 | { | ||
347 | struct seq_file *seq = arg; | ||
348 | |||
349 | #ifdef CONFIG_X86_LOCAL_APIC | ||
350 | seq_printf(seq, " LAPIC\t:\n"); | ||
351 | seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24); | ||
352 | seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR)); | ||
353 | seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI)); | ||
354 | seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI)); | ||
355 | seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI)); | ||
356 | seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR)); | ||
357 | seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR)); | ||
358 | seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV)); | ||
359 | seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR)); | ||
360 | seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR)); | ||
361 | seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR)); | ||
362 | seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2)); | ||
363 | seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT)); | ||
364 | seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR)); | ||
365 | seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC)); | ||
366 | seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0)); | ||
367 | seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1)); | ||
368 | seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR)); | ||
369 | seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); | ||
370 | seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); | ||
371 | seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); | ||
372 | if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { | ||
373 | unsigned int i, v, maxeilvt; | ||
374 | |||
375 | v = apic_read(APIC_EFEAT); | ||
376 | maxeilvt = (v >> 16) & 0xff; | ||
377 | seq_printf(seq, " EFEAT\t\t: %08x\n", v); | ||
378 | seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL)); | ||
379 | |||
380 | for (i = 0; i < maxeilvt; i++) { | ||
381 | v = apic_read(APIC_EILVTn(i)); | ||
382 | seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v); | ||
383 | } | ||
384 | } | ||
385 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
386 | seq_printf(seq, "\n MSR\t:\n"); | ||
387 | } | ||
388 | |||
389 | static int cpu_seq_show(struct seq_file *seq, void *v) | ||
390 | { | ||
391 | struct cpu_private *priv = seq->private; | ||
392 | |||
393 | if (priv == NULL) | ||
394 | return -EINVAL; | ||
395 | |||
396 | switch (cpu_base[priv->type].flag) { | ||
397 | case CPU_TSS: | ||
398 | smp_call_function_single(priv->cpu, print_tss, seq, 1); | ||
399 | break; | ||
400 | case CPU_CR: | ||
401 | smp_call_function_single(priv->cpu, print_cr, seq, 1); | ||
402 | break; | ||
403 | case CPU_DT: | ||
404 | smp_call_function_single(priv->cpu, print_dt, seq, 1); | ||
405 | break; | ||
406 | case CPU_DEBUG: | ||
407 | if (priv->file == CPU_INDEX_BIT) | ||
408 | smp_call_function_single(priv->cpu, print_dr, seq, 1); | ||
409 | print_msr(seq, priv->cpu, cpu_base[priv->type].flag); | ||
410 | break; | ||
411 | case CPU_APIC: | ||
412 | if (priv->file == CPU_INDEX_BIT) | ||
413 | smp_call_function_single(priv->cpu, print_apic, seq, 1); | ||
414 | print_msr(seq, priv->cpu, cpu_base[priv->type].flag); | ||
415 | break; | ||
416 | |||
417 | default: | ||
418 | print_msr(seq, priv->cpu, cpu_base[priv->type].flag); | ||
419 | break; | ||
420 | } | ||
421 | seq_printf(seq, "\n"); | ||
422 | |||
423 | return 0; | ||
424 | } | ||
425 | |||
426 | static void *cpu_seq_start(struct seq_file *seq, loff_t *pos) | ||
427 | { | ||
428 | if (*pos == 0) /* One time is enough ;-) */ | ||
429 | return seq; | ||
430 | |||
431 | return NULL; | ||
432 | } | ||
433 | |||
434 | static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
435 | { | ||
436 | (*pos)++; | ||
437 | |||
438 | return cpu_seq_start(seq, pos); | ||
439 | } | ||
440 | |||
441 | static void cpu_seq_stop(struct seq_file *seq, void *v) | ||
442 | { | ||
443 | } | ||
444 | |||
445 | static const struct seq_operations cpu_seq_ops = { | ||
446 | .start = cpu_seq_start, | ||
447 | .next = cpu_seq_next, | ||
448 | .stop = cpu_seq_stop, | ||
449 | .show = cpu_seq_show, | ||
450 | }; | ||
451 | |||
452 | static int cpu_seq_open(struct inode *inode, struct file *file) | ||
453 | { | ||
454 | struct cpu_private *priv = inode->i_private; | ||
455 | struct seq_file *seq; | ||
456 | int err; | ||
457 | |||
458 | err = seq_open(file, &cpu_seq_ops); | ||
459 | if (!err) { | ||
460 | seq = file->private_data; | ||
461 | seq->private = priv; | ||
462 | } | ||
463 | |||
464 | return err; | ||
465 | } | ||
466 | |||
467 | static int write_msr(struct cpu_private *priv, u64 val) | ||
468 | { | ||
469 | u32 low, high; | ||
470 | |||
471 | high = (val >> 32) & 0xffffffff; | ||
472 | low = val & 0xffffffff; | ||
473 | |||
474 | if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high)) | ||
475 | return 0; | ||
476 | |||
477 | return -EPERM; | ||
478 | } | ||
479 | |||
480 | static int write_cpu_register(struct cpu_private *priv, const char *buf) | ||
481 | { | ||
482 | int ret = -EPERM; | ||
483 | u64 val; | ||
484 | |||
485 | ret = strict_strtoull(buf, 0, &val); | ||
486 | if (ret < 0) | ||
487 | return ret; | ||
488 | |||
489 | /* Supporting only MSRs */ | ||
490 | if (priv->type < CPU_TSS_BIT) | ||
491 | return write_msr(priv, val); | ||
492 | |||
493 | return ret; | ||
494 | } | ||
495 | |||
496 | static ssize_t cpu_write(struct file *file, const char __user *ubuf, | ||
497 | size_t count, loff_t *off) | ||
498 | { | ||
499 | struct seq_file *seq = file->private_data; | ||
500 | struct cpu_private *priv = seq->private; | ||
501 | char buf[19]; | ||
502 | |||
503 | if ((priv == NULL) || (count >= sizeof(buf))) | ||
504 | return -EINVAL; | ||
505 | |||
506 | if (copy_from_user(&buf, ubuf, count)) | ||
507 | return -EFAULT; | ||
508 | |||
509 | buf[count] = 0; | ||
510 | |||
511 | if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write)) | ||
512 | if (!write_cpu_register(priv, buf)) | ||
513 | return count; | ||
514 | |||
515 | return -EACCES; | ||
516 | } | ||
517 | |||
518 | static const struct file_operations cpu_fops = { | ||
519 | .owner = THIS_MODULE, | ||
520 | .open = cpu_seq_open, | ||
521 | .read = seq_read, | ||
522 | .write = cpu_write, | ||
523 | .llseek = seq_lseek, | ||
524 | .release = seq_release, | ||
525 | }; | ||
526 | |||
527 | static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, | ||
528 | unsigned file, struct dentry *dentry) | ||
529 | { | ||
530 | struct cpu_private *priv = NULL; | ||
531 | |||
532 | /* Already intialized */ | ||
533 | if (file == CPU_INDEX_BIT) | ||
534 | if (per_cpu(cpud_arr[type].init, cpu)) | ||
535 | return 0; | ||
536 | |||
537 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
538 | if (priv == NULL) | ||
539 | return -ENOMEM; | ||
540 | |||
541 | priv->cpu = cpu; | ||
542 | priv->type = type; | ||
543 | priv->reg = reg; | ||
544 | priv->file = file; | ||
545 | mutex_lock(&cpu_debug_lock); | ||
546 | per_cpu(cpud_priv_arr[type], cpu) = priv; | ||
547 | per_cpu(cpud_priv_count, cpu)++; | ||
548 | mutex_unlock(&cpu_debug_lock); | ||
549 | |||
550 | if (file) | ||
551 | debugfs_create_file(cpu_file[file].name, S_IRUGO, | ||
552 | dentry, (void *)priv, &cpu_fops); | ||
553 | else { | ||
554 | debugfs_create_file(cpu_base[type].name, S_IRUGO, | ||
555 | per_cpu(cpud_arr[type].dentry, cpu), | ||
556 | (void *)priv, &cpu_fops); | ||
557 | mutex_lock(&cpu_debug_lock); | ||
558 | per_cpu(cpud_arr[type].init, cpu) = 1; | ||
559 | mutex_unlock(&cpu_debug_lock); | ||
560 | } | ||
561 | |||
562 | return 0; | ||
563 | } | ||
564 | |||
565 | static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg, | ||
566 | struct dentry *dentry) | ||
567 | { | ||
568 | unsigned file; | ||
569 | int err = 0; | ||
570 | |||
571 | for (file = 0; file < ARRAY_SIZE(cpu_file); file++) { | ||
572 | err = cpu_create_file(cpu, type, reg, file, dentry); | ||
573 | if (err) | ||
574 | return err; | ||
575 | } | ||
576 | |||
577 | return err; | ||
578 | } | ||
579 | |||
580 | static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) | ||
581 | { | ||
582 | struct dentry *cpu_dentry = NULL; | ||
583 | unsigned reg, reg_min, reg_max; | ||
584 | int i, err = 0; | ||
585 | char reg_dir[12]; | ||
586 | u32 low, high; | ||
587 | |||
588 | for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { | ||
589 | if (!get_cpu_range(cpu, ®_min, ®_max, i, | ||
590 | cpu_base[type].flag)) | ||
591 | continue; | ||
592 | |||
593 | for (reg = reg_min; reg <= reg_max; reg++) { | ||
594 | if (rdmsr_safe_on_cpu(cpu, reg, &low, &high)) | ||
595 | continue; | ||
596 | |||
597 | sprintf(reg_dir, "0x%x", reg); | ||
598 | cpu_dentry = debugfs_create_dir(reg_dir, dentry); | ||
599 | err = cpu_init_regfiles(cpu, type, reg, cpu_dentry); | ||
600 | if (err) | ||
601 | return err; | ||
602 | } | ||
603 | } | ||
604 | |||
605 | return err; | ||
606 | } | ||
607 | |||
608 | static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) | ||
609 | { | ||
610 | struct dentry *cpu_dentry = NULL; | ||
611 | unsigned type; | ||
612 | int err = 0; | ||
613 | |||
614 | for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) { | ||
615 | if (!is_typeflag_valid(cpu, cpu_base[type].flag)) | ||
616 | continue; | ||
617 | cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); | ||
618 | per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry; | ||
619 | |||
620 | if (type < CPU_TSS_BIT) | ||
621 | err = cpu_init_msr(cpu, type, cpu_dentry); | ||
622 | else | ||
623 | err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT, | ||
624 | cpu_dentry); | ||
625 | if (err) | ||
626 | return err; | ||
627 | } | ||
628 | |||
629 | return err; | ||
630 | } | ||
631 | |||
632 | static int cpu_init_cpu(void) | ||
633 | { | ||
634 | struct dentry *cpu_dentry = NULL; | ||
635 | struct cpuinfo_x86 *cpui; | ||
636 | char cpu_dir[12]; | ||
637 | unsigned cpu; | ||
638 | int err = 0; | ||
639 | |||
640 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) { | ||
641 | cpui = &cpu_data(cpu); | ||
642 | if (!cpu_has(cpui, X86_FEATURE_MSR)) | ||
643 | continue; | ||
644 | |||
645 | sprintf(cpu_dir, "cpu%d", cpu); | ||
646 | cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); | ||
647 | err = cpu_init_allreg(cpu, cpu_dentry); | ||
648 | |||
649 | pr_info("cpu%d(%d) debug files %d\n", | ||
650 | cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu)); | ||
651 | if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) { | ||
652 | pr_err("Register files count %d exceeds limit %d\n", | ||
653 | per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES); | ||
654 | per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES; | ||
655 | err = -ENFILE; | ||
656 | } | ||
657 | if (err) | ||
658 | return err; | ||
659 | } | ||
660 | |||
661 | return err; | ||
662 | } | ||
663 | |||
664 | static int __init cpu_debug_init(void) | ||
665 | { | ||
666 | cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir); | ||
667 | |||
668 | return cpu_init_cpu(); | ||
669 | } | ||
670 | |||
671 | static void __exit cpu_debug_exit(void) | ||
672 | { | ||
673 | int i, cpu; | ||
674 | |||
675 | if (cpu_debugfs_dir) | ||
676 | debugfs_remove_recursive(cpu_debugfs_dir); | ||
677 | |||
678 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) | ||
679 | for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++) | ||
680 | kfree(per_cpu(cpud_priv_arr[i], cpu)); | ||
681 | } | ||
682 | |||
683 | module_init(cpu_debug_init); | ||
684 | module_exit(cpu_debug_exit); | ||
685 | |||
686 | MODULE_AUTHOR("Jaswinder Singh Rajput"); | ||
687 | MODULE_DESCRIPTION("CPU Debug module"); | ||
688 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index f138c6c389b9..870e6cc6ad28 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig | |||
@@ -10,6 +10,20 @@ if CPU_FREQ | |||
10 | 10 | ||
11 | comment "CPUFreq processor drivers" | 11 | comment "CPUFreq processor drivers" |
12 | 12 | ||
13 | config X86_PCC_CPUFREQ | ||
14 | tristate "Processor Clocking Control interface driver" | ||
15 | depends on ACPI && ACPI_PROCESSOR | ||
16 | help | ||
17 | This driver adds support for the PCC interface. | ||
18 | |||
19 | For details, take a look at: | ||
20 | <file:Documentation/cpu-freq/pcc-cpufreq.txt>. | ||
21 | |||
22 | To compile this driver as a module, choose M here: the | ||
23 | module will be called pcc-cpufreq. | ||
24 | |||
25 | If in doubt, say N. | ||
26 | |||
13 | config X86_ACPI_CPUFREQ | 27 | config X86_ACPI_CPUFREQ |
14 | tristate "ACPI Processor P-States driver" | 28 | tristate "ACPI Processor P-States driver" |
15 | select CPU_FREQ_TABLE | 29 | select CPU_FREQ_TABLE |
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 509296df294d..1840c0a5170b 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o |
6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o |
7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | ||
7 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o |
8 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
9 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o | 10 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1b1920fa7c80..459168083b77 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/cpufreq.h> | 33 | #include <linux/cpufreq.h> |
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <linux/slab.h> | ||
36 | #include <trace/events/power.h> | 37 | #include <trace/events/power.h> |
37 | 38 | ||
38 | #include <linux/acpi.h> | 39 | #include <linux/acpi.h> |
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 006b278b0d5d..c587db472a75 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | 22 | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/delay.h> | 23 | #include <linux/delay.h> |
25 | #include <linux/cpufreq.h> | 24 | #include <linux/cpufreq.h> |
26 | 25 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index ac27ec2264d5..16e3483be9e3 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/cpufreq.h> | 80 | #include <linux/cpufreq.h> |
81 | #include <linux/pci.h> | 81 | #include <linux/pci.h> |
82 | #include <linux/errno.h> | 82 | #include <linux/errno.h> |
83 | #include <linux/slab.h> | ||
83 | 84 | ||
84 | #include <asm/processor-cyrix.h> | 85 | #include <asm/processor-cyrix.h> |
85 | 86 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index da5f70fcb766..e7b559d74c52 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/slab.h> | ||
13 | #include <linux/cpufreq.h> | 12 | #include <linux/cpufreq.h> |
14 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
15 | 14 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 869615193720..7b8a8ba67b07 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/init.h> | 25 | #include <linux/init.h> |
26 | #include <linux/smp.h> | 26 | #include <linux/smp.h> |
27 | #include <linux/cpufreq.h> | 27 | #include <linux/cpufreq.h> |
28 | #include <linux/slab.h> | ||
29 | #include <linux/cpumask.h> | 28 | #include <linux/cpumask.h> |
30 | #include <linux/timex.h> | 29 | #include <linux/timex.h> |
31 | 30 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c new file mode 100644 index 000000000000..ce7cde713e71 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
@@ -0,0 +1,621 @@ | |||
1 | /* | ||
2 | * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface | ||
3 | * | ||
4 | * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com> | ||
5 | * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. | ||
6 | * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com> | ||
7 | * | ||
8 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; version 2 of the License. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON | ||
17 | * INFRINGEMENT. See the GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
21 | * 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | * | ||
23 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
24 | */ | ||
25 | |||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/smp.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/cpufreq.h> | ||
32 | #include <linux/compiler.h> | ||
33 | #include <linux/slab.h> | ||
34 | |||
35 | #include <linux/acpi.h> | ||
36 | #include <linux/io.h> | ||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/uaccess.h> | ||
39 | |||
40 | #include <acpi/processor.h> | ||
41 | |||
42 | #define PCC_VERSION "1.00.00" | ||
43 | #define POLL_LOOPS 300 | ||
44 | |||
45 | #define CMD_COMPLETE 0x1 | ||
46 | #define CMD_GET_FREQ 0x0 | ||
47 | #define CMD_SET_FREQ 0x1 | ||
48 | |||
49 | #define BUF_SZ 4 | ||
50 | |||
51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
52 | "pcc-cpufreq", msg) | ||
53 | |||
54 | struct pcc_register_resource { | ||
55 | u8 descriptor; | ||
56 | u16 length; | ||
57 | u8 space_id; | ||
58 | u8 bit_width; | ||
59 | u8 bit_offset; | ||
60 | u8 access_size; | ||
61 | u64 address; | ||
62 | } __attribute__ ((packed)); | ||
63 | |||
64 | struct pcc_memory_resource { | ||
65 | u8 descriptor; | ||
66 | u16 length; | ||
67 | u8 space_id; | ||
68 | u8 resource_usage; | ||
69 | u8 type_specific; | ||
70 | u64 granularity; | ||
71 | u64 minimum; | ||
72 | u64 maximum; | ||
73 | u64 translation_offset; | ||
74 | u64 address_length; | ||
75 | } __attribute__ ((packed)); | ||
76 | |||
77 | static struct cpufreq_driver pcc_cpufreq_driver; | ||
78 | |||
79 | struct pcc_header { | ||
80 | u32 signature; | ||
81 | u16 length; | ||
82 | u8 major; | ||
83 | u8 minor; | ||
84 | u32 features; | ||
85 | u16 command; | ||
86 | u16 status; | ||
87 | u32 latency; | ||
88 | u32 minimum_time; | ||
89 | u32 maximum_time; | ||
90 | u32 nominal; | ||
91 | u32 throttled_frequency; | ||
92 | u32 minimum_frequency; | ||
93 | }; | ||
94 | |||
95 | static void __iomem *pcch_virt_addr; | ||
96 | static struct pcc_header __iomem *pcch_hdr; | ||
97 | |||
98 | static DEFINE_SPINLOCK(pcc_lock); | ||
99 | |||
100 | static struct acpi_generic_address doorbell; | ||
101 | |||
102 | static u64 doorbell_preserve; | ||
103 | static u64 doorbell_write; | ||
104 | |||
105 | static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f, | ||
106 | 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46}; | ||
107 | |||
108 | struct pcc_cpu { | ||
109 | u32 input_offset; | ||
110 | u32 output_offset; | ||
111 | }; | ||
112 | |||
113 | static struct pcc_cpu *pcc_cpu_info; | ||
114 | |||
115 | static int pcc_cpufreq_verify(struct cpufreq_policy *policy) | ||
116 | { | ||
117 | cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, | ||
118 | policy->cpuinfo.max_freq); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static inline void pcc_cmd(void) | ||
123 | { | ||
124 | u64 doorbell_value; | ||
125 | int i; | ||
126 | |||
127 | acpi_read(&doorbell_value, &doorbell); | ||
128 | acpi_write((doorbell_value & doorbell_preserve) | doorbell_write, | ||
129 | &doorbell); | ||
130 | |||
131 | for (i = 0; i < POLL_LOOPS; i++) { | ||
132 | if (ioread16(&pcch_hdr->status) & CMD_COMPLETE) | ||
133 | break; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static inline void pcc_clear_mapping(void) | ||
138 | { | ||
139 | if (pcch_virt_addr) | ||
140 | iounmap(pcch_virt_addr); | ||
141 | pcch_virt_addr = NULL; | ||
142 | } | ||
143 | |||
144 | static unsigned int pcc_get_freq(unsigned int cpu) | ||
145 | { | ||
146 | struct pcc_cpu *pcc_cpu_data; | ||
147 | unsigned int curr_freq; | ||
148 | unsigned int freq_limit; | ||
149 | u16 status; | ||
150 | u32 input_buffer; | ||
151 | u32 output_buffer; | ||
152 | |||
153 | spin_lock(&pcc_lock); | ||
154 | |||
155 | dprintk("get: get_freq for CPU %d\n", cpu); | ||
156 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
157 | |||
158 | input_buffer = 0x1; | ||
159 | iowrite32(input_buffer, | ||
160 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
161 | iowrite16(CMD_GET_FREQ, &pcch_hdr->command); | ||
162 | |||
163 | pcc_cmd(); | ||
164 | |||
165 | output_buffer = | ||
166 | ioread32(pcch_virt_addr + pcc_cpu_data->output_offset); | ||
167 | |||
168 | /* Clear the input buffer - we are done with the current command */ | ||
169 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
170 | |||
171 | status = ioread16(&pcch_hdr->status); | ||
172 | if (status != CMD_COMPLETE) { | ||
173 | dprintk("get: FAILED: for CPU %d, status is %d\n", | ||
174 | cpu, status); | ||
175 | goto cmd_incomplete; | ||
176 | } | ||
177 | iowrite16(0, &pcch_hdr->status); | ||
178 | curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff)) | ||
179 | / 100) * 1000); | ||
180 | |||
181 | dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is " | ||
182 | "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n", | ||
183 | cpu, (pcch_virt_addr + pcc_cpu_data->output_offset), | ||
184 | output_buffer, curr_freq); | ||
185 | |||
186 | freq_limit = (output_buffer >> 8) & 0xff; | ||
187 | if (freq_limit != 0xff) { | ||
188 | dprintk("get: frequency for cpu %d is being temporarily" | ||
189 | " capped at %d\n", cpu, curr_freq); | ||
190 | } | ||
191 | |||
192 | spin_unlock(&pcc_lock); | ||
193 | return curr_freq; | ||
194 | |||
195 | cmd_incomplete: | ||
196 | iowrite16(0, &pcch_hdr->status); | ||
197 | spin_unlock(&pcc_lock); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | static int pcc_cpufreq_target(struct cpufreq_policy *policy, | ||
202 | unsigned int target_freq, | ||
203 | unsigned int relation) | ||
204 | { | ||
205 | struct pcc_cpu *pcc_cpu_data; | ||
206 | struct cpufreq_freqs freqs; | ||
207 | u16 status; | ||
208 | u32 input_buffer; | ||
209 | int cpu; | ||
210 | |||
211 | spin_lock(&pcc_lock); | ||
212 | cpu = policy->cpu; | ||
213 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
214 | |||
215 | dprintk("target: CPU %d should go to target freq: %d " | ||
216 | "(virtual) input_offset is 0x%x\n", | ||
217 | cpu, target_freq, | ||
218 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
219 | |||
220 | freqs.new = target_freq; | ||
221 | freqs.cpu = cpu; | ||
222 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
223 | |||
224 | input_buffer = 0x1 | (((target_freq * 100) | ||
225 | / (ioread32(&pcch_hdr->nominal) * 1000)) << 8); | ||
226 | iowrite32(input_buffer, | ||
227 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
228 | iowrite16(CMD_SET_FREQ, &pcch_hdr->command); | ||
229 | |||
230 | pcc_cmd(); | ||
231 | |||
232 | /* Clear the input buffer - we are done with the current command */ | ||
233 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
234 | |||
235 | status = ioread16(&pcch_hdr->status); | ||
236 | if (status != CMD_COMPLETE) { | ||
237 | dprintk("target: FAILED for cpu %d, with status: 0x%x\n", | ||
238 | cpu, status); | ||
239 | goto cmd_incomplete; | ||
240 | } | ||
241 | iowrite16(0, &pcch_hdr->status); | ||
242 | |||
243 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
244 | dprintk("target: was SUCCESSFUL for cpu %d\n", cpu); | ||
245 | spin_unlock(&pcc_lock); | ||
246 | |||
247 | return 0; | ||
248 | |||
249 | cmd_incomplete: | ||
250 | iowrite16(0, &pcch_hdr->status); | ||
251 | spin_unlock(&pcc_lock); | ||
252 | return -EINVAL; | ||
253 | } | ||
254 | |||
255 | static int pcc_get_offset(int cpu) | ||
256 | { | ||
257 | acpi_status status; | ||
258 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
259 | union acpi_object *pccp, *offset; | ||
260 | struct pcc_cpu *pcc_cpu_data; | ||
261 | struct acpi_processor *pr; | ||
262 | int ret = 0; | ||
263 | |||
264 | pr = per_cpu(processors, cpu); | ||
265 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
266 | |||
267 | status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer); | ||
268 | if (ACPI_FAILURE(status)) | ||
269 | return -ENODEV; | ||
270 | |||
271 | pccp = buffer.pointer; | ||
272 | if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) { | ||
273 | ret = -ENODEV; | ||
274 | goto out_free; | ||
275 | }; | ||
276 | |||
277 | offset = &(pccp->package.elements[0]); | ||
278 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
279 | ret = -ENODEV; | ||
280 | goto out_free; | ||
281 | } | ||
282 | |||
283 | pcc_cpu_data->input_offset = offset->integer.value; | ||
284 | |||
285 | offset = &(pccp->package.elements[1]); | ||
286 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
287 | ret = -ENODEV; | ||
288 | goto out_free; | ||
289 | } | ||
290 | |||
291 | pcc_cpu_data->output_offset = offset->integer.value; | ||
292 | |||
293 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
294 | memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ); | ||
295 | |||
296 | dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data " | ||
297 | "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n", | ||
298 | cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset); | ||
299 | out_free: | ||
300 | kfree(buffer.pointer); | ||
301 | return ret; | ||
302 | } | ||
303 | |||
304 | static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | ||
305 | { | ||
306 | acpi_status status; | ||
307 | struct acpi_object_list input; | ||
308 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
309 | union acpi_object in_params[4]; | ||
310 | union acpi_object *out_obj; | ||
311 | u32 capabilities[2]; | ||
312 | u32 errors; | ||
313 | u32 supported; | ||
314 | int ret = 0; | ||
315 | |||
316 | input.count = 4; | ||
317 | input.pointer = in_params; | ||
318 | input.count = 4; | ||
319 | input.pointer = in_params; | ||
320 | in_params[0].type = ACPI_TYPE_BUFFER; | ||
321 | in_params[0].buffer.length = 16; | ||
322 | in_params[0].buffer.pointer = OSC_UUID; | ||
323 | in_params[1].type = ACPI_TYPE_INTEGER; | ||
324 | in_params[1].integer.value = 1; | ||
325 | in_params[2].type = ACPI_TYPE_INTEGER; | ||
326 | in_params[2].integer.value = 2; | ||
327 | in_params[3].type = ACPI_TYPE_BUFFER; | ||
328 | in_params[3].buffer.length = 8; | ||
329 | in_params[3].buffer.pointer = (u8 *)&capabilities; | ||
330 | |||
331 | capabilities[0] = OSC_QUERY_ENABLE; | ||
332 | capabilities[1] = 0x1; | ||
333 | |||
334 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
335 | if (ACPI_FAILURE(status)) | ||
336 | return -ENODEV; | ||
337 | |||
338 | if (!output.length) | ||
339 | return -ENODEV; | ||
340 | |||
341 | out_obj = output.pointer; | ||
342 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
343 | ret = -ENODEV; | ||
344 | goto out_free; | ||
345 | } | ||
346 | |||
347 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
348 | if (errors) { | ||
349 | ret = -ENODEV; | ||
350 | goto out_free; | ||
351 | } | ||
352 | |||
353 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
354 | if (!(supported & 0x1)) { | ||
355 | ret = -ENODEV; | ||
356 | goto out_free; | ||
357 | } | ||
358 | |||
359 | kfree(output.pointer); | ||
360 | capabilities[0] = 0x0; | ||
361 | capabilities[1] = 0x1; | ||
362 | |||
363 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
364 | if (ACPI_FAILURE(status)) | ||
365 | return -ENODEV; | ||
366 | |||
367 | if (!output.length) | ||
368 | return -ENODEV; | ||
369 | |||
370 | out_obj = output.pointer; | ||
371 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
372 | ret = -ENODEV; | ||
373 | goto out_free; | ||
374 | } | ||
375 | |||
376 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
377 | if (errors) { | ||
378 | ret = -ENODEV; | ||
379 | goto out_free; | ||
380 | } | ||
381 | |||
382 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
383 | if (!(supported & 0x1)) { | ||
384 | ret = -ENODEV; | ||
385 | goto out_free; | ||
386 | } | ||
387 | |||
388 | out_free: | ||
389 | kfree(output.pointer); | ||
390 | return ret; | ||
391 | } | ||
392 | |||
393 | static int __init pcc_cpufreq_probe(void) | ||
394 | { | ||
395 | acpi_status status; | ||
396 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
397 | struct pcc_memory_resource *mem_resource; | ||
398 | struct pcc_register_resource *reg_resource; | ||
399 | union acpi_object *out_obj, *member; | ||
400 | acpi_handle handle, osc_handle; | ||
401 | int ret = 0; | ||
402 | |||
403 | status = acpi_get_handle(NULL, "\\_SB", &handle); | ||
404 | if (ACPI_FAILURE(status)) | ||
405 | return -ENODEV; | ||
406 | |||
407 | status = acpi_get_handle(handle, "_OSC", &osc_handle); | ||
408 | if (ACPI_SUCCESS(status)) { | ||
409 | ret = pcc_cpufreq_do_osc(&osc_handle); | ||
410 | if (ret) | ||
411 | dprintk("probe: _OSC evaluation did not succeed\n"); | ||
412 | /* Firmware's use of _OSC is optional */ | ||
413 | ret = 0; | ||
414 | } | ||
415 | |||
416 | status = acpi_evaluate_object(handle, "PCCH", NULL, &output); | ||
417 | if (ACPI_FAILURE(status)) | ||
418 | return -ENODEV; | ||
419 | |||
420 | out_obj = output.pointer; | ||
421 | if (out_obj->type != ACPI_TYPE_PACKAGE) { | ||
422 | ret = -ENODEV; | ||
423 | goto out_free; | ||
424 | } | ||
425 | |||
426 | member = &out_obj->package.elements[0]; | ||
427 | if (member->type != ACPI_TYPE_BUFFER) { | ||
428 | ret = -ENODEV; | ||
429 | goto out_free; | ||
430 | } | ||
431 | |||
432 | mem_resource = (struct pcc_memory_resource *)member->buffer.pointer; | ||
433 | |||
434 | dprintk("probe: mem_resource descriptor: 0x%x," | ||
435 | " length: %d, space_id: %d, resource_usage: %d," | ||
436 | " type_specific: %d, granularity: 0x%llx," | ||
437 | " minimum: 0x%llx, maximum: 0x%llx," | ||
438 | " translation_offset: 0x%llx, address_length: 0x%llx\n", | ||
439 | mem_resource->descriptor, mem_resource->length, | ||
440 | mem_resource->space_id, mem_resource->resource_usage, | ||
441 | mem_resource->type_specific, mem_resource->granularity, | ||
442 | mem_resource->minimum, mem_resource->maximum, | ||
443 | mem_resource->translation_offset, | ||
444 | mem_resource->address_length); | ||
445 | |||
446 | if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) { | ||
447 | ret = -ENODEV; | ||
448 | goto out_free; | ||
449 | } | ||
450 | |||
451 | pcch_virt_addr = ioremap_nocache(mem_resource->minimum, | ||
452 | mem_resource->address_length); | ||
453 | if (pcch_virt_addr == NULL) { | ||
454 | dprintk("probe: could not map shared mem region\n"); | ||
455 | goto out_free; | ||
456 | } | ||
457 | pcch_hdr = pcch_virt_addr; | ||
458 | |||
459 | dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr); | ||
460 | dprintk("probe: PCCH header is at physical address: 0x%llx," | ||
461 | " signature: 0x%x, length: %d bytes, major: %d, minor: %d," | ||
462 | " supported features: 0x%x, command field: 0x%x," | ||
463 | " status field: 0x%x, nominal latency: %d us\n", | ||
464 | mem_resource->minimum, ioread32(&pcch_hdr->signature), | ||
465 | ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major), | ||
466 | ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features), | ||
467 | ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status), | ||
468 | ioread32(&pcch_hdr->latency)); | ||
469 | |||
470 | dprintk("probe: min time between commands: %d us," | ||
471 | " max time between commands: %d us," | ||
472 | " nominal CPU frequency: %d MHz," | ||
473 | " minimum CPU frequency: %d MHz," | ||
474 | " minimum CPU frequency without throttling: %d MHz\n", | ||
475 | ioread32(&pcch_hdr->minimum_time), | ||
476 | ioread32(&pcch_hdr->maximum_time), | ||
477 | ioread32(&pcch_hdr->nominal), | ||
478 | ioread32(&pcch_hdr->throttled_frequency), | ||
479 | ioread32(&pcch_hdr->minimum_frequency)); | ||
480 | |||
481 | member = &out_obj->package.elements[1]; | ||
482 | if (member->type != ACPI_TYPE_BUFFER) { | ||
483 | ret = -ENODEV; | ||
484 | goto pcch_free; | ||
485 | } | ||
486 | |||
487 | reg_resource = (struct pcc_register_resource *)member->buffer.pointer; | ||
488 | |||
489 | doorbell.space_id = reg_resource->space_id; | ||
490 | doorbell.bit_width = reg_resource->bit_width; | ||
491 | doorbell.bit_offset = reg_resource->bit_offset; | ||
492 | doorbell.access_width = 64; | ||
493 | doorbell.address = reg_resource->address; | ||
494 | |||
495 | dprintk("probe: doorbell: space_id is %d, bit_width is %d, " | ||
496 | "bit_offset is %d, access_width is %d, address is 0x%llx\n", | ||
497 | doorbell.space_id, doorbell.bit_width, doorbell.bit_offset, | ||
498 | doorbell.access_width, reg_resource->address); | ||
499 | |||
500 | member = &out_obj->package.elements[2]; | ||
501 | if (member->type != ACPI_TYPE_INTEGER) { | ||
502 | ret = -ENODEV; | ||
503 | goto pcch_free; | ||
504 | } | ||
505 | |||
506 | doorbell_preserve = member->integer.value; | ||
507 | |||
508 | member = &out_obj->package.elements[3]; | ||
509 | if (member->type != ACPI_TYPE_INTEGER) { | ||
510 | ret = -ENODEV; | ||
511 | goto pcch_free; | ||
512 | } | ||
513 | |||
514 | doorbell_write = member->integer.value; | ||
515 | |||
516 | dprintk("probe: doorbell_preserve: 0x%llx," | ||
517 | " doorbell_write: 0x%llx\n", | ||
518 | doorbell_preserve, doorbell_write); | ||
519 | |||
520 | pcc_cpu_info = alloc_percpu(struct pcc_cpu); | ||
521 | if (!pcc_cpu_info) { | ||
522 | ret = -ENOMEM; | ||
523 | goto pcch_free; | ||
524 | } | ||
525 | |||
526 | printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency" | ||
527 | " limits: %d MHz, %d MHz\n", PCC_VERSION, | ||
528 | ioread32(&pcch_hdr->minimum_frequency), | ||
529 | ioread32(&pcch_hdr->nominal)); | ||
530 | kfree(output.pointer); | ||
531 | return ret; | ||
532 | pcch_free: | ||
533 | pcc_clear_mapping(); | ||
534 | out_free: | ||
535 | kfree(output.pointer); | ||
536 | return ret; | ||
537 | } | ||
538 | |||
539 | static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) | ||
540 | { | ||
541 | unsigned int cpu = policy->cpu; | ||
542 | unsigned int result = 0; | ||
543 | |||
544 | if (!pcch_virt_addr) { | ||
545 | result = -1; | ||
546 | goto pcch_null; | ||
547 | } | ||
548 | |||
549 | result = pcc_get_offset(cpu); | ||
550 | if (result) { | ||
551 | dprintk("init: PCCP evaluation failed\n"); | ||
552 | goto free; | ||
553 | } | ||
554 | |||
555 | policy->max = policy->cpuinfo.max_freq = | ||
556 | ioread32(&pcch_hdr->nominal) * 1000; | ||
557 | policy->min = policy->cpuinfo.min_freq = | ||
558 | ioread32(&pcch_hdr->minimum_frequency) * 1000; | ||
559 | policy->cur = pcc_get_freq(cpu); | ||
560 | |||
561 | dprintk("init: policy->max is %d, policy->min is %d\n", | ||
562 | policy->max, policy->min); | ||
563 | |||
564 | return 0; | ||
565 | free: | ||
566 | pcc_clear_mapping(); | ||
567 | free_percpu(pcc_cpu_info); | ||
568 | pcch_null: | ||
569 | return result; | ||
570 | } | ||
571 | |||
572 | static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy) | ||
573 | { | ||
574 | return 0; | ||
575 | } | ||
576 | |||
577 | static struct cpufreq_driver pcc_cpufreq_driver = { | ||
578 | .flags = CPUFREQ_CONST_LOOPS, | ||
579 | .get = pcc_get_freq, | ||
580 | .verify = pcc_cpufreq_verify, | ||
581 | .target = pcc_cpufreq_target, | ||
582 | .init = pcc_cpufreq_cpu_init, | ||
583 | .exit = pcc_cpufreq_cpu_exit, | ||
584 | .name = "pcc-cpufreq", | ||
585 | .owner = THIS_MODULE, | ||
586 | }; | ||
587 | |||
588 | static int __init pcc_cpufreq_init(void) | ||
589 | { | ||
590 | int ret; | ||
591 | |||
592 | if (acpi_disabled) | ||
593 | return 0; | ||
594 | |||
595 | ret = pcc_cpufreq_probe(); | ||
596 | if (ret) { | ||
597 | dprintk("pcc_cpufreq_init: PCCH evaluation failed\n"); | ||
598 | return ret; | ||
599 | } | ||
600 | |||
601 | ret = cpufreq_register_driver(&pcc_cpufreq_driver); | ||
602 | |||
603 | return ret; | ||
604 | } | ||
605 | |||
606 | static void __exit pcc_cpufreq_exit(void) | ||
607 | { | ||
608 | cpufreq_unregister_driver(&pcc_cpufreq_driver); | ||
609 | |||
610 | pcc_clear_mapping(); | ||
611 | |||
612 | free_percpu(pcc_cpu_info); | ||
613 | } | ||
614 | |||
615 | MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar"); | ||
616 | MODULE_VERSION(PCC_VERSION); | ||
617 | MODULE_DESCRIPTION("Processor Clocking Control interface driver"); | ||
618 | MODULE_LICENSE("GPL"); | ||
619 | |||
620 | late_initcall(pcc_cpufreq_init); | ||
621 | module_exit(pcc_cpufreq_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index cb01dac267d3..b3379d6a5c57 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/cpufreq.h> | 14 | #include <linux/cpufreq.h> |
15 | #include <linux/ioport.h> | 15 | #include <linux/ioport.h> |
16 | #include <linux/slab.h> | ||
17 | #include <linux/timex.h> | 16 | #include <linux/timex.h> |
18 | #include <linux/io.h> | 17 | #include <linux/io.h> |
19 | 18 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index f125e5c551c0..d360b56e9825 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -806,7 +806,7 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
806 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, | 806 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, |
807 | unsigned int index) | 807 | unsigned int index) |
808 | { | 808 | { |
809 | acpi_integer control; | 809 | u64 control; |
810 | 810 | ||
811 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | 811 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) |
812 | return; | 812 | return; |
@@ -824,7 +824,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
824 | { | 824 | { |
825 | struct cpufreq_frequency_table *powernow_table; | 825 | struct cpufreq_frequency_table *powernow_table; |
826 | int ret_val = -ENODEV; | 826 | int ret_val = -ENODEV; |
827 | acpi_integer control, status; | 827 | u64 control, status; |
828 | 828 | ||
829 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | 829 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { |
830 | dprintk("register performance failed: bad ACPI data\n"); | 830 | dprintk("register performance failed: bad ACPI data\n"); |
@@ -948,7 +948,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
948 | u32 fid; | 948 | u32 fid; |
949 | u32 vid; | 949 | u32 vid; |
950 | u32 freq, index; | 950 | u32 freq, index; |
951 | acpi_integer status, control; | 951 | u64 status, control; |
952 | 952 | ||
953 | if (data->exttype) { | 953 | if (data->exttype) { |
954 | status = data->acpi_data.states[i].status; | 954 | status = data->acpi_data.states[i].status; |
@@ -1356,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) | |||
1356 | 1356 | ||
1357 | kfree(data->powernow_table); | 1357 | kfree(data->powernow_table); |
1358 | kfree(data); | 1358 | kfree(data); |
1359 | per_cpu(powernow_data, pol->cpu) = NULL; | ||
1359 | 1360 | ||
1360 | return 0; | 1361 | return 0; |
1361 | } | 1362 | } |
@@ -1375,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu) | |||
1375 | int err; | 1376 | int err; |
1376 | 1377 | ||
1377 | if (!data) | 1378 | if (!data) |
1378 | return -EINVAL; | 1379 | return 0; |
1379 | 1380 | ||
1380 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); | 1381 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); |
1381 | if (err) | 1382 | if (err) |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 8d672ef162ce..9b1ff37de46a 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/sched.h> /* current */ | 20 | #include <linux/sched.h> /* current */ |
21 | #include <linux/delay.h> | 21 | #include <linux/delay.h> |
22 | #include <linux/compiler.h> | 22 | #include <linux/compiler.h> |
23 | #include <linux/gfp.h> | ||
23 | 24 | ||
24 | #include <asm/msr.h> | 25 | #include <asm/msr.h> |
25 | #include <asm/processor.h> | 26 | #include <asm/processor.h> |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 2ce8e0b5cc54..561758e95180 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/cpufreq.h> | 24 | #include <linux/cpufreq.h> |
25 | #include <linux/pci.h> | 25 | #include <linux/pci.h> |
26 | #include <linux/slab.h> | ||
27 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
28 | 27 | ||
29 | #include "speedstep-lib.h" | 28 | #include "speedstep-lib.h" |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index ad0083abfa23..a94ec6be69fa 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/moduleparam.h> | 13 | #include <linux/moduleparam.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/cpufreq.h> | 15 | #include <linux/cpufreq.h> |
16 | #include <linux/slab.h> | ||
17 | 16 | ||
18 | #include <asm/msr.h> | 17 | #include <asm/msr.h> |
19 | #include <asm/tsc.h> | 18 | #include <asm/tsc.h> |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index 04d73c114e49..8abd869baabf 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/moduleparam.h> | 17 | #include <linux/moduleparam.h> |
18 | #include <linux/init.h> | 18 | #include <linux/init.h> |
19 | #include <linux/cpufreq.h> | 19 | #include <linux/cpufreq.h> |
20 | #include <linux/slab.h> | ||
21 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
22 | #include <linux/io.h> | 21 | #include <linux/io.h> |
23 | #include <asm/ist.h> | 22 | #include <asm/ist.h> |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 879666f4d871..7e1cca13af35 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -70,7 +70,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
70 | if (c->x86_power & (1 << 8)) { | 70 | if (c->x86_power & (1 << 8)) { |
71 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 71 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
72 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 72 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
73 | sched_clock_stable = 1; | 73 | if (!check_tsc_unstable()) |
74 | sched_clock_stable = 1; | ||
74 | } | 75 | } |
75 | 76 | ||
76 | /* | 77 | /* |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index c2b722d5a722..b3eeb66c0a51 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <asm/k8.h> | 20 | #include <asm/k8.h> |
21 | #include <asm/smp.h> | ||
21 | 22 | ||
22 | #define LVL_1_INST 1 | 23 | #define LVL_1_INST 1 |
23 | #define LVL_1_DATA 2 | 24 | #define LVL_1_DATA 2 |
@@ -152,7 +153,8 @@ struct _cpuid4_info { | |||
152 | union _cpuid4_leaf_ebx ebx; | 153 | union _cpuid4_leaf_ebx ebx; |
153 | union _cpuid4_leaf_ecx ecx; | 154 | union _cpuid4_leaf_ecx ecx; |
154 | unsigned long size; | 155 | unsigned long size; |
155 | unsigned long can_disable; | 156 | bool can_disable; |
157 | unsigned int l3_indices; | ||
156 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | 158 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
157 | }; | 159 | }; |
158 | 160 | ||
@@ -162,7 +164,8 @@ struct _cpuid4_info_regs { | |||
162 | union _cpuid4_leaf_ebx ebx; | 164 | union _cpuid4_leaf_ebx ebx; |
163 | union _cpuid4_leaf_ecx ecx; | 165 | union _cpuid4_leaf_ecx ecx; |
164 | unsigned long size; | 166 | unsigned long size; |
165 | unsigned long can_disable; | 167 | bool can_disable; |
168 | unsigned int l3_indices; | ||
166 | }; | 169 | }; |
167 | 170 | ||
168 | unsigned short num_cache_leaves; | 171 | unsigned short num_cache_leaves; |
@@ -292,6 +295,36 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
292 | (ebx->split.ways_of_associativity + 1) - 1; | 295 | (ebx->split.ways_of_associativity + 1) - 1; |
293 | } | 296 | } |
294 | 297 | ||
298 | struct _cache_attr { | ||
299 | struct attribute attr; | ||
300 | ssize_t (*show)(struct _cpuid4_info *, char *); | ||
301 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | ||
302 | }; | ||
303 | |||
304 | #ifdef CONFIG_CPU_SUP_AMD | ||
305 | static unsigned int __cpuinit amd_calc_l3_indices(void) | ||
306 | { | ||
307 | /* | ||
308 | * We're called over smp_call_function_single() and therefore | ||
309 | * are on the correct cpu. | ||
310 | */ | ||
311 | int cpu = smp_processor_id(); | ||
312 | int node = cpu_to_node(cpu); | ||
313 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
314 | unsigned int sc0, sc1, sc2, sc3; | ||
315 | u32 val = 0; | ||
316 | |||
317 | pci_read_config_dword(dev, 0x1C4, &val); | ||
318 | |||
319 | /* calculate subcache sizes */ | ||
320 | sc0 = !(val & BIT(0)); | ||
321 | sc1 = !(val & BIT(4)); | ||
322 | sc2 = !(val & BIT(8)) + !(val & BIT(9)); | ||
323 | sc3 = !(val & BIT(12)) + !(val & BIT(13)); | ||
324 | |||
325 | return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | ||
326 | } | ||
327 | |||
295 | static void __cpuinit | 328 | static void __cpuinit |
296 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 329 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) |
297 | { | 330 | { |
@@ -301,12 +334,103 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | |||
301 | if (boot_cpu_data.x86 == 0x11) | 334 | if (boot_cpu_data.x86 == 0x11) |
302 | return; | 335 | return; |
303 | 336 | ||
304 | /* see erratum #382 */ | 337 | /* see errata #382 and #388 */ |
305 | if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) | 338 | if ((boot_cpu_data.x86 == 0x10) && |
339 | ((boot_cpu_data.x86_model < 0x8) || | ||
340 | (boot_cpu_data.x86_mask < 0x1))) | ||
306 | return; | 341 | return; |
307 | 342 | ||
308 | this_leaf->can_disable = 1; | 343 | this_leaf->can_disable = true; |
344 | this_leaf->l3_indices = amd_calc_l3_indices(); | ||
345 | } | ||
346 | |||
347 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | ||
348 | unsigned int index) | ||
349 | { | ||
350 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
351 | int node = amd_get_nb_id(cpu); | ||
352 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
353 | unsigned int reg = 0; | ||
354 | |||
355 | if (!this_leaf->can_disable) | ||
356 | return -EINVAL; | ||
357 | |||
358 | if (!dev) | ||
359 | return -EINVAL; | ||
360 | |||
361 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | ||
362 | return sprintf(buf, "0x%08x\n", reg); | ||
363 | } | ||
364 | |||
365 | #define SHOW_CACHE_DISABLE(index) \ | ||
366 | static ssize_t \ | ||
367 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | ||
368 | { \ | ||
369 | return show_cache_disable(this_leaf, buf, index); \ | ||
370 | } | ||
371 | SHOW_CACHE_DISABLE(0) | ||
372 | SHOW_CACHE_DISABLE(1) | ||
373 | |||
374 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | ||
375 | const char *buf, size_t count, unsigned int index) | ||
376 | { | ||
377 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
378 | int node = amd_get_nb_id(cpu); | ||
379 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
380 | unsigned long val = 0; | ||
381 | |||
382 | #define SUBCACHE_MASK (3UL << 20) | ||
383 | #define SUBCACHE_INDEX 0xfff | ||
384 | |||
385 | if (!this_leaf->can_disable) | ||
386 | return -EINVAL; | ||
387 | |||
388 | if (!capable(CAP_SYS_ADMIN)) | ||
389 | return -EPERM; | ||
390 | |||
391 | if (!dev) | ||
392 | return -EINVAL; | ||
393 | |||
394 | if (strict_strtoul(buf, 10, &val) < 0) | ||
395 | return -EINVAL; | ||
396 | |||
397 | /* do not allow writes outside of allowed bits */ | ||
398 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | ||
399 | ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) | ||
400 | return -EINVAL; | ||
401 | |||
402 | val |= BIT(30); | ||
403 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | ||
404 | /* | ||
405 | * We need to WBINVD on a core on the node containing the L3 cache which | ||
406 | * indices we disable therefore a simple wbinvd() is not sufficient. | ||
407 | */ | ||
408 | wbinvd_on_cpu(cpu); | ||
409 | pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); | ||
410 | return count; | ||
411 | } | ||
412 | |||
413 | #define STORE_CACHE_DISABLE(index) \ | ||
414 | static ssize_t \ | ||
415 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | ||
416 | const char *buf, size_t count) \ | ||
417 | { \ | ||
418 | return store_cache_disable(this_leaf, buf, count, index); \ | ||
309 | } | 419 | } |
420 | STORE_CACHE_DISABLE(0) | ||
421 | STORE_CACHE_DISABLE(1) | ||
422 | |||
423 | static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | ||
424 | show_cache_disable_0, store_cache_disable_0); | ||
425 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | ||
426 | show_cache_disable_1, store_cache_disable_1); | ||
427 | |||
428 | #else /* CONFIG_CPU_SUP_AMD */ | ||
429 | static void __cpuinit | ||
430 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | ||
431 | { | ||
432 | }; | ||
433 | #endif /* CONFIG_CPU_SUP_AMD */ | ||
310 | 434 | ||
311 | static int | 435 | static int |
312 | __cpuinit cpuid4_cache_lookup_regs(int index, | 436 | __cpuinit cpuid4_cache_lookup_regs(int index, |
@@ -713,82 +837,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) | |||
713 | #define to_object(k) container_of(k, struct _index_kobject, kobj) | 837 | #define to_object(k) container_of(k, struct _index_kobject, kobj) |
714 | #define to_attr(a) container_of(a, struct _cache_attr, attr) | 838 | #define to_attr(a) container_of(a, struct _cache_attr, attr) |
715 | 839 | ||
716 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | ||
717 | unsigned int index) | ||
718 | { | ||
719 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
720 | int node = cpu_to_node(cpu); | ||
721 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
722 | unsigned int reg = 0; | ||
723 | |||
724 | if (!this_leaf->can_disable) | ||
725 | return -EINVAL; | ||
726 | |||
727 | if (!dev) | ||
728 | return -EINVAL; | ||
729 | |||
730 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | ||
731 | return sprintf(buf, "%x\n", reg); | ||
732 | } | ||
733 | |||
734 | #define SHOW_CACHE_DISABLE(index) \ | ||
735 | static ssize_t \ | ||
736 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | ||
737 | { \ | ||
738 | return show_cache_disable(this_leaf, buf, index); \ | ||
739 | } | ||
740 | SHOW_CACHE_DISABLE(0) | ||
741 | SHOW_CACHE_DISABLE(1) | ||
742 | |||
743 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | ||
744 | const char *buf, size_t count, unsigned int index) | ||
745 | { | ||
746 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
747 | int node = cpu_to_node(cpu); | ||
748 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
749 | unsigned long val = 0; | ||
750 | unsigned int scrubber = 0; | ||
751 | |||
752 | if (!this_leaf->can_disable) | ||
753 | return -EINVAL; | ||
754 | |||
755 | if (!capable(CAP_SYS_ADMIN)) | ||
756 | return -EPERM; | ||
757 | |||
758 | if (!dev) | ||
759 | return -EINVAL; | ||
760 | |||
761 | if (strict_strtoul(buf, 10, &val) < 0) | ||
762 | return -EINVAL; | ||
763 | |||
764 | val |= 0xc0000000; | ||
765 | |||
766 | pci_read_config_dword(dev, 0x58, &scrubber); | ||
767 | scrubber &= ~0x1f000000; | ||
768 | pci_write_config_dword(dev, 0x58, scrubber); | ||
769 | |||
770 | pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); | ||
771 | wbinvd(); | ||
772 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | ||
773 | return count; | ||
774 | } | ||
775 | |||
776 | #define STORE_CACHE_DISABLE(index) \ | ||
777 | static ssize_t \ | ||
778 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | ||
779 | const char *buf, size_t count) \ | ||
780 | { \ | ||
781 | return store_cache_disable(this_leaf, buf, count, index); \ | ||
782 | } | ||
783 | STORE_CACHE_DISABLE(0) | ||
784 | STORE_CACHE_DISABLE(1) | ||
785 | |||
786 | struct _cache_attr { | ||
787 | struct attribute attr; | ||
788 | ssize_t (*show)(struct _cpuid4_info *, char *); | ||
789 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | ||
790 | }; | ||
791 | |||
792 | #define define_one_ro(_name) \ | 840 | #define define_one_ro(_name) \ |
793 | static struct _cache_attr _name = \ | 841 | static struct _cache_attr _name = \ |
794 | __ATTR(_name, 0444, show_##_name, NULL) | 842 | __ATTR(_name, 0444, show_##_name, NULL) |
@@ -803,23 +851,28 @@ define_one_ro(size); | |||
803 | define_one_ro(shared_cpu_map); | 851 | define_one_ro(shared_cpu_map); |
804 | define_one_ro(shared_cpu_list); | 852 | define_one_ro(shared_cpu_list); |
805 | 853 | ||
806 | static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | 854 | #define DEFAULT_SYSFS_CACHE_ATTRS \ |
807 | show_cache_disable_0, store_cache_disable_0); | 855 | &type.attr, \ |
808 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | 856 | &level.attr, \ |
809 | show_cache_disable_1, store_cache_disable_1); | 857 | &coherency_line_size.attr, \ |
858 | &physical_line_partition.attr, \ | ||
859 | &ways_of_associativity.attr, \ | ||
860 | &number_of_sets.attr, \ | ||
861 | &size.attr, \ | ||
862 | &shared_cpu_map.attr, \ | ||
863 | &shared_cpu_list.attr | ||
810 | 864 | ||
811 | static struct attribute *default_attrs[] = { | 865 | static struct attribute *default_attrs[] = { |
812 | &type.attr, | 866 | DEFAULT_SYSFS_CACHE_ATTRS, |
813 | &level.attr, | 867 | NULL |
814 | &coherency_line_size.attr, | 868 | }; |
815 | &physical_line_partition.attr, | 869 | |
816 | &ways_of_associativity.attr, | 870 | static struct attribute *default_l3_attrs[] = { |
817 | &number_of_sets.attr, | 871 | DEFAULT_SYSFS_CACHE_ATTRS, |
818 | &size.attr, | 872 | #ifdef CONFIG_CPU_SUP_AMD |
819 | &shared_cpu_map.attr, | ||
820 | &shared_cpu_list.attr, | ||
821 | &cache_disable_0.attr, | 873 | &cache_disable_0.attr, |
822 | &cache_disable_1.attr, | 874 | &cache_disable_1.attr, |
875 | #endif | ||
823 | NULL | 876 | NULL |
824 | }; | 877 | }; |
825 | 878 | ||
@@ -850,7 +903,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
850 | return ret; | 903 | return ret; |
851 | } | 904 | } |
852 | 905 | ||
853 | static struct sysfs_ops sysfs_ops = { | 906 | static const struct sysfs_ops sysfs_ops = { |
854 | .show = show, | 907 | .show = show, |
855 | .store = store, | 908 | .store = store, |
856 | }; | 909 | }; |
@@ -910,6 +963,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
910 | unsigned int cpu = sys_dev->id; | 963 | unsigned int cpu = sys_dev->id; |
911 | unsigned long i, j; | 964 | unsigned long i, j; |
912 | struct _index_kobject *this_object; | 965 | struct _index_kobject *this_object; |
966 | struct _cpuid4_info *this_leaf; | ||
913 | int retval; | 967 | int retval; |
914 | 968 | ||
915 | retval = cpuid4_cache_sysfs_init(cpu); | 969 | retval = cpuid4_cache_sysfs_init(cpu); |
@@ -928,6 +982,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
928 | this_object = INDEX_KOBJECT_PTR(cpu, i); | 982 | this_object = INDEX_KOBJECT_PTR(cpu, i); |
929 | this_object->cpu = cpu; | 983 | this_object->cpu = cpu; |
930 | this_object->index = i; | 984 | this_object->index = i; |
985 | |||
986 | this_leaf = CPUID4_INFO_IDX(cpu, i); | ||
987 | |||
988 | if (this_leaf->can_disable) | ||
989 | ktype_cache.default_attrs = default_l3_attrs; | ||
990 | else | ||
991 | ktype_cache.default_attrs = default_attrs; | ||
992 | |||
931 | retval = kobject_init_and_add(&(this_object->kobj), | 993 | retval = kobject_init_and_add(&(this_object->kobj), |
932 | &ktype_cache, | 994 | &ktype_cache, |
933 | per_cpu(ici_cache_kobject, cpu), | 995 | per_cpu(ici_cache_kobject, cpu), |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 73734baa50f2..e7dbde7bfedb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/kdebug.h> | 22 | #include <linux/kdebug.h> |
23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
24 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
25 | #include <linux/gfp.h> | ||
25 | #include <asm/mce.h> | 26 | #include <asm/mce.h> |
26 | #include <asm/apic.h> | 27 | #include <asm/apic.h> |
27 | 28 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a8aacd4b513c..7a355ddcc64b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
27 | #include <linux/sysfs.h> | 27 | #include <linux/sysfs.h> |
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/slab.h> | ||
29 | #include <linux/init.h> | 30 | #include <linux/init.h> |
30 | #include <linux/kmod.h> | 31 | #include <linux/kmod.h> |
31 | #include <linux/poll.h> | 32 | #include <linux/poll.h> |
@@ -46,6 +47,13 @@ | |||
46 | 47 | ||
47 | #include "mce-internal.h" | 48 | #include "mce-internal.h" |
48 | 49 | ||
50 | static DEFINE_MUTEX(mce_read_mutex); | ||
51 | |||
52 | #define rcu_dereference_check_mce(p) \ | ||
53 | rcu_dereference_check((p), \ | ||
54 | rcu_read_lock_sched_held() || \ | ||
55 | lockdep_is_held(&mce_read_mutex)) | ||
56 | |||
49 | #define CREATE_TRACE_POINTS | 57 | #define CREATE_TRACE_POINTS |
50 | #include <trace/events/mce.h> | 58 | #include <trace/events/mce.h> |
51 | 59 | ||
@@ -158,7 +166,7 @@ void mce_log(struct mce *mce) | |||
158 | mce->finished = 0; | 166 | mce->finished = 0; |
159 | wmb(); | 167 | wmb(); |
160 | for (;;) { | 168 | for (;;) { |
161 | entry = rcu_dereference(mcelog.next); | 169 | entry = rcu_dereference_check_mce(mcelog.next); |
162 | for (;;) { | 170 | for (;;) { |
163 | /* | 171 | /* |
164 | * When the buffer fills up discard new entries. | 172 | * When the buffer fills up discard new entries. |
@@ -531,7 +539,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
531 | struct mce m; | 539 | struct mce m; |
532 | int i; | 540 | int i; |
533 | 541 | ||
534 | __get_cpu_var(mce_poll_count)++; | 542 | percpu_inc(mce_poll_count); |
535 | 543 | ||
536 | mce_setup(&m); | 544 | mce_setup(&m); |
537 | 545 | ||
@@ -926,7 +934,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
926 | 934 | ||
927 | atomic_inc(&mce_entry); | 935 | atomic_inc(&mce_entry); |
928 | 936 | ||
929 | __get_cpu_var(mce_exception_count)++; | 937 | percpu_inc(mce_exception_count); |
930 | 938 | ||
931 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | 939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
932 | 18, SIGKILL) == NOTIFY_STOP) | 940 | 18, SIGKILL) == NOTIFY_STOP) |
@@ -1485,8 +1493,6 @@ static void collect_tscs(void *data) | |||
1485 | rdtscll(cpu_tsc[smp_processor_id()]); | 1493 | rdtscll(cpu_tsc[smp_processor_id()]); |
1486 | } | 1494 | } |
1487 | 1495 | ||
1488 | static DEFINE_MUTEX(mce_read_mutex); | ||
1489 | |||
1490 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1496 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
1491 | loff_t *off) | 1497 | loff_t *off) |
1492 | { | 1498 | { |
@@ -1500,7 +1506,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1500 | return -ENOMEM; | 1506 | return -ENOMEM; |
1501 | 1507 | ||
1502 | mutex_lock(&mce_read_mutex); | 1508 | mutex_lock(&mce_read_mutex); |
1503 | next = rcu_dereference(mcelog.next); | 1509 | next = rcu_dereference_check_mce(mcelog.next); |
1504 | 1510 | ||
1505 | /* Only supports full reads right now */ | 1511 | /* Only supports full reads right now */ |
1506 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 1512 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { |
@@ -1565,7 +1571,7 @@ timeout: | |||
1565 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1571 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
1566 | { | 1572 | { |
1567 | poll_wait(file, &mce_wait, wait); | 1573 | poll_wait(file, &mce_wait, wait); |
1568 | if (rcu_dereference(mcelog.next)) | 1574 | if (rcu_dereference_check_mce(mcelog.next)) |
1569 | return POLLIN | POLLRDNORM; | 1575 | return POLLIN | POLLRDNORM; |
1570 | return 0; | 1576 | return 0; |
1571 | } | 1577 | } |
@@ -2044,6 +2050,7 @@ static __init void mce_init_banks(void) | |||
2044 | struct mce_bank *b = &mce_banks[i]; | 2050 | struct mce_bank *b = &mce_banks[i]; |
2045 | struct sysdev_attribute *a = &b->attr; | 2051 | struct sysdev_attribute *a = &b->attr; |
2046 | 2052 | ||
2053 | sysfs_attr_init(&a->attr); | ||
2047 | a->attr.name = b->attrname; | 2054 | a->attr.name = b->attrname; |
2048 | snprintf(b->attrname, ATTR_LEN, "bank%d", i); | 2055 | snprintf(b->attrname, ATTR_LEN, "bank%d", i); |
2049 | 2056 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 83a3d1f4efca..224392d8fe8c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/sysfs.h> | 23 | #include <linux/sysfs.h> |
24 | #include <linux/slab.h> | ||
24 | #include <linux/init.h> | 25 | #include <linux/init.h> |
25 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
26 | #include <linux/smp.h> | 27 | #include <linux/smp.h> |
@@ -388,7 +389,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
388 | return ret; | 389 | return ret; |
389 | } | 390 | } |
390 | 391 | ||
391 | static struct sysfs_ops threshold_ops = { | 392 | static const struct sysfs_ops threshold_ops = { |
392 | .show = show, | 393 | .show = show, |
393 | .store = store, | 394 | .store = store, |
394 | }; | 395 | }; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 7c785634af2b..62b48e40920a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Author: Andi Kleen | 5 | * Author: Andi Kleen |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/gfp.h> | ||
8 | #include <linux/init.h> | 9 | #include <linux/init.h> |
9 | #include <linux/interrupt.h> | 10 | #include <linux/interrupt.h> |
10 | #include <linux/percpu.h> | 11 | #include <linux/percpu.h> |
@@ -95,7 +96,7 @@ static void cmci_discover(int banks, int boot) | |||
95 | 96 | ||
96 | /* Already owned by someone else? */ | 97 | /* Already owned by someone else? */ |
97 | if (val & CMCI_EN) { | 98 | if (val & CMCI_EN) { |
98 | if (test_and_clear_bit(i, owned) || boot) | 99 | if (test_and_clear_bit(i, owned) && !boot) |
99 | print_update("SHD", &hdr, i); | 100 | print_update("SHD", &hdr, i); |
100 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
101 | continue; | 102 | continue; |
@@ -107,7 +108,7 @@ static void cmci_discover(int banks, int boot) | |||
107 | 108 | ||
108 | /* Did the enable bit stick? -- the bank supports CMCI */ | 109 | /* Did the enable bit stick? -- the bank supports CMCI */ |
109 | if (val & CMCI_EN) { | 110 | if (val & CMCI_EN) { |
110 | if (!test_and_set_bit(i, owned) || boot) | 111 | if (!test_and_set_bit(i, owned) && !boot) |
111 | print_update("CMCI", &hdr, i); | 112 | print_update("CMCI", &hdr, i); |
112 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 113 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
113 | } else { | 114 | } else { |
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index f4361b56f8e9..ad9e5ed81181 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile +++ b/arch/x86/kernel/cpu/mtrr/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | obj-y := main.o if.o generic.o state.o cleanup.o | 1 | obj-y := main.o if.o generic.o cleanup.o |
2 | obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o | 2 | obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o |
3 | 3 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index 33af14110dfd..92ba9cd31c9a 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c | |||
@@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | |||
108 | return 0; | 108 | return 0; |
109 | } | 109 | } |
110 | 110 | ||
111 | static struct mtrr_ops amd_mtrr_ops = { | 111 | static const struct mtrr_ops amd_mtrr_ops = { |
112 | .vendor = X86_VENDOR_AMD, | 112 | .vendor = X86_VENDOR_AMD, |
113 | .set = amd_set_mtrr, | 113 | .set = amd_set_mtrr, |
114 | .get = amd_get_mtrr, | 114 | .get = amd_get_mtrr, |
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index de89f14eff3a..316fe3e60a97 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c | |||
@@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t | |||
110 | return 0; | 110 | return 0; |
111 | } | 111 | } |
112 | 112 | ||
113 | static struct mtrr_ops centaur_mtrr_ops = { | 113 | static const struct mtrr_ops centaur_mtrr_ops = { |
114 | .vendor = X86_VENDOR_CENTAUR, | 114 | .vendor = X86_VENDOR_CENTAUR, |
115 | .set = centaur_set_mcr, | 115 | .set = centaur_set_mcr, |
116 | .get = centaur_get_mcr, | 116 | .get = centaur_get_mcr, |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 09b1698e0466..06130b52f012 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -22,10 +22,10 @@ | |||
22 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
25 | #include <linux/sort.h> | ||
26 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
27 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
28 | #include <linux/kvm_para.h> | 27 | #include <linux/kvm_para.h> |
28 | #include <linux/range.h> | ||
29 | 29 | ||
30 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
31 | #include <asm/e820.h> | 31 | #include <asm/e820.h> |
@@ -34,11 +34,6 @@ | |||
34 | 34 | ||
35 | #include "mtrr.h" | 35 | #include "mtrr.h" |
36 | 36 | ||
37 | struct res_range { | ||
38 | unsigned long start; | ||
39 | unsigned long end; | ||
40 | }; | ||
41 | |||
42 | struct var_mtrr_range_state { | 37 | struct var_mtrr_range_state { |
43 | unsigned long base_pfn; | 38 | unsigned long base_pfn; |
44 | unsigned long size_pfn; | 39 | unsigned long size_pfn; |
@@ -56,7 +51,7 @@ struct var_mtrr_state { | |||
56 | /* Should be related to MTRR_VAR_RANGES nums */ | 51 | /* Should be related to MTRR_VAR_RANGES nums */ |
57 | #define RANGE_NUM 256 | 52 | #define RANGE_NUM 256 |
58 | 53 | ||
59 | static struct res_range __initdata range[RANGE_NUM]; | 54 | static struct range __initdata range[RANGE_NUM]; |
60 | static int __initdata nr_range; | 55 | static int __initdata nr_range; |
61 | 56 | ||
62 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | 57 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; |
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | |||
64 | static int __initdata debug_print; | 59 | static int __initdata debug_print; |
65 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) | 60 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) |
66 | 61 | ||
67 | |||
68 | static int __init | ||
69 | add_range(struct res_range *range, int nr_range, | ||
70 | unsigned long start, unsigned long end) | ||
71 | { | ||
72 | /* Out of slots: */ | ||
73 | if (nr_range >= RANGE_NUM) | ||
74 | return nr_range; | ||
75 | |||
76 | range[nr_range].start = start; | ||
77 | range[nr_range].end = end; | ||
78 | |||
79 | nr_range++; | ||
80 | |||
81 | return nr_range; | ||
82 | } | ||
83 | |||
84 | static int __init | ||
85 | add_range_with_merge(struct res_range *range, int nr_range, | ||
86 | unsigned long start, unsigned long end) | ||
87 | { | ||
88 | int i; | ||
89 | |||
90 | /* Try to merge it with old one: */ | ||
91 | for (i = 0; i < nr_range; i++) { | ||
92 | unsigned long final_start, final_end; | ||
93 | unsigned long common_start, common_end; | ||
94 | |||
95 | if (!range[i].end) | ||
96 | continue; | ||
97 | |||
98 | common_start = max(range[i].start, start); | ||
99 | common_end = min(range[i].end, end); | ||
100 | if (common_start > common_end + 1) | ||
101 | continue; | ||
102 | |||
103 | final_start = min(range[i].start, start); | ||
104 | final_end = max(range[i].end, end); | ||
105 | |||
106 | range[i].start = final_start; | ||
107 | range[i].end = final_end; | ||
108 | return nr_range; | ||
109 | } | ||
110 | |||
111 | /* Need to add it: */ | ||
112 | return add_range(range, nr_range, start, end); | ||
113 | } | ||
114 | |||
115 | static void __init | ||
116 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | ||
117 | { | ||
118 | int i, j; | ||
119 | |||
120 | for (j = 0; j < RANGE_NUM; j++) { | ||
121 | if (!range[j].end) | ||
122 | continue; | ||
123 | |||
124 | if (start <= range[j].start && end >= range[j].end) { | ||
125 | range[j].start = 0; | ||
126 | range[j].end = 0; | ||
127 | continue; | ||
128 | } | ||
129 | |||
130 | if (start <= range[j].start && end < range[j].end && | ||
131 | range[j].start < end + 1) { | ||
132 | range[j].start = end + 1; | ||
133 | continue; | ||
134 | } | ||
135 | |||
136 | |||
137 | if (start > range[j].start && end >= range[j].end && | ||
138 | range[j].end > start - 1) { | ||
139 | range[j].end = start - 1; | ||
140 | continue; | ||
141 | } | ||
142 | |||
143 | if (start > range[j].start && end < range[j].end) { | ||
144 | /* Find the new spare: */ | ||
145 | for (i = 0; i < RANGE_NUM; i++) { | ||
146 | if (range[i].end == 0) | ||
147 | break; | ||
148 | } | ||
149 | if (i < RANGE_NUM) { | ||
150 | range[i].end = range[j].end; | ||
151 | range[i].start = end + 1; | ||
152 | } else { | ||
153 | printk(KERN_ERR "run of slot in ranges\n"); | ||
154 | } | ||
155 | range[j].end = start - 1; | ||
156 | continue; | ||
157 | } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static int __init cmp_range(const void *x1, const void *x2) | ||
162 | { | ||
163 | const struct res_range *r1 = x1; | ||
164 | const struct res_range *r2 = x2; | ||
165 | long start1, start2; | ||
166 | |||
167 | start1 = r1->start; | ||
168 | start2 = r2->start; | ||
169 | |||
170 | return start1 - start2; | ||
171 | } | ||
172 | |||
173 | static int __init clean_sort_range(struct res_range *range, int az) | ||
174 | { | ||
175 | int i, j, k = az - 1, nr_range = 0; | ||
176 | |||
177 | for (i = 0; i < k; i++) { | ||
178 | if (range[i].end) | ||
179 | continue; | ||
180 | for (j = k; j > i; j--) { | ||
181 | if (range[j].end) { | ||
182 | k = j; | ||
183 | break; | ||
184 | } | ||
185 | } | ||
186 | if (j == i) | ||
187 | break; | ||
188 | range[i].start = range[k].start; | ||
189 | range[i].end = range[k].end; | ||
190 | range[k].start = 0; | ||
191 | range[k].end = 0; | ||
192 | k--; | ||
193 | } | ||
194 | /* count it */ | ||
195 | for (i = 0; i < az; i++) { | ||
196 | if (!range[i].end) { | ||
197 | nr_range = i; | ||
198 | break; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* sort them */ | ||
203 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
204 | |||
205 | return nr_range; | ||
206 | } | ||
207 | |||
208 | #define BIOS_BUG_MSG KERN_WARNING \ | 62 | #define BIOS_BUG_MSG KERN_WARNING \ |
209 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" | 63 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
210 | 64 | ||
211 | static int __init | 65 | static int __init |
212 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | 66 | x86_get_mtrr_mem_range(struct range *range, int nr_range, |
213 | unsigned long extra_remove_base, | 67 | unsigned long extra_remove_base, |
214 | unsigned long extra_remove_size) | 68 | unsigned long extra_remove_size) |
215 | { | 69 | { |
@@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
223 | continue; | 77 | continue; |
224 | base = range_state[i].base_pfn; | 78 | base = range_state[i].base_pfn; |
225 | size = range_state[i].size_pfn; | 79 | size = range_state[i].size_pfn; |
226 | nr_range = add_range_with_merge(range, nr_range, base, | 80 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, |
227 | base + size - 1); | 81 | base, base + size); |
228 | } | 82 | } |
229 | if (debug_print) { | 83 | if (debug_print) { |
230 | printk(KERN_DEBUG "After WB checking\n"); | 84 | printk(KERN_DEBUG "After WB checking\n"); |
231 | for (i = 0; i < nr_range; i++) | 85 | for (i = 0; i < nr_range; i++) |
232 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 86 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
233 | range[i].start, range[i].end + 1); | 87 | range[i].start, range[i].end); |
234 | } | 88 | } |
235 | 89 | ||
236 | /* Take out UC ranges: */ | 90 | /* Take out UC ranges: */ |
@@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
252 | size -= (1<<(20-PAGE_SHIFT)) - base; | 106 | size -= (1<<(20-PAGE_SHIFT)) - base; |
253 | base = 1<<(20-PAGE_SHIFT); | 107 | base = 1<<(20-PAGE_SHIFT); |
254 | } | 108 | } |
255 | subtract_range(range, base, base + size - 1); | 109 | subtract_range(range, RANGE_NUM, base, base + size); |
256 | } | 110 | } |
257 | if (extra_remove_size) | 111 | if (extra_remove_size) |
258 | subtract_range(range, extra_remove_base, | 112 | subtract_range(range, RANGE_NUM, extra_remove_base, |
259 | extra_remove_base + extra_remove_size - 1); | 113 | extra_remove_base + extra_remove_size); |
260 | 114 | ||
261 | if (debug_print) { | 115 | if (debug_print) { |
262 | printk(KERN_DEBUG "After UC checking\n"); | 116 | printk(KERN_DEBUG "After UC checking\n"); |
263 | for (i = 0; i < RANGE_NUM; i++) { | 117 | for (i = 0; i < RANGE_NUM; i++) { |
264 | if (!range[i].end) | 118 | if (!range[i].end) |
265 | continue; | 119 | continue; |
266 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 120 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
267 | range[i].start, range[i].end + 1); | 121 | range[i].start, range[i].end); |
268 | } | 122 | } |
269 | } | 123 | } |
270 | 124 | ||
@@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
273 | if (debug_print) { | 127 | if (debug_print) { |
274 | printk(KERN_DEBUG "After sorting\n"); | 128 | printk(KERN_DEBUG "After sorting\n"); |
275 | for (i = 0; i < nr_range; i++) | 129 | for (i = 0; i < nr_range; i++) |
276 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 130 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
277 | range[i].start, range[i].end + 1); | 131 | range[i].start, range[i].end); |
278 | } | 132 | } |
279 | 133 | ||
280 | /* clear those is not used */ | ||
281 | for (i = nr_range; i < RANGE_NUM; i++) | ||
282 | memset(&range[i], 0, sizeof(range[i])); | ||
283 | |||
284 | return nr_range; | 134 | return nr_range; |
285 | } | 135 | } |
286 | 136 | ||
287 | #ifdef CONFIG_MTRR_SANITIZER | 137 | #ifdef CONFIG_MTRR_SANITIZER |
288 | 138 | ||
289 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | 139 | static unsigned long __init sum_ranges(struct range *range, int nr_range) |
290 | { | 140 | { |
291 | unsigned long sum = 0; | 141 | unsigned long sum = 0; |
292 | int i; | 142 | int i; |
293 | 143 | ||
294 | for (i = 0; i < nr_range; i++) | 144 | for (i = 0; i < nr_range; i++) |
295 | sum += range[i].end + 1 - range[i].start; | 145 | sum += range[i].end - range[i].start; |
296 | 146 | ||
297 | return sum; | 147 | return sum; |
298 | } | 148 | } |
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg) | |||
621 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | 471 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); |
622 | 472 | ||
623 | static int __init | 473 | static int __init |
624 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | 474 | x86_setup_var_mtrrs(struct range *range, int nr_range, |
625 | u64 chunk_size, u64 gran_size) | 475 | u64 chunk_size, u64 gran_size) |
626 | { | 476 | { |
627 | struct var_mtrr_state var_state; | 477 | struct var_mtrr_state var_state; |
@@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
639 | /* Write the range: */ | 489 | /* Write the range: */ |
640 | for (i = 0; i < nr_range; i++) { | 490 | for (i = 0; i < nr_range; i++) { |
641 | set_var_mtrr_range(&var_state, range[i].start, | 491 | set_var_mtrr_range(&var_state, range[i].start, |
642 | range[i].end - range[i].start + 1); | 492 | range[i].end - range[i].start); |
643 | } | 493 | } |
644 | 494 | ||
645 | /* Write the last range: */ | 495 | /* Write the last range: */ |
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | |||
742 | unsigned long x_remove_base, | 592 | unsigned long x_remove_base, |
743 | unsigned long x_remove_size, int i) | 593 | unsigned long x_remove_size, int i) |
744 | { | 594 | { |
745 | static struct res_range range_new[RANGE_NUM]; | 595 | static struct range range_new[RANGE_NUM]; |
746 | unsigned long range_sums_new; | 596 | unsigned long range_sums_new; |
747 | static int nr_range_new; | 597 | static int nr_range_new; |
748 | int num_reg; | 598 | int num_reg; |
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
869 | * [0, 1M) should always be covered by var mtrr with WB | 719 | * [0, 1M) should always be covered by var mtrr with WB |
870 | * and fixed mtrrs should take effect before var mtrr for it: | 720 | * and fixed mtrrs should take effect before var mtrr for it: |
871 | */ | 721 | */ |
872 | nr_range = add_range_with_merge(range, nr_range, 0, | 722 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0, |
873 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | 723 | 1ULL<<(20 - PAGE_SHIFT)); |
874 | /* Sort the ranges: */ | 724 | /* Sort the ranges: */ |
875 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 725 | sort_range(range, nr_range); |
876 | 726 | ||
877 | range_sums = sum_ranges(range, nr_range); | 727 | range_sums = sum_ranges(range, nr_range); |
878 | printk(KERN_INFO "total RAM covered: %ldM\n", | 728 | printk(KERN_INFO "total RAM covered: %ldM\n", |
@@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1089 | nr_range = 0; | 939 | nr_range = 0; |
1090 | if (mtrr_tom2) { | 940 | if (mtrr_tom2) { |
1091 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | 941 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); |
1092 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | 942 | range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT; |
1093 | if (highest_pfn < range[nr_range].end + 1) | 943 | if (highest_pfn < range[nr_range].end) |
1094 | highest_pfn = range[nr_range].end + 1; | 944 | highest_pfn = range[nr_range].end; |
1095 | nr_range++; | 945 | nr_range++; |
1096 | } | 946 | } |
1097 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | 947 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); |
@@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1103 | 953 | ||
1104 | /* Check the holes: */ | 954 | /* Check the holes: */ |
1105 | for (i = 0; i < nr_range - 1; i++) { | 955 | for (i = 0; i < nr_range - 1; i++) { |
1106 | if (range[i].end + 1 < range[i+1].start) | 956 | if (range[i].end < range[i+1].start) |
1107 | total_trim_size += real_trim_memory(range[i].end + 1, | 957 | total_trim_size += real_trim_memory(range[i].end, |
1108 | range[i+1].start); | 958 | range[i+1].start); |
1109 | } | 959 | } |
1110 | 960 | ||
1111 | /* Check the top: */ | 961 | /* Check the top: */ |
1112 | i = nr_range - 1; | 962 | i = nr_range - 1; |
1113 | if (range[i].end + 1 < end_pfn) | 963 | if (range[i].end < end_pfn) |
1114 | total_trim_size += real_trim_memory(range[i].end + 1, | 964 | total_trim_size += real_trim_memory(range[i].end, |
1115 | end_pfn); | 965 | end_pfn); |
1116 | 966 | ||
1117 | if (total_trim_size) { | 967 | if (total_trim_size) { |
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 228d982ce09c..68a3343e5798 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c | |||
@@ -265,7 +265,7 @@ static void cyrix_set_all(void) | |||
265 | post_set(); | 265 | post_set(); |
266 | } | 266 | } |
267 | 267 | ||
268 | static struct mtrr_ops cyrix_mtrr_ops = { | 268 | static const struct mtrr_ops cyrix_mtrr_ops = { |
269 | .vendor = X86_VENDOR_CYRIX, | 269 | .vendor = X86_VENDOR_CYRIX, |
270 | .set_all = cyrix_set_all, | 270 | .set_all = cyrix_set_all, |
271 | .set = cyrix_set_arr, | 271 | .set = cyrix_set_arr, |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 55da0c5f68dd..fd31a441c61c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -6,7 +6,6 @@ | |||
6 | 6 | ||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/slab.h> | ||
10 | #include <linux/io.h> | 9 | #include <linux/io.h> |
11 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
12 | 11 | ||
@@ -464,7 +463,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
464 | tmp |= ~((1<<(hi - 1)) - 1); | 463 | tmp |= ~((1<<(hi - 1)) - 1); |
465 | 464 | ||
466 | if (tmp != mask_lo) { | 465 | if (tmp != mask_lo) { |
467 | WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); | 466 | printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); |
468 | mask_lo = tmp; | 467 | mask_lo = tmp; |
469 | } | 468 | } |
470 | } | 469 | } |
@@ -570,7 +569,7 @@ static unsigned long set_mtrr_state(void) | |||
570 | 569 | ||
571 | 570 | ||
572 | static unsigned long cr4; | 571 | static unsigned long cr4; |
573 | static DEFINE_SPINLOCK(set_atomicity_lock); | 572 | static DEFINE_RAW_SPINLOCK(set_atomicity_lock); |
574 | 573 | ||
575 | /* | 574 | /* |
576 | * Since we are disabling the cache don't allow any interrupts, | 575 | * Since we are disabling the cache don't allow any interrupts, |
@@ -590,7 +589,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
590 | * changes to the way the kernel boots | 589 | * changes to the way the kernel boots |
591 | */ | 590 | */ |
592 | 591 | ||
593 | spin_lock(&set_atomicity_lock); | 592 | raw_spin_lock(&set_atomicity_lock); |
594 | 593 | ||
595 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ | 594 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ |
596 | cr0 = read_cr0() | X86_CR0_CD; | 595 | cr0 = read_cr0() | X86_CR0_CD; |
@@ -627,7 +626,7 @@ static void post_set(void) __releases(set_atomicity_lock) | |||
627 | /* Restore value of CR4 */ | 626 | /* Restore value of CR4 */ |
628 | if (cpu_has_pge) | 627 | if (cpu_has_pge) |
629 | write_cr4(cr4); | 628 | write_cr4(cr4); |
630 | spin_unlock(&set_atomicity_lock); | 629 | raw_spin_unlock(&set_atomicity_lock); |
631 | } | 630 | } |
632 | 631 | ||
633 | static void generic_set_all(void) | 632 | static void generic_set_all(void) |
@@ -752,7 +751,7 @@ int positive_have_wrcomb(void) | |||
752 | /* | 751 | /* |
753 | * Generic structure... | 752 | * Generic structure... |
754 | */ | 753 | */ |
755 | struct mtrr_ops generic_mtrr_ops = { | 754 | const struct mtrr_ops generic_mtrr_ops = { |
756 | .use_intel_if = 1, | 755 | .use_intel_if = 1, |
757 | .set_all = generic_set_all, | 756 | .set_all = generic_set_all, |
758 | .get = generic_get_mtrr, | 757 | .get = generic_get_mtrr, |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index e006e56f699c..79289632cb27 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/ctype.h> | 6 | #include <linux/ctype.h> |
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/slab.h> | ||
8 | #include <linux/init.h> | 9 | #include <linux/init.h> |
9 | 10 | ||
10 | #define LINE_SIZE 80 | 11 | #define LINE_SIZE 80 |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 84e83de54575..79556bd9b602 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -60,14 +60,14 @@ static DEFINE_MUTEX(mtrr_mutex); | |||
60 | u64 size_or_mask, size_and_mask; | 60 | u64 size_or_mask, size_and_mask; |
61 | static bool mtrr_aps_delayed_init; | 61 | static bool mtrr_aps_delayed_init; |
62 | 62 | ||
63 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; | 63 | static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; |
64 | 64 | ||
65 | struct mtrr_ops *mtrr_if; | 65 | const struct mtrr_ops *mtrr_if; |
66 | 66 | ||
67 | static void set_mtrr(unsigned int reg, unsigned long base, | 67 | static void set_mtrr(unsigned int reg, unsigned long base, |
68 | unsigned long size, mtrr_type type); | 68 | unsigned long size, mtrr_type type); |
69 | 69 | ||
70 | void set_mtrr_ops(struct mtrr_ops *ops) | 70 | void set_mtrr_ops(const struct mtrr_ops *ops) |
71 | { | 71 | { |
72 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) | 72 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) |
73 | mtrr_ops[ops->vendor] = ops; | 73 | mtrr_ops[ops->vendor] = ops; |
@@ -145,6 +145,7 @@ struct set_mtrr_data { | |||
145 | 145 | ||
146 | /** | 146 | /** |
147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. | 147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. |
148 | * @info: pointer to mtrr configuration data | ||
148 | * | 149 | * |
149 | * Returns nothing. | 150 | * Returns nothing. |
150 | */ | 151 | */ |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index a501dee9a87a..df5e41f31a27 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -32,7 +32,7 @@ extern int generic_get_free_region(unsigned long base, unsigned long size, | |||
32 | extern int generic_validate_add_page(unsigned long base, unsigned long size, | 32 | extern int generic_validate_add_page(unsigned long base, unsigned long size, |
33 | unsigned int type); | 33 | unsigned int type); |
34 | 34 | ||
35 | extern struct mtrr_ops generic_mtrr_ops; | 35 | extern const struct mtrr_ops generic_mtrr_ops; |
36 | 36 | ||
37 | extern int positive_have_wrcomb(void); | 37 | extern int positive_have_wrcomb(void); |
38 | 38 | ||
@@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index, | |||
53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); | 53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); |
54 | void get_mtrr_state(void); | 54 | void get_mtrr_state(void); |
55 | 55 | ||
56 | extern void set_mtrr_ops(struct mtrr_ops *ops); | 56 | extern void set_mtrr_ops(const struct mtrr_ops *ops); |
57 | 57 | ||
58 | extern u64 size_or_mask, size_and_mask; | 58 | extern u64 size_or_mask, size_and_mask; |
59 | extern struct mtrr_ops *mtrr_if; | 59 | extern const struct mtrr_ops *mtrr_if; |
60 | 60 | ||
61 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) | 61 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) |
62 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) | 62 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) |
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c deleted file mode 100644 index dfc80b4e6b0d..000000000000 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/io.h> | ||
3 | #include <linux/mm.h> | ||
4 | |||
5 | #include <asm/processor-cyrix.h> | ||
6 | #include <asm/processor-flags.h> | ||
7 | #include <asm/mtrr.h> | ||
8 | #include <asm/msr.h> | ||
9 | |||
10 | #include "mtrr.h" | ||
11 | |||
12 | /* Put the processor into a state where MTRRs can be safely set */ | ||
13 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | ||
14 | { | ||
15 | unsigned int cr0; | ||
16 | |||
17 | /* Disable interrupts locally */ | ||
18 | local_irq_save(ctxt->flags); | ||
19 | |||
20 | if (use_intel() || is_cpu(CYRIX)) { | ||
21 | |||
22 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | ||
23 | if (cpu_has_pge) { | ||
24 | ctxt->cr4val = read_cr4(); | ||
25 | write_cr4(ctxt->cr4val & ~X86_CR4_PGE); | ||
26 | } | ||
27 | |||
28 | /* | ||
29 | * Disable and flush caches. Note that wbinvd flushes the TLBs | ||
30 | * as a side-effect | ||
31 | */ | ||
32 | cr0 = read_cr0() | X86_CR0_CD; | ||
33 | wbinvd(); | ||
34 | write_cr0(cr0); | ||
35 | wbinvd(); | ||
36 | |||
37 | if (use_intel()) { | ||
38 | /* Save MTRR state */ | ||
39 | rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); | ||
40 | } else { | ||
41 | /* | ||
42 | * Cyrix ARRs - | ||
43 | * everything else were excluded at the top | ||
44 | */ | ||
45 | ctxt->ccr3 = getCx86(CX86_CCR3); | ||
46 | } | ||
47 | } | ||
48 | } | ||
49 | |||
50 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) | ||
51 | { | ||
52 | if (use_intel()) { | ||
53 | /* Disable MTRRs, and set the default type to uncached */ | ||
54 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, | ||
55 | ctxt->deftype_hi); | ||
56 | } else { | ||
57 | if (is_cpu(CYRIX)) { | ||
58 | /* Cyrix ARRs - everything else were excluded at the top */ | ||
59 | setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); | ||
60 | } | ||
61 | } | ||
62 | } | ||
63 | |||
64 | /* Restore the processor after a set_mtrr_prepare */ | ||
65 | void set_mtrr_done(struct set_mtrr_context *ctxt) | ||
66 | { | ||
67 | if (use_intel() || is_cpu(CYRIX)) { | ||
68 | |||
69 | /* Flush caches and TLBs */ | ||
70 | wbinvd(); | ||
71 | |||
72 | /* Restore MTRRdefType */ | ||
73 | if (use_intel()) { | ||
74 | /* Intel (P6) standard MTRRs */ | ||
75 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, | ||
76 | ctxt->deftype_hi); | ||
77 | } else { | ||
78 | /* | ||
79 | * Cyrix ARRs - | ||
80 | * everything else was excluded at the top | ||
81 | */ | ||
82 | setCx86(CX86_CCR3, ctxt->ccr3); | ||
83 | } | ||
84 | |||
85 | /* Enable caches */ | ||
86 | write_cr0(read_cr0() & 0xbfffffff); | ||
87 | |||
88 | /* Restore value of CR4 */ | ||
89 | if (cpu_has_pge) | ||
90 | write_cr4(ctxt->cr4val); | ||
91 | } | ||
92 | /* Re-enable interrupts locally (if enabled previously) */ | ||
93 | local_irq_restore(ctxt->flags); | ||
94 | } | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c223b7e895d9..db5bdc8addf8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -7,6 +7,7 @@ | |||
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | 9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> |
10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
10 | * | 11 | * |
11 | * For licencing details see kernel-base/COPYING | 12 | * For licencing details see kernel-base/COPYING |
12 | */ | 13 | */ |
@@ -20,12 +21,15 @@ | |||
20 | #include <linux/kdebug.h> | 21 | #include <linux/kdebug.h> |
21 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
22 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/slab.h> | ||
23 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
24 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
27 | #include <linux/bitops.h> | ||
25 | 28 | ||
26 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
27 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
28 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | ||
29 | 33 | ||
30 | static u64 perf_event_mask __read_mostly; | 34 | static u64 perf_event_mask __read_mostly; |
31 | 35 | ||
@@ -68,26 +72,59 @@ struct debug_store { | |||
68 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | 72 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; |
69 | }; | 73 | }; |
70 | 74 | ||
75 | struct event_constraint { | ||
76 | union { | ||
77 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
78 | u64 idxmsk64; | ||
79 | }; | ||
80 | u64 code; | ||
81 | u64 cmask; | ||
82 | int weight; | ||
83 | }; | ||
84 | |||
85 | struct amd_nb { | ||
86 | int nb_id; /* NorthBridge id */ | ||
87 | int refcnt; /* reference count */ | ||
88 | struct perf_event *owners[X86_PMC_IDX_MAX]; | ||
89 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | ||
90 | }; | ||
91 | |||
71 | struct cpu_hw_events { | 92 | struct cpu_hw_events { |
72 | struct perf_event *events[X86_PMC_IDX_MAX]; | 93 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 94 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
75 | unsigned long interrupts; | 95 | unsigned long interrupts; |
76 | int enabled; | 96 | int enabled; |
77 | struct debug_store *ds; | 97 | struct debug_store *ds; |
78 | }; | ||
79 | 98 | ||
80 | struct event_constraint { | 99 | int n_events; |
81 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 100 | int n_added; |
82 | int code; | 101 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
102 | u64 tags[X86_PMC_IDX_MAX]; | ||
103 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | ||
104 | struct amd_nb *amd_nb; | ||
83 | }; | 105 | }; |
84 | 106 | ||
85 | #define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } | 107 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ |
86 | #define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } | 108 | { .idxmsk64 = (n) }, \ |
109 | .code = (c), \ | ||
110 | .cmask = (m), \ | ||
111 | .weight = (w), \ | ||
112 | } | ||
113 | |||
114 | #define EVENT_CONSTRAINT(c, n, m) \ | ||
115 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | ||
87 | 116 | ||
88 | #define for_each_event_constraint(e, c) \ | 117 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
89 | for ((e) = (c); (e)->idxmsk[0]; (e)++) | 118 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) |
90 | 119 | ||
120 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | ||
121 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | ||
122 | |||
123 | #define EVENT_CONSTRAINT_END \ | ||
124 | EVENT_CONSTRAINT(0, 0, 0) | ||
125 | |||
126 | #define for_each_event_constraint(e, c) \ | ||
127 | for ((e) = (c); (e)->cmask; (e)++) | ||
91 | 128 | ||
92 | /* | 129 | /* |
93 | * struct x86_pmu - generic x86 pmu | 130 | * struct x86_pmu - generic x86 pmu |
@@ -98,8 +135,8 @@ struct x86_pmu { | |||
98 | int (*handle_irq)(struct pt_regs *); | 135 | int (*handle_irq)(struct pt_regs *); |
99 | void (*disable_all)(void); | 136 | void (*disable_all)(void); |
100 | void (*enable_all)(void); | 137 | void (*enable_all)(void); |
101 | void (*enable)(struct hw_perf_event *, int); | 138 | void (*enable)(struct perf_event *); |
102 | void (*disable)(struct hw_perf_event *, int); | 139 | void (*disable)(struct perf_event *); |
103 | unsigned eventsel; | 140 | unsigned eventsel; |
104 | unsigned perfctr; | 141 | unsigned perfctr; |
105 | u64 (*event_map)(int); | 142 | u64 (*event_map)(int); |
@@ -114,121 +151,28 @@ struct x86_pmu { | |||
114 | u64 intel_ctrl; | 151 | u64 intel_ctrl; |
115 | void (*enable_bts)(u64 config); | 152 | void (*enable_bts)(u64 config); |
116 | void (*disable_bts)(void); | 153 | void (*disable_bts)(void); |
117 | int (*get_event_idx)(struct cpu_hw_events *cpuc, | ||
118 | struct hw_perf_event *hwc); | ||
119 | }; | ||
120 | 154 | ||
121 | static struct x86_pmu x86_pmu __read_mostly; | 155 | struct event_constraint * |
156 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | ||
157 | struct perf_event *event); | ||
122 | 158 | ||
123 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 159 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
124 | .enabled = 1, | 160 | struct perf_event *event); |
125 | }; | 161 | struct event_constraint *event_constraints; |
126 | 162 | ||
127 | static const struct event_constraint *event_constraints; | 163 | int (*cpu_prepare)(int cpu); |
128 | 164 | void (*cpu_starting)(int cpu); | |
129 | /* | 165 | void (*cpu_dying)(int cpu); |
130 | * Not sure about some of these | 166 | void (*cpu_dead)(int cpu); |
131 | */ | ||
132 | static const u64 p6_perfmon_event_map[] = | ||
133 | { | ||
134 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
135 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
136 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
137 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
138 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
139 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
140 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
141 | }; | ||
142 | |||
143 | static u64 p6_pmu_event_map(int hw_event) | ||
144 | { | ||
145 | return p6_perfmon_event_map[hw_event]; | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Event setting that is specified not to count anything. | ||
150 | * We use this to effectively disable a counter. | ||
151 | * | ||
152 | * L2_RQSTS with 0 MESI unit mask. | ||
153 | */ | ||
154 | #define P6_NOP_EVENT 0x0000002EULL | ||
155 | |||
156 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
157 | { | ||
158 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
159 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
160 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
161 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
162 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
163 | |||
164 | #define P6_EVNTSEL_MASK \ | ||
165 | (P6_EVNTSEL_EVENT_MASK | \ | ||
166 | P6_EVNTSEL_UNIT_MASK | \ | ||
167 | P6_EVNTSEL_EDGE_MASK | \ | ||
168 | P6_EVNTSEL_INV_MASK | \ | ||
169 | P6_EVNTSEL_REG_MASK) | ||
170 | |||
171 | return hw_event & P6_EVNTSEL_MASK; | ||
172 | } | ||
173 | |||
174 | static const struct event_constraint intel_p6_event_constraints[] = | ||
175 | { | ||
176 | EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
177 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
178 | EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
179 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
180 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
181 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
182 | EVENT_CONSTRAINT_END | ||
183 | }; | ||
184 | |||
185 | /* | ||
186 | * Intel PerfMon v3. Used on Core2 and later. | ||
187 | */ | ||
188 | static const u64 intel_perfmon_event_map[] = | ||
189 | { | ||
190 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
191 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
192 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
193 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
194 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
195 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
196 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
197 | }; | 167 | }; |
198 | 168 | ||
199 | static const struct event_constraint intel_core_event_constraints[] = | 169 | static struct x86_pmu x86_pmu __read_mostly; |
200 | { | ||
201 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
202 | EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
203 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
204 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
205 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
206 | EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
207 | EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
208 | EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
209 | EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
210 | EVENT_CONSTRAINT_END | ||
211 | }; | ||
212 | 170 | ||
213 | static const struct event_constraint intel_nehalem_event_constraints[] = | 171 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
214 | { | 172 | .enabled = 1, |
215 | EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
216 | EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
217 | EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
218 | EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
219 | EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
220 | EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ | ||
221 | EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
222 | EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ | ||
223 | EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ | ||
224 | EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ | ||
225 | EVENT_CONSTRAINT_END | ||
226 | }; | 173 | }; |
227 | 174 | ||
228 | static u64 intel_pmu_event_map(int hw_event) | 175 | static int x86_perf_event_set_period(struct perf_event *event); |
229 | { | ||
230 | return intel_perfmon_event_map[hw_event]; | ||
231 | } | ||
232 | 176 | ||
233 | /* | 177 | /* |
234 | * Generalized hw caching related hw_event table, filled | 178 | * Generalized hw caching related hw_event table, filled |
@@ -245,435 +189,18 @@ static u64 __read_mostly hw_cache_event_ids | |||
245 | [PERF_COUNT_HW_CACHE_OP_MAX] | 189 | [PERF_COUNT_HW_CACHE_OP_MAX] |
246 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 190 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
247 | 191 | ||
248 | static __initconst u64 nehalem_hw_cache_event_ids | ||
249 | [PERF_COUNT_HW_CACHE_MAX] | ||
250 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
251 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
252 | { | ||
253 | [ C(L1D) ] = { | ||
254 | [ C(OP_READ) ] = { | ||
255 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
256 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
257 | }, | ||
258 | [ C(OP_WRITE) ] = { | ||
259 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
260 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
261 | }, | ||
262 | [ C(OP_PREFETCH) ] = { | ||
263 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
264 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
265 | }, | ||
266 | }, | ||
267 | [ C(L1I ) ] = { | ||
268 | [ C(OP_READ) ] = { | ||
269 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
270 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
271 | }, | ||
272 | [ C(OP_WRITE) ] = { | ||
273 | [ C(RESULT_ACCESS) ] = -1, | ||
274 | [ C(RESULT_MISS) ] = -1, | ||
275 | }, | ||
276 | [ C(OP_PREFETCH) ] = { | ||
277 | [ C(RESULT_ACCESS) ] = 0x0, | ||
278 | [ C(RESULT_MISS) ] = 0x0, | ||
279 | }, | ||
280 | }, | ||
281 | [ C(LL ) ] = { | ||
282 | [ C(OP_READ) ] = { | ||
283 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
284 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
285 | }, | ||
286 | [ C(OP_WRITE) ] = { | ||
287 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
288 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
289 | }, | ||
290 | [ C(OP_PREFETCH) ] = { | ||
291 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
292 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
293 | }, | ||
294 | }, | ||
295 | [ C(DTLB) ] = { | ||
296 | [ C(OP_READ) ] = { | ||
297 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
298 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
299 | }, | ||
300 | [ C(OP_WRITE) ] = { | ||
301 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
302 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
303 | }, | ||
304 | [ C(OP_PREFETCH) ] = { | ||
305 | [ C(RESULT_ACCESS) ] = 0x0, | ||
306 | [ C(RESULT_MISS) ] = 0x0, | ||
307 | }, | ||
308 | }, | ||
309 | [ C(ITLB) ] = { | ||
310 | [ C(OP_READ) ] = { | ||
311 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
312 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
313 | }, | ||
314 | [ C(OP_WRITE) ] = { | ||
315 | [ C(RESULT_ACCESS) ] = -1, | ||
316 | [ C(RESULT_MISS) ] = -1, | ||
317 | }, | ||
318 | [ C(OP_PREFETCH) ] = { | ||
319 | [ C(RESULT_ACCESS) ] = -1, | ||
320 | [ C(RESULT_MISS) ] = -1, | ||
321 | }, | ||
322 | }, | ||
323 | [ C(BPU ) ] = { | ||
324 | [ C(OP_READ) ] = { | ||
325 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
326 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
327 | }, | ||
328 | [ C(OP_WRITE) ] = { | ||
329 | [ C(RESULT_ACCESS) ] = -1, | ||
330 | [ C(RESULT_MISS) ] = -1, | ||
331 | }, | ||
332 | [ C(OP_PREFETCH) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = -1, | ||
334 | [ C(RESULT_MISS) ] = -1, | ||
335 | }, | ||
336 | }, | ||
337 | }; | ||
338 | |||
339 | static __initconst u64 core2_hw_cache_event_ids | ||
340 | [PERF_COUNT_HW_CACHE_MAX] | ||
341 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
342 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
343 | { | ||
344 | [ C(L1D) ] = { | ||
345 | [ C(OP_READ) ] = { | ||
346 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
347 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
348 | }, | ||
349 | [ C(OP_WRITE) ] = { | ||
350 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
351 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
352 | }, | ||
353 | [ C(OP_PREFETCH) ] = { | ||
354 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
355 | [ C(RESULT_MISS) ] = 0, | ||
356 | }, | ||
357 | }, | ||
358 | [ C(L1I ) ] = { | ||
359 | [ C(OP_READ) ] = { | ||
360 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
361 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
362 | }, | ||
363 | [ C(OP_WRITE) ] = { | ||
364 | [ C(RESULT_ACCESS) ] = -1, | ||
365 | [ C(RESULT_MISS) ] = -1, | ||
366 | }, | ||
367 | [ C(OP_PREFETCH) ] = { | ||
368 | [ C(RESULT_ACCESS) ] = 0, | ||
369 | [ C(RESULT_MISS) ] = 0, | ||
370 | }, | ||
371 | }, | ||
372 | [ C(LL ) ] = { | ||
373 | [ C(OP_READ) ] = { | ||
374 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
375 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
376 | }, | ||
377 | [ C(OP_WRITE) ] = { | ||
378 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
379 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
380 | }, | ||
381 | [ C(OP_PREFETCH) ] = { | ||
382 | [ C(RESULT_ACCESS) ] = 0, | ||
383 | [ C(RESULT_MISS) ] = 0, | ||
384 | }, | ||
385 | }, | ||
386 | [ C(DTLB) ] = { | ||
387 | [ C(OP_READ) ] = { | ||
388 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
389 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
390 | }, | ||
391 | [ C(OP_WRITE) ] = { | ||
392 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
393 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
394 | }, | ||
395 | [ C(OP_PREFETCH) ] = { | ||
396 | [ C(RESULT_ACCESS) ] = 0, | ||
397 | [ C(RESULT_MISS) ] = 0, | ||
398 | }, | ||
399 | }, | ||
400 | [ C(ITLB) ] = { | ||
401 | [ C(OP_READ) ] = { | ||
402 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
403 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
404 | }, | ||
405 | [ C(OP_WRITE) ] = { | ||
406 | [ C(RESULT_ACCESS) ] = -1, | ||
407 | [ C(RESULT_MISS) ] = -1, | ||
408 | }, | ||
409 | [ C(OP_PREFETCH) ] = { | ||
410 | [ C(RESULT_ACCESS) ] = -1, | ||
411 | [ C(RESULT_MISS) ] = -1, | ||
412 | }, | ||
413 | }, | ||
414 | [ C(BPU ) ] = { | ||
415 | [ C(OP_READ) ] = { | ||
416 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
417 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
418 | }, | ||
419 | [ C(OP_WRITE) ] = { | ||
420 | [ C(RESULT_ACCESS) ] = -1, | ||
421 | [ C(RESULT_MISS) ] = -1, | ||
422 | }, | ||
423 | [ C(OP_PREFETCH) ] = { | ||
424 | [ C(RESULT_ACCESS) ] = -1, | ||
425 | [ C(RESULT_MISS) ] = -1, | ||
426 | }, | ||
427 | }, | ||
428 | }; | ||
429 | |||
430 | static __initconst u64 atom_hw_cache_event_ids | ||
431 | [PERF_COUNT_HW_CACHE_MAX] | ||
432 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
433 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
434 | { | ||
435 | [ C(L1D) ] = { | ||
436 | [ C(OP_READ) ] = { | ||
437 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
438 | [ C(RESULT_MISS) ] = 0, | ||
439 | }, | ||
440 | [ C(OP_WRITE) ] = { | ||
441 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
442 | [ C(RESULT_MISS) ] = 0, | ||
443 | }, | ||
444 | [ C(OP_PREFETCH) ] = { | ||
445 | [ C(RESULT_ACCESS) ] = 0x0, | ||
446 | [ C(RESULT_MISS) ] = 0, | ||
447 | }, | ||
448 | }, | ||
449 | [ C(L1I ) ] = { | ||
450 | [ C(OP_READ) ] = { | ||
451 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
452 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
453 | }, | ||
454 | [ C(OP_WRITE) ] = { | ||
455 | [ C(RESULT_ACCESS) ] = -1, | ||
456 | [ C(RESULT_MISS) ] = -1, | ||
457 | }, | ||
458 | [ C(OP_PREFETCH) ] = { | ||
459 | [ C(RESULT_ACCESS) ] = 0, | ||
460 | [ C(RESULT_MISS) ] = 0, | ||
461 | }, | ||
462 | }, | ||
463 | [ C(LL ) ] = { | ||
464 | [ C(OP_READ) ] = { | ||
465 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
466 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
467 | }, | ||
468 | [ C(OP_WRITE) ] = { | ||
469 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
470 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
471 | }, | ||
472 | [ C(OP_PREFETCH) ] = { | ||
473 | [ C(RESULT_ACCESS) ] = 0, | ||
474 | [ C(RESULT_MISS) ] = 0, | ||
475 | }, | ||
476 | }, | ||
477 | [ C(DTLB) ] = { | ||
478 | [ C(OP_READ) ] = { | ||
479 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
480 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
481 | }, | ||
482 | [ C(OP_WRITE) ] = { | ||
483 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
484 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
485 | }, | ||
486 | [ C(OP_PREFETCH) ] = { | ||
487 | [ C(RESULT_ACCESS) ] = 0, | ||
488 | [ C(RESULT_MISS) ] = 0, | ||
489 | }, | ||
490 | }, | ||
491 | [ C(ITLB) ] = { | ||
492 | [ C(OP_READ) ] = { | ||
493 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
494 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
495 | }, | ||
496 | [ C(OP_WRITE) ] = { | ||
497 | [ C(RESULT_ACCESS) ] = -1, | ||
498 | [ C(RESULT_MISS) ] = -1, | ||
499 | }, | ||
500 | [ C(OP_PREFETCH) ] = { | ||
501 | [ C(RESULT_ACCESS) ] = -1, | ||
502 | [ C(RESULT_MISS) ] = -1, | ||
503 | }, | ||
504 | }, | ||
505 | [ C(BPU ) ] = { | ||
506 | [ C(OP_READ) ] = { | ||
507 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
508 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
509 | }, | ||
510 | [ C(OP_WRITE) ] = { | ||
511 | [ C(RESULT_ACCESS) ] = -1, | ||
512 | [ C(RESULT_MISS) ] = -1, | ||
513 | }, | ||
514 | [ C(OP_PREFETCH) ] = { | ||
515 | [ C(RESULT_ACCESS) ] = -1, | ||
516 | [ C(RESULT_MISS) ] = -1, | ||
517 | }, | ||
518 | }, | ||
519 | }; | ||
520 | |||
521 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
522 | { | ||
523 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
524 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
525 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
526 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
527 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
528 | |||
529 | #define CORE_EVNTSEL_MASK \ | ||
530 | (CORE_EVNTSEL_EVENT_MASK | \ | ||
531 | CORE_EVNTSEL_UNIT_MASK | \ | ||
532 | CORE_EVNTSEL_EDGE_MASK | \ | ||
533 | CORE_EVNTSEL_INV_MASK | \ | ||
534 | CORE_EVNTSEL_REG_MASK) | ||
535 | |||
536 | return hw_event & CORE_EVNTSEL_MASK; | ||
537 | } | ||
538 | |||
539 | static __initconst u64 amd_hw_cache_event_ids | ||
540 | [PERF_COUNT_HW_CACHE_MAX] | ||
541 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
542 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
543 | { | ||
544 | [ C(L1D) ] = { | ||
545 | [ C(OP_READ) ] = { | ||
546 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
547 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
548 | }, | ||
549 | [ C(OP_WRITE) ] = { | ||
550 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
551 | [ C(RESULT_MISS) ] = 0, | ||
552 | }, | ||
553 | [ C(OP_PREFETCH) ] = { | ||
554 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
555 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
556 | }, | ||
557 | }, | ||
558 | [ C(L1I ) ] = { | ||
559 | [ C(OP_READ) ] = { | ||
560 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
561 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
562 | }, | ||
563 | [ C(OP_WRITE) ] = { | ||
564 | [ C(RESULT_ACCESS) ] = -1, | ||
565 | [ C(RESULT_MISS) ] = -1, | ||
566 | }, | ||
567 | [ C(OP_PREFETCH) ] = { | ||
568 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
569 | [ C(RESULT_MISS) ] = 0, | ||
570 | }, | ||
571 | }, | ||
572 | [ C(LL ) ] = { | ||
573 | [ C(OP_READ) ] = { | ||
574 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
575 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
576 | }, | ||
577 | [ C(OP_WRITE) ] = { | ||
578 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
579 | [ C(RESULT_MISS) ] = 0, | ||
580 | }, | ||
581 | [ C(OP_PREFETCH) ] = { | ||
582 | [ C(RESULT_ACCESS) ] = 0, | ||
583 | [ C(RESULT_MISS) ] = 0, | ||
584 | }, | ||
585 | }, | ||
586 | [ C(DTLB) ] = { | ||
587 | [ C(OP_READ) ] = { | ||
588 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
589 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
590 | }, | ||
591 | [ C(OP_WRITE) ] = { | ||
592 | [ C(RESULT_ACCESS) ] = 0, | ||
593 | [ C(RESULT_MISS) ] = 0, | ||
594 | }, | ||
595 | [ C(OP_PREFETCH) ] = { | ||
596 | [ C(RESULT_ACCESS) ] = 0, | ||
597 | [ C(RESULT_MISS) ] = 0, | ||
598 | }, | ||
599 | }, | ||
600 | [ C(ITLB) ] = { | ||
601 | [ C(OP_READ) ] = { | ||
602 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
603 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
604 | }, | ||
605 | [ C(OP_WRITE) ] = { | ||
606 | [ C(RESULT_ACCESS) ] = -1, | ||
607 | [ C(RESULT_MISS) ] = -1, | ||
608 | }, | ||
609 | [ C(OP_PREFETCH) ] = { | ||
610 | [ C(RESULT_ACCESS) ] = -1, | ||
611 | [ C(RESULT_MISS) ] = -1, | ||
612 | }, | ||
613 | }, | ||
614 | [ C(BPU ) ] = { | ||
615 | [ C(OP_READ) ] = { | ||
616 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
617 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
618 | }, | ||
619 | [ C(OP_WRITE) ] = { | ||
620 | [ C(RESULT_ACCESS) ] = -1, | ||
621 | [ C(RESULT_MISS) ] = -1, | ||
622 | }, | ||
623 | [ C(OP_PREFETCH) ] = { | ||
624 | [ C(RESULT_ACCESS) ] = -1, | ||
625 | [ C(RESULT_MISS) ] = -1, | ||
626 | }, | ||
627 | }, | ||
628 | }; | ||
629 | |||
630 | /* | ||
631 | * AMD Performance Monitor K7 and later. | ||
632 | */ | ||
633 | static const u64 amd_perfmon_event_map[] = | ||
634 | { | ||
635 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
636 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
637 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
638 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
639 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
640 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
641 | }; | ||
642 | |||
643 | static u64 amd_pmu_event_map(int hw_event) | ||
644 | { | ||
645 | return amd_perfmon_event_map[hw_event]; | ||
646 | } | ||
647 | |||
648 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
649 | { | ||
650 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | ||
651 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
652 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
653 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
654 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
655 | |||
656 | #define K7_EVNTSEL_MASK \ | ||
657 | (K7_EVNTSEL_EVENT_MASK | \ | ||
658 | K7_EVNTSEL_UNIT_MASK | \ | ||
659 | K7_EVNTSEL_EDGE_MASK | \ | ||
660 | K7_EVNTSEL_INV_MASK | \ | ||
661 | K7_EVNTSEL_REG_MASK) | ||
662 | |||
663 | return hw_event & K7_EVNTSEL_MASK; | ||
664 | } | ||
665 | |||
666 | /* | 192 | /* |
667 | * Propagate event elapsed time into the generic event. | 193 | * Propagate event elapsed time into the generic event. |
668 | * Can only be executed on the CPU where the event is active. | 194 | * Can only be executed on the CPU where the event is active. |
669 | * Returns the delta events processed. | 195 | * Returns the delta events processed. |
670 | */ | 196 | */ |
671 | static u64 | 197 | static u64 |
672 | x86_perf_event_update(struct perf_event *event, | 198 | x86_perf_event_update(struct perf_event *event) |
673 | struct hw_perf_event *hwc, int idx) | ||
674 | { | 199 | { |
200 | struct hw_perf_event *hwc = &event->hw; | ||
675 | int shift = 64 - x86_pmu.event_bits; | 201 | int shift = 64 - x86_pmu.event_bits; |
676 | u64 prev_raw_count, new_raw_count; | 202 | u64 prev_raw_count, new_raw_count; |
203 | int idx = hwc->idx; | ||
677 | s64 delta; | 204 | s64 delta; |
678 | 205 | ||
679 | if (idx == X86_PMC_IDX_FIXED_BTS) | 206 | if (idx == X86_PMC_IDX_FIXED_BTS) |
@@ -773,7 +300,7 @@ static inline bool bts_available(void) | |||
773 | return x86_pmu.enable_bts != NULL; | 300 | return x86_pmu.enable_bts != NULL; |
774 | } | 301 | } |
775 | 302 | ||
776 | static inline void init_debug_store_on_cpu(int cpu) | 303 | static void init_debug_store_on_cpu(int cpu) |
777 | { | 304 | { |
778 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 305 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
779 | 306 | ||
@@ -785,7 +312,7 @@ static inline void init_debug_store_on_cpu(int cpu) | |||
785 | (u32)((u64)(unsigned long)ds >> 32)); | 312 | (u32)((u64)(unsigned long)ds >> 32)); |
786 | } | 313 | } |
787 | 314 | ||
788 | static inline void fini_debug_store_on_cpu(int cpu) | 315 | static void fini_debug_store_on_cpu(int cpu) |
789 | { | 316 | { |
790 | if (!per_cpu(cpu_hw_events, cpu).ds) | 317 | if (!per_cpu(cpu_hw_events, cpu).ds) |
791 | return; | 318 | return; |
@@ -914,42 +441,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
914 | return 0; | 441 | return 0; |
915 | } | 442 | } |
916 | 443 | ||
917 | static void intel_pmu_enable_bts(u64 config) | ||
918 | { | ||
919 | unsigned long debugctlmsr; | ||
920 | |||
921 | debugctlmsr = get_debugctlmsr(); | ||
922 | |||
923 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
924 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
925 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
926 | |||
927 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
928 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
929 | |||
930 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
931 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
932 | |||
933 | update_debugctlmsr(debugctlmsr); | ||
934 | } | ||
935 | |||
936 | static void intel_pmu_disable_bts(void) | ||
937 | { | ||
938 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
939 | unsigned long debugctlmsr; | ||
940 | |||
941 | if (!cpuc->ds) | ||
942 | return; | ||
943 | |||
944 | debugctlmsr = get_debugctlmsr(); | ||
945 | |||
946 | debugctlmsr &= | ||
947 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
948 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
949 | |||
950 | update_debugctlmsr(debugctlmsr); | ||
951 | } | ||
952 | |||
953 | /* | 444 | /* |
954 | * Setup the hardware configuration for a given attr_type | 445 | * Setup the hardware configuration for a given attr_type |
955 | */ | 446 | */ |
@@ -988,6 +479,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
988 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | 479 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; |
989 | 480 | ||
990 | hwc->idx = -1; | 481 | hwc->idx = -1; |
482 | hwc->last_cpu = -1; | ||
483 | hwc->last_tag = ~0ULL; | ||
991 | 484 | ||
992 | /* | 485 | /* |
993 | * Count user and OS events unless requested not to. | 486 | * Count user and OS events unless requested not to. |
@@ -1017,6 +510,9 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1017 | */ | 510 | */ |
1018 | if (attr->type == PERF_TYPE_RAW) { | 511 | if (attr->type == PERF_TYPE_RAW) { |
1019 | hwc->config |= x86_pmu.raw_event(attr->config); | 512 | hwc->config |= x86_pmu.raw_event(attr->config); |
513 | if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && | ||
514 | perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
515 | return -EACCES; | ||
1020 | return 0; | 516 | return 0; |
1021 | } | 517 | } |
1022 | 518 | ||
@@ -1056,216 +552,314 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1056 | return 0; | 552 | return 0; |
1057 | } | 553 | } |
1058 | 554 | ||
1059 | static void p6_pmu_disable_all(void) | 555 | static void x86_pmu_disable_all(void) |
1060 | { | 556 | { |
1061 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 557 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1062 | u64 val; | 558 | int idx; |
1063 | |||
1064 | if (!cpuc->enabled) | ||
1065 | return; | ||
1066 | 559 | ||
1067 | cpuc->enabled = 0; | 560 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1068 | barrier(); | 561 | u64 val; |
1069 | 562 | ||
1070 | /* p6 only has one enable register */ | 563 | if (!test_bit(idx, cpuc->active_mask)) |
1071 | rdmsrl(MSR_P6_EVNTSEL0, val); | 564 | continue; |
1072 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | 565 | rdmsrl(x86_pmu.eventsel + idx, val); |
1073 | wrmsrl(MSR_P6_EVNTSEL0, val); | 566 | if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) |
567 | continue; | ||
568 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
569 | wrmsrl(x86_pmu.eventsel + idx, val); | ||
570 | } | ||
1074 | } | 571 | } |
1075 | 572 | ||
1076 | static void intel_pmu_disable_all(void) | 573 | void hw_perf_disable(void) |
1077 | { | 574 | { |
1078 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 575 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1079 | 576 | ||
577 | if (!x86_pmu_initialized()) | ||
578 | return; | ||
579 | |||
1080 | if (!cpuc->enabled) | 580 | if (!cpuc->enabled) |
1081 | return; | 581 | return; |
1082 | 582 | ||
583 | cpuc->n_added = 0; | ||
1083 | cpuc->enabled = 0; | 584 | cpuc->enabled = 0; |
1084 | barrier(); | 585 | barrier(); |
1085 | 586 | ||
1086 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 587 | x86_pmu.disable_all(); |
1087 | |||
1088 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
1089 | intel_pmu_disable_bts(); | ||
1090 | } | 588 | } |
1091 | 589 | ||
1092 | static void amd_pmu_disable_all(void) | 590 | static void x86_pmu_enable_all(void) |
1093 | { | 591 | { |
1094 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 592 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1095 | int idx; | 593 | int idx; |
1096 | 594 | ||
1097 | if (!cpuc->enabled) | ||
1098 | return; | ||
1099 | |||
1100 | cpuc->enabled = 0; | ||
1101 | /* | ||
1102 | * ensure we write the disable before we start disabling the | ||
1103 | * events proper, so that amd_pmu_enable_event() does the | ||
1104 | * right thing. | ||
1105 | */ | ||
1106 | barrier(); | ||
1107 | |||
1108 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 595 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
596 | struct perf_event *event = cpuc->events[idx]; | ||
1109 | u64 val; | 597 | u64 val; |
1110 | 598 | ||
1111 | if (!test_bit(idx, cpuc->active_mask)) | 599 | if (!test_bit(idx, cpuc->active_mask)) |
1112 | continue; | 600 | continue; |
1113 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | 601 | |
1114 | if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) | 602 | val = event->hw.config; |
1115 | continue; | 603 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
1116 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | 604 | wrmsrl(x86_pmu.eventsel + idx, val); |
1117 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
1118 | } | 605 | } |
1119 | } | 606 | } |
1120 | 607 | ||
1121 | void hw_perf_disable(void) | 608 | static const struct pmu pmu; |
609 | |||
610 | static inline int is_x86_event(struct perf_event *event) | ||
1122 | { | 611 | { |
1123 | if (!x86_pmu_initialized()) | 612 | return event->pmu == &pmu; |
1124 | return; | ||
1125 | return x86_pmu.disable_all(); | ||
1126 | } | 613 | } |
1127 | 614 | ||
1128 | static void p6_pmu_enable_all(void) | 615 | static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
1129 | { | 616 | { |
1130 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 617 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
1131 | unsigned long val; | 618 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
619 | int i, j, w, wmax, num = 0; | ||
620 | struct hw_perf_event *hwc; | ||
1132 | 621 | ||
1133 | if (cpuc->enabled) | 622 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
1134 | return; | ||
1135 | 623 | ||
1136 | cpuc->enabled = 1; | 624 | for (i = 0; i < n; i++) { |
1137 | barrier(); | 625 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
626 | constraints[i] = c; | ||
627 | } | ||
1138 | 628 | ||
1139 | /* p6 only has one enable register */ | 629 | /* |
1140 | rdmsrl(MSR_P6_EVNTSEL0, val); | 630 | * fastpath, try to reuse previous register |
1141 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 631 | */ |
1142 | wrmsrl(MSR_P6_EVNTSEL0, val); | 632 | for (i = 0; i < n; i++) { |
1143 | } | 633 | hwc = &cpuc->event_list[i]->hw; |
634 | c = constraints[i]; | ||
1144 | 635 | ||
1145 | static void intel_pmu_enable_all(void) | 636 | /* never assigned */ |
1146 | { | 637 | if (hwc->idx == -1) |
1147 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 638 | break; |
1148 | 639 | ||
1149 | if (cpuc->enabled) | 640 | /* constraint still honored */ |
1150 | return; | 641 | if (!test_bit(hwc->idx, c->idxmsk)) |
642 | break; | ||
1151 | 643 | ||
1152 | cpuc->enabled = 1; | 644 | /* not already used */ |
1153 | barrier(); | 645 | if (test_bit(hwc->idx, used_mask)) |
646 | break; | ||
1154 | 647 | ||
1155 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 648 | __set_bit(hwc->idx, used_mask); |
649 | if (assign) | ||
650 | assign[i] = hwc->idx; | ||
651 | } | ||
652 | if (i == n) | ||
653 | goto done; | ||
1156 | 654 | ||
1157 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 655 | /* |
1158 | struct perf_event *event = | 656 | * begin slow path |
1159 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | 657 | */ |
1160 | 658 | ||
1161 | if (WARN_ON_ONCE(!event)) | 659 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
1162 | return; | ||
1163 | 660 | ||
1164 | intel_pmu_enable_bts(event->hw.config); | 661 | /* |
1165 | } | 662 | * weight = number of possible counters |
1166 | } | 663 | * |
664 | * 1 = most constrained, only works on one counter | ||
665 | * wmax = least constrained, works on any counter | ||
666 | * | ||
667 | * assign events to counters starting with most | ||
668 | * constrained events. | ||
669 | */ | ||
670 | wmax = x86_pmu.num_events; | ||
1167 | 671 | ||
1168 | static void amd_pmu_enable_all(void) | 672 | /* |
1169 | { | 673 | * when fixed event counters are present, |
1170 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 674 | * wmax is incremented by 1 to account |
1171 | int idx; | 675 | * for one more choice |
676 | */ | ||
677 | if (x86_pmu.num_events_fixed) | ||
678 | wmax++; | ||
1172 | 679 | ||
1173 | if (cpuc->enabled) | 680 | for (w = 1, num = n; num && w <= wmax; w++) { |
1174 | return; | 681 | /* for each event */ |
682 | for (i = 0; num && i < n; i++) { | ||
683 | c = constraints[i]; | ||
684 | hwc = &cpuc->event_list[i]->hw; | ||
1175 | 685 | ||
1176 | cpuc->enabled = 1; | 686 | if (c->weight != w) |
1177 | barrier(); | 687 | continue; |
1178 | 688 | ||
1179 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 689 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { |
1180 | struct perf_event *event = cpuc->events[idx]; | 690 | if (!test_bit(j, used_mask)) |
1181 | u64 val; | 691 | break; |
692 | } | ||
1182 | 693 | ||
1183 | if (!test_bit(idx, cpuc->active_mask)) | 694 | if (j == X86_PMC_IDX_MAX) |
1184 | continue; | 695 | break; |
1185 | 696 | ||
1186 | val = event->hw.config; | 697 | __set_bit(j, used_mask); |
1187 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 698 | |
1188 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | 699 | if (assign) |
700 | assign[i] = j; | ||
701 | num--; | ||
702 | } | ||
1189 | } | 703 | } |
704 | done: | ||
705 | /* | ||
706 | * scheduling failed or is just a simulation, | ||
707 | * free resources if necessary | ||
708 | */ | ||
709 | if (!assign || num) { | ||
710 | for (i = 0; i < n; i++) { | ||
711 | if (x86_pmu.put_event_constraints) | ||
712 | x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); | ||
713 | } | ||
714 | } | ||
715 | return num ? -ENOSPC : 0; | ||
1190 | } | 716 | } |
1191 | 717 | ||
1192 | void hw_perf_enable(void) | 718 | /* |
719 | * dogrp: true if must collect siblings events (group) | ||
720 | * returns total number of events and error code | ||
721 | */ | ||
722 | static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) | ||
1193 | { | 723 | { |
1194 | if (!x86_pmu_initialized()) | 724 | struct perf_event *event; |
1195 | return; | 725 | int n, max_count; |
1196 | x86_pmu.enable_all(); | ||
1197 | } | ||
1198 | 726 | ||
1199 | static inline u64 intel_pmu_get_status(void) | 727 | max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; |
1200 | { | ||
1201 | u64 status; | ||
1202 | 728 | ||
1203 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 729 | /* current number of events already accepted */ |
730 | n = cpuc->n_events; | ||
1204 | 731 | ||
1205 | return status; | 732 | if (is_x86_event(leader)) { |
1206 | } | 733 | if (n >= max_count) |
734 | return -ENOSPC; | ||
735 | cpuc->event_list[n] = leader; | ||
736 | n++; | ||
737 | } | ||
738 | if (!dogrp) | ||
739 | return n; | ||
1207 | 740 | ||
1208 | static inline void intel_pmu_ack_status(u64 ack) | 741 | list_for_each_entry(event, &leader->sibling_list, group_entry) { |
1209 | { | 742 | if (!is_x86_event(event) || |
1210 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 743 | event->state <= PERF_EVENT_STATE_OFF) |
1211 | } | 744 | continue; |
1212 | 745 | ||
1213 | static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 746 | if (n >= max_count) |
1214 | { | 747 | return -ENOSPC; |
1215 | (void)checking_wrmsrl(hwc->config_base + idx, | ||
1216 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | ||
1217 | } | ||
1218 | 748 | ||
1219 | static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 749 | cpuc->event_list[n] = event; |
1220 | { | 750 | n++; |
1221 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); | 751 | } |
752 | return n; | ||
1222 | } | 753 | } |
1223 | 754 | ||
1224 | static inline void | 755 | static inline void x86_assign_hw_event(struct perf_event *event, |
1225 | intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) | 756 | struct cpu_hw_events *cpuc, int i) |
1226 | { | 757 | { |
1227 | int idx = __idx - X86_PMC_IDX_FIXED; | 758 | struct hw_perf_event *hwc = &event->hw; |
1228 | u64 ctrl_val, mask; | ||
1229 | 759 | ||
1230 | mask = 0xfULL << (idx * 4); | 760 | hwc->idx = cpuc->assign[i]; |
761 | hwc->last_cpu = smp_processor_id(); | ||
762 | hwc->last_tag = ++cpuc->tags[i]; | ||
1231 | 763 | ||
1232 | rdmsrl(hwc->config_base, ctrl_val); | 764 | if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { |
1233 | ctrl_val &= ~mask; | 765 | hwc->config_base = 0; |
1234 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 766 | hwc->event_base = 0; |
767 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { | ||
768 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
769 | /* | ||
770 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
771 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
772 | */ | ||
773 | hwc->event_base = | ||
774 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
775 | } else { | ||
776 | hwc->config_base = x86_pmu.eventsel; | ||
777 | hwc->event_base = x86_pmu.perfctr; | ||
778 | } | ||
1235 | } | 779 | } |
1236 | 780 | ||
1237 | static inline void | 781 | static inline int match_prev_assignment(struct hw_perf_event *hwc, |
1238 | p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 782 | struct cpu_hw_events *cpuc, |
783 | int i) | ||
1239 | { | 784 | { |
1240 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 785 | return hwc->idx == cpuc->assign[i] && |
1241 | u64 val = P6_NOP_EVENT; | 786 | hwc->last_cpu == smp_processor_id() && |
1242 | 787 | hwc->last_tag == cpuc->tags[i]; | |
1243 | if (cpuc->enabled) | ||
1244 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1245 | |||
1246 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1247 | } | 788 | } |
1248 | 789 | ||
1249 | static inline void | 790 | static int x86_pmu_start(struct perf_event *event); |
1250 | intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 791 | static void x86_pmu_stop(struct perf_event *event); |
792 | |||
793 | void hw_perf_enable(void) | ||
1251 | { | 794 | { |
1252 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | 795 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1253 | intel_pmu_disable_bts(); | 796 | struct perf_event *event; |
797 | struct hw_perf_event *hwc; | ||
798 | int i; | ||
799 | |||
800 | if (!x86_pmu_initialized()) | ||
1254 | return; | 801 | return; |
1255 | } | ||
1256 | 802 | ||
1257 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 803 | if (cpuc->enabled) |
1258 | intel_pmu_disable_fixed(hwc, idx); | ||
1259 | return; | 804 | return; |
805 | |||
806 | if (cpuc->n_added) { | ||
807 | int n_running = cpuc->n_events - cpuc->n_added; | ||
808 | /* | ||
809 | * apply assignment obtained either from | ||
810 | * hw_perf_group_sched_in() or x86_pmu_enable() | ||
811 | * | ||
812 | * step1: save events moving to new counters | ||
813 | * step2: reprogram moved events into new counters | ||
814 | */ | ||
815 | for (i = 0; i < n_running; i++) { | ||
816 | event = cpuc->event_list[i]; | ||
817 | hwc = &event->hw; | ||
818 | |||
819 | /* | ||
820 | * we can avoid reprogramming counter if: | ||
821 | * - assigned same counter as last time | ||
822 | * - running on same CPU as last time | ||
823 | * - no other event has used the counter since | ||
824 | */ | ||
825 | if (hwc->idx == -1 || | ||
826 | match_prev_assignment(hwc, cpuc, i)) | ||
827 | continue; | ||
828 | |||
829 | x86_pmu_stop(event); | ||
830 | } | ||
831 | |||
832 | for (i = 0; i < cpuc->n_events; i++) { | ||
833 | event = cpuc->event_list[i]; | ||
834 | hwc = &event->hw; | ||
835 | |||
836 | if (!match_prev_assignment(hwc, cpuc, i)) | ||
837 | x86_assign_hw_event(event, cpuc, i); | ||
838 | else if (i < n_running) | ||
839 | continue; | ||
840 | |||
841 | x86_pmu_start(event); | ||
842 | } | ||
843 | cpuc->n_added = 0; | ||
844 | perf_events_lapic_init(); | ||
1260 | } | 845 | } |
1261 | 846 | ||
1262 | x86_pmu_disable_event(hwc, idx); | 847 | cpuc->enabled = 1; |
848 | barrier(); | ||
849 | |||
850 | x86_pmu.enable_all(); | ||
1263 | } | 851 | } |
1264 | 852 | ||
1265 | static inline void | 853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) |
1266 | amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1267 | { | 854 | { |
1268 | x86_pmu_disable_event(hwc, idx); | 855 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
856 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
857 | } | ||
858 | |||
859 | static inline void x86_pmu_disable_event(struct perf_event *event) | ||
860 | { | ||
861 | struct hw_perf_event *hwc = &event->hw; | ||
862 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
1269 | } | 863 | } |
1270 | 864 | ||
1271 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 865 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -1275,12 +869,12 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | |||
1275 | * To be called with the event disabled in hw: | 869 | * To be called with the event disabled in hw: |
1276 | */ | 870 | */ |
1277 | static int | 871 | static int |
1278 | x86_perf_event_set_period(struct perf_event *event, | 872 | x86_perf_event_set_period(struct perf_event *event) |
1279 | struct hw_perf_event *hwc, int idx) | ||
1280 | { | 873 | { |
874 | struct hw_perf_event *hwc = &event->hw; | ||
1281 | s64 left = atomic64_read(&hwc->period_left); | 875 | s64 left = atomic64_read(&hwc->period_left); |
1282 | s64 period = hwc->sample_period; | 876 | s64 period = hwc->sample_period; |
1283 | int err, ret = 0; | 877 | int err, ret = 0, idx = hwc->idx; |
1284 | 878 | ||
1285 | if (idx == X86_PMC_IDX_FIXED_BTS) | 879 | if (idx == X86_PMC_IDX_FIXED_BTS) |
1286 | return 0; | 880 | return 0; |
@@ -1326,212 +920,63 @@ x86_perf_event_set_period(struct perf_event *event, | |||
1326 | return ret; | 920 | return ret; |
1327 | } | 921 | } |
1328 | 922 | ||
1329 | static inline void | 923 | static void x86_pmu_enable_event(struct perf_event *event) |
1330 | intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1331 | { | ||
1332 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1333 | u64 ctrl_val, bits, mask; | ||
1334 | int err; | ||
1335 | |||
1336 | /* | ||
1337 | * Enable IRQ generation (0x8), | ||
1338 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
1339 | * if requested: | ||
1340 | */ | ||
1341 | bits = 0x8ULL; | ||
1342 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
1343 | bits |= 0x2; | ||
1344 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
1345 | bits |= 0x1; | ||
1346 | bits <<= (idx * 4); | ||
1347 | mask = 0xfULL << (idx * 4); | ||
1348 | |||
1349 | rdmsrl(hwc->config_base, ctrl_val); | ||
1350 | ctrl_val &= ~mask; | ||
1351 | ctrl_val |= bits; | ||
1352 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1353 | } | ||
1354 | |||
1355 | static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1356 | { | 924 | { |
1357 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 925 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1358 | u64 val; | ||
1359 | |||
1360 | val = hwc->config; | ||
1361 | if (cpuc->enabled) | 926 | if (cpuc->enabled) |
1362 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 927 | __x86_pmu_enable_event(&event->hw); |
1363 | |||
1364 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1365 | } | 928 | } |
1366 | 929 | ||
1367 | 930 | /* | |
1368 | static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 931 | * activate a single event |
1369 | { | 932 | * |
1370 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | 933 | * The event is added to the group of enabled events |
1371 | if (!__get_cpu_var(cpu_hw_events).enabled) | 934 | * but only if it can be scehduled with existing events. |
1372 | return; | 935 | * |
1373 | 936 | * Called with PMU disabled. If successful and return value 1, | |
1374 | intel_pmu_enable_bts(hwc->config); | 937 | * then guaranteed to call perf_enable() and hw_perf_enable() |
1375 | return; | 938 | */ |
1376 | } | 939 | static int x86_pmu_enable(struct perf_event *event) |
1377 | |||
1378 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1379 | intel_pmu_enable_fixed(hwc, idx); | ||
1380 | return; | ||
1381 | } | ||
1382 | |||
1383 | x86_pmu_enable_event(hwc, idx); | ||
1384 | } | ||
1385 | |||
1386 | static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1387 | { | 940 | { |
1388 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 941 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
942 | struct hw_perf_event *hwc; | ||
943 | int assign[X86_PMC_IDX_MAX]; | ||
944 | int n, n0, ret; | ||
1389 | 945 | ||
1390 | if (cpuc->enabled) | 946 | hwc = &event->hw; |
1391 | x86_pmu_enable_event(hwc, idx); | ||
1392 | } | ||
1393 | |||
1394 | static int fixed_mode_idx(struct hw_perf_event *hwc) | ||
1395 | { | ||
1396 | unsigned int hw_event; | ||
1397 | |||
1398 | hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
1399 | |||
1400 | if (unlikely((hw_event == | ||
1401 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
1402 | (hwc->sample_period == 1))) | ||
1403 | return X86_PMC_IDX_FIXED_BTS; | ||
1404 | 947 | ||
1405 | if (!x86_pmu.num_events_fixed) | 948 | n0 = cpuc->n_events; |
1406 | return -1; | 949 | n = collect_events(cpuc, event, false); |
950 | if (n < 0) | ||
951 | return n; | ||
1407 | 952 | ||
953 | ret = x86_schedule_events(cpuc, n, assign); | ||
954 | if (ret) | ||
955 | return ret; | ||
1408 | /* | 956 | /* |
1409 | * fixed counters do not take all possible filters | 957 | * copy new assignment, now we know it is possible |
958 | * will be used by hw_perf_enable() | ||
1410 | */ | 959 | */ |
1411 | if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) | 960 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
1412 | return -1; | ||
1413 | |||
1414 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | ||
1415 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | ||
1416 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | ||
1417 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | ||
1418 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) | ||
1419 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | ||
1420 | |||
1421 | return -1; | ||
1422 | } | ||
1423 | |||
1424 | /* | ||
1425 | * generic counter allocator: get next free counter | ||
1426 | */ | ||
1427 | static int | ||
1428 | gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1429 | { | ||
1430 | int idx; | ||
1431 | |||
1432 | idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); | ||
1433 | return idx == x86_pmu.num_events ? -1 : idx; | ||
1434 | } | ||
1435 | 961 | ||
1436 | /* | 962 | cpuc->n_events = n; |
1437 | * intel-specific counter allocator: check event constraints | 963 | cpuc->n_added += n - n0; |
1438 | */ | ||
1439 | static int | ||
1440 | intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1441 | { | ||
1442 | const struct event_constraint *event_constraint; | ||
1443 | int i, code; | ||
1444 | 964 | ||
1445 | if (!event_constraints) | 965 | return 0; |
1446 | goto skip; | ||
1447 | |||
1448 | code = hwc->config & CORE_EVNTSEL_EVENT_MASK; | ||
1449 | |||
1450 | for_each_event_constraint(event_constraint, event_constraints) { | ||
1451 | if (code == event_constraint->code) { | ||
1452 | for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { | ||
1453 | if (!test_and_set_bit(i, cpuc->used_mask)) | ||
1454 | return i; | ||
1455 | } | ||
1456 | return -1; | ||
1457 | } | ||
1458 | } | ||
1459 | skip: | ||
1460 | return gen_get_event_idx(cpuc, hwc); | ||
1461 | } | ||
1462 | |||
1463 | static int | ||
1464 | x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1465 | { | ||
1466 | int idx; | ||
1467 | |||
1468 | idx = fixed_mode_idx(hwc); | ||
1469 | if (idx == X86_PMC_IDX_FIXED_BTS) { | ||
1470 | /* BTS is already occupied. */ | ||
1471 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1472 | return -EAGAIN; | ||
1473 | |||
1474 | hwc->config_base = 0; | ||
1475 | hwc->event_base = 0; | ||
1476 | hwc->idx = idx; | ||
1477 | } else if (idx >= 0) { | ||
1478 | /* | ||
1479 | * Try to get the fixed event, if that is already taken | ||
1480 | * then try to get a generic event: | ||
1481 | */ | ||
1482 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1483 | goto try_generic; | ||
1484 | |||
1485 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
1486 | /* | ||
1487 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
1488 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
1489 | */ | ||
1490 | hwc->event_base = | ||
1491 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
1492 | hwc->idx = idx; | ||
1493 | } else { | ||
1494 | idx = hwc->idx; | ||
1495 | /* Try to get the previous generic event again */ | ||
1496 | if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { | ||
1497 | try_generic: | ||
1498 | idx = x86_pmu.get_event_idx(cpuc, hwc); | ||
1499 | if (idx == -1) | ||
1500 | return -EAGAIN; | ||
1501 | |||
1502 | set_bit(idx, cpuc->used_mask); | ||
1503 | hwc->idx = idx; | ||
1504 | } | ||
1505 | hwc->config_base = x86_pmu.eventsel; | ||
1506 | hwc->event_base = x86_pmu.perfctr; | ||
1507 | } | ||
1508 | |||
1509 | return idx; | ||
1510 | } | 966 | } |
1511 | 967 | ||
1512 | /* | 968 | static int x86_pmu_start(struct perf_event *event) |
1513 | * Find a PMC slot for the freshly enabled / scheduled in event: | ||
1514 | */ | ||
1515 | static int x86_pmu_enable(struct perf_event *event) | ||
1516 | { | 969 | { |
1517 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 970 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1518 | struct hw_perf_event *hwc = &event->hw; | 971 | int idx = event->hw.idx; |
1519 | int idx; | ||
1520 | |||
1521 | idx = x86_schedule_event(cpuc, hwc); | ||
1522 | if (idx < 0) | ||
1523 | return idx; | ||
1524 | |||
1525 | perf_events_lapic_init(); | ||
1526 | 972 | ||
1527 | x86_pmu.disable(hwc, idx); | 973 | if (idx == -1) |
974 | return -EAGAIN; | ||
1528 | 975 | ||
976 | x86_perf_event_set_period(event); | ||
1529 | cpuc->events[idx] = event; | 977 | cpuc->events[idx] = event; |
1530 | set_bit(idx, cpuc->active_mask); | 978 | __set_bit(idx, cpuc->active_mask); |
1531 | 979 | x86_pmu.enable(event); | |
1532 | x86_perf_event_set_period(event, hwc, idx); | ||
1533 | x86_pmu.enable(hwc, idx); | ||
1534 | |||
1535 | perf_event_update_userpage(event); | 980 | perf_event_update_userpage(event); |
1536 | 981 | ||
1537 | return 0; | 982 | return 0; |
@@ -1539,14 +984,8 @@ static int x86_pmu_enable(struct perf_event *event) | |||
1539 | 984 | ||
1540 | static void x86_pmu_unthrottle(struct perf_event *event) | 985 | static void x86_pmu_unthrottle(struct perf_event *event) |
1541 | { | 986 | { |
1542 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 987 | int ret = x86_pmu_start(event); |
1543 | struct hw_perf_event *hwc = &event->hw; | 988 | WARN_ON_ONCE(ret); |
1544 | |||
1545 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || | ||
1546 | cpuc->events[hwc->idx] != event)) | ||
1547 | return; | ||
1548 | |||
1549 | x86_pmu.enable(hwc, hwc->idx); | ||
1550 | } | 989 | } |
1551 | 990 | ||
1552 | void perf_event_print_debug(void) | 991 | void perf_event_print_debug(void) |
@@ -1576,7 +1015,7 @@ void perf_event_print_debug(void) | |||
1576 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1015 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1577 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1016 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
1578 | } | 1017 | } |
1579 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); | 1018 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1580 | 1019 | ||
1581 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1020 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1582 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1021 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
@@ -1600,257 +1039,50 @@ void perf_event_print_debug(void) | |||
1600 | local_irq_restore(flags); | 1039 | local_irq_restore(flags); |
1601 | } | 1040 | } |
1602 | 1041 | ||
1603 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) | 1042 | static void x86_pmu_stop(struct perf_event *event) |
1604 | { | ||
1605 | struct debug_store *ds = cpuc->ds; | ||
1606 | struct bts_record { | ||
1607 | u64 from; | ||
1608 | u64 to; | ||
1609 | u64 flags; | ||
1610 | }; | ||
1611 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1612 | struct bts_record *at, *top; | ||
1613 | struct perf_output_handle handle; | ||
1614 | struct perf_event_header header; | ||
1615 | struct perf_sample_data data; | ||
1616 | struct pt_regs regs; | ||
1617 | |||
1618 | if (!event) | ||
1619 | return; | ||
1620 | |||
1621 | if (!ds) | ||
1622 | return; | ||
1623 | |||
1624 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1625 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1626 | |||
1627 | if (top <= at) | ||
1628 | return; | ||
1629 | |||
1630 | ds->bts_index = ds->bts_buffer_base; | ||
1631 | |||
1632 | |||
1633 | data.period = event->hw.last_period; | ||
1634 | data.addr = 0; | ||
1635 | data.raw = NULL; | ||
1636 | regs.ip = 0; | ||
1637 | |||
1638 | /* | ||
1639 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
1640 | * We will overwrite the from and to address before we output | ||
1641 | * the sample. | ||
1642 | */ | ||
1643 | perf_prepare_sample(&header, &data, event, ®s); | ||
1644 | |||
1645 | if (perf_output_begin(&handle, event, | ||
1646 | header.size * (top - at), 1, 1)) | ||
1647 | return; | ||
1648 | |||
1649 | for (; at < top; at++) { | ||
1650 | data.ip = at->from; | ||
1651 | data.addr = at->to; | ||
1652 | |||
1653 | perf_output_sample(&handle, &header, &data, event); | ||
1654 | } | ||
1655 | |||
1656 | perf_output_end(&handle); | ||
1657 | |||
1658 | /* There's new data available. */ | ||
1659 | event->hw.interrupts++; | ||
1660 | event->pending_kill = POLL_IN; | ||
1661 | } | ||
1662 | |||
1663 | static void x86_pmu_disable(struct perf_event *event) | ||
1664 | { | 1043 | { |
1665 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1044 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1666 | struct hw_perf_event *hwc = &event->hw; | 1045 | struct hw_perf_event *hwc = &event->hw; |
1667 | int idx = hwc->idx; | 1046 | int idx = hwc->idx; |
1668 | 1047 | ||
1669 | /* | 1048 | if (!__test_and_clear_bit(idx, cpuc->active_mask)) |
1670 | * Must be done before we disable, otherwise the nmi handler | 1049 | return; |
1671 | * could reenable again: | ||
1672 | */ | ||
1673 | clear_bit(idx, cpuc->active_mask); | ||
1674 | x86_pmu.disable(hwc, idx); | ||
1675 | 1050 | ||
1676 | /* | 1051 | x86_pmu.disable(event); |
1677 | * Make sure the cleared pointer becomes visible before we | ||
1678 | * (potentially) free the event: | ||
1679 | */ | ||
1680 | barrier(); | ||
1681 | 1052 | ||
1682 | /* | 1053 | /* |
1683 | * Drain the remaining delta count out of a event | 1054 | * Drain the remaining delta count out of a event |
1684 | * that we are disabling: | 1055 | * that we are disabling: |
1685 | */ | 1056 | */ |
1686 | x86_perf_event_update(event, hwc, idx); | 1057 | x86_perf_event_update(event); |
1687 | |||
1688 | /* Drain the remaining BTS records. */ | ||
1689 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) | ||
1690 | intel_pmu_drain_bts_buffer(cpuc); | ||
1691 | 1058 | ||
1692 | cpuc->events[idx] = NULL; | 1059 | cpuc->events[idx] = NULL; |
1693 | clear_bit(idx, cpuc->used_mask); | ||
1694 | |||
1695 | perf_event_update_userpage(event); | ||
1696 | } | ||
1697 | |||
1698 | /* | ||
1699 | * Save and restart an expired event. Called by NMI contexts, | ||
1700 | * so it has to be careful about preempting normal event ops: | ||
1701 | */ | ||
1702 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
1703 | { | ||
1704 | struct hw_perf_event *hwc = &event->hw; | ||
1705 | int idx = hwc->idx; | ||
1706 | int ret; | ||
1707 | |||
1708 | x86_perf_event_update(event, hwc, idx); | ||
1709 | ret = x86_perf_event_set_period(event, hwc, idx); | ||
1710 | |||
1711 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
1712 | intel_pmu_enable_event(hwc, idx); | ||
1713 | |||
1714 | return ret; | ||
1715 | } | ||
1716 | |||
1717 | static void intel_pmu_reset(void) | ||
1718 | { | ||
1719 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
1720 | unsigned long flags; | ||
1721 | int idx; | ||
1722 | |||
1723 | if (!x86_pmu.num_events) | ||
1724 | return; | ||
1725 | |||
1726 | local_irq_save(flags); | ||
1727 | |||
1728 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
1729 | |||
1730 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1731 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
1732 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
1733 | } | ||
1734 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
1735 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
1736 | } | ||
1737 | if (ds) | ||
1738 | ds->bts_index = ds->bts_buffer_base; | ||
1739 | |||
1740 | local_irq_restore(flags); | ||
1741 | } | ||
1742 | |||
1743 | static int p6_pmu_handle_irq(struct pt_regs *regs) | ||
1744 | { | ||
1745 | struct perf_sample_data data; | ||
1746 | struct cpu_hw_events *cpuc; | ||
1747 | struct perf_event *event; | ||
1748 | struct hw_perf_event *hwc; | ||
1749 | int idx, handled = 0; | ||
1750 | u64 val; | ||
1751 | |||
1752 | data.addr = 0; | ||
1753 | data.raw = NULL; | ||
1754 | |||
1755 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1756 | |||
1757 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1758 | if (!test_bit(idx, cpuc->active_mask)) | ||
1759 | continue; | ||
1760 | |||
1761 | event = cpuc->events[idx]; | ||
1762 | hwc = &event->hw; | ||
1763 | |||
1764 | val = x86_perf_event_update(event, hwc, idx); | ||
1765 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | ||
1766 | continue; | ||
1767 | |||
1768 | /* | ||
1769 | * event overflow | ||
1770 | */ | ||
1771 | handled = 1; | ||
1772 | data.period = event->hw.last_period; | ||
1773 | |||
1774 | if (!x86_perf_event_set_period(event, hwc, idx)) | ||
1775 | continue; | ||
1776 | |||
1777 | if (perf_event_overflow(event, 1, &data, regs)) | ||
1778 | p6_pmu_disable_event(hwc, idx); | ||
1779 | } | ||
1780 | |||
1781 | if (handled) | ||
1782 | inc_irq_stat(apic_perf_irqs); | ||
1783 | |||
1784 | return handled; | ||
1785 | } | 1060 | } |
1786 | 1061 | ||
1787 | /* | 1062 | static void x86_pmu_disable(struct perf_event *event) |
1788 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
1789 | * rules apply: | ||
1790 | */ | ||
1791 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
1792 | { | 1063 | { |
1793 | struct perf_sample_data data; | 1064 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1794 | struct cpu_hw_events *cpuc; | 1065 | int i; |
1795 | int bit, loops; | ||
1796 | u64 ack, status; | ||
1797 | |||
1798 | data.addr = 0; | ||
1799 | data.raw = NULL; | ||
1800 | |||
1801 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1802 | 1066 | ||
1803 | perf_disable(); | 1067 | x86_pmu_stop(event); |
1804 | intel_pmu_drain_bts_buffer(cpuc); | ||
1805 | status = intel_pmu_get_status(); | ||
1806 | if (!status) { | ||
1807 | perf_enable(); | ||
1808 | return 0; | ||
1809 | } | ||
1810 | 1068 | ||
1811 | loops = 0; | 1069 | for (i = 0; i < cpuc->n_events; i++) { |
1812 | again: | 1070 | if (event == cpuc->event_list[i]) { |
1813 | if (++loops > 100) { | ||
1814 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
1815 | perf_event_print_debug(); | ||
1816 | intel_pmu_reset(); | ||
1817 | perf_enable(); | ||
1818 | return 1; | ||
1819 | } | ||
1820 | |||
1821 | inc_irq_stat(apic_perf_irqs); | ||
1822 | ack = status; | ||
1823 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
1824 | struct perf_event *event = cpuc->events[bit]; | ||
1825 | 1071 | ||
1826 | clear_bit(bit, (unsigned long *) &status); | 1072 | if (x86_pmu.put_event_constraints) |
1827 | if (!test_bit(bit, cpuc->active_mask)) | 1073 | x86_pmu.put_event_constraints(cpuc, event); |
1828 | continue; | ||
1829 | 1074 | ||
1830 | if (!intel_pmu_save_and_restart(event)) | 1075 | while (++i < cpuc->n_events) |
1831 | continue; | 1076 | cpuc->event_list[i-1] = cpuc->event_list[i]; |
1832 | 1077 | ||
1833 | data.period = event->hw.last_period; | 1078 | --cpuc->n_events; |
1834 | 1079 | break; | |
1835 | if (perf_event_overflow(event, 1, &data, regs)) | 1080 | } |
1836 | intel_pmu_disable_event(&event->hw, bit); | ||
1837 | } | 1081 | } |
1838 | 1082 | perf_event_update_userpage(event); | |
1839 | intel_pmu_ack_status(ack); | ||
1840 | |||
1841 | /* | ||
1842 | * Repeat if there is more work to be done: | ||
1843 | */ | ||
1844 | status = intel_pmu_get_status(); | ||
1845 | if (status) | ||
1846 | goto again; | ||
1847 | |||
1848 | perf_enable(); | ||
1849 | |||
1850 | return 1; | ||
1851 | } | 1083 | } |
1852 | 1084 | ||
1853 | static int amd_pmu_handle_irq(struct pt_regs *regs) | 1085 | static int x86_pmu_handle_irq(struct pt_regs *regs) |
1854 | { | 1086 | { |
1855 | struct perf_sample_data data; | 1087 | struct perf_sample_data data; |
1856 | struct cpu_hw_events *cpuc; | 1088 | struct cpu_hw_events *cpuc; |
@@ -1859,8 +1091,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1859 | int idx, handled = 0; | 1091 | int idx, handled = 0; |
1860 | u64 val; | 1092 | u64 val; |
1861 | 1093 | ||
1862 | data.addr = 0; | 1094 | perf_sample_data_init(&data, 0); |
1863 | data.raw = NULL; | ||
1864 | 1095 | ||
1865 | cpuc = &__get_cpu_var(cpu_hw_events); | 1096 | cpuc = &__get_cpu_var(cpu_hw_events); |
1866 | 1097 | ||
@@ -1871,7 +1102,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1871 | event = cpuc->events[idx]; | 1102 | event = cpuc->events[idx]; |
1872 | hwc = &event->hw; | 1103 | hwc = &event->hw; |
1873 | 1104 | ||
1874 | val = x86_perf_event_update(event, hwc, idx); | 1105 | val = x86_perf_event_update(event); |
1875 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1106 | if (val & (1ULL << (x86_pmu.event_bits - 1))) |
1876 | continue; | 1107 | continue; |
1877 | 1108 | ||
@@ -1881,11 +1112,11 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1881 | handled = 1; | 1112 | handled = 1; |
1882 | data.period = event->hw.last_period; | 1113 | data.period = event->hw.last_period; |
1883 | 1114 | ||
1884 | if (!x86_perf_event_set_period(event, hwc, idx)) | 1115 | if (!x86_perf_event_set_period(event)) |
1885 | continue; | 1116 | continue; |
1886 | 1117 | ||
1887 | if (perf_event_overflow(event, 1, &data, regs)) | 1118 | if (perf_event_overflow(event, 1, &data, regs)) |
1888 | amd_pmu_disable_event(hwc, idx); | 1119 | x86_pmu_stop(event); |
1889 | } | 1120 | } |
1890 | 1121 | ||
1891 | if (handled) | 1122 | if (handled) |
@@ -1968,193 +1199,171 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { | |||
1968 | .priority = 1 | 1199 | .priority = 1 |
1969 | }; | 1200 | }; |
1970 | 1201 | ||
1971 | static __initconst struct x86_pmu p6_pmu = { | 1202 | static struct event_constraint unconstrained; |
1972 | .name = "p6", | 1203 | static struct event_constraint emptyconstraint; |
1973 | .handle_irq = p6_pmu_handle_irq, | ||
1974 | .disable_all = p6_pmu_disable_all, | ||
1975 | .enable_all = p6_pmu_enable_all, | ||
1976 | .enable = p6_pmu_enable_event, | ||
1977 | .disable = p6_pmu_disable_event, | ||
1978 | .eventsel = MSR_P6_EVNTSEL0, | ||
1979 | .perfctr = MSR_P6_PERFCTR0, | ||
1980 | .event_map = p6_pmu_event_map, | ||
1981 | .raw_event = p6_pmu_raw_event, | ||
1982 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
1983 | .apic = 1, | ||
1984 | .max_period = (1ULL << 31) - 1, | ||
1985 | .version = 0, | ||
1986 | .num_events = 2, | ||
1987 | /* | ||
1988 | * Events have 40 bits implemented. However they are designed such | ||
1989 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
1990 | * effective width of a event for P6-like PMU is 32 bits only. | ||
1991 | * | ||
1992 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
1993 | */ | ||
1994 | .event_bits = 32, | ||
1995 | .event_mask = (1ULL << 32) - 1, | ||
1996 | .get_event_idx = intel_get_event_idx, | ||
1997 | }; | ||
1998 | 1204 | ||
1999 | static __initconst struct x86_pmu intel_pmu = { | 1205 | static struct event_constraint * |
2000 | .name = "Intel", | 1206 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
2001 | .handle_irq = intel_pmu_handle_irq, | 1207 | { |
2002 | .disable_all = intel_pmu_disable_all, | 1208 | struct event_constraint *c; |
2003 | .enable_all = intel_pmu_enable_all, | ||
2004 | .enable = intel_pmu_enable_event, | ||
2005 | .disable = intel_pmu_disable_event, | ||
2006 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
2007 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
2008 | .event_map = intel_pmu_event_map, | ||
2009 | .raw_event = intel_pmu_raw_event, | ||
2010 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
2011 | .apic = 1, | ||
2012 | /* | ||
2013 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
2014 | * so we install an artificial 1<<31 period regardless of | ||
2015 | * the generic event period: | ||
2016 | */ | ||
2017 | .max_period = (1ULL << 31) - 1, | ||
2018 | .enable_bts = intel_pmu_enable_bts, | ||
2019 | .disable_bts = intel_pmu_disable_bts, | ||
2020 | .get_event_idx = intel_get_event_idx, | ||
2021 | }; | ||
2022 | 1209 | ||
2023 | static __initconst struct x86_pmu amd_pmu = { | 1210 | if (x86_pmu.event_constraints) { |
2024 | .name = "AMD", | 1211 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
2025 | .handle_irq = amd_pmu_handle_irq, | 1212 | if ((event->hw.config & c->cmask) == c->code) |
2026 | .disable_all = amd_pmu_disable_all, | 1213 | return c; |
2027 | .enable_all = amd_pmu_enable_all, | 1214 | } |
2028 | .enable = amd_pmu_enable_event, | 1215 | } |
2029 | .disable = amd_pmu_disable_event, | 1216 | |
2030 | .eventsel = MSR_K7_EVNTSEL0, | 1217 | return &unconstrained; |
2031 | .perfctr = MSR_K7_PERFCTR0, | 1218 | } |
2032 | .event_map = amd_pmu_event_map, | ||
2033 | .raw_event = amd_pmu_raw_event, | ||
2034 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
2035 | .num_events = 4, | ||
2036 | .event_bits = 48, | ||
2037 | .event_mask = (1ULL << 48) - 1, | ||
2038 | .apic = 1, | ||
2039 | /* use highest bit to detect overflow */ | ||
2040 | .max_period = (1ULL << 47) - 1, | ||
2041 | .get_event_idx = gen_get_event_idx, | ||
2042 | }; | ||
2043 | 1219 | ||
2044 | static __init int p6_pmu_init(void) | 1220 | static int x86_event_sched_in(struct perf_event *event, |
1221 | struct perf_cpu_context *cpuctx) | ||
2045 | { | 1222 | { |
2046 | switch (boot_cpu_data.x86_model) { | 1223 | int ret = 0; |
2047 | case 1: | ||
2048 | case 3: /* Pentium Pro */ | ||
2049 | case 5: | ||
2050 | case 6: /* Pentium II */ | ||
2051 | case 7: | ||
2052 | case 8: | ||
2053 | case 11: /* Pentium III */ | ||
2054 | event_constraints = intel_p6_event_constraints; | ||
2055 | break; | ||
2056 | case 9: | ||
2057 | case 13: | ||
2058 | /* Pentium M */ | ||
2059 | event_constraints = intel_p6_event_constraints; | ||
2060 | break; | ||
2061 | default: | ||
2062 | pr_cont("unsupported p6 CPU model %d ", | ||
2063 | boot_cpu_data.x86_model); | ||
2064 | return -ENODEV; | ||
2065 | } | ||
2066 | 1224 | ||
2067 | x86_pmu = p6_pmu; | 1225 | event->state = PERF_EVENT_STATE_ACTIVE; |
1226 | event->oncpu = smp_processor_id(); | ||
1227 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
2068 | 1228 | ||
2069 | return 0; | 1229 | if (!is_x86_event(event)) |
1230 | ret = event->pmu->enable(event); | ||
1231 | |||
1232 | if (!ret && !is_software_event(event)) | ||
1233 | cpuctx->active_oncpu++; | ||
1234 | |||
1235 | if (!ret && event->attr.exclusive) | ||
1236 | cpuctx->exclusive = 1; | ||
1237 | |||
1238 | return ret; | ||
2070 | } | 1239 | } |
2071 | 1240 | ||
2072 | static __init int intel_pmu_init(void) | 1241 | static void x86_event_sched_out(struct perf_event *event, |
1242 | struct perf_cpu_context *cpuctx) | ||
2073 | { | 1243 | { |
2074 | union cpuid10_edx edx; | 1244 | event->state = PERF_EVENT_STATE_INACTIVE; |
2075 | union cpuid10_eax eax; | 1245 | event->oncpu = -1; |
2076 | unsigned int unused; | ||
2077 | unsigned int ebx; | ||
2078 | int version; | ||
2079 | |||
2080 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
2081 | /* check for P6 processor family */ | ||
2082 | if (boot_cpu_data.x86 == 6) { | ||
2083 | return p6_pmu_init(); | ||
2084 | } else { | ||
2085 | return -ENODEV; | ||
2086 | } | ||
2087 | } | ||
2088 | 1246 | ||
2089 | /* | 1247 | if (!is_x86_event(event)) |
2090 | * Check whether the Architectural PerfMon supports | 1248 | event->pmu->disable(event); |
2091 | * Branch Misses Retired hw_event or not. | ||
2092 | */ | ||
2093 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
2094 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
2095 | return -ENODEV; | ||
2096 | 1249 | ||
2097 | version = eax.split.version_id; | 1250 | event->tstamp_running -= event->ctx->time - event->tstamp_stopped; |
2098 | if (version < 2) | ||
2099 | return -ENODEV; | ||
2100 | 1251 | ||
2101 | x86_pmu = intel_pmu; | 1252 | if (!is_software_event(event)) |
2102 | x86_pmu.version = version; | 1253 | cpuctx->active_oncpu--; |
2103 | x86_pmu.num_events = eax.split.num_events; | ||
2104 | x86_pmu.event_bits = eax.split.bit_width; | ||
2105 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | ||
2106 | 1254 | ||
1255 | if (event->attr.exclusive || !cpuctx->active_oncpu) | ||
1256 | cpuctx->exclusive = 0; | ||
1257 | } | ||
1258 | |||
1259 | /* | ||
1260 | * Called to enable a whole group of events. | ||
1261 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
1262 | * Assumes the caller has disabled interrupts and has | ||
1263 | * frozen the PMU with hw_perf_save_disable. | ||
1264 | * | ||
1265 | * called with PMU disabled. If successful and return value 1, | ||
1266 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
1267 | */ | ||
1268 | int hw_perf_group_sched_in(struct perf_event *leader, | ||
1269 | struct perf_cpu_context *cpuctx, | ||
1270 | struct perf_event_context *ctx) | ||
1271 | { | ||
1272 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1273 | struct perf_event *sub; | ||
1274 | int assign[X86_PMC_IDX_MAX]; | ||
1275 | int n0, n1, ret; | ||
1276 | |||
1277 | /* n0 = total number of events */ | ||
1278 | n0 = collect_events(cpuc, leader, true); | ||
1279 | if (n0 < 0) | ||
1280 | return n0; | ||
1281 | |||
1282 | ret = x86_schedule_events(cpuc, n0, assign); | ||
1283 | if (ret) | ||
1284 | return ret; | ||
1285 | |||
1286 | ret = x86_event_sched_in(leader, cpuctx); | ||
1287 | if (ret) | ||
1288 | return ret; | ||
1289 | |||
1290 | n1 = 1; | ||
1291 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1292 | if (sub->state > PERF_EVENT_STATE_OFF) { | ||
1293 | ret = x86_event_sched_in(sub, cpuctx); | ||
1294 | if (ret) | ||
1295 | goto undo; | ||
1296 | ++n1; | ||
1297 | } | ||
1298 | } | ||
2107 | /* | 1299 | /* |
2108 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1300 | * copy new assignment, now we know it is possible |
2109 | * assume at least 3 events: | 1301 | * will be used by hw_perf_enable() |
2110 | */ | 1302 | */ |
2111 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 1303 | memcpy(cpuc->assign, assign, n0*sizeof(int)); |
1304 | |||
1305 | cpuc->n_events = n0; | ||
1306 | cpuc->n_added += n1; | ||
1307 | ctx->nr_active += n1; | ||
2112 | 1308 | ||
2113 | /* | 1309 | /* |
2114 | * Install the hw-cache-events table: | 1310 | * 1 means successful and events are active |
1311 | * This is not quite true because we defer | ||
1312 | * actual activation until hw_perf_enable() but | ||
1313 | * this way we* ensure caller won't try to enable | ||
1314 | * individual events | ||
2115 | */ | 1315 | */ |
2116 | switch (boot_cpu_data.x86_model) { | 1316 | return 1; |
2117 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 1317 | undo: |
2118 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 1318 | x86_event_sched_out(leader, cpuctx); |
2119 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 1319 | n0 = 1; |
2120 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 1320 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
2121 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 1321 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { |
2122 | sizeof(hw_cache_event_ids)); | 1322 | x86_event_sched_out(sub, cpuctx); |
2123 | 1323 | if (++n0 == n1) | |
2124 | pr_cont("Core2 events, "); | 1324 | break; |
2125 | event_constraints = intel_core_event_constraints; | 1325 | } |
2126 | break; | 1326 | } |
2127 | default: | 1327 | return ret; |
2128 | case 26: | 1328 | } |
2129 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 1329 | |
2130 | sizeof(hw_cache_event_ids)); | 1330 | #include "perf_event_amd.c" |
1331 | #include "perf_event_p6.c" | ||
1332 | #include "perf_event_intel.c" | ||
2131 | 1333 | ||
2132 | event_constraints = intel_nehalem_event_constraints; | 1334 | static int __cpuinit |
2133 | pr_cont("Nehalem/Corei7 events, "); | 1335 | x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) |
1336 | { | ||
1337 | unsigned int cpu = (long)hcpu; | ||
1338 | int ret = NOTIFY_OK; | ||
1339 | |||
1340 | switch (action & ~CPU_TASKS_FROZEN) { | ||
1341 | case CPU_UP_PREPARE: | ||
1342 | if (x86_pmu.cpu_prepare) | ||
1343 | ret = x86_pmu.cpu_prepare(cpu); | ||
2134 | break; | 1344 | break; |
2135 | case 28: | ||
2136 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
2137 | sizeof(hw_cache_event_ids)); | ||
2138 | 1345 | ||
2139 | pr_cont("Atom events, "); | 1346 | case CPU_STARTING: |
1347 | if (x86_pmu.cpu_starting) | ||
1348 | x86_pmu.cpu_starting(cpu); | ||
2140 | break; | 1349 | break; |
2141 | } | ||
2142 | return 0; | ||
2143 | } | ||
2144 | 1350 | ||
2145 | static __init int amd_pmu_init(void) | 1351 | case CPU_DYING: |
2146 | { | 1352 | if (x86_pmu.cpu_dying) |
2147 | /* Performance-monitoring supported from K7 and later: */ | 1353 | x86_pmu.cpu_dying(cpu); |
2148 | if (boot_cpu_data.x86 < 6) | 1354 | break; |
2149 | return -ENODEV; | ||
2150 | 1355 | ||
2151 | x86_pmu = amd_pmu; | 1356 | case CPU_UP_CANCELED: |
1357 | case CPU_DEAD: | ||
1358 | if (x86_pmu.cpu_dead) | ||
1359 | x86_pmu.cpu_dead(cpu); | ||
1360 | break; | ||
2152 | 1361 | ||
2153 | /* Events are common for all AMDs */ | 1362 | default: |
2154 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | 1363 | break; |
2155 | sizeof(hw_cache_event_ids)); | 1364 | } |
2156 | 1365 | ||
2157 | return 0; | 1366 | return ret; |
2158 | } | 1367 | } |
2159 | 1368 | ||
2160 | static void __init pmu_check_apic(void) | 1369 | static void __init pmu_check_apic(void) |
@@ -2169,6 +1378,7 @@ static void __init pmu_check_apic(void) | |||
2169 | 1378 | ||
2170 | void __init init_hw_perf_events(void) | 1379 | void __init init_hw_perf_events(void) |
2171 | { | 1380 | { |
1381 | struct event_constraint *c; | ||
2172 | int err; | 1382 | int err; |
2173 | 1383 | ||
2174 | pr_info("Performance Events: "); | 1384 | pr_info("Performance Events: "); |
@@ -2213,6 +1423,20 @@ void __init init_hw_perf_events(void) | |||
2213 | perf_events_lapic_init(); | 1423 | perf_events_lapic_init(); |
2214 | register_die_notifier(&perf_event_nmi_notifier); | 1424 | register_die_notifier(&perf_event_nmi_notifier); |
2215 | 1425 | ||
1426 | unconstrained = (struct event_constraint) | ||
1427 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, | ||
1428 | 0, x86_pmu.num_events); | ||
1429 | |||
1430 | if (x86_pmu.event_constraints) { | ||
1431 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
1432 | if (c->cmask != INTEL_ARCH_FIXED_MASK) | ||
1433 | continue; | ||
1434 | |||
1435 | c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; | ||
1436 | c->weight += x86_pmu.num_events; | ||
1437 | } | ||
1438 | } | ||
1439 | |||
2216 | pr_info("... version: %d\n", x86_pmu.version); | 1440 | pr_info("... version: %d\n", x86_pmu.version); |
2217 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1441 | pr_info("... bit width: %d\n", x86_pmu.event_bits); |
2218 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1442 | pr_info("... generic registers: %d\n", x86_pmu.num_events); |
@@ -2220,60 +1444,91 @@ void __init init_hw_perf_events(void) | |||
2220 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1444 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
2221 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1445 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); |
2222 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1446 | pr_info("... event mask: %016Lx\n", perf_event_mask); |
1447 | |||
1448 | perf_cpu_notifier(x86_pmu_notifier); | ||
2223 | } | 1449 | } |
2224 | 1450 | ||
2225 | static inline void x86_pmu_read(struct perf_event *event) | 1451 | static inline void x86_pmu_read(struct perf_event *event) |
2226 | { | 1452 | { |
2227 | x86_perf_event_update(event, &event->hw, event->hw.idx); | 1453 | x86_perf_event_update(event); |
2228 | } | 1454 | } |
2229 | 1455 | ||
2230 | static const struct pmu pmu = { | 1456 | static const struct pmu pmu = { |
2231 | .enable = x86_pmu_enable, | 1457 | .enable = x86_pmu_enable, |
2232 | .disable = x86_pmu_disable, | 1458 | .disable = x86_pmu_disable, |
1459 | .start = x86_pmu_start, | ||
1460 | .stop = x86_pmu_stop, | ||
2233 | .read = x86_pmu_read, | 1461 | .read = x86_pmu_read, |
2234 | .unthrottle = x86_pmu_unthrottle, | 1462 | .unthrottle = x86_pmu_unthrottle, |
2235 | }; | 1463 | }; |
2236 | 1464 | ||
2237 | static int | 1465 | /* |
2238 | validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) | 1466 | * validate a single event group |
2239 | { | 1467 | * |
2240 | struct hw_perf_event fake_event = event->hw; | 1468 | * validation include: |
2241 | 1469 | * - check events are compatible which each other | |
2242 | if (event->pmu && event->pmu != &pmu) | 1470 | * - events do not compete for the same counter |
2243 | return 0; | 1471 | * - number of events <= number of counters |
2244 | 1472 | * | |
2245 | return x86_schedule_event(cpuc, &fake_event) >= 0; | 1473 | * validation ensures the group can be loaded onto the |
2246 | } | 1474 | * PMU if it was the only group available. |
2247 | 1475 | */ | |
2248 | static int validate_group(struct perf_event *event) | 1476 | static int validate_group(struct perf_event *event) |
2249 | { | 1477 | { |
2250 | struct perf_event *sibling, *leader = event->group_leader; | 1478 | struct perf_event *leader = event->group_leader; |
2251 | struct cpu_hw_events fake_pmu; | 1479 | struct cpu_hw_events *fake_cpuc; |
1480 | int ret, n; | ||
2252 | 1481 | ||
2253 | memset(&fake_pmu, 0, sizeof(fake_pmu)); | 1482 | ret = -ENOMEM; |
1483 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1484 | if (!fake_cpuc) | ||
1485 | goto out; | ||
2254 | 1486 | ||
2255 | if (!validate_event(&fake_pmu, leader)) | 1487 | /* |
2256 | return -ENOSPC; | 1488 | * the event is not yet connected with its |
1489 | * siblings therefore we must first collect | ||
1490 | * existing siblings, then add the new event | ||
1491 | * before we can simulate the scheduling | ||
1492 | */ | ||
1493 | ret = -ENOSPC; | ||
1494 | n = collect_events(fake_cpuc, leader, true); | ||
1495 | if (n < 0) | ||
1496 | goto out_free; | ||
2257 | 1497 | ||
2258 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | 1498 | fake_cpuc->n_events = n; |
2259 | if (!validate_event(&fake_pmu, sibling)) | 1499 | n = collect_events(fake_cpuc, event, false); |
2260 | return -ENOSPC; | 1500 | if (n < 0) |
2261 | } | 1501 | goto out_free; |
2262 | 1502 | ||
2263 | if (!validate_event(&fake_pmu, event)) | 1503 | fake_cpuc->n_events = n; |
2264 | return -ENOSPC; | ||
2265 | 1504 | ||
2266 | return 0; | 1505 | ret = x86_schedule_events(fake_cpuc, n, NULL); |
1506 | |||
1507 | out_free: | ||
1508 | kfree(fake_cpuc); | ||
1509 | out: | ||
1510 | return ret; | ||
2267 | } | 1511 | } |
2268 | 1512 | ||
2269 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 1513 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
2270 | { | 1514 | { |
1515 | const struct pmu *tmp; | ||
2271 | int err; | 1516 | int err; |
2272 | 1517 | ||
2273 | err = __hw_perf_event_init(event); | 1518 | err = __hw_perf_event_init(event); |
2274 | if (!err) { | 1519 | if (!err) { |
1520 | /* | ||
1521 | * we temporarily connect event to its pmu | ||
1522 | * such that validate_group() can classify | ||
1523 | * it as an x86 event using is_x86_event() | ||
1524 | */ | ||
1525 | tmp = event->pmu; | ||
1526 | event->pmu = &pmu; | ||
1527 | |||
2275 | if (event->group_leader != event) | 1528 | if (event->group_leader != event) |
2276 | err = validate_group(event); | 1529 | err = validate_group(event); |
1530 | |||
1531 | event->pmu = tmp; | ||
2277 | } | 1532 | } |
2278 | if (err) { | 1533 | if (err) { |
2279 | if (event->destroy) | 1534 | if (event->destroy) |
@@ -2297,7 +1552,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) | |||
2297 | 1552 | ||
2298 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | 1553 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); |
2299 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | 1554 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); |
2300 | static DEFINE_PER_CPU(int, in_ignored_frame); | ||
2301 | 1555 | ||
2302 | 1556 | ||
2303 | static void | 1557 | static void |
@@ -2313,10 +1567,6 @@ static void backtrace_warning(void *data, char *msg) | |||
2313 | 1567 | ||
2314 | static int backtrace_stack(void *data, char *name) | 1568 | static int backtrace_stack(void *data, char *name) |
2315 | { | 1569 | { |
2316 | per_cpu(in_ignored_frame, smp_processor_id()) = | ||
2317 | x86_is_stack_id(NMI_STACK, name) || | ||
2318 | x86_is_stack_id(DEBUG_STACK, name); | ||
2319 | |||
2320 | return 0; | 1570 | return 0; |
2321 | } | 1571 | } |
2322 | 1572 | ||
@@ -2324,9 +1574,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
2324 | { | 1574 | { |
2325 | struct perf_callchain_entry *entry = data; | 1575 | struct perf_callchain_entry *entry = data; |
2326 | 1576 | ||
2327 | if (per_cpu(in_ignored_frame, smp_processor_id())) | ||
2328 | return; | ||
2329 | |||
2330 | if (reliable) | 1577 | if (reliable) |
2331 | callchain_store(entry, addr); | 1578 | callchain_store(entry, addr); |
2332 | } | 1579 | } |
@@ -2347,7 +1594,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2347 | callchain_store(entry, PERF_CONTEXT_KERNEL); | 1594 | callchain_store(entry, PERF_CONTEXT_KERNEL); |
2348 | callchain_store(entry, regs->ip); | 1595 | callchain_store(entry, regs->ip); |
2349 | 1596 | ||
2350 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); | 1597 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
2351 | } | 1598 | } |
2352 | 1599 | ||
2353 | /* | 1600 | /* |
@@ -2385,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
2385 | return len; | 1632 | return len; |
2386 | } | 1633 | } |
2387 | 1634 | ||
2388 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1635 | #ifdef CONFIG_COMPAT |
1636 | static inline int | ||
1637 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
2389 | { | 1638 | { |
2390 | unsigned long bytes; | 1639 | /* 32-bit process in 64-bit kernel. */ |
1640 | struct stack_frame_ia32 frame; | ||
1641 | const void __user *fp; | ||
1642 | |||
1643 | if (!test_thread_flag(TIF_IA32)) | ||
1644 | return 0; | ||
1645 | |||
1646 | fp = compat_ptr(regs->bp); | ||
1647 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
1648 | unsigned long bytes; | ||
1649 | frame.next_frame = 0; | ||
1650 | frame.return_address = 0; | ||
2391 | 1651 | ||
2392 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); | 1652 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1653 | if (bytes != sizeof(frame)) | ||
1654 | break; | ||
1655 | |||
1656 | if (fp < compat_ptr(regs->sp)) | ||
1657 | break; | ||
2393 | 1658 | ||
2394 | return bytes == sizeof(*frame); | 1659 | callchain_store(entry, frame.return_address); |
1660 | fp = compat_ptr(frame.next_frame); | ||
1661 | } | ||
1662 | return 1; | ||
1663 | } | ||
1664 | #else | ||
1665 | static inline int | ||
1666 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1667 | { | ||
1668 | return 0; | ||
2395 | } | 1669 | } |
1670 | #endif | ||
2396 | 1671 | ||
2397 | static void | 1672 | static void |
2398 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1673 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -2408,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2408 | callchain_store(entry, PERF_CONTEXT_USER); | 1683 | callchain_store(entry, PERF_CONTEXT_USER); |
2409 | callchain_store(entry, regs->ip); | 1684 | callchain_store(entry, regs->ip); |
2410 | 1685 | ||
1686 | if (perf_callchain_user32(regs, entry)) | ||
1687 | return; | ||
1688 | |||
2411 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 1689 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1690 | unsigned long bytes; | ||
2412 | frame.next_frame = NULL; | 1691 | frame.next_frame = NULL; |
2413 | frame.return_address = 0; | 1692 | frame.return_address = 0; |
2414 | 1693 | ||
2415 | if (!copy_stack_frame(fp, &frame)) | 1694 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1695 | if (bytes != sizeof(frame)) | ||
2416 | break; | 1696 | break; |
2417 | 1697 | ||
2418 | if ((unsigned long)fp < regs->sp) | 1698 | if ((unsigned long)fp < regs->sp) |
@@ -2433,9 +1713,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2433 | 1713 | ||
2434 | is_user = user_mode(regs); | 1714 | is_user = user_mode(regs); |
2435 | 1715 | ||
2436 | if (!current || current->pid == 0) | ||
2437 | return; | ||
2438 | |||
2439 | if (is_user && current->state != TASK_RUNNING) | 1716 | if (is_user && current->state != TASK_RUNNING) |
2440 | return; | 1717 | return; |
2441 | 1718 | ||
@@ -2462,7 +1739,14 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
2462 | return entry; | 1739 | return entry; |
2463 | } | 1740 | } |
2464 | 1741 | ||
2465 | void hw_perf_event_setup_online(int cpu) | 1742 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) |
2466 | { | 1743 | { |
2467 | init_debug_store_on_cpu(cpu); | 1744 | regs->ip = ip; |
1745 | /* | ||
1746 | * perf_arch_fetch_caller_regs adds another call, we need to increment | ||
1747 | * the skip level | ||
1748 | */ | ||
1749 | regs->bp = rewind_frame_pointer(skip + 1); | ||
1750 | regs->cs = __KERNEL_CS; | ||
1751 | local_save_flags(regs->flags); | ||
2468 | } | 1752 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c new file mode 100644 index 000000000000..db6f7d4056e1 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -0,0 +1,422 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_AMD | ||
2 | |||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | ||
4 | |||
5 | static __initconst u64 amd_hw_cache_event_ids | ||
6 | [PERF_COUNT_HW_CACHE_MAX] | ||
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
9 | { | ||
10 | [ C(L1D) ] = { | ||
11 | [ C(OP_READ) ] = { | ||
12 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
13 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
14 | }, | ||
15 | [ C(OP_WRITE) ] = { | ||
16 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
17 | [ C(RESULT_MISS) ] = 0, | ||
18 | }, | ||
19 | [ C(OP_PREFETCH) ] = { | ||
20 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
21 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
22 | }, | ||
23 | }, | ||
24 | [ C(L1I ) ] = { | ||
25 | [ C(OP_READ) ] = { | ||
26 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
27 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
28 | }, | ||
29 | [ C(OP_WRITE) ] = { | ||
30 | [ C(RESULT_ACCESS) ] = -1, | ||
31 | [ C(RESULT_MISS) ] = -1, | ||
32 | }, | ||
33 | [ C(OP_PREFETCH) ] = { | ||
34 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
35 | [ C(RESULT_MISS) ] = 0, | ||
36 | }, | ||
37 | }, | ||
38 | [ C(LL ) ] = { | ||
39 | [ C(OP_READ) ] = { | ||
40 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
41 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
42 | }, | ||
43 | [ C(OP_WRITE) ] = { | ||
44 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
45 | [ C(RESULT_MISS) ] = 0, | ||
46 | }, | ||
47 | [ C(OP_PREFETCH) ] = { | ||
48 | [ C(RESULT_ACCESS) ] = 0, | ||
49 | [ C(RESULT_MISS) ] = 0, | ||
50 | }, | ||
51 | }, | ||
52 | [ C(DTLB) ] = { | ||
53 | [ C(OP_READ) ] = { | ||
54 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
55 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
56 | }, | ||
57 | [ C(OP_WRITE) ] = { | ||
58 | [ C(RESULT_ACCESS) ] = 0, | ||
59 | [ C(RESULT_MISS) ] = 0, | ||
60 | }, | ||
61 | [ C(OP_PREFETCH) ] = { | ||
62 | [ C(RESULT_ACCESS) ] = 0, | ||
63 | [ C(RESULT_MISS) ] = 0, | ||
64 | }, | ||
65 | }, | ||
66 | [ C(ITLB) ] = { | ||
67 | [ C(OP_READ) ] = { | ||
68 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
69 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
70 | }, | ||
71 | [ C(OP_WRITE) ] = { | ||
72 | [ C(RESULT_ACCESS) ] = -1, | ||
73 | [ C(RESULT_MISS) ] = -1, | ||
74 | }, | ||
75 | [ C(OP_PREFETCH) ] = { | ||
76 | [ C(RESULT_ACCESS) ] = -1, | ||
77 | [ C(RESULT_MISS) ] = -1, | ||
78 | }, | ||
79 | }, | ||
80 | [ C(BPU ) ] = { | ||
81 | [ C(OP_READ) ] = { | ||
82 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
83 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
84 | }, | ||
85 | [ C(OP_WRITE) ] = { | ||
86 | [ C(RESULT_ACCESS) ] = -1, | ||
87 | [ C(RESULT_MISS) ] = -1, | ||
88 | }, | ||
89 | [ C(OP_PREFETCH) ] = { | ||
90 | [ C(RESULT_ACCESS) ] = -1, | ||
91 | [ C(RESULT_MISS) ] = -1, | ||
92 | }, | ||
93 | }, | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * AMD Performance Monitor K7 and later. | ||
98 | */ | ||
99 | static const u64 amd_perfmon_event_map[] = | ||
100 | { | ||
101 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
102 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
103 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
104 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
105 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
106 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
107 | }; | ||
108 | |||
109 | static u64 amd_pmu_event_map(int hw_event) | ||
110 | { | ||
111 | return amd_perfmon_event_map[hw_event]; | ||
112 | } | ||
113 | |||
114 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
115 | { | ||
116 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | ||
117 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
118 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
119 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
120 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
121 | |||
122 | #define K7_EVNTSEL_MASK \ | ||
123 | (K7_EVNTSEL_EVENT_MASK | \ | ||
124 | K7_EVNTSEL_UNIT_MASK | \ | ||
125 | K7_EVNTSEL_EDGE_MASK | \ | ||
126 | K7_EVNTSEL_INV_MASK | \ | ||
127 | K7_EVNTSEL_REG_MASK) | ||
128 | |||
129 | return hw_event & K7_EVNTSEL_MASK; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * AMD64 events are detected based on their event codes. | ||
134 | */ | ||
135 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) | ||
136 | { | ||
137 | return (hwc->config & 0xe0) == 0xe0; | ||
138 | } | ||
139 | |||
140 | static inline int amd_has_nb(struct cpu_hw_events *cpuc) | ||
141 | { | ||
142 | struct amd_nb *nb = cpuc->amd_nb; | ||
143 | |||
144 | return nb && nb->nb_id != -1; | ||
145 | } | ||
146 | |||
147 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | ||
148 | struct perf_event *event) | ||
149 | { | ||
150 | struct hw_perf_event *hwc = &event->hw; | ||
151 | struct amd_nb *nb = cpuc->amd_nb; | ||
152 | int i; | ||
153 | |||
154 | /* | ||
155 | * only care about NB events | ||
156 | */ | ||
157 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) | ||
158 | return; | ||
159 | |||
160 | /* | ||
161 | * need to scan whole list because event may not have | ||
162 | * been assigned during scheduling | ||
163 | * | ||
164 | * no race condition possible because event can only | ||
165 | * be removed on one CPU at a time AND PMU is disabled | ||
166 | * when we come here | ||
167 | */ | ||
168 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
169 | if (nb->owners[i] == event) { | ||
170 | cmpxchg(nb->owners+i, event, NULL); | ||
171 | break; | ||
172 | } | ||
173 | } | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * AMD64 NorthBridge events need special treatment because | ||
178 | * counter access needs to be synchronized across all cores | ||
179 | * of a package. Refer to BKDG section 3.12 | ||
180 | * | ||
181 | * NB events are events measuring L3 cache, Hypertransport | ||
182 | * traffic. They are identified by an event code >= 0xe00. | ||
183 | * They measure events on the NorthBride which is shared | ||
184 | * by all cores on a package. NB events are counted on a | ||
185 | * shared set of counters. When a NB event is programmed | ||
186 | * in a counter, the data actually comes from a shared | ||
187 | * counter. Thus, access to those counters needs to be | ||
188 | * synchronized. | ||
189 | * | ||
190 | * We implement the synchronization such that no two cores | ||
191 | * can be measuring NB events using the same counters. Thus, | ||
192 | * we maintain a per-NB allocation table. The available slot | ||
193 | * is propagated using the event_constraint structure. | ||
194 | * | ||
195 | * We provide only one choice for each NB event based on | ||
196 | * the fact that only NB events have restrictions. Consequently, | ||
197 | * if a counter is available, there is a guarantee the NB event | ||
198 | * will be assigned to it. If no slot is available, an empty | ||
199 | * constraint is returned and scheduling will eventually fail | ||
200 | * for this event. | ||
201 | * | ||
202 | * Note that all cores attached the same NB compete for the same | ||
203 | * counters to host NB events, this is why we use atomic ops. Some | ||
204 | * multi-chip CPUs may have more than one NB. | ||
205 | * | ||
206 | * Given that resources are allocated (cmpxchg), they must be | ||
207 | * eventually freed for others to use. This is accomplished by | ||
208 | * calling amd_put_event_constraints(). | ||
209 | * | ||
210 | * Non NB events are not impacted by this restriction. | ||
211 | */ | ||
212 | static struct event_constraint * | ||
213 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
214 | { | ||
215 | struct hw_perf_event *hwc = &event->hw; | ||
216 | struct amd_nb *nb = cpuc->amd_nb; | ||
217 | struct perf_event *old = NULL; | ||
218 | int max = x86_pmu.num_events; | ||
219 | int i, j, k = -1; | ||
220 | |||
221 | /* | ||
222 | * if not NB event or no NB, then no constraints | ||
223 | */ | ||
224 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) | ||
225 | return &unconstrained; | ||
226 | |||
227 | /* | ||
228 | * detect if already present, if so reuse | ||
229 | * | ||
230 | * cannot merge with actual allocation | ||
231 | * because of possible holes | ||
232 | * | ||
233 | * event can already be present yet not assigned (in hwc->idx) | ||
234 | * because of successive calls to x86_schedule_events() from | ||
235 | * hw_perf_group_sched_in() without hw_perf_enable() | ||
236 | */ | ||
237 | for (i = 0; i < max; i++) { | ||
238 | /* | ||
239 | * keep track of first free slot | ||
240 | */ | ||
241 | if (k == -1 && !nb->owners[i]) | ||
242 | k = i; | ||
243 | |||
244 | /* already present, reuse */ | ||
245 | if (nb->owners[i] == event) | ||
246 | goto done; | ||
247 | } | ||
248 | /* | ||
249 | * not present, so grab a new slot | ||
250 | * starting either at: | ||
251 | */ | ||
252 | if (hwc->idx != -1) { | ||
253 | /* previous assignment */ | ||
254 | i = hwc->idx; | ||
255 | } else if (k != -1) { | ||
256 | /* start from free slot found */ | ||
257 | i = k; | ||
258 | } else { | ||
259 | /* | ||
260 | * event not found, no slot found in | ||
261 | * first pass, try again from the | ||
262 | * beginning | ||
263 | */ | ||
264 | i = 0; | ||
265 | } | ||
266 | j = i; | ||
267 | do { | ||
268 | old = cmpxchg(nb->owners+i, NULL, event); | ||
269 | if (!old) | ||
270 | break; | ||
271 | if (++i == max) | ||
272 | i = 0; | ||
273 | } while (i != j); | ||
274 | done: | ||
275 | if (!old) | ||
276 | return &nb->event_constraints[i]; | ||
277 | |||
278 | return &emptyconstraint; | ||
279 | } | ||
280 | |||
281 | static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | ||
282 | { | ||
283 | struct amd_nb *nb; | ||
284 | int i; | ||
285 | |||
286 | nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); | ||
287 | if (!nb) | ||
288 | return NULL; | ||
289 | |||
290 | memset(nb, 0, sizeof(*nb)); | ||
291 | nb->nb_id = nb_id; | ||
292 | |||
293 | /* | ||
294 | * initialize all possible NB constraints | ||
295 | */ | ||
296 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
297 | __set_bit(i, nb->event_constraints[i].idxmsk); | ||
298 | nb->event_constraints[i].weight = 1; | ||
299 | } | ||
300 | return nb; | ||
301 | } | ||
302 | |||
303 | static int amd_pmu_cpu_prepare(int cpu) | ||
304 | { | ||
305 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
306 | |||
307 | WARN_ON_ONCE(cpuc->amd_nb); | ||
308 | |||
309 | if (boot_cpu_data.x86_max_cores < 2) | ||
310 | return NOTIFY_OK; | ||
311 | |||
312 | cpuc->amd_nb = amd_alloc_nb(cpu, -1); | ||
313 | if (!cpuc->amd_nb) | ||
314 | return NOTIFY_BAD; | ||
315 | |||
316 | return NOTIFY_OK; | ||
317 | } | ||
318 | |||
319 | static void amd_pmu_cpu_starting(int cpu) | ||
320 | { | ||
321 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
322 | struct amd_nb *nb; | ||
323 | int i, nb_id; | ||
324 | |||
325 | if (boot_cpu_data.x86_max_cores < 2) | ||
326 | return; | ||
327 | |||
328 | nb_id = amd_get_nb_id(cpu); | ||
329 | WARN_ON_ONCE(nb_id == BAD_APICID); | ||
330 | |||
331 | raw_spin_lock(&amd_nb_lock); | ||
332 | |||
333 | for_each_online_cpu(i) { | ||
334 | nb = per_cpu(cpu_hw_events, i).amd_nb; | ||
335 | if (WARN_ON_ONCE(!nb)) | ||
336 | continue; | ||
337 | |||
338 | if (nb->nb_id == nb_id) { | ||
339 | kfree(cpuc->amd_nb); | ||
340 | cpuc->amd_nb = nb; | ||
341 | break; | ||
342 | } | ||
343 | } | ||
344 | |||
345 | cpuc->amd_nb->nb_id = nb_id; | ||
346 | cpuc->amd_nb->refcnt++; | ||
347 | |||
348 | raw_spin_unlock(&amd_nb_lock); | ||
349 | } | ||
350 | |||
351 | static void amd_pmu_cpu_dead(int cpu) | ||
352 | { | ||
353 | struct cpu_hw_events *cpuhw; | ||
354 | |||
355 | if (boot_cpu_data.x86_max_cores < 2) | ||
356 | return; | ||
357 | |||
358 | cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
359 | |||
360 | raw_spin_lock(&amd_nb_lock); | ||
361 | |||
362 | if (cpuhw->amd_nb) { | ||
363 | struct amd_nb *nb = cpuhw->amd_nb; | ||
364 | |||
365 | if (nb->nb_id == -1 || --nb->refcnt == 0) | ||
366 | kfree(nb); | ||
367 | |||
368 | cpuhw->amd_nb = NULL; | ||
369 | } | ||
370 | |||
371 | raw_spin_unlock(&amd_nb_lock); | ||
372 | } | ||
373 | |||
374 | static __initconst struct x86_pmu amd_pmu = { | ||
375 | .name = "AMD", | ||
376 | .handle_irq = x86_pmu_handle_irq, | ||
377 | .disable_all = x86_pmu_disable_all, | ||
378 | .enable_all = x86_pmu_enable_all, | ||
379 | .enable = x86_pmu_enable_event, | ||
380 | .disable = x86_pmu_disable_event, | ||
381 | .eventsel = MSR_K7_EVNTSEL0, | ||
382 | .perfctr = MSR_K7_PERFCTR0, | ||
383 | .event_map = amd_pmu_event_map, | ||
384 | .raw_event = amd_pmu_raw_event, | ||
385 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
386 | .num_events = 4, | ||
387 | .event_bits = 48, | ||
388 | .event_mask = (1ULL << 48) - 1, | ||
389 | .apic = 1, | ||
390 | /* use highest bit to detect overflow */ | ||
391 | .max_period = (1ULL << 47) - 1, | ||
392 | .get_event_constraints = amd_get_event_constraints, | ||
393 | .put_event_constraints = amd_put_event_constraints, | ||
394 | |||
395 | .cpu_prepare = amd_pmu_cpu_prepare, | ||
396 | .cpu_starting = amd_pmu_cpu_starting, | ||
397 | .cpu_dead = amd_pmu_cpu_dead, | ||
398 | }; | ||
399 | |||
400 | static __init int amd_pmu_init(void) | ||
401 | { | ||
402 | /* Performance-monitoring supported from K7 and later: */ | ||
403 | if (boot_cpu_data.x86 < 6) | ||
404 | return -ENODEV; | ||
405 | |||
406 | x86_pmu = amd_pmu; | ||
407 | |||
408 | /* Events are common for all AMDs */ | ||
409 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
410 | sizeof(hw_cache_event_ids)); | ||
411 | |||
412 | return 0; | ||
413 | } | ||
414 | |||
415 | #else /* CONFIG_CPU_SUP_AMD */ | ||
416 | |||
417 | static int amd_pmu_init(void) | ||
418 | { | ||
419 | return 0; | ||
420 | } | ||
421 | |||
422 | #endif | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c new file mode 100644 index 000000000000..9c794ac87837 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -0,0 +1,980 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* | ||
4 | * Intel PerfMon, used on Core and later. | ||
5 | */ | ||
6 | static const u64 intel_perfmon_event_map[] = | ||
7 | { | ||
8 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
9 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
10 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
11 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
12 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
13 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
14 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
15 | }; | ||
16 | |||
17 | static struct event_constraint intel_core_event_constraints[] = | ||
18 | { | ||
19 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
20 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
21 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
22 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
23 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
24 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ | ||
25 | EVENT_CONSTRAINT_END | ||
26 | }; | ||
27 | |||
28 | static struct event_constraint intel_core2_event_constraints[] = | ||
29 | { | ||
30 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
31 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
32 | /* | ||
33 | * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event | ||
34 | * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed | ||
35 | * ratio between these counters. | ||
36 | */ | ||
37 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
38 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
39 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
40 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
41 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
42 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
43 | INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
44 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
45 | INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
46 | INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ | ||
47 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
48 | EVENT_CONSTRAINT_END | ||
49 | }; | ||
50 | |||
51 | static struct event_constraint intel_nehalem_event_constraints[] = | ||
52 | { | ||
53 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
54 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
55 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
56 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
57 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
58 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
59 | INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
60 | INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ | ||
61 | INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
62 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
63 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
64 | EVENT_CONSTRAINT_END | ||
65 | }; | ||
66 | |||
67 | static struct event_constraint intel_westmere_event_constraints[] = | ||
68 | { | ||
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
70 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
71 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
72 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
73 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | ||
74 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
75 | EVENT_CONSTRAINT_END | ||
76 | }; | ||
77 | |||
78 | static struct event_constraint intel_gen_event_constraints[] = | ||
79 | { | ||
80 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
81 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
82 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
83 | EVENT_CONSTRAINT_END | ||
84 | }; | ||
85 | |||
86 | static u64 intel_pmu_event_map(int hw_event) | ||
87 | { | ||
88 | return intel_perfmon_event_map[hw_event]; | ||
89 | } | ||
90 | |||
91 | static __initconst u64 westmere_hw_cache_event_ids | ||
92 | [PERF_COUNT_HW_CACHE_MAX] | ||
93 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
95 | { | ||
96 | [ C(L1D) ] = { | ||
97 | [ C(OP_READ) ] = { | ||
98 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
99 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ | ||
100 | }, | ||
101 | [ C(OP_WRITE) ] = { | ||
102 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
103 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ | ||
104 | }, | ||
105 | [ C(OP_PREFETCH) ] = { | ||
106 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
107 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
108 | }, | ||
109 | }, | ||
110 | [ C(L1I ) ] = { | ||
111 | [ C(OP_READ) ] = { | ||
112 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
113 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
114 | }, | ||
115 | [ C(OP_WRITE) ] = { | ||
116 | [ C(RESULT_ACCESS) ] = -1, | ||
117 | [ C(RESULT_MISS) ] = -1, | ||
118 | }, | ||
119 | [ C(OP_PREFETCH) ] = { | ||
120 | [ C(RESULT_ACCESS) ] = 0x0, | ||
121 | [ C(RESULT_MISS) ] = 0x0, | ||
122 | }, | ||
123 | }, | ||
124 | [ C(LL ) ] = { | ||
125 | [ C(OP_READ) ] = { | ||
126 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
127 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
128 | }, | ||
129 | [ C(OP_WRITE) ] = { | ||
130 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
131 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
132 | }, | ||
133 | [ C(OP_PREFETCH) ] = { | ||
134 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
135 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
136 | }, | ||
137 | }, | ||
138 | [ C(DTLB) ] = { | ||
139 | [ C(OP_READ) ] = { | ||
140 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
141 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
142 | }, | ||
143 | [ C(OP_WRITE) ] = { | ||
144 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
145 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
146 | }, | ||
147 | [ C(OP_PREFETCH) ] = { | ||
148 | [ C(RESULT_ACCESS) ] = 0x0, | ||
149 | [ C(RESULT_MISS) ] = 0x0, | ||
150 | }, | ||
151 | }, | ||
152 | [ C(ITLB) ] = { | ||
153 | [ C(OP_READ) ] = { | ||
154 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
155 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ | ||
156 | }, | ||
157 | [ C(OP_WRITE) ] = { | ||
158 | [ C(RESULT_ACCESS) ] = -1, | ||
159 | [ C(RESULT_MISS) ] = -1, | ||
160 | }, | ||
161 | [ C(OP_PREFETCH) ] = { | ||
162 | [ C(RESULT_ACCESS) ] = -1, | ||
163 | [ C(RESULT_MISS) ] = -1, | ||
164 | }, | ||
165 | }, | ||
166 | [ C(BPU ) ] = { | ||
167 | [ C(OP_READ) ] = { | ||
168 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
169 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
170 | }, | ||
171 | [ C(OP_WRITE) ] = { | ||
172 | [ C(RESULT_ACCESS) ] = -1, | ||
173 | [ C(RESULT_MISS) ] = -1, | ||
174 | }, | ||
175 | [ C(OP_PREFETCH) ] = { | ||
176 | [ C(RESULT_ACCESS) ] = -1, | ||
177 | [ C(RESULT_MISS) ] = -1, | ||
178 | }, | ||
179 | }, | ||
180 | }; | ||
181 | |||
182 | static __initconst u64 nehalem_hw_cache_event_ids | ||
183 | [PERF_COUNT_HW_CACHE_MAX] | ||
184 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
186 | { | ||
187 | [ C(L1D) ] = { | ||
188 | [ C(OP_READ) ] = { | ||
189 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
190 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
191 | }, | ||
192 | [ C(OP_WRITE) ] = { | ||
193 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
194 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
195 | }, | ||
196 | [ C(OP_PREFETCH) ] = { | ||
197 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
198 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
199 | }, | ||
200 | }, | ||
201 | [ C(L1I ) ] = { | ||
202 | [ C(OP_READ) ] = { | ||
203 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
204 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
205 | }, | ||
206 | [ C(OP_WRITE) ] = { | ||
207 | [ C(RESULT_ACCESS) ] = -1, | ||
208 | [ C(RESULT_MISS) ] = -1, | ||
209 | }, | ||
210 | [ C(OP_PREFETCH) ] = { | ||
211 | [ C(RESULT_ACCESS) ] = 0x0, | ||
212 | [ C(RESULT_MISS) ] = 0x0, | ||
213 | }, | ||
214 | }, | ||
215 | [ C(LL ) ] = { | ||
216 | [ C(OP_READ) ] = { | ||
217 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
218 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
219 | }, | ||
220 | [ C(OP_WRITE) ] = { | ||
221 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
222 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
223 | }, | ||
224 | [ C(OP_PREFETCH) ] = { | ||
225 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
226 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
227 | }, | ||
228 | }, | ||
229 | [ C(DTLB) ] = { | ||
230 | [ C(OP_READ) ] = { | ||
231 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
232 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
233 | }, | ||
234 | [ C(OP_WRITE) ] = { | ||
235 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
236 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
237 | }, | ||
238 | [ C(OP_PREFETCH) ] = { | ||
239 | [ C(RESULT_ACCESS) ] = 0x0, | ||
240 | [ C(RESULT_MISS) ] = 0x0, | ||
241 | }, | ||
242 | }, | ||
243 | [ C(ITLB) ] = { | ||
244 | [ C(OP_READ) ] = { | ||
245 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
246 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
247 | }, | ||
248 | [ C(OP_WRITE) ] = { | ||
249 | [ C(RESULT_ACCESS) ] = -1, | ||
250 | [ C(RESULT_MISS) ] = -1, | ||
251 | }, | ||
252 | [ C(OP_PREFETCH) ] = { | ||
253 | [ C(RESULT_ACCESS) ] = -1, | ||
254 | [ C(RESULT_MISS) ] = -1, | ||
255 | }, | ||
256 | }, | ||
257 | [ C(BPU ) ] = { | ||
258 | [ C(OP_READ) ] = { | ||
259 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
260 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
261 | }, | ||
262 | [ C(OP_WRITE) ] = { | ||
263 | [ C(RESULT_ACCESS) ] = -1, | ||
264 | [ C(RESULT_MISS) ] = -1, | ||
265 | }, | ||
266 | [ C(OP_PREFETCH) ] = { | ||
267 | [ C(RESULT_ACCESS) ] = -1, | ||
268 | [ C(RESULT_MISS) ] = -1, | ||
269 | }, | ||
270 | }, | ||
271 | }; | ||
272 | |||
273 | static __initconst u64 core2_hw_cache_event_ids | ||
274 | [PERF_COUNT_HW_CACHE_MAX] | ||
275 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
277 | { | ||
278 | [ C(L1D) ] = { | ||
279 | [ C(OP_READ) ] = { | ||
280 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
281 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
282 | }, | ||
283 | [ C(OP_WRITE) ] = { | ||
284 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
285 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
286 | }, | ||
287 | [ C(OP_PREFETCH) ] = { | ||
288 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
289 | [ C(RESULT_MISS) ] = 0, | ||
290 | }, | ||
291 | }, | ||
292 | [ C(L1I ) ] = { | ||
293 | [ C(OP_READ) ] = { | ||
294 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
295 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
296 | }, | ||
297 | [ C(OP_WRITE) ] = { | ||
298 | [ C(RESULT_ACCESS) ] = -1, | ||
299 | [ C(RESULT_MISS) ] = -1, | ||
300 | }, | ||
301 | [ C(OP_PREFETCH) ] = { | ||
302 | [ C(RESULT_ACCESS) ] = 0, | ||
303 | [ C(RESULT_MISS) ] = 0, | ||
304 | }, | ||
305 | }, | ||
306 | [ C(LL ) ] = { | ||
307 | [ C(OP_READ) ] = { | ||
308 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
309 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
310 | }, | ||
311 | [ C(OP_WRITE) ] = { | ||
312 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
313 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
314 | }, | ||
315 | [ C(OP_PREFETCH) ] = { | ||
316 | [ C(RESULT_ACCESS) ] = 0, | ||
317 | [ C(RESULT_MISS) ] = 0, | ||
318 | }, | ||
319 | }, | ||
320 | [ C(DTLB) ] = { | ||
321 | [ C(OP_READ) ] = { | ||
322 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
323 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
324 | }, | ||
325 | [ C(OP_WRITE) ] = { | ||
326 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
327 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
328 | }, | ||
329 | [ C(OP_PREFETCH) ] = { | ||
330 | [ C(RESULT_ACCESS) ] = 0, | ||
331 | [ C(RESULT_MISS) ] = 0, | ||
332 | }, | ||
333 | }, | ||
334 | [ C(ITLB) ] = { | ||
335 | [ C(OP_READ) ] = { | ||
336 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
337 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
338 | }, | ||
339 | [ C(OP_WRITE) ] = { | ||
340 | [ C(RESULT_ACCESS) ] = -1, | ||
341 | [ C(RESULT_MISS) ] = -1, | ||
342 | }, | ||
343 | [ C(OP_PREFETCH) ] = { | ||
344 | [ C(RESULT_ACCESS) ] = -1, | ||
345 | [ C(RESULT_MISS) ] = -1, | ||
346 | }, | ||
347 | }, | ||
348 | [ C(BPU ) ] = { | ||
349 | [ C(OP_READ) ] = { | ||
350 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
351 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
352 | }, | ||
353 | [ C(OP_WRITE) ] = { | ||
354 | [ C(RESULT_ACCESS) ] = -1, | ||
355 | [ C(RESULT_MISS) ] = -1, | ||
356 | }, | ||
357 | [ C(OP_PREFETCH) ] = { | ||
358 | [ C(RESULT_ACCESS) ] = -1, | ||
359 | [ C(RESULT_MISS) ] = -1, | ||
360 | }, | ||
361 | }, | ||
362 | }; | ||
363 | |||
364 | static __initconst u64 atom_hw_cache_event_ids | ||
365 | [PERF_COUNT_HW_CACHE_MAX] | ||
366 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
368 | { | ||
369 | [ C(L1D) ] = { | ||
370 | [ C(OP_READ) ] = { | ||
371 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
372 | [ C(RESULT_MISS) ] = 0, | ||
373 | }, | ||
374 | [ C(OP_WRITE) ] = { | ||
375 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
376 | [ C(RESULT_MISS) ] = 0, | ||
377 | }, | ||
378 | [ C(OP_PREFETCH) ] = { | ||
379 | [ C(RESULT_ACCESS) ] = 0x0, | ||
380 | [ C(RESULT_MISS) ] = 0, | ||
381 | }, | ||
382 | }, | ||
383 | [ C(L1I ) ] = { | ||
384 | [ C(OP_READ) ] = { | ||
385 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
386 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
387 | }, | ||
388 | [ C(OP_WRITE) ] = { | ||
389 | [ C(RESULT_ACCESS) ] = -1, | ||
390 | [ C(RESULT_MISS) ] = -1, | ||
391 | }, | ||
392 | [ C(OP_PREFETCH) ] = { | ||
393 | [ C(RESULT_ACCESS) ] = 0, | ||
394 | [ C(RESULT_MISS) ] = 0, | ||
395 | }, | ||
396 | }, | ||
397 | [ C(LL ) ] = { | ||
398 | [ C(OP_READ) ] = { | ||
399 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
400 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
401 | }, | ||
402 | [ C(OP_WRITE) ] = { | ||
403 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
404 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
405 | }, | ||
406 | [ C(OP_PREFETCH) ] = { | ||
407 | [ C(RESULT_ACCESS) ] = 0, | ||
408 | [ C(RESULT_MISS) ] = 0, | ||
409 | }, | ||
410 | }, | ||
411 | [ C(DTLB) ] = { | ||
412 | [ C(OP_READ) ] = { | ||
413 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
414 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
415 | }, | ||
416 | [ C(OP_WRITE) ] = { | ||
417 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
418 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
419 | }, | ||
420 | [ C(OP_PREFETCH) ] = { | ||
421 | [ C(RESULT_ACCESS) ] = 0, | ||
422 | [ C(RESULT_MISS) ] = 0, | ||
423 | }, | ||
424 | }, | ||
425 | [ C(ITLB) ] = { | ||
426 | [ C(OP_READ) ] = { | ||
427 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
428 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
429 | }, | ||
430 | [ C(OP_WRITE) ] = { | ||
431 | [ C(RESULT_ACCESS) ] = -1, | ||
432 | [ C(RESULT_MISS) ] = -1, | ||
433 | }, | ||
434 | [ C(OP_PREFETCH) ] = { | ||
435 | [ C(RESULT_ACCESS) ] = -1, | ||
436 | [ C(RESULT_MISS) ] = -1, | ||
437 | }, | ||
438 | }, | ||
439 | [ C(BPU ) ] = { | ||
440 | [ C(OP_READ) ] = { | ||
441 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
442 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
443 | }, | ||
444 | [ C(OP_WRITE) ] = { | ||
445 | [ C(RESULT_ACCESS) ] = -1, | ||
446 | [ C(RESULT_MISS) ] = -1, | ||
447 | }, | ||
448 | [ C(OP_PREFETCH) ] = { | ||
449 | [ C(RESULT_ACCESS) ] = -1, | ||
450 | [ C(RESULT_MISS) ] = -1, | ||
451 | }, | ||
452 | }, | ||
453 | }; | ||
454 | |||
455 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
456 | { | ||
457 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
458 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
459 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
460 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
461 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
462 | |||
463 | #define CORE_EVNTSEL_MASK \ | ||
464 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
465 | INTEL_ARCH_UNIT_MASK | \ | ||
466 | INTEL_ARCH_EDGE_MASK | \ | ||
467 | INTEL_ARCH_INV_MASK | \ | ||
468 | INTEL_ARCH_CNT_MASK) | ||
469 | |||
470 | return hw_event & CORE_EVNTSEL_MASK; | ||
471 | } | ||
472 | |||
473 | static void intel_pmu_enable_bts(u64 config) | ||
474 | { | ||
475 | unsigned long debugctlmsr; | ||
476 | |||
477 | debugctlmsr = get_debugctlmsr(); | ||
478 | |||
479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
482 | |||
483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
485 | |||
486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
488 | |||
489 | update_debugctlmsr(debugctlmsr); | ||
490 | } | ||
491 | |||
492 | static void intel_pmu_disable_bts(void) | ||
493 | { | ||
494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
495 | unsigned long debugctlmsr; | ||
496 | |||
497 | if (!cpuc->ds) | ||
498 | return; | ||
499 | |||
500 | debugctlmsr = get_debugctlmsr(); | ||
501 | |||
502 | debugctlmsr &= | ||
503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
505 | |||
506 | update_debugctlmsr(debugctlmsr); | ||
507 | } | ||
508 | |||
509 | static void intel_pmu_disable_all(void) | ||
510 | { | ||
511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
512 | |||
513 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
514 | |||
515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
516 | intel_pmu_disable_bts(); | ||
517 | } | ||
518 | |||
519 | static void intel_pmu_enable_all(void) | ||
520 | { | ||
521 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
522 | |||
523 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
524 | |||
525 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
526 | struct perf_event *event = | ||
527 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
528 | |||
529 | if (WARN_ON_ONCE(!event)) | ||
530 | return; | ||
531 | |||
532 | intel_pmu_enable_bts(event->hw.config); | ||
533 | } | ||
534 | } | ||
535 | |||
536 | static inline u64 intel_pmu_get_status(void) | ||
537 | { | ||
538 | u64 status; | ||
539 | |||
540 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
541 | |||
542 | return status; | ||
543 | } | ||
544 | |||
545 | static inline void intel_pmu_ack_status(u64 ack) | ||
546 | { | ||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
548 | } | ||
549 | |||
550 | static inline void | ||
551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
552 | { | ||
553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | ||
554 | u64 ctrl_val, mask; | ||
555 | |||
556 | mask = 0xfULL << (idx * 4); | ||
557 | |||
558 | rdmsrl(hwc->config_base, ctrl_val); | ||
559 | ctrl_val &= ~mask; | ||
560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | ||
561 | } | ||
562 | |||
563 | static void intel_pmu_drain_bts_buffer(void) | ||
564 | { | ||
565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
566 | struct debug_store *ds = cpuc->ds; | ||
567 | struct bts_record { | ||
568 | u64 from; | ||
569 | u64 to; | ||
570 | u64 flags; | ||
571 | }; | ||
572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
573 | struct bts_record *at, *top; | ||
574 | struct perf_output_handle handle; | ||
575 | struct perf_event_header header; | ||
576 | struct perf_sample_data data; | ||
577 | struct pt_regs regs; | ||
578 | |||
579 | if (!event) | ||
580 | return; | ||
581 | |||
582 | if (!ds) | ||
583 | return; | ||
584 | |||
585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
587 | |||
588 | if (top <= at) | ||
589 | return; | ||
590 | |||
591 | ds->bts_index = ds->bts_buffer_base; | ||
592 | |||
593 | perf_sample_data_init(&data, 0); | ||
594 | |||
595 | data.period = event->hw.last_period; | ||
596 | regs.ip = 0; | ||
597 | |||
598 | /* | ||
599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
600 | * We will overwrite the from and to address before we output | ||
601 | * the sample. | ||
602 | */ | ||
603 | perf_prepare_sample(&header, &data, event, ®s); | ||
604 | |||
605 | if (perf_output_begin(&handle, event, | ||
606 | header.size * (top - at), 1, 1)) | ||
607 | return; | ||
608 | |||
609 | for (; at < top; at++) { | ||
610 | data.ip = at->from; | ||
611 | data.addr = at->to; | ||
612 | |||
613 | perf_output_sample(&handle, &header, &data, event); | ||
614 | } | ||
615 | |||
616 | perf_output_end(&handle); | ||
617 | |||
618 | /* There's new data available. */ | ||
619 | event->hw.interrupts++; | ||
620 | event->pending_kill = POLL_IN; | ||
621 | } | ||
622 | |||
623 | static inline void | ||
624 | intel_pmu_disable_event(struct perf_event *event) | ||
625 | { | ||
626 | struct hw_perf_event *hwc = &event->hw; | ||
627 | |||
628 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | ||
629 | intel_pmu_disable_bts(); | ||
630 | intel_pmu_drain_bts_buffer(); | ||
631 | return; | ||
632 | } | ||
633 | |||
634 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
635 | intel_pmu_disable_fixed(hwc); | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | x86_pmu_disable_event(event); | ||
640 | } | ||
641 | |||
642 | static inline void | ||
643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
644 | { | ||
645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | ||
646 | u64 ctrl_val, bits, mask; | ||
647 | int err; | ||
648 | |||
649 | /* | ||
650 | * Enable IRQ generation (0x8), | ||
651 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
652 | * if requested: | ||
653 | */ | ||
654 | bits = 0x8ULL; | ||
655 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
656 | bits |= 0x2; | ||
657 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
658 | bits |= 0x1; | ||
659 | |||
660 | /* | ||
661 | * ANY bit is supported in v3 and up | ||
662 | */ | ||
663 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) | ||
664 | bits |= 0x4; | ||
665 | |||
666 | bits <<= (idx * 4); | ||
667 | mask = 0xfULL << (idx * 4); | ||
668 | |||
669 | rdmsrl(hwc->config_base, ctrl_val); | ||
670 | ctrl_val &= ~mask; | ||
671 | ctrl_val |= bits; | ||
672 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
673 | } | ||
674 | |||
675 | static void intel_pmu_enable_event(struct perf_event *event) | ||
676 | { | ||
677 | struct hw_perf_event *hwc = &event->hw; | ||
678 | |||
679 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | ||
680 | if (!__get_cpu_var(cpu_hw_events).enabled) | ||
681 | return; | ||
682 | |||
683 | intel_pmu_enable_bts(hwc->config); | ||
684 | return; | ||
685 | } | ||
686 | |||
687 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
688 | intel_pmu_enable_fixed(hwc); | ||
689 | return; | ||
690 | } | ||
691 | |||
692 | __x86_pmu_enable_event(hwc); | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * Save and restart an expired event. Called by NMI contexts, | ||
697 | * so it has to be careful about preempting normal event ops: | ||
698 | */ | ||
699 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
700 | { | ||
701 | x86_perf_event_update(event); | ||
702 | return x86_perf_event_set_period(event); | ||
703 | } | ||
704 | |||
705 | static void intel_pmu_reset(void) | ||
706 | { | ||
707 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
708 | unsigned long flags; | ||
709 | int idx; | ||
710 | |||
711 | if (!x86_pmu.num_events) | ||
712 | return; | ||
713 | |||
714 | local_irq_save(flags); | ||
715 | |||
716 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
717 | |||
718 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
719 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
720 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
721 | } | ||
722 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
723 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
724 | } | ||
725 | if (ds) | ||
726 | ds->bts_index = ds->bts_buffer_base; | ||
727 | |||
728 | local_irq_restore(flags); | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
733 | * rules apply: | ||
734 | */ | ||
735 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
736 | { | ||
737 | struct perf_sample_data data; | ||
738 | struct cpu_hw_events *cpuc; | ||
739 | int bit, loops; | ||
740 | u64 ack, status; | ||
741 | |||
742 | perf_sample_data_init(&data, 0); | ||
743 | |||
744 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
745 | |||
746 | intel_pmu_disable_all(); | ||
747 | intel_pmu_drain_bts_buffer(); | ||
748 | status = intel_pmu_get_status(); | ||
749 | if (!status) { | ||
750 | intel_pmu_enable_all(); | ||
751 | return 0; | ||
752 | } | ||
753 | |||
754 | loops = 0; | ||
755 | again: | ||
756 | if (++loops > 100) { | ||
757 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
758 | perf_event_print_debug(); | ||
759 | intel_pmu_reset(); | ||
760 | goto done; | ||
761 | } | ||
762 | |||
763 | inc_irq_stat(apic_perf_irqs); | ||
764 | ack = status; | ||
765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
766 | struct perf_event *event = cpuc->events[bit]; | ||
767 | |||
768 | if (!test_bit(bit, cpuc->active_mask)) | ||
769 | continue; | ||
770 | |||
771 | if (!intel_pmu_save_and_restart(event)) | ||
772 | continue; | ||
773 | |||
774 | data.period = event->hw.last_period; | ||
775 | |||
776 | if (perf_event_overflow(event, 1, &data, regs)) | ||
777 | x86_pmu_stop(event); | ||
778 | } | ||
779 | |||
780 | intel_pmu_ack_status(ack); | ||
781 | |||
782 | /* | ||
783 | * Repeat if there is more work to be done: | ||
784 | */ | ||
785 | status = intel_pmu_get_status(); | ||
786 | if (status) | ||
787 | goto again; | ||
788 | |||
789 | done: | ||
790 | intel_pmu_enable_all(); | ||
791 | return 1; | ||
792 | } | ||
793 | |||
794 | static struct event_constraint bts_constraint = | ||
795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
796 | |||
797 | static struct event_constraint * | ||
798 | intel_special_constraints(struct perf_event *event) | ||
799 | { | ||
800 | unsigned int hw_event; | ||
801 | |||
802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
803 | |||
804 | if (unlikely((hw_event == | ||
805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
806 | (event->hw.sample_period == 1))) { | ||
807 | |||
808 | return &bts_constraint; | ||
809 | } | ||
810 | return NULL; | ||
811 | } | ||
812 | |||
813 | static struct event_constraint * | ||
814 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
815 | { | ||
816 | struct event_constraint *c; | ||
817 | |||
818 | c = intel_special_constraints(event); | ||
819 | if (c) | ||
820 | return c; | ||
821 | |||
822 | return x86_get_event_constraints(cpuc, event); | ||
823 | } | ||
824 | |||
825 | static __initconst struct x86_pmu core_pmu = { | ||
826 | .name = "core", | ||
827 | .handle_irq = x86_pmu_handle_irq, | ||
828 | .disable_all = x86_pmu_disable_all, | ||
829 | .enable_all = x86_pmu_enable_all, | ||
830 | .enable = x86_pmu_enable_event, | ||
831 | .disable = x86_pmu_disable_event, | ||
832 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
833 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
834 | .event_map = intel_pmu_event_map, | ||
835 | .raw_event = intel_pmu_raw_event, | ||
836 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
837 | .apic = 1, | ||
838 | /* | ||
839 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
840 | * so we install an artificial 1<<31 period regardless of | ||
841 | * the generic event period: | ||
842 | */ | ||
843 | .max_period = (1ULL << 31) - 1, | ||
844 | .get_event_constraints = intel_get_event_constraints, | ||
845 | .event_constraints = intel_core_event_constraints, | ||
846 | }; | ||
847 | |||
848 | static __initconst struct x86_pmu intel_pmu = { | ||
849 | .name = "Intel", | ||
850 | .handle_irq = intel_pmu_handle_irq, | ||
851 | .disable_all = intel_pmu_disable_all, | ||
852 | .enable_all = intel_pmu_enable_all, | ||
853 | .enable = intel_pmu_enable_event, | ||
854 | .disable = intel_pmu_disable_event, | ||
855 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
856 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
857 | .event_map = intel_pmu_event_map, | ||
858 | .raw_event = intel_pmu_raw_event, | ||
859 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
860 | .apic = 1, | ||
861 | /* | ||
862 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
863 | * so we install an artificial 1<<31 period regardless of | ||
864 | * the generic event period: | ||
865 | */ | ||
866 | .max_period = (1ULL << 31) - 1, | ||
867 | .enable_bts = intel_pmu_enable_bts, | ||
868 | .disable_bts = intel_pmu_disable_bts, | ||
869 | .get_event_constraints = intel_get_event_constraints, | ||
870 | |||
871 | .cpu_starting = init_debug_store_on_cpu, | ||
872 | .cpu_dying = fini_debug_store_on_cpu, | ||
873 | }; | ||
874 | |||
875 | static __init int intel_pmu_init(void) | ||
876 | { | ||
877 | union cpuid10_edx edx; | ||
878 | union cpuid10_eax eax; | ||
879 | unsigned int unused; | ||
880 | unsigned int ebx; | ||
881 | int version; | ||
882 | |||
883 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
884 | /* check for P6 processor family */ | ||
885 | if (boot_cpu_data.x86 == 6) { | ||
886 | return p6_pmu_init(); | ||
887 | } else { | ||
888 | return -ENODEV; | ||
889 | } | ||
890 | } | ||
891 | |||
892 | /* | ||
893 | * Check whether the Architectural PerfMon supports | ||
894 | * Branch Misses Retired hw_event or not. | ||
895 | */ | ||
896 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
897 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
898 | return -ENODEV; | ||
899 | |||
900 | version = eax.split.version_id; | ||
901 | if (version < 2) | ||
902 | x86_pmu = core_pmu; | ||
903 | else | ||
904 | x86_pmu = intel_pmu; | ||
905 | |||
906 | x86_pmu.version = version; | ||
907 | x86_pmu.num_events = eax.split.num_events; | ||
908 | x86_pmu.event_bits = eax.split.bit_width; | ||
909 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | ||
910 | |||
911 | /* | ||
912 | * Quirk: v2 perfmon does not report fixed-purpose events, so | ||
913 | * assume at least 3 events: | ||
914 | */ | ||
915 | if (version > 1) | ||
916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | ||
917 | |||
918 | /* | ||
919 | * Install the hw-cache-events table: | ||
920 | */ | ||
921 | switch (boot_cpu_data.x86_model) { | ||
922 | case 14: /* 65 nm core solo/duo, "Yonah" */ | ||
923 | pr_cont("Core events, "); | ||
924 | break; | ||
925 | |||
926 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
927 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
928 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
929 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
930 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
931 | sizeof(hw_cache_event_ids)); | ||
932 | |||
933 | x86_pmu.event_constraints = intel_core2_event_constraints; | ||
934 | pr_cont("Core2 events, "); | ||
935 | break; | ||
936 | |||
937 | case 26: /* 45 nm nehalem, "Bloomfield" */ | ||
938 | case 30: /* 45 nm nehalem, "Lynnfield" */ | ||
939 | case 46: /* 45 nm nehalem-ex, "Beckton" */ | ||
940 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
941 | sizeof(hw_cache_event_ids)); | ||
942 | |||
943 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | ||
944 | pr_cont("Nehalem/Corei7 events, "); | ||
945 | break; | ||
946 | case 28: /* Atom */ | ||
947 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
948 | sizeof(hw_cache_event_ids)); | ||
949 | |||
950 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
951 | pr_cont("Atom events, "); | ||
952 | break; | ||
953 | |||
954 | case 37: /* 32 nm nehalem, "Clarkdale" */ | ||
955 | case 44: /* 32 nm nehalem, "Gulftown" */ | ||
956 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | ||
957 | sizeof(hw_cache_event_ids)); | ||
958 | |||
959 | x86_pmu.event_constraints = intel_westmere_event_constraints; | ||
960 | pr_cont("Westmere events, "); | ||
961 | break; | ||
962 | |||
963 | default: | ||
964 | /* | ||
965 | * default constraints for v2 and up | ||
966 | */ | ||
967 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
968 | pr_cont("generic architected perfmon, "); | ||
969 | } | ||
970 | return 0; | ||
971 | } | ||
972 | |||
973 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
974 | |||
975 | static int intel_pmu_init(void) | ||
976 | { | ||
977 | return 0; | ||
978 | } | ||
979 | |||
980 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c new file mode 100644 index 000000000000..a330485d14da --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -0,0 +1,159 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* | ||
4 | * Not sure about some of these | ||
5 | */ | ||
6 | static const u64 p6_perfmon_event_map[] = | ||
7 | { | ||
8 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
9 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
10 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
11 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
12 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
13 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
14 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
15 | }; | ||
16 | |||
17 | static u64 p6_pmu_event_map(int hw_event) | ||
18 | { | ||
19 | return p6_perfmon_event_map[hw_event]; | ||
20 | } | ||
21 | |||
22 | /* | ||
23 | * Event setting that is specified not to count anything. | ||
24 | * We use this to effectively disable a counter. | ||
25 | * | ||
26 | * L2_RQSTS with 0 MESI unit mask. | ||
27 | */ | ||
28 | #define P6_NOP_EVENT 0x0000002EULL | ||
29 | |||
30 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
31 | { | ||
32 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
33 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
34 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
35 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
36 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
37 | |||
38 | #define P6_EVNTSEL_MASK \ | ||
39 | (P6_EVNTSEL_EVENT_MASK | \ | ||
40 | P6_EVNTSEL_UNIT_MASK | \ | ||
41 | P6_EVNTSEL_EDGE_MASK | \ | ||
42 | P6_EVNTSEL_INV_MASK | \ | ||
43 | P6_EVNTSEL_REG_MASK) | ||
44 | |||
45 | return hw_event & P6_EVNTSEL_MASK; | ||
46 | } | ||
47 | |||
48 | static struct event_constraint p6_event_constraints[] = | ||
49 | { | ||
50 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
51 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
52 | INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
53 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
54 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
55 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
56 | EVENT_CONSTRAINT_END | ||
57 | }; | ||
58 | |||
59 | static void p6_pmu_disable_all(void) | ||
60 | { | ||
61 | u64 val; | ||
62 | |||
63 | /* p6 only has one enable register */ | ||
64 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
65 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
66 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
67 | } | ||
68 | |||
69 | static void p6_pmu_enable_all(void) | ||
70 | { | ||
71 | unsigned long val; | ||
72 | |||
73 | /* p6 only has one enable register */ | ||
74 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
75 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
76 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
77 | } | ||
78 | |||
79 | static inline void | ||
80 | p6_pmu_disable_event(struct perf_event *event) | ||
81 | { | ||
82 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
83 | struct hw_perf_event *hwc = &event->hw; | ||
84 | u64 val = P6_NOP_EVENT; | ||
85 | |||
86 | if (cpuc->enabled) | ||
87 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
88 | |||
89 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | ||
90 | } | ||
91 | |||
92 | static void p6_pmu_enable_event(struct perf_event *event) | ||
93 | { | ||
94 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
95 | struct hw_perf_event *hwc = &event->hw; | ||
96 | u64 val; | ||
97 | |||
98 | val = hwc->config; | ||
99 | if (cpuc->enabled) | ||
100 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
101 | |||
102 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | ||
103 | } | ||
104 | |||
105 | static __initconst struct x86_pmu p6_pmu = { | ||
106 | .name = "p6", | ||
107 | .handle_irq = x86_pmu_handle_irq, | ||
108 | .disable_all = p6_pmu_disable_all, | ||
109 | .enable_all = p6_pmu_enable_all, | ||
110 | .enable = p6_pmu_enable_event, | ||
111 | .disable = p6_pmu_disable_event, | ||
112 | .eventsel = MSR_P6_EVNTSEL0, | ||
113 | .perfctr = MSR_P6_PERFCTR0, | ||
114 | .event_map = p6_pmu_event_map, | ||
115 | .raw_event = p6_pmu_raw_event, | ||
116 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
117 | .apic = 1, | ||
118 | .max_period = (1ULL << 31) - 1, | ||
119 | .version = 0, | ||
120 | .num_events = 2, | ||
121 | /* | ||
122 | * Events have 40 bits implemented. However they are designed such | ||
123 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
124 | * effective width of a event for P6-like PMU is 32 bits only. | ||
125 | * | ||
126 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
127 | */ | ||
128 | .event_bits = 32, | ||
129 | .event_mask = (1ULL << 32) - 1, | ||
130 | .get_event_constraints = x86_get_event_constraints, | ||
131 | .event_constraints = p6_event_constraints, | ||
132 | }; | ||
133 | |||
134 | static __init int p6_pmu_init(void) | ||
135 | { | ||
136 | switch (boot_cpu_data.x86_model) { | ||
137 | case 1: | ||
138 | case 3: /* Pentium Pro */ | ||
139 | case 5: | ||
140 | case 6: /* Pentium II */ | ||
141 | case 7: | ||
142 | case 8: | ||
143 | case 11: /* Pentium III */ | ||
144 | case 9: | ||
145 | case 13: | ||
146 | /* Pentium M */ | ||
147 | break; | ||
148 | default: | ||
149 | pr_cont("unsupported p6 CPU model %d ", | ||
150 | boot_cpu_data.x86_model); | ||
151 | return -ENODEV; | ||
152 | } | ||
153 | |||
154 | x86_pmu = p6_pmu; | ||
155 | |||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 898df9719afb..fb329e9f8494 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | |||
115 | 115 | ||
116 | return !test_bit(counter, perfctr_nmi_owner); | 116 | return !test_bit(counter, perfctr_nmi_owner); |
117 | } | 117 | } |
118 | |||
119 | /* checks the an msr for availability */ | ||
120 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
121 | { | ||
122 | unsigned int counter; | ||
123 | |||
124 | counter = nmi_perfctr_msr_to_bit(msr); | ||
125 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
126 | |||
127 | return !test_bit(counter, perfctr_nmi_owner); | ||
128 | } | ||
129 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | 118 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
130 | 119 | ||
131 | int reserve_perfctr_nmi(unsigned int msr) | 120 | int reserve_perfctr_nmi(unsigned int msr) |
@@ -691,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) | |||
691 | cpu_nmi_set_wd_enabled(); | 680 | cpu_nmi_set_wd_enabled(); |
692 | 681 | ||
693 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 682 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
694 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 683 | evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; |
695 | wrmsr(evntsel_msr, evntsel, 0); | 684 | wrmsr(evntsel_msr, evntsel, 0); |
696 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | 685 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); |
697 | return 1; | 686 | return 1; |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 1cbed97b59cf..dfdb4dba2320 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -22,6 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <linux/module.h> | ||
25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
26 | #include <asm/vmware.h> | 27 | #include <asm/vmware.h> |
27 | #include <asm/x86_init.h> | 28 | #include <asm/x86_init.h> |
@@ -101,6 +102,7 @@ int vmware_platform(void) | |||
101 | 102 | ||
102 | return 0; | 103 | return 0; |
103 | } | 104 | } |
105 | EXPORT_SYMBOL(vmware_platform); | ||
104 | 106 | ||
105 | /* | 107 | /* |
106 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 108 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index cb27fd6136c9..8b862d5900fe 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
41 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
42 | #include <linux/uaccess.h> | 42 | #include <linux/uaccess.h> |
43 | #include <linux/gfp.h> | ||
43 | 44 | ||
44 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
45 | #include <asm/msr.h> | 46 | #include <asm/msr.h> |
@@ -229,7 +230,7 @@ static void __exit cpuid_exit(void) | |||
229 | for_each_online_cpu(cpu) | 230 | for_each_online_cpu(cpu) |
230 | cpuid_device_destroy(cpu); | 231 | cpuid_device_destroy(cpu); |
231 | class_destroy(cpuid_class); | 232 | class_destroy(cpuid_class); |
232 | unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); | 233 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
233 | unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); | 234 | unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); |
234 | } | 235 | } |
235 | 236 | ||
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a4849c10a77e..ebd4c51d096a 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <asm/cpu.h> | 27 | #include <asm/cpu.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | 29 | #include <asm/virtext.h> |
30 | #include <asm/x86_init.h> | ||
31 | 30 | ||
32 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 31 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
33 | 32 | ||
@@ -103,10 +102,5 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
103 | #ifdef CONFIG_HPET_TIMER | 102 | #ifdef CONFIG_HPET_TIMER |
104 | hpet_disable(); | 103 | hpet_disable(); |
105 | #endif | 104 | #endif |
106 | |||
107 | #ifdef CONFIG_X86_64 | ||
108 | x86_platform.iommu_shutdown(); | ||
109 | #endif | ||
110 | |||
111 | crash_save_cpu(regs, safe_smp_processor_id()); | 105 | crash_save_cpu(regs, safe_smp_processor_id()); |
112 | } | 106 | } |
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index cd97ce18c29d..67414550c3cc 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Copyright (C) IBM Corporation, 2004. All rights reserved | 5 | * Copyright (C) IBM Corporation, 2004. All rights reserved |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/slab.h> | ||
8 | #include <linux/errno.h> | 9 | #include <linux/errno.h> |
9 | #include <linux/highmem.h> | 10 | #include <linux/highmem.h> |
10 | #include <linux/crash_dump.h> | 11 | #include <linux/crash_dump.h> |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c56bc2873030..6d817554780a 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -123,13 +123,15 @@ print_context_stack_bp(struct thread_info *tinfo, | |||
123 | while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { | 123 | while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { |
124 | unsigned long addr = *ret_addr; | 124 | unsigned long addr = *ret_addr; |
125 | 125 | ||
126 | if (__kernel_text_address(addr)) { | 126 | if (!__kernel_text_address(addr)) |
127 | ops->address(data, addr, 1); | 127 | break; |
128 | frame = frame->next_frame; | 128 | |
129 | ret_addr = &frame->return_address; | 129 | ops->address(data, addr, 1); |
130 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); | 130 | frame = frame->next_frame; |
131 | } | 131 | ret_addr = &frame->return_address; |
132 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); | ||
132 | } | 133 | } |
134 | |||
133 | return (unsigned long)frame; | 135 | return (unsigned long)frame; |
134 | } | 136 | } |
135 | EXPORT_SYMBOL_GPL(print_context_stack_bp); | 137 | EXPORT_SYMBOL_GPL(print_context_stack_bp); |
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 4fd1420faffa..e1a93be4fd44 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | 14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) |
15 | #endif | 15 | #endif |
16 | 16 | ||
17 | #include <linux/uaccess.h> | ||
18 | |||
17 | extern void | 19 | extern void |
18 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 20 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
19 | unsigned long *stack, unsigned long bp, char *log_lvl); | 21 | unsigned long *stack, unsigned long bp, char *log_lvl); |
@@ -29,4 +31,26 @@ struct stack_frame { | |||
29 | struct stack_frame *next_frame; | 31 | struct stack_frame *next_frame; |
30 | unsigned long return_address; | 32 | unsigned long return_address; |
31 | }; | 33 | }; |
34 | |||
35 | struct stack_frame_ia32 { | ||
36 | u32 next_frame; | ||
37 | u32 return_address; | ||
38 | }; | ||
39 | |||
40 | static inline unsigned long rewind_frame_pointer(int n) | ||
41 | { | ||
42 | struct stack_frame *frame; | ||
43 | |||
44 | get_bp(frame); | ||
45 | |||
46 | #ifdef CONFIG_FRAME_POINTER | ||
47 | while (n--) { | ||
48 | if (probe_kernel_address(&frame->next_frame, frame)) | ||
49 | break; | ||
50 | } | ||
32 | #endif | 51 | #endif |
52 | |||
53 | return (unsigned long)frame; | ||
54 | } | ||
55 | |||
56 | #endif /* DUMPSTACK_H */ | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index ae775ca47b25..11540a189d93 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -18,11 +18,6 @@ | |||
18 | 18 | ||
19 | #include "dumpstack.h" | 19 | #include "dumpstack.h" |
20 | 20 | ||
21 | /* Just a stub for now */ | ||
22 | int x86_is_stack_id(int id, char *name) | ||
23 | { | ||
24 | return 0; | ||
25 | } | ||
26 | 21 | ||
27 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
28 | unsigned long *stack, unsigned long bp, | 23 | unsigned long *stack, unsigned long bp, |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 0ad9597073f5..272c9f1f05f3 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = { | |||
33 | #endif | 33 | #endif |
34 | }; | 34 | }; |
35 | 35 | ||
36 | int x86_is_stack_id(int id, char *name) | ||
37 | { | ||
38 | return x86_stack_ids[id - 1] == name; | ||
39 | } | ||
40 | |||
41 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 36 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
42 | unsigned *usedp, char **idp) | 37 | unsigned *usedp, char **idp) |
43 | { | 38 | { |
@@ -125,9 +120,15 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, | |||
125 | { | 120 | { |
126 | #ifdef CONFIG_FRAME_POINTER | 121 | #ifdef CONFIG_FRAME_POINTER |
127 | struct stack_frame *frame = (struct stack_frame *)bp; | 122 | struct stack_frame *frame = (struct stack_frame *)bp; |
123 | unsigned long next; | ||
128 | 124 | ||
129 | if (!in_irq_stack(stack, irq_stack, irq_stack_end)) | 125 | if (!in_irq_stack(stack, irq_stack, irq_stack_end)) { |
130 | return (unsigned long)frame->next_frame; | 126 | if (!probe_kernel_address(&frame->next_frame, next)) |
127 | return next; | ||
128 | else | ||
129 | WARN_ONCE(1, "Perf: bad frame pointer = %p in " | ||
130 | "callchain\n", &frame->next_frame); | ||
131 | } | ||
131 | #endif | 132 | #endif |
132 | return bp; | 133 | return bp; |
133 | } | 134 | } |
@@ -207,7 +208,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
207 | if (in_irq_stack(stack, irq_stack, irq_stack_end)) { | 208 | if (in_irq_stack(stack, irq_stack, irq_stack_end)) { |
208 | if (ops->stack(data, "IRQ") < 0) | 209 | if (ops->stack(data, "IRQ") < 0) |
209 | break; | 210 | break; |
210 | bp = print_context_stack(tinfo, stack, bp, | 211 | bp = ops->walk_stack(tinfo, stack, bp, |
211 | ops, data, irq_stack_end, &graph); | 212 | ops, data, irq_stack_end, &graph); |
212 | /* | 213 | /* |
213 | * We link to the next stack (which would be | 214 | * We link to the next stack (which would be |
@@ -228,7 +229,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
228 | /* | 229 | /* |
229 | * This handles the process stack: | 230 | * This handles the process stack: |
230 | */ | 231 | */ |
231 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); | 232 | bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); |
232 | put_cpu(); | 233 | put_cpu(); |
233 | } | 234 | } |
234 | EXPORT_SYMBOL(dump_trace); | 235 | EXPORT_SYMBOL(dump_trace); |
@@ -291,6 +292,7 @@ void show_registers(struct pt_regs *regs) | |||
291 | 292 | ||
292 | sp = regs->sp; | 293 | sp = regs->sp; |
293 | printk("CPU %d ", cpu); | 294 | printk("CPU %d ", cpu); |
295 | print_modules(); | ||
294 | __show_regs(regs, 1); | 296 | __show_regs(regs, 1); |
295 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | 297 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", |
296 | cur->comm, cur->pid, task_thread_info(cur), cur); | 298 | cur->comm, cur->pid, task_thread_info(cur), cur); |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 05ed7ab2ca48..7bca3c6a02fb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -12,21 +12,13 @@ | |||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/ioport.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/kexec.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/pfn.h> | 15 | #include <linux/pfn.h> |
21 | #include <linux/suspend.h> | 16 | #include <linux/suspend.h> |
22 | #include <linux/firmware-map.h> | 17 | #include <linux/firmware-map.h> |
23 | 18 | ||
24 | #include <asm/pgtable.h> | ||
25 | #include <asm/page.h> | ||
26 | #include <asm/e820.h> | 19 | #include <asm/e820.h> |
27 | #include <asm/proto.h> | 20 | #include <asm/proto.h> |
28 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
29 | #include <asm/trampoline.h> | ||
30 | 22 | ||
31 | /* | 23 | /* |
32 | * The e820 map is the map that gets modified e.g. with command line parameters | 24 | * The e820 map is the map that gets modified e.g. with command line parameters |
@@ -517,31 +509,55 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, | |||
517 | int checktype) | 509 | int checktype) |
518 | { | 510 | { |
519 | int i; | 511 | int i; |
512 | u64 end; | ||
520 | u64 real_removed_size = 0; | 513 | u64 real_removed_size = 0; |
521 | 514 | ||
522 | if (size > (ULLONG_MAX - start)) | 515 | if (size > (ULLONG_MAX - start)) |
523 | size = ULLONG_MAX - start; | 516 | size = ULLONG_MAX - start; |
524 | 517 | ||
518 | end = start + size; | ||
519 | printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", | ||
520 | (unsigned long long) start, | ||
521 | (unsigned long long) end); | ||
522 | if (checktype) | ||
523 | e820_print_type(old_type); | ||
524 | printk(KERN_CONT "\n"); | ||
525 | |||
525 | for (i = 0; i < e820.nr_map; i++) { | 526 | for (i = 0; i < e820.nr_map; i++) { |
526 | struct e820entry *ei = &e820.map[i]; | 527 | struct e820entry *ei = &e820.map[i]; |
527 | u64 final_start, final_end; | 528 | u64 final_start, final_end; |
529 | u64 ei_end; | ||
528 | 530 | ||
529 | if (checktype && ei->type != old_type) | 531 | if (checktype && ei->type != old_type) |
530 | continue; | 532 | continue; |
533 | |||
534 | ei_end = ei->addr + ei->size; | ||
531 | /* totally covered? */ | 535 | /* totally covered? */ |
532 | if (ei->addr >= start && | 536 | if (ei->addr >= start && ei_end <= end) { |
533 | (ei->addr + ei->size) <= (start + size)) { | ||
534 | real_removed_size += ei->size; | 537 | real_removed_size += ei->size; |
535 | memset(ei, 0, sizeof(struct e820entry)); | 538 | memset(ei, 0, sizeof(struct e820entry)); |
536 | continue; | 539 | continue; |
537 | } | 540 | } |
541 | |||
542 | /* new range is totally covered? */ | ||
543 | if (ei->addr < start && ei_end > end) { | ||
544 | e820_add_region(end, ei_end - end, ei->type); | ||
545 | ei->size = start - ei->addr; | ||
546 | real_removed_size += size; | ||
547 | continue; | ||
548 | } | ||
549 | |||
538 | /* partially covered */ | 550 | /* partially covered */ |
539 | final_start = max(start, ei->addr); | 551 | final_start = max(start, ei->addr); |
540 | final_end = min(start + size, ei->addr + ei->size); | 552 | final_end = min(end, ei_end); |
541 | if (final_start >= final_end) | 553 | if (final_start >= final_end) |
542 | continue; | 554 | continue; |
543 | real_removed_size += final_end - final_start; | 555 | real_removed_size += final_end - final_start; |
544 | 556 | ||
557 | /* | ||
558 | * left range could be head or tail, so need to update | ||
559 | * size at first. | ||
560 | */ | ||
545 | ei->size -= final_end - final_start; | 561 | ei->size -= final_end - final_start; |
546 | if (ei->addr < final_start) | 562 | if (ei->addr < final_start) |
547 | continue; | 563 | continue; |
@@ -722,319 +738,44 @@ core_initcall(e820_mark_nvs_memory); | |||
722 | #endif | 738 | #endif |
723 | 739 | ||
724 | /* | 740 | /* |
725 | * Early reserved memory areas. | 741 | * Find a free area with specified alignment in a specific range. |
726 | */ | ||
727 | #define MAX_EARLY_RES 32 | ||
728 | |||
729 | struct early_res { | ||
730 | u64 start, end; | ||
731 | char name[16]; | ||
732 | char overlap_ok; | ||
733 | }; | ||
734 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | ||
735 | { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ | ||
736 | #ifdef CONFIG_X86_32 | ||
737 | /* | ||
738 | * But first pinch a few for the stack/trampoline stuff | ||
739 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
740 | * trampoline before removing it. (see the GDT stuff) | ||
741 | */ | ||
742 | { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 }, | ||
743 | #endif | ||
744 | |||
745 | {} | ||
746 | }; | ||
747 | |||
748 | static int __init find_overlapped_early(u64 start, u64 end) | ||
749 | { | ||
750 | int i; | ||
751 | struct early_res *r; | ||
752 | |||
753 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
754 | r = &early_res[i]; | ||
755 | if (end > r->start && start < r->end) | ||
756 | break; | ||
757 | } | ||
758 | |||
759 | return i; | ||
760 | } | ||
761 | |||
762 | /* | ||
763 | * Drop the i-th range from the early reservation map, | ||
764 | * by copying any higher ranges down one over it, and | ||
765 | * clearing what had been the last slot. | ||
766 | */ | ||
767 | static void __init drop_range(int i) | ||
768 | { | ||
769 | int j; | ||
770 | |||
771 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | ||
772 | ; | ||
773 | |||
774 | memmove(&early_res[i], &early_res[i + 1], | ||
775 | (j - 1 - i) * sizeof(struct early_res)); | ||
776 | |||
777 | early_res[j - 1].end = 0; | ||
778 | } | ||
779 | |||
780 | /* | ||
781 | * Split any existing ranges that: | ||
782 | * 1) are marked 'overlap_ok', and | ||
783 | * 2) overlap with the stated range [start, end) | ||
784 | * into whatever portion (if any) of the existing range is entirely | ||
785 | * below or entirely above the stated range. Drop the portion | ||
786 | * of the existing range that overlaps with the stated range, | ||
787 | * which will allow the caller of this routine to then add that | ||
788 | * stated range without conflicting with any existing range. | ||
789 | */ | 742 | */ |
790 | static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | 743 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) |
791 | { | 744 | { |
792 | int i; | 745 | int i; |
793 | struct early_res *r; | ||
794 | u64 lower_start, lower_end; | ||
795 | u64 upper_start, upper_end; | ||
796 | char name[16]; | ||
797 | 746 | ||
798 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | 747 | for (i = 0; i < e820.nr_map; i++) { |
799 | r = &early_res[i]; | 748 | struct e820entry *ei = &e820.map[i]; |
749 | u64 addr; | ||
750 | u64 ei_start, ei_last; | ||
800 | 751 | ||
801 | /* Continue past non-overlapping ranges */ | 752 | if (ei->type != E820_RAM) |
802 | if (end <= r->start || start >= r->end) | ||
803 | continue; | 753 | continue; |
804 | 754 | ||
805 | /* | 755 | ei_last = ei->addr + ei->size; |
806 | * Leave non-ok overlaps as is; let caller | 756 | ei_start = ei->addr; |
807 | * panic "Overlapping early reservations" | 757 | addr = find_early_area(ei_start, ei_last, start, end, |
808 | * when it hits this overlap. | 758 | size, align); |
809 | */ | ||
810 | if (!r->overlap_ok) | ||
811 | return; | ||
812 | |||
813 | /* | ||
814 | * We have an ok overlap. We will drop it from the early | ||
815 | * reservation map, and add back in any non-overlapping | ||
816 | * portions (lower or upper) as separate, overlap_ok, | ||
817 | * non-overlapping ranges. | ||
818 | */ | ||
819 | |||
820 | /* 1. Note any non-overlapping (lower or upper) ranges. */ | ||
821 | strncpy(name, r->name, sizeof(name) - 1); | ||
822 | |||
823 | lower_start = lower_end = 0; | ||
824 | upper_start = upper_end = 0; | ||
825 | if (r->start < start) { | ||
826 | lower_start = r->start; | ||
827 | lower_end = start; | ||
828 | } | ||
829 | if (r->end > end) { | ||
830 | upper_start = end; | ||
831 | upper_end = r->end; | ||
832 | } | ||
833 | |||
834 | /* 2. Drop the original ok overlapping range */ | ||
835 | drop_range(i); | ||
836 | |||
837 | i--; /* resume for-loop on copied down entry */ | ||
838 | |||
839 | /* 3. Add back in any non-overlapping ranges. */ | ||
840 | if (lower_end) | ||
841 | reserve_early_overlap_ok(lower_start, lower_end, name); | ||
842 | if (upper_end) | ||
843 | reserve_early_overlap_ok(upper_start, upper_end, name); | ||
844 | } | ||
845 | } | ||
846 | |||
847 | static void __init __reserve_early(u64 start, u64 end, char *name, | ||
848 | int overlap_ok) | ||
849 | { | ||
850 | int i; | ||
851 | struct early_res *r; | ||
852 | |||
853 | i = find_overlapped_early(start, end); | ||
854 | if (i >= MAX_EARLY_RES) | ||
855 | panic("Too many early reservations"); | ||
856 | r = &early_res[i]; | ||
857 | if (r->end) | ||
858 | panic("Overlapping early reservations " | ||
859 | "%llx-%llx %s to %llx-%llx %s\n", | ||
860 | start, end - 1, name?name:"", r->start, | ||
861 | r->end - 1, r->name); | ||
862 | r->start = start; | ||
863 | r->end = end; | ||
864 | r->overlap_ok = overlap_ok; | ||
865 | if (name) | ||
866 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
867 | } | ||
868 | |||
869 | /* | ||
870 | * A few early reservtations come here. | ||
871 | * | ||
872 | * The 'overlap_ok' in the name of this routine does -not- mean it | ||
873 | * is ok for these reservations to overlap an earlier reservation. | ||
874 | * Rather it means that it is ok for subsequent reservations to | ||
875 | * overlap this one. | ||
876 | * | ||
877 | * Use this entry point to reserve early ranges when you are doing | ||
878 | * so out of "Paranoia", reserving perhaps more memory than you need, | ||
879 | * just in case, and don't mind a subsequent overlapping reservation | ||
880 | * that is known to be needed. | ||
881 | * | ||
882 | * The drop_overlaps_that_are_ok() call here isn't really needed. | ||
883 | * It would be needed if we had two colliding 'overlap_ok' | ||
884 | * reservations, so that the second such would not panic on the | ||
885 | * overlap with the first. We don't have any such as of this | ||
886 | * writing, but might as well tolerate such if it happens in | ||
887 | * the future. | ||
888 | */ | ||
889 | void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | ||
890 | { | ||
891 | drop_overlaps_that_are_ok(start, end); | ||
892 | __reserve_early(start, end, name, 1); | ||
893 | } | ||
894 | |||
895 | /* | ||
896 | * Most early reservations come here. | ||
897 | * | ||
898 | * We first have drop_overlaps_that_are_ok() drop any pre-existing | ||
899 | * 'overlap_ok' ranges, so that we can then reserve this memory | ||
900 | * range without risk of panic'ing on an overlapping overlap_ok | ||
901 | * early reservation. | ||
902 | */ | ||
903 | void __init reserve_early(u64 start, u64 end, char *name) | ||
904 | { | ||
905 | if (start >= end) | ||
906 | return; | ||
907 | |||
908 | drop_overlaps_that_are_ok(start, end); | ||
909 | __reserve_early(start, end, name, 0); | ||
910 | } | ||
911 | |||
912 | void __init free_early(u64 start, u64 end) | ||
913 | { | ||
914 | struct early_res *r; | ||
915 | int i; | ||
916 | |||
917 | i = find_overlapped_early(start, end); | ||
918 | r = &early_res[i]; | ||
919 | if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | ||
920 | panic("free_early on not reserved area: %llx-%llx!", | ||
921 | start, end - 1); | ||
922 | |||
923 | drop_range(i); | ||
924 | } | ||
925 | |||
926 | void __init early_res_to_bootmem(u64 start, u64 end) | ||
927 | { | ||
928 | int i, count; | ||
929 | u64 final_start, final_end; | ||
930 | |||
931 | count = 0; | ||
932 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | ||
933 | count++; | ||
934 | |||
935 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", | ||
936 | count, start, end); | ||
937 | for (i = 0; i < count; i++) { | ||
938 | struct early_res *r = &early_res[i]; | ||
939 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | ||
940 | r->start, r->end, r->name); | ||
941 | final_start = max(start, r->start); | ||
942 | final_end = min(end, r->end); | ||
943 | if (final_start >= final_end) { | ||
944 | printk(KERN_CONT "\n"); | ||
945 | continue; | ||
946 | } | ||
947 | printk(KERN_CONT " ==> [%010llx - %010llx]\n", | ||
948 | final_start, final_end); | ||
949 | reserve_bootmem_generic(final_start, final_end - final_start, | ||
950 | BOOTMEM_DEFAULT); | ||
951 | } | ||
952 | } | ||
953 | 759 | ||
954 | /* Check for already reserved areas */ | 760 | if (addr != -1ULL) |
955 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | 761 | return addr; |
956 | { | ||
957 | int i; | ||
958 | u64 addr = *addrp; | ||
959 | int changed = 0; | ||
960 | struct early_res *r; | ||
961 | again: | ||
962 | i = find_overlapped_early(addr, addr + size); | ||
963 | r = &early_res[i]; | ||
964 | if (i < MAX_EARLY_RES && r->end) { | ||
965 | *addrp = addr = round_up(r->end, align); | ||
966 | changed = 1; | ||
967 | goto again; | ||
968 | } | 762 | } |
969 | return changed; | 763 | return -1ULL; |
970 | } | 764 | } |
971 | 765 | ||
972 | /* Check for already reserved areas */ | 766 | u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) |
973 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | ||
974 | { | 767 | { |
975 | int i; | 768 | return find_e820_area(start, end, size, align); |
976 | u64 addr = *addrp, last; | ||
977 | u64 size = *sizep; | ||
978 | int changed = 0; | ||
979 | again: | ||
980 | last = addr + size; | ||
981 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
982 | struct early_res *r = &early_res[i]; | ||
983 | if (last > r->start && addr < r->start) { | ||
984 | size = r->start - addr; | ||
985 | changed = 1; | ||
986 | goto again; | ||
987 | } | ||
988 | if (last > r->end && addr < r->end) { | ||
989 | addr = round_up(r->end, align); | ||
990 | size = last - addr; | ||
991 | changed = 1; | ||
992 | goto again; | ||
993 | } | ||
994 | if (last <= r->end && addr >= r->start) { | ||
995 | (*sizep)++; | ||
996 | return 0; | ||
997 | } | ||
998 | } | ||
999 | if (changed) { | ||
1000 | *addrp = addr; | ||
1001 | *sizep = size; | ||
1002 | } | ||
1003 | return changed; | ||
1004 | } | 769 | } |
1005 | 770 | ||
1006 | /* | 771 | u64 __init get_max_mapped(void) |
1007 | * Find a free area with specified alignment in a specific range. | ||
1008 | */ | ||
1009 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | ||
1010 | { | 772 | { |
1011 | int i; | 773 | u64 end = max_pfn_mapped; |
1012 | 774 | ||
1013 | for (i = 0; i < e820.nr_map; i++) { | 775 | end <<= PAGE_SHIFT; |
1014 | struct e820entry *ei = &e820.map[i]; | ||
1015 | u64 addr, last; | ||
1016 | u64 ei_last; | ||
1017 | 776 | ||
1018 | if (ei->type != E820_RAM) | 777 | return end; |
1019 | continue; | ||
1020 | addr = round_up(ei->addr, align); | ||
1021 | ei_last = ei->addr + ei->size; | ||
1022 | if (addr < start) | ||
1023 | addr = round_up(start, align); | ||
1024 | if (addr >= ei_last) | ||
1025 | continue; | ||
1026 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
1027 | ; | ||
1028 | last = addr + size; | ||
1029 | if (last > ei_last) | ||
1030 | continue; | ||
1031 | if (last > end) | ||
1032 | continue; | ||
1033 | return addr; | ||
1034 | } | ||
1035 | return -1ULL; | ||
1036 | } | 778 | } |
1037 | |||
1038 | /* | 779 | /* |
1039 | * Find next free range after *start | 780 | * Find next free range after *start |
1040 | */ | 781 | */ |
@@ -1044,25 +785,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | |||
1044 | 785 | ||
1045 | for (i = 0; i < e820.nr_map; i++) { | 786 | for (i = 0; i < e820.nr_map; i++) { |
1046 | struct e820entry *ei = &e820.map[i]; | 787 | struct e820entry *ei = &e820.map[i]; |
1047 | u64 addr, last; | 788 | u64 addr; |
1048 | u64 ei_last; | 789 | u64 ei_start, ei_last; |
1049 | 790 | ||
1050 | if (ei->type != E820_RAM) | 791 | if (ei->type != E820_RAM) |
1051 | continue; | 792 | continue; |
1052 | addr = round_up(ei->addr, align); | 793 | |
1053 | ei_last = ei->addr + ei->size; | 794 | ei_last = ei->addr + ei->size; |
1054 | if (addr < start) | 795 | ei_start = ei->addr; |
1055 | addr = round_up(start, align); | 796 | addr = find_early_area_size(ei_start, ei_last, start, |
1056 | if (addr >= ei_last) | 797 | sizep, align); |
1057 | continue; | 798 | |
1058 | *sizep = ei_last - addr; | 799 | if (addr != -1ULL) |
1059 | while (bad_addr_size(&addr, sizep, align) && | 800 | return addr; |
1060 | addr + *sizep <= ei_last) | ||
1061 | ; | ||
1062 | last = addr + *sizep; | ||
1063 | if (last > ei_last) | ||
1064 | continue; | ||
1065 | return addr; | ||
1066 | } | 801 | } |
1067 | 802 | ||
1068 | return -1ULL; | 803 | return -1ULL; |
@@ -1421,6 +1156,8 @@ void __init e820_reserve_resources_late(void) | |||
1421 | end = MAX_RESOURCE_SIZE; | 1156 | end = MAX_RESOURCE_SIZE; |
1422 | if (start >= end) | 1157 | if (start >= end) |
1423 | continue; | 1158 | continue; |
1159 | printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", | ||
1160 | start, end); | ||
1424 | reserve_region_with_split(&iomem_resource, start, end, | 1161 | reserve_region_with_split(&iomem_resource, start, end, |
1425 | "RAM buffer"); | 1162 | "RAM buffer"); |
1426 | } | 1163 | } |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index cdcfb122f256..c2fa9b8b497e 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -362,7 +362,7 @@ void __init efi_init(void) | |||
362 | printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); | 362 | printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); |
363 | early_iounmap(tmp, 2); | 363 | early_iounmap(tmp, 2); |
364 | 364 | ||
365 | printk(KERN_INFO "EFI v%u.%.02u by %s \n", | 365 | printk(KERN_INFO "EFI v%u.%.02u by %s\n", |
366 | efi.systab->hdr.revision >> 16, | 366 | efi.systab->hdr.revision >> 16, |
367 | efi.systab->hdr.revision & 0xffff, vendor); | 367 | efi.systab->hdr.revision & 0xffff, vendor); |
368 | 368 | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 309689245431..cd37469b54ee 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -30,14 +30,32 @@ | |||
30 | 30 | ||
31 | #ifdef CONFIG_DYNAMIC_FTRACE | 31 | #ifdef CONFIG_DYNAMIC_FTRACE |
32 | 32 | ||
33 | /* | ||
34 | * modifying_code is set to notify NMIs that they need to use | ||
35 | * memory barriers when entering or exiting. But we don't want | ||
36 | * to burden NMIs with unnecessary memory barriers when code | ||
37 | * modification is not being done (which is most of the time). | ||
38 | * | ||
39 | * A mutex is already held when ftrace_arch_code_modify_prepare | ||
40 | * and post_process are called. No locks need to be taken here. | ||
41 | * | ||
42 | * Stop machine will make sure currently running NMIs are done | ||
43 | * and new NMIs will see the updated variable before we need | ||
44 | * to worry about NMIs doing memory barriers. | ||
45 | */ | ||
46 | static int modifying_code __read_mostly; | ||
47 | static DEFINE_PER_CPU(int, save_modifying_code); | ||
48 | |||
33 | int ftrace_arch_code_modify_prepare(void) | 49 | int ftrace_arch_code_modify_prepare(void) |
34 | { | 50 | { |
35 | set_kernel_text_rw(); | 51 | set_kernel_text_rw(); |
52 | modifying_code = 1; | ||
36 | return 0; | 53 | return 0; |
37 | } | 54 | } |
38 | 55 | ||
39 | int ftrace_arch_code_modify_post_process(void) | 56 | int ftrace_arch_code_modify_post_process(void) |
40 | { | 57 | { |
58 | modifying_code = 0; | ||
41 | set_kernel_text_ro(); | 59 | set_kernel_text_ro(); |
42 | return 0; | 60 | return 0; |
43 | } | 61 | } |
@@ -149,6 +167,11 @@ static void ftrace_mod_code(void) | |||
149 | 167 | ||
150 | void ftrace_nmi_enter(void) | 168 | void ftrace_nmi_enter(void) |
151 | { | 169 | { |
170 | __get_cpu_var(save_modifying_code) = modifying_code; | ||
171 | |||
172 | if (!__get_cpu_var(save_modifying_code)) | ||
173 | return; | ||
174 | |||
152 | if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { | 175 | if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { |
153 | smp_rmb(); | 176 | smp_rmb(); |
154 | ftrace_mod_code(); | 177 | ftrace_mod_code(); |
@@ -160,6 +183,9 @@ void ftrace_nmi_enter(void) | |||
160 | 183 | ||
161 | void ftrace_nmi_exit(void) | 184 | void ftrace_nmi_exit(void) |
162 | { | 185 | { |
186 | if (!__get_cpu_var(save_modifying_code)) | ||
187 | return; | ||
188 | |||
163 | /* Finish all executions before clearing nmi_running */ | 189 | /* Finish all executions before clearing nmi_running */ |
164 | smp_mb(); | 190 | smp_mb(); |
165 | atomic_dec(&nmi_running); | 191 | atomic_dec(&nmi_running); |
@@ -484,13 +510,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
484 | } | 510 | } |
485 | } | 511 | } |
486 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 512 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
487 | |||
488 | #ifdef CONFIG_FTRACE_SYSCALLS | ||
489 | |||
490 | extern unsigned long *sys_call_table; | ||
491 | |||
492 | unsigned long __init arch_syscall_addr(int nr) | ||
493 | { | ||
494 | return (unsigned long)(&sys_call_table)[nr]; | ||
495 | } | ||
496 | #endif | ||
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 5051b94c9069..b2e246037392 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/start_kernel.h> | 9 | #include <linux/start_kernel.h> |
10 | #include <linux/mm.h> | ||
10 | 11 | ||
11 | #include <asm/setup.h> | 12 | #include <asm/setup.h> |
12 | #include <asm/sections.h> | 13 | #include <asm/sections.h> |
@@ -29,14 +30,25 @@ static void __init i386_default_early_setup(void) | |||
29 | 30 | ||
30 | void __init i386_start_kernel(void) | 31 | void __init i386_start_kernel(void) |
31 | { | 32 | { |
33 | #ifdef CONFIG_X86_TRAMPOLINE | ||
34 | /* | ||
35 | * But first pinch a few for the stack/trampoline stuff | ||
36 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
37 | * trampoline before removing it. (see the GDT stuff) | ||
38 | */ | ||
39 | reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, | ||
40 | "EX TRAMPOLINE"); | ||
41 | #endif | ||
42 | |||
32 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 43 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
33 | 44 | ||
34 | #ifdef CONFIG_BLK_DEV_INITRD | 45 | #ifdef CONFIG_BLK_DEV_INITRD |
35 | /* Reserve INITRD */ | 46 | /* Reserve INITRD */ |
36 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | 47 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { |
48 | /* Assume only end is not page aligned */ | ||
37 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 49 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
38 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 50 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
39 | u64 ramdisk_end = ramdisk_image + ramdisk_size; | 51 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
40 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 52 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); |
41 | } | 53 | } |
42 | #endif | 54 | #endif |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b5a9896ca1e7..7147143fd614 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -103,9 +103,10 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
103 | #ifdef CONFIG_BLK_DEV_INITRD | 103 | #ifdef CONFIG_BLK_DEV_INITRD |
104 | /* Reserve INITRD */ | 104 | /* Reserve INITRD */ |
105 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | 105 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { |
106 | /* Assume only end is not page aligned */ | ||
106 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 107 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; |
107 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 108 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; |
108 | unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | 109 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
109 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 110 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); |
110 | } | 111 | } |
111 | #endif | 112 | #endif |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 7fd318bac59c..37c3d4b17d85 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -442,8 +442,8 @@ is386: movl $2,%ecx # set MP | |||
442 | */ | 442 | */ |
443 | cmpb $0,ready | 443 | cmpb $0,ready |
444 | jne 1f | 444 | jne 1f |
445 | movl $per_cpu__gdt_page,%eax | 445 | movl $gdt_page,%eax |
446 | movl $per_cpu__stack_canary,%ecx | 446 | movl $stack_canary,%ecx |
447 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | 447 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) |
448 | shrl $16, %ecx | 448 | shrl $16, %ecx |
449 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | 449 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) |
@@ -706,7 +706,7 @@ idt_descr: | |||
706 | .word 0 # 32 bit align gdt_desc.address | 706 | .word 0 # 32 bit align gdt_desc.address |
707 | ENTRY(early_gdt_descr) | 707 | ENTRY(early_gdt_descr) |
708 | .word GDT_ENTRIES*8-1 | 708 | .word GDT_ENTRIES*8-1 |
709 | .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ | 709 | .long gdt_page /* Overwritten for secondary CPUs */ |
710 | 710 | ||
711 | /* | 711 | /* |
712 | * The boot_gdt must mirror the equivalent in setup.S and is | 712 | * The boot_gdt must mirror the equivalent in setup.S and is |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2d8b5035371c..3d1e6f16b7a6 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -27,7 +27,7 @@ | |||
27 | #define GET_CR2_INTO_RCX movq %cr2, %rcx | 27 | #define GET_CR2_INTO_RCX movq %cr2, %rcx |
28 | #endif | 28 | #endif |
29 | 29 | ||
30 | /* we are not able to switch in one step to the final KERNEL ADRESS SPACE | 30 | /* we are not able to switch in one step to the final KERNEL ADDRESS SPACE |
31 | * because we need identity-mapped pages. | 31 | * because we need identity-mapped pages. |
32 | * | 32 | * |
33 | */ | 33 | */ |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ba6e65884603..23b4ecdffa9b 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/sysdev.h> | 4 | #include <linux/sysdev.h> |
5 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
6 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
7 | #include <linux/slab.h> | ||
7 | #include <linux/hpet.h> | 8 | #include <linux/hpet.h> |
8 | #include <linux/init.h> | 9 | #include <linux/init.h> |
9 | #include <linux/cpu.h> | 10 | #include <linux/cpu.h> |
@@ -34,6 +35,8 @@ | |||
34 | */ | 35 | */ |
35 | unsigned long hpet_address; | 36 | unsigned long hpet_address; |
36 | u8 hpet_blockid; /* OS timer block num */ | 37 | u8 hpet_blockid; /* OS timer block num */ |
38 | u8 hpet_msi_disable; | ||
39 | |||
37 | #ifdef CONFIG_PCI_MSI | 40 | #ifdef CONFIG_PCI_MSI |
38 | static unsigned long hpet_num_timers; | 41 | static unsigned long hpet_num_timers; |
39 | #endif | 42 | #endif |
@@ -264,7 +267,7 @@ static void hpet_resume_device(void) | |||
264 | force_hpet_resume(); | 267 | force_hpet_resume(); |
265 | } | 268 | } |
266 | 269 | ||
267 | static void hpet_resume_counter(void) | 270 | static void hpet_resume_counter(struct clocksource *cs) |
268 | { | 271 | { |
269 | hpet_resume_device(); | 272 | hpet_resume_device(); |
270 | hpet_restart_counter(); | 273 | hpet_restart_counter(); |
@@ -397,9 +400,15 @@ static int hpet_next_event(unsigned long delta, | |||
397 | * then we might have a real hardware problem. We can not do | 400 | * then we might have a real hardware problem. We can not do |
398 | * much about it here, but at least alert the user/admin with | 401 | * much about it here, but at least alert the user/admin with |
399 | * a prominent warning. | 402 | * a prominent warning. |
403 | * An erratum on some chipsets (ICH9,..), results in comparator read | ||
404 | * immediately following a write returning old value. Workaround | ||
405 | * for this is to read this value second time, when first | ||
406 | * read returns old value. | ||
400 | */ | 407 | */ |
401 | WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, | 408 | if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { |
409 | WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, | ||
402 | KERN_WARNING "hpet: compare register read back failed.\n"); | 410 | KERN_WARNING "hpet: compare register read back failed.\n"); |
411 | } | ||
403 | 412 | ||
404 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | 413 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; |
405 | } | 414 | } |
@@ -596,6 +605,9 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) | |||
596 | unsigned int num_timers_used = 0; | 605 | unsigned int num_timers_used = 0; |
597 | int i; | 606 | int i; |
598 | 607 | ||
608 | if (hpet_msi_disable) | ||
609 | return; | ||
610 | |||
599 | if (boot_cpu_has(X86_FEATURE_ARAT)) | 611 | if (boot_cpu_has(X86_FEATURE_ARAT)) |
600 | return; | 612 | return; |
601 | id = hpet_readl(HPET_ID); | 613 | id = hpet_readl(HPET_ID); |
@@ -928,6 +940,9 @@ static __init int hpet_late_init(void) | |||
928 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); | 940 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); |
929 | hpet_print_config(); | 941 | hpet_print_config(); |
930 | 942 | ||
943 | if (hpet_msi_disable) | ||
944 | return 0; | ||
945 | |||
931 | if (boot_cpu_has(X86_FEATURE_ARAT)) | 946 | if (boot_cpu_has(X86_FEATURE_ARAT)) |
932 | return 0; | 947 | return 0; |
933 | 948 | ||
@@ -1135,6 +1150,7 @@ int hpet_set_periodic_freq(unsigned long freq) | |||
1135 | do_div(clc, freq); | 1150 | do_div(clc, freq); |
1136 | clc >>= hpet_clockevent.shift; | 1151 | clc >>= hpet_clockevent.shift; |
1137 | hpet_pie_delta = clc; | 1152 | hpet_pie_delta = clc; |
1153 | hpet_pie_limit = 0; | ||
1138 | } | 1154 | } |
1139 | return 1; | 1155 | return 1; |
1140 | } | 1156 | } |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 05d5fec64a94..d6cc065f519f 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -212,25 +212,6 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | |||
212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | 212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); |
213 | } | 213 | } |
214 | 214 | ||
215 | /* | ||
216 | * Store a breakpoint's encoded address, length, and type. | ||
217 | */ | ||
218 | static int arch_store_info(struct perf_event *bp) | ||
219 | { | ||
220 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
221 | /* | ||
222 | * For kernel-addresses, either the address or symbol name can be | ||
223 | * specified. | ||
224 | */ | ||
225 | if (info->name) | ||
226 | info->address = (unsigned long) | ||
227 | kallsyms_lookup_name(info->name); | ||
228 | if (info->address) | ||
229 | return 0; | ||
230 | |||
231 | return -EINVAL; | ||
232 | } | ||
233 | |||
234 | int arch_bp_generic_fields(int x86_len, int x86_type, | 215 | int arch_bp_generic_fields(int x86_len, int x86_type, |
235 | int *gen_len, int *gen_type) | 216 | int *gen_len, int *gen_type) |
236 | { | 217 | { |
@@ -362,10 +343,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
362 | return ret; | 343 | return ret; |
363 | } | 344 | } |
364 | 345 | ||
365 | ret = arch_store_info(bp); | ||
366 | |||
367 | if (ret < 0) | ||
368 | return ret; | ||
369 | /* | 346 | /* |
370 | * Check that the low-order bits of the address are appropriate | 347 | * Check that the low-order bits of the address are appropriate |
371 | * for the alignment implied by len. | 348 | * for the alignment implied by len. |
@@ -502,8 +479,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) | |||
502 | rcu_read_lock(); | 479 | rcu_read_lock(); |
503 | 480 | ||
504 | bp = per_cpu(bp_per_reg[i], cpu); | 481 | bp = per_cpu(bp_per_reg[i], cpu); |
505 | if (bp) | ||
506 | rc = NOTIFY_DONE; | ||
507 | /* | 482 | /* |
508 | * Reset the 'i'th TRAP bit in dr6 to denote completion of | 483 | * Reset the 'i'th TRAP bit in dr6 to denote completion of |
509 | * exception handling | 484 | * exception handling |
@@ -522,7 +497,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) | |||
522 | 497 | ||
523 | rcu_read_unlock(); | 498 | rcu_read_unlock(); |
524 | } | 499 | } |
525 | if (dr6 & (~DR_TRAP_BITS)) | 500 | /* |
501 | * Further processing in do_debug() is needed for a) user-space | ||
502 | * breakpoints (to generate signals) and b) when the system has | ||
503 | * taken exception due to multiple causes | ||
504 | */ | ||
505 | if ((current->thread.debugreg6 & DR_TRAP_BITS) || | ||
506 | (dr6 & (~DR_TRAP_BITS))) | ||
526 | rc = NOTIFY_DONE; | 507 | rc = NOTIFY_DONE; |
527 | 508 | ||
528 | set_debugreg(dr7, 7); | 509 | set_debugreg(dr7, 7); |
@@ -547,8 +528,3 @@ void hw_breakpoint_pmu_read(struct perf_event *bp) | |||
547 | { | 528 | { |
548 | /* TODO */ | 529 | /* TODO */ |
549 | } | 530 | } |
550 | |||
551 | void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) | ||
552 | { | ||
553 | /* TODO */ | ||
554 | } | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f2f8540a7f3d..54c31c285488 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/regset.h> | 9 | #include <linux/regset.h> |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/slab.h> | ||
11 | 12 | ||
12 | #include <asm/sigcontext.h> | 13 | #include <asm/sigcontext.h> |
13 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
@@ -164,6 +165,11 @@ int init_fpu(struct task_struct *tsk) | |||
164 | return 0; | 165 | return 0; |
165 | } | 166 | } |
166 | 167 | ||
168 | /* | ||
169 | * The xstateregs_active() routine is the same as the fpregs_active() routine, | ||
170 | * as the "regset->n" for the xstate regset will be updated based on the feature | ||
171 | * capabilites supported by the xsave. | ||
172 | */ | ||
167 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) | 173 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) |
168 | { | 174 | { |
169 | return tsk_used_math(target) ? regset->n : 0; | 175 | return tsk_used_math(target) ? regset->n : 0; |
@@ -204,8 +210,6 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
204 | if (ret) | 210 | if (ret) |
205 | return ret; | 211 | return ret; |
206 | 212 | ||
207 | set_stopped_child_used_math(target); | ||
208 | |||
209 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 213 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
210 | &target->thread.xstate->fxsave, 0, -1); | 214 | &target->thread.xstate->fxsave, 0, -1); |
211 | 215 | ||
@@ -224,6 +228,68 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
224 | return ret; | 228 | return ret; |
225 | } | 229 | } |
226 | 230 | ||
231 | int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | ||
232 | unsigned int pos, unsigned int count, | ||
233 | void *kbuf, void __user *ubuf) | ||
234 | { | ||
235 | int ret; | ||
236 | |||
237 | if (!cpu_has_xsave) | ||
238 | return -ENODEV; | ||
239 | |||
240 | ret = init_fpu(target); | ||
241 | if (ret) | ||
242 | return ret; | ||
243 | |||
244 | /* | ||
245 | * Copy the 48bytes defined by the software first into the xstate | ||
246 | * memory layout in the thread struct, so that we can copy the entire | ||
247 | * xstateregs to the user using one user_regset_copyout(). | ||
248 | */ | ||
249 | memcpy(&target->thread.xstate->fxsave.sw_reserved, | ||
250 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | ||
251 | |||
252 | /* | ||
253 | * Copy the xstate memory layout. | ||
254 | */ | ||
255 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, | ||
256 | &target->thread.xstate->xsave, 0, -1); | ||
257 | return ret; | ||
258 | } | ||
259 | |||
260 | int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | ||
261 | unsigned int pos, unsigned int count, | ||
262 | const void *kbuf, const void __user *ubuf) | ||
263 | { | ||
264 | int ret; | ||
265 | struct xsave_hdr_struct *xsave_hdr; | ||
266 | |||
267 | if (!cpu_has_xsave) | ||
268 | return -ENODEV; | ||
269 | |||
270 | ret = init_fpu(target); | ||
271 | if (ret) | ||
272 | return ret; | ||
273 | |||
274 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | ||
275 | &target->thread.xstate->xsave, 0, -1); | ||
276 | |||
277 | /* | ||
278 | * mxcsr reserved bits must be masked to zero for security reasons. | ||
279 | */ | ||
280 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | ||
281 | |||
282 | xsave_hdr = &target->thread.xstate->xsave.xsave_hdr; | ||
283 | |||
284 | xsave_hdr->xstate_bv &= pcntxt_mask; | ||
285 | /* | ||
286 | * These bits must be zero. | ||
287 | */ | ||
288 | xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; | ||
289 | |||
290 | return ret; | ||
291 | } | ||
292 | |||
227 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 293 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
228 | 294 | ||
229 | /* | 295 | /* |
@@ -404,8 +470,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
404 | if (ret) | 470 | if (ret) |
405 | return ret; | 471 | return ret; |
406 | 472 | ||
407 | set_stopped_child_used_math(target); | ||
408 | |||
409 | if (!HAVE_HWFP) | 473 | if (!HAVE_HWFP) |
410 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | 474 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); |
411 | 475 | ||
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index df89102bef80..7c9f02c130f3 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/ioport.h> | 5 | #include <linux/ioport.h> |
6 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
7 | #include <linux/timex.h> | 7 | #include <linux/timex.h> |
8 | #include <linux/slab.h> | ||
9 | #include <linux/random.h> | 8 | #include <linux/random.h> |
10 | #include <linux/init.h> | 9 | #include <linux/init.h> |
11 | #include <linux/kernel_stat.h> | 10 | #include <linux/kernel_stat.h> |
@@ -32,8 +31,14 @@ | |||
32 | */ | 31 | */ |
33 | 32 | ||
34 | static int i8259A_auto_eoi; | 33 | static int i8259A_auto_eoi; |
35 | DEFINE_SPINLOCK(i8259A_lock); | 34 | DEFINE_RAW_SPINLOCK(i8259A_lock); |
36 | static void mask_and_ack_8259A(unsigned int); | 35 | static void mask_and_ack_8259A(unsigned int); |
36 | static void mask_8259A(void); | ||
37 | static void unmask_8259A(void); | ||
38 | static void disable_8259A_irq(unsigned int irq); | ||
39 | static void enable_8259A_irq(unsigned int irq); | ||
40 | static void init_8259A(int auto_eoi); | ||
41 | static int i8259A_irq_pending(unsigned int irq); | ||
37 | 42 | ||
38 | struct irq_chip i8259A_chip = { | 43 | struct irq_chip i8259A_chip = { |
39 | .name = "XT-PIC", | 44 | .name = "XT-PIC", |
@@ -63,51 +68,51 @@ unsigned int cached_irq_mask = 0xffff; | |||
63 | */ | 68 | */ |
64 | unsigned long io_apic_irqs; | 69 | unsigned long io_apic_irqs; |
65 | 70 | ||
66 | void disable_8259A_irq(unsigned int irq) | 71 | static void disable_8259A_irq(unsigned int irq) |
67 | { | 72 | { |
68 | unsigned int mask = 1 << irq; | 73 | unsigned int mask = 1 << irq; |
69 | unsigned long flags; | 74 | unsigned long flags; |
70 | 75 | ||
71 | spin_lock_irqsave(&i8259A_lock, flags); | 76 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
72 | cached_irq_mask |= mask; | 77 | cached_irq_mask |= mask; |
73 | if (irq & 8) | 78 | if (irq & 8) |
74 | outb(cached_slave_mask, PIC_SLAVE_IMR); | 79 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
75 | else | 80 | else |
76 | outb(cached_master_mask, PIC_MASTER_IMR); | 81 | outb(cached_master_mask, PIC_MASTER_IMR); |
77 | spin_unlock_irqrestore(&i8259A_lock, flags); | 82 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
78 | } | 83 | } |
79 | 84 | ||
80 | void enable_8259A_irq(unsigned int irq) | 85 | static void enable_8259A_irq(unsigned int irq) |
81 | { | 86 | { |
82 | unsigned int mask = ~(1 << irq); | 87 | unsigned int mask = ~(1 << irq); |
83 | unsigned long flags; | 88 | unsigned long flags; |
84 | 89 | ||
85 | spin_lock_irqsave(&i8259A_lock, flags); | 90 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
86 | cached_irq_mask &= mask; | 91 | cached_irq_mask &= mask; |
87 | if (irq & 8) | 92 | if (irq & 8) |
88 | outb(cached_slave_mask, PIC_SLAVE_IMR); | 93 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
89 | else | 94 | else |
90 | outb(cached_master_mask, PIC_MASTER_IMR); | 95 | outb(cached_master_mask, PIC_MASTER_IMR); |
91 | spin_unlock_irqrestore(&i8259A_lock, flags); | 96 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
92 | } | 97 | } |
93 | 98 | ||
94 | int i8259A_irq_pending(unsigned int irq) | 99 | static int i8259A_irq_pending(unsigned int irq) |
95 | { | 100 | { |
96 | unsigned int mask = 1<<irq; | 101 | unsigned int mask = 1<<irq; |
97 | unsigned long flags; | 102 | unsigned long flags; |
98 | int ret; | 103 | int ret; |
99 | 104 | ||
100 | spin_lock_irqsave(&i8259A_lock, flags); | 105 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
101 | if (irq < 8) | 106 | if (irq < 8) |
102 | ret = inb(PIC_MASTER_CMD) & mask; | 107 | ret = inb(PIC_MASTER_CMD) & mask; |
103 | else | 108 | else |
104 | ret = inb(PIC_SLAVE_CMD) & (mask >> 8); | 109 | ret = inb(PIC_SLAVE_CMD) & (mask >> 8); |
105 | spin_unlock_irqrestore(&i8259A_lock, flags); | 110 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
106 | 111 | ||
107 | return ret; | 112 | return ret; |
108 | } | 113 | } |
109 | 114 | ||
110 | void make_8259A_irq(unsigned int irq) | 115 | static void make_8259A_irq(unsigned int irq) |
111 | { | 116 | { |
112 | disable_irq_nosync(irq); | 117 | disable_irq_nosync(irq); |
113 | io_apic_irqs &= ~(1<<irq); | 118 | io_apic_irqs &= ~(1<<irq); |
@@ -150,7 +155,7 @@ static void mask_and_ack_8259A(unsigned int irq) | |||
150 | unsigned int irqmask = 1 << irq; | 155 | unsigned int irqmask = 1 << irq; |
151 | unsigned long flags; | 156 | unsigned long flags; |
152 | 157 | ||
153 | spin_lock_irqsave(&i8259A_lock, flags); | 158 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
154 | /* | 159 | /* |
155 | * Lightweight spurious IRQ detection. We do not want | 160 | * Lightweight spurious IRQ detection. We do not want |
156 | * to overdo spurious IRQ handling - it's usually a sign | 161 | * to overdo spurious IRQ handling - it's usually a sign |
@@ -183,7 +188,7 @@ handle_real_irq: | |||
183 | outb(cached_master_mask, PIC_MASTER_IMR); | 188 | outb(cached_master_mask, PIC_MASTER_IMR); |
184 | outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ | 189 | outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ |
185 | } | 190 | } |
186 | spin_unlock_irqrestore(&i8259A_lock, flags); | 191 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
187 | return; | 192 | return; |
188 | 193 | ||
189 | spurious_8259A_irq: | 194 | spurious_8259A_irq: |
@@ -281,37 +286,37 @@ static int __init i8259A_init_sysfs(void) | |||
281 | 286 | ||
282 | device_initcall(i8259A_init_sysfs); | 287 | device_initcall(i8259A_init_sysfs); |
283 | 288 | ||
284 | void mask_8259A(void) | 289 | static void mask_8259A(void) |
285 | { | 290 | { |
286 | unsigned long flags; | 291 | unsigned long flags; |
287 | 292 | ||
288 | spin_lock_irqsave(&i8259A_lock, flags); | 293 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
289 | 294 | ||
290 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 295 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
291 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | 296 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
292 | 297 | ||
293 | spin_unlock_irqrestore(&i8259A_lock, flags); | 298 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
294 | } | 299 | } |
295 | 300 | ||
296 | void unmask_8259A(void) | 301 | static void unmask_8259A(void) |
297 | { | 302 | { |
298 | unsigned long flags; | 303 | unsigned long flags; |
299 | 304 | ||
300 | spin_lock_irqsave(&i8259A_lock, flags); | 305 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
301 | 306 | ||
302 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | 307 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ |
303 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | 308 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ |
304 | 309 | ||
305 | spin_unlock_irqrestore(&i8259A_lock, flags); | 310 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
306 | } | 311 | } |
307 | 312 | ||
308 | void init_8259A(int auto_eoi) | 313 | static void init_8259A(int auto_eoi) |
309 | { | 314 | { |
310 | unsigned long flags; | 315 | unsigned long flags; |
311 | 316 | ||
312 | i8259A_auto_eoi = auto_eoi; | 317 | i8259A_auto_eoi = auto_eoi; |
313 | 318 | ||
314 | spin_lock_irqsave(&i8259A_lock, flags); | 319 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
315 | 320 | ||
316 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 321 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
317 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | 322 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
@@ -356,5 +361,49 @@ void init_8259A(int auto_eoi) | |||
356 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | 361 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ |
357 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | 362 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ |
358 | 363 | ||
359 | spin_unlock_irqrestore(&i8259A_lock, flags); | 364 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
360 | } | 365 | } |
366 | |||
367 | /* | ||
368 | * make i8259 a driver so that we can select pic functions at run time. the goal | ||
369 | * is to make x86 binary compatible among pc compatible and non-pc compatible | ||
370 | * platforms, such as x86 MID. | ||
371 | */ | ||
372 | |||
373 | static void legacy_pic_noop(void) { }; | ||
374 | static void legacy_pic_uint_noop(unsigned int unused) { }; | ||
375 | static void legacy_pic_int_noop(int unused) { }; | ||
376 | |||
377 | static struct irq_chip dummy_pic_chip = { | ||
378 | .name = "dummy pic", | ||
379 | .mask = legacy_pic_uint_noop, | ||
380 | .unmask = legacy_pic_uint_noop, | ||
381 | .disable = legacy_pic_uint_noop, | ||
382 | .mask_ack = legacy_pic_uint_noop, | ||
383 | }; | ||
384 | static int legacy_pic_irq_pending_noop(unsigned int irq) | ||
385 | { | ||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | struct legacy_pic null_legacy_pic = { | ||
390 | .nr_legacy_irqs = 0, | ||
391 | .chip = &dummy_pic_chip, | ||
392 | .mask_all = legacy_pic_noop, | ||
393 | .restore_mask = legacy_pic_noop, | ||
394 | .init = legacy_pic_int_noop, | ||
395 | .irq_pending = legacy_pic_irq_pending_noop, | ||
396 | .make_irq = legacy_pic_uint_noop, | ||
397 | }; | ||
398 | |||
399 | struct legacy_pic default_legacy_pic = { | ||
400 | .nr_legacy_irqs = NR_IRQS_LEGACY, | ||
401 | .chip = &i8259A_chip, | ||
402 | .mask_all = mask_8259A, | ||
403 | .restore_mask = unmask_8259A, | ||
404 | .init = init_8259A, | ||
405 | .irq_pending = i8259A_irq_pending, | ||
406 | .make_irq = make_8259A_irq, | ||
407 | }; | ||
408 | |||
409 | struct legacy_pic *legacy_pic = &default_legacy_pic; | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index d5932226614f..0ed2d300cd46 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/ioport.h> | 5 | #include <linux/ioport.h> |
6 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
7 | #include <linux/timex.h> | 7 | #include <linux/timex.h> |
8 | #include <linux/slab.h> | ||
9 | #include <linux/random.h> | 8 | #include <linux/random.h> |
10 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
11 | #include <linux/init.h> | 10 | #include <linux/init.h> |
@@ -84,24 +83,7 @@ static struct irqaction irq2 = { | |||
84 | }; | 83 | }; |
85 | 84 | ||
86 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | 85 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { |
87 | [0 ... IRQ0_VECTOR - 1] = -1, | 86 | [0 ... NR_VECTORS - 1] = -1, |
88 | [IRQ0_VECTOR] = 0, | ||
89 | [IRQ1_VECTOR] = 1, | ||
90 | [IRQ2_VECTOR] = 2, | ||
91 | [IRQ3_VECTOR] = 3, | ||
92 | [IRQ4_VECTOR] = 4, | ||
93 | [IRQ5_VECTOR] = 5, | ||
94 | [IRQ6_VECTOR] = 6, | ||
95 | [IRQ7_VECTOR] = 7, | ||
96 | [IRQ8_VECTOR] = 8, | ||
97 | [IRQ9_VECTOR] = 9, | ||
98 | [IRQ10_VECTOR] = 10, | ||
99 | [IRQ11_VECTOR] = 11, | ||
100 | [IRQ12_VECTOR] = 12, | ||
101 | [IRQ13_VECTOR] = 13, | ||
102 | [IRQ14_VECTOR] = 14, | ||
103 | [IRQ15_VECTOR] = 15, | ||
104 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | ||
105 | }; | 87 | }; |
106 | 88 | ||
107 | int vector_used_by_percpu_irq(unsigned int vector) | 89 | int vector_used_by_percpu_irq(unsigned int vector) |
@@ -123,12 +105,12 @@ void __init init_ISA_irqs(void) | |||
123 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | 105 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) |
124 | init_bsp_APIC(); | 106 | init_bsp_APIC(); |
125 | #endif | 107 | #endif |
126 | init_8259A(0); | 108 | legacy_pic->init(0); |
127 | 109 | ||
128 | /* | 110 | /* |
129 | * 16 old-style INTA-cycle interrupts: | 111 | * 16 old-style INTA-cycle interrupts: |
130 | */ | 112 | */ |
131 | for (i = 0; i < NR_IRQS_LEGACY; i++) { | 113 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) { |
132 | struct irq_desc *desc = irq_to_desc(i); | 114 | struct irq_desc *desc = irq_to_desc(i); |
133 | 115 | ||
134 | desc->status = IRQ_DISABLED; | 116 | desc->status = IRQ_DISABLED; |
@@ -142,9 +124,44 @@ void __init init_ISA_irqs(void) | |||
142 | 124 | ||
143 | void __init init_IRQ(void) | 125 | void __init init_IRQ(void) |
144 | { | 126 | { |
127 | int i; | ||
128 | |||
129 | /* | ||
130 | * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. | ||
131 | * If these IRQ's are handled by legacy interrupt-controllers like PIC, | ||
132 | * then this configuration will likely be static after the boot. If | ||
133 | * these IRQ's are handled by more mordern controllers like IO-APIC, | ||
134 | * then this vector space can be freed and re-used dynamically as the | ||
135 | * irq's migrate etc. | ||
136 | */ | ||
137 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) | ||
138 | per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; | ||
139 | |||
145 | x86_init.irqs.intr_init(); | 140 | x86_init.irqs.intr_init(); |
146 | } | 141 | } |
147 | 142 | ||
143 | /* | ||
144 | * Setup the vector to irq mappings. | ||
145 | */ | ||
146 | void setup_vector_irq(int cpu) | ||
147 | { | ||
148 | #ifndef CONFIG_X86_IO_APIC | ||
149 | int irq; | ||
150 | |||
151 | /* | ||
152 | * On most of the platforms, legacy PIC delivers the interrupts on the | ||
153 | * boot cpu. But there are certain platforms where PIC interrupts are | ||
154 | * delivered to multiple cpu's. If the legacy IRQ is handled by the | ||
155 | * legacy PIC, for the new cpu that is coming online, setup the static | ||
156 | * legacy vector to irq mapping: | ||
157 | */ | ||
158 | for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++) | ||
159 | per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; | ||
160 | #endif | ||
161 | |||
162 | __setup_vector_irq(cpu); | ||
163 | } | ||
164 | |||
148 | static void __init smp_intr_init(void) | 165 | static void __init smp_intr_init(void) |
149 | { | 166 | { |
150 | #ifdef CONFIG_SMP | 167 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index cbc4332a77b2..0f7bc20cfcde 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c | |||
@@ -2,8 +2,8 @@ | |||
2 | * Shared support code for AMD K8 northbridges and derivates. | 2 | * Shared support code for AMD K8 northbridges and derivates. |
3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. | 3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. |
4 | */ | 4 | */ |
5 | #include <linux/gfp.h> | ||
6 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | #include <linux/slab.h> | ||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
@@ -121,3 +121,17 @@ void k8_flush_garts(void) | |||
121 | } | 121 | } |
122 | EXPORT_SYMBOL_GPL(k8_flush_garts); | 122 | EXPORT_SYMBOL_GPL(k8_flush_garts); |
123 | 123 | ||
124 | static __init int init_k8_nbs(void) | ||
125 | { | ||
126 | int err = 0; | ||
127 | |||
128 | err = cache_k8_northbridges(); | ||
129 | |||
130 | if (err < 0) | ||
131 | printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); | ||
132 | |||
133 | return err; | ||
134 | } | ||
135 | |||
136 | /* This has to go after the PCI subsystem */ | ||
137 | fs_initcall(init_k8_nbs); | ||
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index e444357375ce..8afd9f321f10 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/debugfs.h> | 9 | #include <linux/debugfs.h> |
10 | #include <linux/uaccess.h> | 10 | #include <linux/uaccess.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/slab.h> | ||
12 | #include <linux/init.h> | 13 | #include <linux/init.h> |
13 | #include <linux/stat.h> | 14 | #include <linux/stat.h> |
14 | #include <linux/io.h> | 15 | #include <linux/io.h> |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index dd74fe7273b1..b2258ca91003 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/smp.h> | 43 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 44 | #include <linux/nmi.h> |
45 | #include <linux/hw_breakpoint.h> | ||
45 | 46 | ||
46 | #include <asm/debugreg.h> | 47 | #include <asm/debugreg.h> |
47 | #include <asm/apicdef.h> | 48 | #include <asm/apicdef.h> |
@@ -204,40 +205,81 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
204 | 205 | ||
205 | static struct hw_breakpoint { | 206 | static struct hw_breakpoint { |
206 | unsigned enabled; | 207 | unsigned enabled; |
207 | unsigned type; | ||
208 | unsigned len; | ||
209 | unsigned long addr; | 208 | unsigned long addr; |
209 | int len; | ||
210 | int type; | ||
211 | struct perf_event **pev; | ||
210 | } breakinfo[4]; | 212 | } breakinfo[4]; |
211 | 213 | ||
212 | static void kgdb_correct_hw_break(void) | 214 | static void kgdb_correct_hw_break(void) |
213 | { | 215 | { |
214 | unsigned long dr7; | ||
215 | int correctit = 0; | ||
216 | int breakbit; | ||
217 | int breakno; | 216 | int breakno; |
218 | 217 | ||
219 | get_debugreg(dr7, 7); | ||
220 | for (breakno = 0; breakno < 4; breakno++) { | 218 | for (breakno = 0; breakno < 4; breakno++) { |
221 | breakbit = 2 << (breakno << 1); | 219 | struct perf_event *bp; |
222 | if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { | 220 | struct arch_hw_breakpoint *info; |
223 | correctit = 1; | 221 | int val; |
224 | dr7 |= breakbit; | 222 | int cpu = raw_smp_processor_id(); |
225 | dr7 &= ~(0xf0000 << (breakno << 2)); | 223 | if (!breakinfo[breakno].enabled) |
226 | dr7 |= ((breakinfo[breakno].len << 2) | | 224 | continue; |
227 | breakinfo[breakno].type) << | 225 | bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); |
228 | ((breakno << 2) + 16); | 226 | info = counter_arch_bp(bp); |
229 | set_debugreg(breakinfo[breakno].addr, breakno); | 227 | if (bp->attr.disabled != 1) |
230 | 228 | continue; | |
231 | } else { | 229 | bp->attr.bp_addr = breakinfo[breakno].addr; |
232 | if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { | 230 | bp->attr.bp_len = breakinfo[breakno].len; |
233 | correctit = 1; | 231 | bp->attr.bp_type = breakinfo[breakno].type; |
234 | dr7 &= ~breakbit; | 232 | info->address = breakinfo[breakno].addr; |
235 | dr7 &= ~(0xf0000 << (breakno << 2)); | 233 | info->len = breakinfo[breakno].len; |
236 | } | 234 | info->type = breakinfo[breakno].type; |
237 | } | 235 | val = arch_install_hw_breakpoint(bp); |
236 | if (!val) | ||
237 | bp->attr.disabled = 0; | ||
238 | } | ||
239 | hw_breakpoint_restore(); | ||
240 | } | ||
241 | |||
242 | static int hw_break_reserve_slot(int breakno) | ||
243 | { | ||
244 | int cpu; | ||
245 | int cnt = 0; | ||
246 | struct perf_event **pevent; | ||
247 | |||
248 | for_each_online_cpu(cpu) { | ||
249 | cnt++; | ||
250 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | ||
251 | if (dbg_reserve_bp_slot(*pevent)) | ||
252 | goto fail; | ||
253 | } | ||
254 | |||
255 | return 0; | ||
256 | |||
257 | fail: | ||
258 | for_each_online_cpu(cpu) { | ||
259 | cnt--; | ||
260 | if (!cnt) | ||
261 | break; | ||
262 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | ||
263 | dbg_release_bp_slot(*pevent); | ||
238 | } | 264 | } |
239 | if (correctit) | 265 | return -1; |
240 | set_debugreg(dr7, 7); | 266 | } |
267 | |||
268 | static int hw_break_release_slot(int breakno) | ||
269 | { | ||
270 | struct perf_event **pevent; | ||
271 | int cpu; | ||
272 | |||
273 | for_each_online_cpu(cpu) { | ||
274 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | ||
275 | if (dbg_release_bp_slot(*pevent)) | ||
276 | /* | ||
277 | * The debugger is responisble for handing the retry on | ||
278 | * remove failure. | ||
279 | */ | ||
280 | return -1; | ||
281 | } | ||
282 | return 0; | ||
241 | } | 283 | } |
242 | 284 | ||
243 | static int | 285 | static int |
@@ -251,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
251 | if (i == 4) | 293 | if (i == 4) |
252 | return -1; | 294 | return -1; |
253 | 295 | ||
296 | if (hw_break_release_slot(i)) { | ||
297 | printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr); | ||
298 | return -1; | ||
299 | } | ||
254 | breakinfo[i].enabled = 0; | 300 | breakinfo[i].enabled = 0; |
255 | 301 | ||
256 | return 0; | 302 | return 0; |
@@ -259,15 +305,23 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
259 | static void kgdb_remove_all_hw_break(void) | 305 | static void kgdb_remove_all_hw_break(void) |
260 | { | 306 | { |
261 | int i; | 307 | int i; |
308 | int cpu = raw_smp_processor_id(); | ||
309 | struct perf_event *bp; | ||
262 | 310 | ||
263 | for (i = 0; i < 4; i++) | 311 | for (i = 0; i < 4; i++) { |
264 | memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); | 312 | if (!breakinfo[i].enabled) |
313 | continue; | ||
314 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | ||
315 | if (bp->attr.disabled == 1) | ||
316 | continue; | ||
317 | arch_uninstall_hw_breakpoint(bp); | ||
318 | bp->attr.disabled = 1; | ||
319 | } | ||
265 | } | 320 | } |
266 | 321 | ||
267 | static int | 322 | static int |
268 | kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | 323 | kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) |
269 | { | 324 | { |
270 | unsigned type; | ||
271 | int i; | 325 | int i; |
272 | 326 | ||
273 | for (i = 0; i < 4; i++) | 327 | for (i = 0; i < 4; i++) |
@@ -278,27 +332,42 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
278 | 332 | ||
279 | switch (bptype) { | 333 | switch (bptype) { |
280 | case BP_HARDWARE_BREAKPOINT: | 334 | case BP_HARDWARE_BREAKPOINT: |
281 | type = 0; | 335 | len = 1; |
282 | len = 1; | 336 | breakinfo[i].type = X86_BREAKPOINT_EXECUTE; |
283 | break; | 337 | break; |
284 | case BP_WRITE_WATCHPOINT: | 338 | case BP_WRITE_WATCHPOINT: |
285 | type = 1; | 339 | breakinfo[i].type = X86_BREAKPOINT_WRITE; |
286 | break; | 340 | break; |
287 | case BP_ACCESS_WATCHPOINT: | 341 | case BP_ACCESS_WATCHPOINT: |
288 | type = 3; | 342 | breakinfo[i].type = X86_BREAKPOINT_RW; |
289 | break; | 343 | break; |
290 | default: | 344 | default: |
291 | return -1; | 345 | return -1; |
292 | } | 346 | } |
293 | 347 | switch (len) { | |
294 | if (len == 1 || len == 2 || len == 4) | 348 | case 1: |
295 | breakinfo[i].len = len - 1; | 349 | breakinfo[i].len = X86_BREAKPOINT_LEN_1; |
296 | else | 350 | break; |
351 | case 2: | ||
352 | breakinfo[i].len = X86_BREAKPOINT_LEN_2; | ||
353 | break; | ||
354 | case 4: | ||
355 | breakinfo[i].len = X86_BREAKPOINT_LEN_4; | ||
356 | break; | ||
357 | #ifdef CONFIG_X86_64 | ||
358 | case 8: | ||
359 | breakinfo[i].len = X86_BREAKPOINT_LEN_8; | ||
360 | break; | ||
361 | #endif | ||
362 | default: | ||
297 | return -1; | 363 | return -1; |
298 | 364 | } | |
299 | breakinfo[i].enabled = 1; | ||
300 | breakinfo[i].addr = addr; | 365 | breakinfo[i].addr = addr; |
301 | breakinfo[i].type = type; | 366 | if (hw_break_reserve_slot(i)) { |
367 | breakinfo[i].addr = 0; | ||
368 | return -1; | ||
369 | } | ||
370 | breakinfo[i].enabled = 1; | ||
302 | 371 | ||
303 | return 0; | 372 | return 0; |
304 | } | 373 | } |
@@ -313,8 +382,21 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
313 | */ | 382 | */ |
314 | void kgdb_disable_hw_debug(struct pt_regs *regs) | 383 | void kgdb_disable_hw_debug(struct pt_regs *regs) |
315 | { | 384 | { |
385 | int i; | ||
386 | int cpu = raw_smp_processor_id(); | ||
387 | struct perf_event *bp; | ||
388 | |||
316 | /* Disable hardware debugging while we are in kgdb: */ | 389 | /* Disable hardware debugging while we are in kgdb: */ |
317 | set_debugreg(0UL, 7); | 390 | set_debugreg(0UL, 7); |
391 | for (i = 0; i < 4; i++) { | ||
392 | if (!breakinfo[i].enabled) | ||
393 | continue; | ||
394 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | ||
395 | if (bp->attr.disabled == 1) | ||
396 | continue; | ||
397 | arch_uninstall_hw_breakpoint(bp); | ||
398 | bp->attr.disabled = 1; | ||
399 | } | ||
318 | } | 400 | } |
319 | 401 | ||
320 | /** | 402 | /** |
@@ -378,7 +460,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
378 | struct pt_regs *linux_regs) | 460 | struct pt_regs *linux_regs) |
379 | { | 461 | { |
380 | unsigned long addr; | 462 | unsigned long addr; |
381 | unsigned long dr6; | ||
382 | char *ptr; | 463 | char *ptr; |
383 | int newPC; | 464 | int newPC; |
384 | 465 | ||
@@ -404,20 +485,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
404 | raw_smp_processor_id()); | 485 | raw_smp_processor_id()); |
405 | } | 486 | } |
406 | 487 | ||
407 | get_debugreg(dr6, 6); | ||
408 | if (!(dr6 & 0x4000)) { | ||
409 | int breakno; | ||
410 | |||
411 | for (breakno = 0; breakno < 4; breakno++) { | ||
412 | if (dr6 & (1 << breakno) && | ||
413 | breakinfo[breakno].type == 0) { | ||
414 | /* Set restore flag: */ | ||
415 | linux_regs->flags |= X86_EFLAGS_RF; | ||
416 | break; | ||
417 | } | ||
418 | } | ||
419 | } | ||
420 | set_debugreg(0UL, 6); | ||
421 | kgdb_correct_hw_break(); | 488 | kgdb_correct_hw_break(); |
422 | 489 | ||
423 | return 0; | 490 | return 0; |
@@ -485,8 +552,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
485 | break; | 552 | break; |
486 | 553 | ||
487 | case DIE_DEBUG: | 554 | case DIE_DEBUG: |
488 | if (atomic_read(&kgdb_cpu_doing_single_step) == | 555 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { |
489 | raw_smp_processor_id()) { | ||
490 | if (user_mode(regs)) | 556 | if (user_mode(regs)) |
491 | return single_step_cont(regs, args); | 557 | return single_step_cont(regs, args); |
492 | break; | 558 | break; |
@@ -539,7 +605,42 @@ static struct notifier_block kgdb_notifier = { | |||
539 | */ | 605 | */ |
540 | int kgdb_arch_init(void) | 606 | int kgdb_arch_init(void) |
541 | { | 607 | { |
542 | return register_die_notifier(&kgdb_notifier); | 608 | int i, cpu; |
609 | int ret; | ||
610 | struct perf_event_attr attr; | ||
611 | struct perf_event **pevent; | ||
612 | |||
613 | ret = register_die_notifier(&kgdb_notifier); | ||
614 | if (ret != 0) | ||
615 | return ret; | ||
616 | /* | ||
617 | * Pre-allocate the hw breakpoint structions in the non-atomic | ||
618 | * portion of kgdb because this operation requires mutexs to | ||
619 | * complete. | ||
620 | */ | ||
621 | hw_breakpoint_init(&attr); | ||
622 | attr.bp_addr = (unsigned long)kgdb_arch_init; | ||
623 | attr.bp_len = HW_BREAKPOINT_LEN_1; | ||
624 | attr.bp_type = HW_BREAKPOINT_W; | ||
625 | attr.disabled = 1; | ||
626 | for (i = 0; i < 4; i++) { | ||
627 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); | ||
628 | if (IS_ERR(breakinfo[i].pev)) { | ||
629 | printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n"); | ||
630 | breakinfo[i].pev = NULL; | ||
631 | kgdb_arch_exit(); | ||
632 | return -1; | ||
633 | } | ||
634 | for_each_online_cpu(cpu) { | ||
635 | pevent = per_cpu_ptr(breakinfo[i].pev, cpu); | ||
636 | pevent[0]->hw.sample_period = 1; | ||
637 | if (pevent[0]->destroy != NULL) { | ||
638 | pevent[0]->destroy = NULL; | ||
639 | release_bp_slot(*pevent); | ||
640 | } | ||
641 | } | ||
642 | } | ||
643 | return ret; | ||
543 | } | 644 | } |
544 | 645 | ||
545 | /** | 646 | /** |
@@ -550,6 +651,13 @@ int kgdb_arch_init(void) | |||
550 | */ | 651 | */ |
551 | void kgdb_arch_exit(void) | 652 | void kgdb_arch_exit(void) |
552 | { | 653 | { |
654 | int i; | ||
655 | for (i = 0; i < 4; i++) { | ||
656 | if (breakinfo[i].pev) { | ||
657 | unregister_wide_hw_breakpoint(breakinfo[i].pev); | ||
658 | breakinfo[i].pev = NULL; | ||
659 | } | ||
660 | } | ||
553 | unregister_die_notifier(&kgdb_notifier); | 661 | unregister_die_notifier(&kgdb_notifier); |
554 | } | 662 | } |
555 | 663 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 5b8c7505b3bc..b43bbaebe2c0 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kdebug.h> | 50 | #include <linux/kdebug.h> |
51 | #include <linux/kallsyms.h> | 51 | #include <linux/kallsyms.h> |
52 | #include <linux/ftrace.h> | ||
52 | 53 | ||
53 | #include <asm/cacheflush.h> | 54 | #include <asm/cacheflush.h> |
54 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
@@ -106,16 +107,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { | |||
106 | }; | 107 | }; |
107 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | 108 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); |
108 | 109 | ||
109 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | 110 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) |
110 | static void __kprobes set_jmp_op(void *from, void *to) | ||
111 | { | 111 | { |
112 | struct __arch_jmp_op { | 112 | struct __arch_relative_insn { |
113 | char op; | 113 | u8 op; |
114 | s32 raddr; | 114 | s32 raddr; |
115 | } __attribute__((packed)) * jop; | 115 | } __attribute__((packed)) *insn; |
116 | jop = (struct __arch_jmp_op *)from; | 116 | |
117 | jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); | 117 | insn = (struct __arch_relative_insn *)from; |
118 | jop->op = RELATIVEJUMP_INSTRUCTION; | 118 | insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); |
119 | insn->op = op; | ||
120 | } | ||
121 | |||
122 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | ||
123 | static void __kprobes synthesize_reljump(void *from, void *to) | ||
124 | { | ||
125 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); | ||
119 | } | 126 | } |
120 | 127 | ||
121 | /* | 128 | /* |
@@ -202,7 +209,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | |||
202 | /* | 209 | /* |
203 | * Basically, kp->ainsn.insn has an original instruction. | 210 | * Basically, kp->ainsn.insn has an original instruction. |
204 | * However, RIP-relative instruction can not do single-stepping | 211 | * However, RIP-relative instruction can not do single-stepping |
205 | * at different place, fix_riprel() tweaks the displacement of | 212 | * at different place, __copy_instruction() tweaks the displacement of |
206 | * that instruction. In that case, we can't recover the instruction | 213 | * that instruction. In that case, we can't recover the instruction |
207 | * from the kp->ainsn.insn. | 214 | * from the kp->ainsn.insn. |
208 | * | 215 | * |
@@ -284,21 +291,37 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
284 | } | 291 | } |
285 | 292 | ||
286 | /* | 293 | /* |
287 | * Adjust the displacement if the instruction uses the %rip-relative | 294 | * Copy an instruction and adjust the displacement if the instruction |
288 | * addressing mode. | 295 | * uses the %rip-relative addressing mode. |
289 | * If it does, Return the address of the 32-bit displacement word. | 296 | * If it does, Return the address of the 32-bit displacement word. |
290 | * If not, return null. | 297 | * If not, return null. |
291 | * Only applicable to 64-bit x86. | 298 | * Only applicable to 64-bit x86. |
292 | */ | 299 | */ |
293 | static void __kprobes fix_riprel(struct kprobe *p) | 300 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) |
294 | { | 301 | { |
295 | #ifdef CONFIG_X86_64 | ||
296 | struct insn insn; | 302 | struct insn insn; |
297 | kernel_insn_init(&insn, p->ainsn.insn); | 303 | int ret; |
304 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
298 | 305 | ||
306 | kernel_insn_init(&insn, src); | ||
307 | if (recover) { | ||
308 | insn_get_opcode(&insn); | ||
309 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
310 | ret = recover_probed_instruction(buf, | ||
311 | (unsigned long)src); | ||
312 | if (ret) | ||
313 | return 0; | ||
314 | kernel_insn_init(&insn, buf); | ||
315 | } | ||
316 | } | ||
317 | insn_get_length(&insn); | ||
318 | memcpy(dest, insn.kaddr, insn.length); | ||
319 | |||
320 | #ifdef CONFIG_X86_64 | ||
299 | if (insn_rip_relative(&insn)) { | 321 | if (insn_rip_relative(&insn)) { |
300 | s64 newdisp; | 322 | s64 newdisp; |
301 | u8 *disp; | 323 | u8 *disp; |
324 | kernel_insn_init(&insn, dest); | ||
302 | insn_get_displacement(&insn); | 325 | insn_get_displacement(&insn); |
303 | /* | 326 | /* |
304 | * The copied instruction uses the %rip-relative addressing | 327 | * The copied instruction uses the %rip-relative addressing |
@@ -312,20 +335,23 @@ static void __kprobes fix_riprel(struct kprobe *p) | |||
312 | * extension of the original signed 32-bit displacement would | 335 | * extension of the original signed 32-bit displacement would |
313 | * have given. | 336 | * have given. |
314 | */ | 337 | */ |
315 | newdisp = (u8 *) p->addr + (s64) insn.displacement.value - | 338 | newdisp = (u8 *) src + (s64) insn.displacement.value - |
316 | (u8 *) p->ainsn.insn; | 339 | (u8 *) dest; |
317 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ | 340 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
318 | disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); | 341 | disp = (u8 *) dest + insn_offset_displacement(&insn); |
319 | *(s32 *) disp = (s32) newdisp; | 342 | *(s32 *) disp = (s32) newdisp; |
320 | } | 343 | } |
321 | #endif | 344 | #endif |
345 | return insn.length; | ||
322 | } | 346 | } |
323 | 347 | ||
324 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | 348 | static void __kprobes arch_copy_kprobe(struct kprobe *p) |
325 | { | 349 | { |
326 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 350 | /* |
327 | 351 | * Copy an instruction without recovering int3, because it will be | |
328 | fix_riprel(p); | 352 | * put by another subsystem. |
353 | */ | ||
354 | __copy_instruction(p->ainsn.insn, p->addr, 0); | ||
329 | 355 | ||
330 | if (can_boost(p->addr)) | 356 | if (can_boost(p->addr)) |
331 | p->ainsn.boostable = 0; | 357 | p->ainsn.boostable = 0; |
@@ -337,6 +363,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) | |||
337 | 363 | ||
338 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 364 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
339 | { | 365 | { |
366 | if (alternatives_text_reserved(p->addr, p->addr)) | ||
367 | return -EINVAL; | ||
368 | |||
340 | if (!can_probe((unsigned long)p->addr)) | 369 | if (!can_probe((unsigned long)p->addr)) |
341 | return -EILSEQ; | 370 | return -EILSEQ; |
342 | /* insn: must be on special executable page on x86. */ | 371 | /* insn: must be on special executable page on x86. */ |
@@ -403,18 +432,6 @@ static void __kprobes restore_btf(void) | |||
403 | update_debugctlmsr(current->thread.debugctlmsr); | 432 | update_debugctlmsr(current->thread.debugctlmsr); |
404 | } | 433 | } |
405 | 434 | ||
406 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
407 | { | ||
408 | clear_btf(); | ||
409 | regs->flags |= X86_EFLAGS_TF; | ||
410 | regs->flags &= ~X86_EFLAGS_IF; | ||
411 | /* single step inline if the instruction is an int3 */ | ||
412 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
413 | regs->ip = (unsigned long)p->addr; | ||
414 | else | ||
415 | regs->ip = (unsigned long)p->ainsn.insn; | ||
416 | } | ||
417 | |||
418 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
419 | struct pt_regs *regs) | 436 | struct pt_regs *regs) |
420 | { | 437 | { |
@@ -426,20 +443,50 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
426 | *sara = (unsigned long) &kretprobe_trampoline; | 443 | *sara = (unsigned long) &kretprobe_trampoline; |
427 | } | 444 | } |
428 | 445 | ||
446 | #ifdef CONFIG_OPTPROBES | ||
447 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
448 | struct pt_regs *regs, | ||
449 | int reenter); | ||
450 | #else | ||
451 | #define setup_detour_execution(p, regs, reenter) (0) | ||
452 | #endif | ||
453 | |||
429 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | 454 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, |
430 | struct kprobe_ctlblk *kcb) | 455 | struct kprobe_ctlblk *kcb, int reenter) |
431 | { | 456 | { |
432 | #if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) | 457 | if (setup_detour_execution(p, regs, reenter)) |
458 | return; | ||
459 | |||
460 | #if !defined(CONFIG_PREEMPT) | ||
433 | if (p->ainsn.boostable == 1 && !p->post_handler) { | 461 | if (p->ainsn.boostable == 1 && !p->post_handler) { |
434 | /* Boost up -- we can execute copied instructions directly */ | 462 | /* Boost up -- we can execute copied instructions directly */ |
435 | reset_current_kprobe(); | 463 | if (!reenter) |
464 | reset_current_kprobe(); | ||
465 | /* | ||
466 | * Reentering boosted probe doesn't reset current_kprobe, | ||
467 | * nor set current_kprobe, because it doesn't use single | ||
468 | * stepping. | ||
469 | */ | ||
436 | regs->ip = (unsigned long)p->ainsn.insn; | 470 | regs->ip = (unsigned long)p->ainsn.insn; |
437 | preempt_enable_no_resched(); | 471 | preempt_enable_no_resched(); |
438 | return; | 472 | return; |
439 | } | 473 | } |
440 | #endif | 474 | #endif |
441 | prepare_singlestep(p, regs); | 475 | if (reenter) { |
442 | kcb->kprobe_status = KPROBE_HIT_SS; | 476 | save_previous_kprobe(kcb); |
477 | set_current_kprobe(p, regs, kcb); | ||
478 | kcb->kprobe_status = KPROBE_REENTER; | ||
479 | } else | ||
480 | kcb->kprobe_status = KPROBE_HIT_SS; | ||
481 | /* Prepare real single stepping */ | ||
482 | clear_btf(); | ||
483 | regs->flags |= X86_EFLAGS_TF; | ||
484 | regs->flags &= ~X86_EFLAGS_IF; | ||
485 | /* single step inline if the instruction is an int3 */ | ||
486 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
487 | regs->ip = (unsigned long)p->addr; | ||
488 | else | ||
489 | regs->ip = (unsigned long)p->ainsn.insn; | ||
443 | } | 490 | } |
444 | 491 | ||
445 | /* | 492 | /* |
@@ -453,11 +500,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
453 | switch (kcb->kprobe_status) { | 500 | switch (kcb->kprobe_status) { |
454 | case KPROBE_HIT_SSDONE: | 501 | case KPROBE_HIT_SSDONE: |
455 | case KPROBE_HIT_ACTIVE: | 502 | case KPROBE_HIT_ACTIVE: |
456 | save_previous_kprobe(kcb); | ||
457 | set_current_kprobe(p, regs, kcb); | ||
458 | kprobes_inc_nmissed_count(p); | 503 | kprobes_inc_nmissed_count(p); |
459 | prepare_singlestep(p, regs); | 504 | setup_singlestep(p, regs, kcb, 1); |
460 | kcb->kprobe_status = KPROBE_REENTER; | ||
461 | break; | 505 | break; |
462 | case KPROBE_HIT_SS: | 506 | case KPROBE_HIT_SS: |
463 | /* A probe has been hit in the codepath leading up to, or just | 507 | /* A probe has been hit in the codepath leading up to, or just |
@@ -532,13 +576,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
532 | * more here. | 576 | * more here. |
533 | */ | 577 | */ |
534 | if (!p->pre_handler || !p->pre_handler(p, regs)) | 578 | if (!p->pre_handler || !p->pre_handler(p, regs)) |
535 | setup_singlestep(p, regs, kcb); | 579 | setup_singlestep(p, regs, kcb, 0); |
536 | return 1; | 580 | return 1; |
537 | } | 581 | } |
538 | } else if (kprobe_running()) { | 582 | } else if (kprobe_running()) { |
539 | p = __get_cpu_var(current_kprobe); | 583 | p = __get_cpu_var(current_kprobe); |
540 | if (p->break_handler && p->break_handler(p, regs)) { | 584 | if (p->break_handler && p->break_handler(p, regs)) { |
541 | setup_singlestep(p, regs, kcb); | 585 | setup_singlestep(p, regs, kcb, 0); |
542 | return 1; | 586 | return 1; |
543 | } | 587 | } |
544 | } /* else: not a kprobe fault; let the kernel handle it */ | 588 | } /* else: not a kprobe fault; let the kernel handle it */ |
@@ -547,6 +591,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
547 | return 0; | 591 | return 0; |
548 | } | 592 | } |
549 | 593 | ||
594 | #ifdef CONFIG_X86_64 | ||
595 | #define SAVE_REGS_STRING \ | ||
596 | /* Skip cs, ip, orig_ax. */ \ | ||
597 | " subq $24, %rsp\n" \ | ||
598 | " pushq %rdi\n" \ | ||
599 | " pushq %rsi\n" \ | ||
600 | " pushq %rdx\n" \ | ||
601 | " pushq %rcx\n" \ | ||
602 | " pushq %rax\n" \ | ||
603 | " pushq %r8\n" \ | ||
604 | " pushq %r9\n" \ | ||
605 | " pushq %r10\n" \ | ||
606 | " pushq %r11\n" \ | ||
607 | " pushq %rbx\n" \ | ||
608 | " pushq %rbp\n" \ | ||
609 | " pushq %r12\n" \ | ||
610 | " pushq %r13\n" \ | ||
611 | " pushq %r14\n" \ | ||
612 | " pushq %r15\n" | ||
613 | #define RESTORE_REGS_STRING \ | ||
614 | " popq %r15\n" \ | ||
615 | " popq %r14\n" \ | ||
616 | " popq %r13\n" \ | ||
617 | " popq %r12\n" \ | ||
618 | " popq %rbp\n" \ | ||
619 | " popq %rbx\n" \ | ||
620 | " popq %r11\n" \ | ||
621 | " popq %r10\n" \ | ||
622 | " popq %r9\n" \ | ||
623 | " popq %r8\n" \ | ||
624 | " popq %rax\n" \ | ||
625 | " popq %rcx\n" \ | ||
626 | " popq %rdx\n" \ | ||
627 | " popq %rsi\n" \ | ||
628 | " popq %rdi\n" \ | ||
629 | /* Skip orig_ax, ip, cs */ \ | ||
630 | " addq $24, %rsp\n" | ||
631 | #else | ||
632 | #define SAVE_REGS_STRING \ | ||
633 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
634 | " subl $16, %esp\n" \ | ||
635 | " pushl %fs\n" \ | ||
636 | " pushl %ds\n" \ | ||
637 | " pushl %es\n" \ | ||
638 | " pushl %eax\n" \ | ||
639 | " pushl %ebp\n" \ | ||
640 | " pushl %edi\n" \ | ||
641 | " pushl %esi\n" \ | ||
642 | " pushl %edx\n" \ | ||
643 | " pushl %ecx\n" \ | ||
644 | " pushl %ebx\n" | ||
645 | #define RESTORE_REGS_STRING \ | ||
646 | " popl %ebx\n" \ | ||
647 | " popl %ecx\n" \ | ||
648 | " popl %edx\n" \ | ||
649 | " popl %esi\n" \ | ||
650 | " popl %edi\n" \ | ||
651 | " popl %ebp\n" \ | ||
652 | " popl %eax\n" \ | ||
653 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
654 | " addl $24, %esp\n" | ||
655 | #endif | ||
656 | |||
550 | /* | 657 | /* |
551 | * When a retprobed function returns, this code saves registers and | 658 | * When a retprobed function returns, this code saves registers and |
552 | * calls trampoline_handler() runs, which calls the kretprobe's handler. | 659 | * calls trampoline_handler() runs, which calls the kretprobe's handler. |
@@ -560,65 +667,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
560 | /* We don't bother saving the ss register */ | 667 | /* We don't bother saving the ss register */ |
561 | " pushq %rsp\n" | 668 | " pushq %rsp\n" |
562 | " pushfq\n" | 669 | " pushfq\n" |
563 | /* | 670 | SAVE_REGS_STRING |
564 | * Skip cs, ip, orig_ax. | ||
565 | * trampoline_handler() will plug in these values | ||
566 | */ | ||
567 | " subq $24, %rsp\n" | ||
568 | " pushq %rdi\n" | ||
569 | " pushq %rsi\n" | ||
570 | " pushq %rdx\n" | ||
571 | " pushq %rcx\n" | ||
572 | " pushq %rax\n" | ||
573 | " pushq %r8\n" | ||
574 | " pushq %r9\n" | ||
575 | " pushq %r10\n" | ||
576 | " pushq %r11\n" | ||
577 | " pushq %rbx\n" | ||
578 | " pushq %rbp\n" | ||
579 | " pushq %r12\n" | ||
580 | " pushq %r13\n" | ||
581 | " pushq %r14\n" | ||
582 | " pushq %r15\n" | ||
583 | " movq %rsp, %rdi\n" | 671 | " movq %rsp, %rdi\n" |
584 | " call trampoline_handler\n" | 672 | " call trampoline_handler\n" |
585 | /* Replace saved sp with true return address. */ | 673 | /* Replace saved sp with true return address. */ |
586 | " movq %rax, 152(%rsp)\n" | 674 | " movq %rax, 152(%rsp)\n" |
587 | " popq %r15\n" | 675 | RESTORE_REGS_STRING |
588 | " popq %r14\n" | ||
589 | " popq %r13\n" | ||
590 | " popq %r12\n" | ||
591 | " popq %rbp\n" | ||
592 | " popq %rbx\n" | ||
593 | " popq %r11\n" | ||
594 | " popq %r10\n" | ||
595 | " popq %r9\n" | ||
596 | " popq %r8\n" | ||
597 | " popq %rax\n" | ||
598 | " popq %rcx\n" | ||
599 | " popq %rdx\n" | ||
600 | " popq %rsi\n" | ||
601 | " popq %rdi\n" | ||
602 | /* Skip orig_ax, ip, cs */ | ||
603 | " addq $24, %rsp\n" | ||
604 | " popfq\n" | 676 | " popfq\n" |
605 | #else | 677 | #else |
606 | " pushf\n" | 678 | " pushf\n" |
607 | /* | 679 | SAVE_REGS_STRING |
608 | * Skip cs, ip, orig_ax and gs. | ||
609 | * trampoline_handler() will plug in these values | ||
610 | */ | ||
611 | " subl $16, %esp\n" | ||
612 | " pushl %fs\n" | ||
613 | " pushl %es\n" | ||
614 | " pushl %ds\n" | ||
615 | " pushl %eax\n" | ||
616 | " pushl %ebp\n" | ||
617 | " pushl %edi\n" | ||
618 | " pushl %esi\n" | ||
619 | " pushl %edx\n" | ||
620 | " pushl %ecx\n" | ||
621 | " pushl %ebx\n" | ||
622 | " movl %esp, %eax\n" | 680 | " movl %esp, %eax\n" |
623 | " call trampoline_handler\n" | 681 | " call trampoline_handler\n" |
624 | /* Move flags to cs */ | 682 | /* Move flags to cs */ |
@@ -626,15 +684,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
626 | " movl %edx, 52(%esp)\n" | 684 | " movl %edx, 52(%esp)\n" |
627 | /* Replace saved flags with true return address. */ | 685 | /* Replace saved flags with true return address. */ |
628 | " movl %eax, 56(%esp)\n" | 686 | " movl %eax, 56(%esp)\n" |
629 | " popl %ebx\n" | 687 | RESTORE_REGS_STRING |
630 | " popl %ecx\n" | ||
631 | " popl %edx\n" | ||
632 | " popl %esi\n" | ||
633 | " popl %edi\n" | ||
634 | " popl %ebp\n" | ||
635 | " popl %eax\n" | ||
636 | /* Skip ds, es, fs, gs, orig_ax and ip */ | ||
637 | " addl $24, %esp\n" | ||
638 | " popf\n" | 688 | " popf\n" |
639 | #endif | 689 | #endif |
640 | " ret\n"); | 690 | " ret\n"); |
@@ -802,8 +852,8 @@ static void __kprobes resume_execution(struct kprobe *p, | |||
802 | * These instructions can be executed directly if it | 852 | * These instructions can be executed directly if it |
803 | * jumps back to correct address. | 853 | * jumps back to correct address. |
804 | */ | 854 | */ |
805 | set_jmp_op((void *)regs->ip, | 855 | synthesize_reljump((void *)regs->ip, |
806 | (void *)orig_ip + (regs->ip - copy_ip)); | 856 | (void *)orig_ip + (regs->ip - copy_ip)); |
807 | p->ainsn.boostable = 1; | 857 | p->ainsn.boostable = 1; |
808 | } else { | 858 | } else { |
809 | p->ainsn.boostable = -1; | 859 | p->ainsn.boostable = -1; |
@@ -1030,6 +1080,358 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1030 | return 0; | 1080 | return 0; |
1031 | } | 1081 | } |
1032 | 1082 | ||
1083 | |||
1084 | #ifdef CONFIG_OPTPROBES | ||
1085 | |||
1086 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
1087 | static void __kprobes synthesize_relcall(void *from, void *to) | ||
1088 | { | ||
1089 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
1090 | } | ||
1091 | |||
1092 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
1093 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | ||
1094 | unsigned long val) | ||
1095 | { | ||
1096 | #ifdef CONFIG_X86_64 | ||
1097 | *addr++ = 0x48; | ||
1098 | *addr++ = 0xbf; | ||
1099 | #else | ||
1100 | *addr++ = 0xb8; | ||
1101 | #endif | ||
1102 | *(unsigned long *)addr = val; | ||
1103 | } | ||
1104 | |||
1105 | void __kprobes kprobes_optinsn_template_holder(void) | ||
1106 | { | ||
1107 | asm volatile ( | ||
1108 | ".global optprobe_template_entry\n" | ||
1109 | "optprobe_template_entry: \n" | ||
1110 | #ifdef CONFIG_X86_64 | ||
1111 | /* We don't bother saving the ss register */ | ||
1112 | " pushq %rsp\n" | ||
1113 | " pushfq\n" | ||
1114 | SAVE_REGS_STRING | ||
1115 | " movq %rsp, %rsi\n" | ||
1116 | ".global optprobe_template_val\n" | ||
1117 | "optprobe_template_val: \n" | ||
1118 | ASM_NOP5 | ||
1119 | ASM_NOP5 | ||
1120 | ".global optprobe_template_call\n" | ||
1121 | "optprobe_template_call: \n" | ||
1122 | ASM_NOP5 | ||
1123 | /* Move flags to rsp */ | ||
1124 | " movq 144(%rsp), %rdx\n" | ||
1125 | " movq %rdx, 152(%rsp)\n" | ||
1126 | RESTORE_REGS_STRING | ||
1127 | /* Skip flags entry */ | ||
1128 | " addq $8, %rsp\n" | ||
1129 | " popfq\n" | ||
1130 | #else /* CONFIG_X86_32 */ | ||
1131 | " pushf\n" | ||
1132 | SAVE_REGS_STRING | ||
1133 | " movl %esp, %edx\n" | ||
1134 | ".global optprobe_template_val\n" | ||
1135 | "optprobe_template_val: \n" | ||
1136 | ASM_NOP5 | ||
1137 | ".global optprobe_template_call\n" | ||
1138 | "optprobe_template_call: \n" | ||
1139 | ASM_NOP5 | ||
1140 | RESTORE_REGS_STRING | ||
1141 | " addl $4, %esp\n" /* skip cs */ | ||
1142 | " popf\n" | ||
1143 | #endif | ||
1144 | ".global optprobe_template_end\n" | ||
1145 | "optprobe_template_end: \n"); | ||
1146 | } | ||
1147 | |||
1148 | #define TMPL_MOVE_IDX \ | ||
1149 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
1150 | #define TMPL_CALL_IDX \ | ||
1151 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
1152 | #define TMPL_END_IDX \ | ||
1153 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
1154 | |||
1155 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
1156 | |||
1157 | /* Optimized kprobe call back function: called from optinsn */ | ||
1158 | static void __kprobes optimized_callback(struct optimized_kprobe *op, | ||
1159 | struct pt_regs *regs) | ||
1160 | { | ||
1161 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
1162 | |||
1163 | preempt_disable(); | ||
1164 | if (kprobe_running()) { | ||
1165 | kprobes_inc_nmissed_count(&op->kp); | ||
1166 | } else { | ||
1167 | /* Save skipped registers */ | ||
1168 | #ifdef CONFIG_X86_64 | ||
1169 | regs->cs = __KERNEL_CS; | ||
1170 | #else | ||
1171 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
1172 | regs->gs = 0; | ||
1173 | #endif | ||
1174 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
1175 | regs->orig_ax = ~0UL; | ||
1176 | |||
1177 | __get_cpu_var(current_kprobe) = &op->kp; | ||
1178 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1179 | opt_pre_handler(&op->kp, regs); | ||
1180 | __get_cpu_var(current_kprobe) = NULL; | ||
1181 | } | ||
1182 | preempt_enable_no_resched(); | ||
1183 | } | ||
1184 | |||
1185 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
1186 | { | ||
1187 | int len = 0, ret; | ||
1188 | |||
1189 | while (len < RELATIVEJUMP_SIZE) { | ||
1190 | ret = __copy_instruction(dest + len, src + len, 1); | ||
1191 | if (!ret || !can_boost(dest + len)) | ||
1192 | return -EINVAL; | ||
1193 | len += ret; | ||
1194 | } | ||
1195 | /* Check whether the address range is reserved */ | ||
1196 | if (ftrace_text_reserved(src, src + len - 1) || | ||
1197 | alternatives_text_reserved(src, src + len - 1)) | ||
1198 | return -EBUSY; | ||
1199 | |||
1200 | return len; | ||
1201 | } | ||
1202 | |||
1203 | /* Check whether insn is indirect jump */ | ||
1204 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
1205 | { | ||
1206 | return ((insn->opcode.bytes[0] == 0xff && | ||
1207 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
1208 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
1209 | } | ||
1210 | |||
1211 | /* Check whether insn jumps into specified address range */ | ||
1212 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
1213 | { | ||
1214 | unsigned long target = 0; | ||
1215 | |||
1216 | switch (insn->opcode.bytes[0]) { | ||
1217 | case 0xe0: /* loopne */ | ||
1218 | case 0xe1: /* loope */ | ||
1219 | case 0xe2: /* loop */ | ||
1220 | case 0xe3: /* jcxz */ | ||
1221 | case 0xe9: /* near relative jump */ | ||
1222 | case 0xeb: /* short relative jump */ | ||
1223 | break; | ||
1224 | case 0x0f: | ||
1225 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
1226 | break; | ||
1227 | return 0; | ||
1228 | default: | ||
1229 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
1230 | break; | ||
1231 | return 0; | ||
1232 | } | ||
1233 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
1234 | |||
1235 | return (start <= target && target <= start + len); | ||
1236 | } | ||
1237 | |||
1238 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
1239 | static int __kprobes can_optimize(unsigned long paddr) | ||
1240 | { | ||
1241 | int ret; | ||
1242 | unsigned long addr, size = 0, offset = 0; | ||
1243 | struct insn insn; | ||
1244 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
1245 | /* Dummy buffers for lookup_symbol_attrs */ | ||
1246 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
1247 | |||
1248 | /* Lookup symbol including addr */ | ||
1249 | if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) | ||
1250 | return 0; | ||
1251 | |||
1252 | /* Check there is enough space for a relative jump. */ | ||
1253 | if (size - offset < RELATIVEJUMP_SIZE) | ||
1254 | return 0; | ||
1255 | |||
1256 | /* Decode instructions */ | ||
1257 | addr = paddr - offset; | ||
1258 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
1259 | if (search_exception_tables(addr)) | ||
1260 | /* | ||
1261 | * Since some fixup code will jumps into this function, | ||
1262 | * we can't optimize kprobe in this function. | ||
1263 | */ | ||
1264 | return 0; | ||
1265 | kernel_insn_init(&insn, (void *)addr); | ||
1266 | insn_get_opcode(&insn); | ||
1267 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
1268 | ret = recover_probed_instruction(buf, addr); | ||
1269 | if (ret) | ||
1270 | return 0; | ||
1271 | kernel_insn_init(&insn, buf); | ||
1272 | } | ||
1273 | insn_get_length(&insn); | ||
1274 | /* Recover address */ | ||
1275 | insn.kaddr = (void *)addr; | ||
1276 | insn.next_byte = (void *)(addr + insn.length); | ||
1277 | /* Check any instructions don't jump into target */ | ||
1278 | if (insn_is_indirect_jump(&insn) || | ||
1279 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
1280 | RELATIVE_ADDR_SIZE)) | ||
1281 | return 0; | ||
1282 | addr += insn.length; | ||
1283 | } | ||
1284 | |||
1285 | return 1; | ||
1286 | } | ||
1287 | |||
1288 | /* Check optimized_kprobe can actually be optimized. */ | ||
1289 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
1290 | { | ||
1291 | int i; | ||
1292 | struct kprobe *p; | ||
1293 | |||
1294 | for (i = 1; i < op->optinsn.size; i++) { | ||
1295 | p = get_kprobe(op->kp.addr + i); | ||
1296 | if (p && !kprobe_disabled(p)) | ||
1297 | return -EEXIST; | ||
1298 | } | ||
1299 | |||
1300 | return 0; | ||
1301 | } | ||
1302 | |||
1303 | /* Check the addr is within the optimized instructions. */ | ||
1304 | int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, | ||
1305 | unsigned long addr) | ||
1306 | { | ||
1307 | return ((unsigned long)op->kp.addr <= addr && | ||
1308 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
1309 | } | ||
1310 | |||
1311 | /* Free optimized instruction slot */ | ||
1312 | static __kprobes | ||
1313 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
1314 | { | ||
1315 | if (op->optinsn.insn) { | ||
1316 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
1317 | op->optinsn.insn = NULL; | ||
1318 | op->optinsn.size = 0; | ||
1319 | } | ||
1320 | } | ||
1321 | |||
1322 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
1323 | { | ||
1324 | __arch_remove_optimized_kprobe(op, 1); | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * Copy replacing target instructions | ||
1329 | * Target instructions MUST be relocatable (checked inside) | ||
1330 | */ | ||
1331 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
1332 | { | ||
1333 | u8 *buf; | ||
1334 | int ret; | ||
1335 | long rel; | ||
1336 | |||
1337 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
1338 | return -EILSEQ; | ||
1339 | |||
1340 | op->optinsn.insn = get_optinsn_slot(); | ||
1341 | if (!op->optinsn.insn) | ||
1342 | return -ENOMEM; | ||
1343 | |||
1344 | /* | ||
1345 | * Verify if the address gap is in 2GB range, because this uses | ||
1346 | * a relative jump. | ||
1347 | */ | ||
1348 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
1349 | if (abs(rel) > 0x7fffffff) | ||
1350 | return -ERANGE; | ||
1351 | |||
1352 | buf = (u8 *)op->optinsn.insn; | ||
1353 | |||
1354 | /* Copy instructions into the out-of-line buffer */ | ||
1355 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
1356 | if (ret < 0) { | ||
1357 | __arch_remove_optimized_kprobe(op, 0); | ||
1358 | return ret; | ||
1359 | } | ||
1360 | op->optinsn.size = ret; | ||
1361 | |||
1362 | /* Copy arch-dep-instance from template */ | ||
1363 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
1364 | |||
1365 | /* Set probe information */ | ||
1366 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
1367 | |||
1368 | /* Set probe function call */ | ||
1369 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
1370 | |||
1371 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
1372 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
1373 | (u8 *)op->kp.addr + op->optinsn.size); | ||
1374 | |||
1375 | flush_icache_range((unsigned long) buf, | ||
1376 | (unsigned long) buf + TMPL_END_IDX + | ||
1377 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
1378 | return 0; | ||
1379 | } | ||
1380 | |||
1381 | /* Replace a breakpoint (int3) with a relative jump. */ | ||
1382 | int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | ||
1383 | { | ||
1384 | unsigned char jmp_code[RELATIVEJUMP_SIZE]; | ||
1385 | s32 rel = (s32)((long)op->optinsn.insn - | ||
1386 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
1387 | |||
1388 | /* Backup instructions which will be replaced by jump address */ | ||
1389 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
1390 | RELATIVE_ADDR_SIZE); | ||
1391 | |||
1392 | jmp_code[0] = RELATIVEJUMP_OPCODE; | ||
1393 | *(s32 *)(&jmp_code[1]) = rel; | ||
1394 | |||
1395 | /* | ||
1396 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1397 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1398 | * code probing, it's not a problem. | ||
1399 | */ | ||
1400 | text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); | ||
1401 | return 0; | ||
1402 | } | ||
1403 | |||
1404 | /* Replace a relative jump with a breakpoint (int3). */ | ||
1405 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
1406 | { | ||
1407 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1408 | |||
1409 | /* Set int3 to first byte for kprobes */ | ||
1410 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
1411 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1412 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
1413 | } | ||
1414 | |||
1415 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
1416 | struct pt_regs *regs, | ||
1417 | int reenter) | ||
1418 | { | ||
1419 | struct optimized_kprobe *op; | ||
1420 | |||
1421 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
1422 | /* This kprobe is really able to run optimized path. */ | ||
1423 | op = container_of(p, struct optimized_kprobe, kp); | ||
1424 | /* Detour through copied instructions */ | ||
1425 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
1426 | if (!reenter) | ||
1427 | reset_current_kprobe(); | ||
1428 | preempt_enable_no_resched(); | ||
1429 | return 1; | ||
1430 | } | ||
1431 | return 0; | ||
1432 | } | ||
1433 | #endif | ||
1434 | |||
1033 | int __init arch_init_kprobes(void) | 1435 | int __init arch_init_kprobes(void) |
1034 | { | 1436 | { |
1035 | return 0; | 1437 | return 0; |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ec6ef60cbd17..ea697263b373 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -7,6 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/errno.h> | 9 | #include <linux/errno.h> |
10 | #include <linux/gfp.h> | ||
10 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
11 | #include <linux/string.h> | 12 | #include <linux/string.h> |
12 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 4a8bb82248ae..035c8c529181 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/kexec.h> | 10 | #include <linux/kexec.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/gfp.h> | ||
12 | #include <linux/reboot.h> | 13 | #include <linux/reboot.h> |
13 | #include <linux/numa.h> | 14 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 845d80ce1ef1..63eaf6596233 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/kernel.h> | 42 | #include <linux/kernel.h> |
43 | #include <linux/mca.h> | 43 | #include <linux/mca.h> |
44 | #include <linux/kprobes.h> | 44 | #include <linux/kprobes.h> |
45 | #include <linux/slab.h> | ||
45 | #include <asm/system.h> | 46 | #include <asm/system.h> |
46 | #include <asm/io.h> | 47 | #include <asm/io.h> |
47 | #include <linux/proc_fs.h> | 48 | #include <linux/proc_fs.h> |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 37542b67c57e..e1af7c055c7d 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -36,9 +36,6 @@ MODULE_LICENSE("GPL v2"); | |||
36 | #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 | 36 | #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 |
37 | #define UCODE_UCODE_TYPE 0x00000001 | 37 | #define UCODE_UCODE_TYPE 0x00000001 |
38 | 38 | ||
39 | const struct firmware *firmware; | ||
40 | static int supported_cpu; | ||
41 | |||
42 | struct equiv_cpu_entry { | 39 | struct equiv_cpu_entry { |
43 | u32 installed_cpu; | 40 | u32 installed_cpu; |
44 | u32 fixed_errata_mask; | 41 | u32 fixed_errata_mask; |
@@ -77,12 +74,15 @@ static struct equiv_cpu_entry *equiv_cpu_table; | |||
77 | 74 | ||
78 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 75 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
79 | { | 76 | { |
77 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
80 | u32 dummy; | 78 | u32 dummy; |
81 | 79 | ||
82 | if (!supported_cpu) | ||
83 | return -1; | ||
84 | |||
85 | memset(csig, 0, sizeof(*csig)); | 80 | memset(csig, 0, sizeof(*csig)); |
81 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | ||
82 | pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " | ||
83 | "supported\n", cpu, c->x86); | ||
84 | return -1; | ||
85 | } | ||
86 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | 86 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); |
87 | pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); | 87 | pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); |
88 | return 0; | 88 | return 0; |
@@ -294,10 +294,14 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
294 | 294 | ||
295 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) | 295 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) |
296 | { | 296 | { |
297 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | ||
298 | const struct firmware *firmware; | ||
297 | enum ucode_state ret; | 299 | enum ucode_state ret; |
298 | 300 | ||
299 | if (firmware == NULL) | 301 | if (request_firmware(&firmware, fw_name, device)) { |
302 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); | ||
300 | return UCODE_NFOUND; | 303 | return UCODE_NFOUND; |
304 | } | ||
301 | 305 | ||
302 | if (*(u32 *)firmware->data != UCODE_MAGIC) { | 306 | if (*(u32 *)firmware->data != UCODE_MAGIC) { |
303 | pr_err("invalid UCODE_MAGIC (0x%08x)\n", | 307 | pr_err("invalid UCODE_MAGIC (0x%08x)\n", |
@@ -307,6 +311,8 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) | |||
307 | 311 | ||
308 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); | 312 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); |
309 | 313 | ||
314 | release_firmware(firmware); | ||
315 | |||
310 | return ret; | 316 | return ret; |
311 | } | 317 | } |
312 | 318 | ||
@@ -325,31 +331,7 @@ static void microcode_fini_cpu_amd(int cpu) | |||
325 | uci->mc = NULL; | 331 | uci->mc = NULL; |
326 | } | 332 | } |
327 | 333 | ||
328 | void init_microcode_amd(struct device *device) | ||
329 | { | ||
330 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | ||
331 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
332 | |||
333 | WARN_ON(c->x86_vendor != X86_VENDOR_AMD); | ||
334 | |||
335 | if (c->x86 < 0x10) { | ||
336 | pr_warning("AMD CPU family 0x%x not supported\n", c->x86); | ||
337 | return; | ||
338 | } | ||
339 | supported_cpu = 1; | ||
340 | |||
341 | if (request_firmware(&firmware, fw_name, device)) | ||
342 | pr_err("failed to load file %s\n", fw_name); | ||
343 | } | ||
344 | |||
345 | void fini_microcode_amd(void) | ||
346 | { | ||
347 | release_firmware(firmware); | ||
348 | } | ||
349 | |||
350 | static struct microcode_ops microcode_amd_ops = { | 334 | static struct microcode_ops microcode_amd_ops = { |
351 | .init = init_microcode_amd, | ||
352 | .fini = fini_microcode_amd, | ||
353 | .request_microcode_user = request_microcode_user, | 335 | .request_microcode_user = request_microcode_user, |
354 | .request_microcode_fw = request_microcode_fw, | 336 | .request_microcode_fw = request_microcode_fw, |
355 | .collect_cpu_info = collect_cpu_info_amd, | 337 | .collect_cpu_info = collect_cpu_info_amd, |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 0c8632433090..cceb5bc3c3c2 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -521,9 +521,6 @@ static int __init microcode_init(void) | |||
521 | return PTR_ERR(microcode_pdev); | 521 | return PTR_ERR(microcode_pdev); |
522 | } | 522 | } |
523 | 523 | ||
524 | if (microcode_ops->init) | ||
525 | microcode_ops->init(µcode_pdev->dev); | ||
526 | |||
527 | get_online_cpus(); | 524 | get_online_cpus(); |
528 | mutex_lock(µcode_mutex); | 525 | mutex_lock(µcode_mutex); |
529 | 526 | ||
@@ -566,9 +563,6 @@ static void __exit microcode_exit(void) | |||
566 | 563 | ||
567 | platform_device_unregister(microcode_pdev); | 564 | platform_device_unregister(microcode_pdev); |
568 | 565 | ||
569 | if (microcode_ops->fini) | ||
570 | microcode_ops->fini(); | ||
571 | |||
572 | microcode_ops = NULL; | 566 | microcode_ops = NULL; |
573 | 567 | ||
574 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); | 568 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index ebd193e476ca..85a343e28937 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -328,7 +328,7 @@ static int apply_microcode(int cpu) | |||
328 | cpu_num, mc_intel->hdr.rev); | 328 | cpu_num, mc_intel->hdr.rev); |
329 | return -1; | 329 | return -1; |
330 | } | 330 | } |
331 | pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n", | 331 | pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", |
332 | cpu_num, val[1], | 332 | cpu_num, val[1], |
333 | mc_intel->hdr.date & 0xffff, | 333 | mc_intel->hdr.date & 0xffff, |
334 | mc_intel->hdr.date >> 24, | 334 | mc_intel->hdr.date >> 24, |
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 712d15fdc416..71825806cd44 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/dmi.h> | 9 | #include <linux/dmi.h> |
10 | #include <linux/range.h> | ||
11 | |||
10 | #include <asm/pci-direct.h> | 12 | #include <asm/pci-direct.h> |
11 | #include <linux/sort.h> | 13 | #include <linux/sort.h> |
12 | #include <asm/io.h> | 14 | #include <asm/io.h> |
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { | |||
30 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, | 32 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, |
31 | }; | 33 | }; |
32 | 34 | ||
33 | struct range { | ||
34 | u64 start; | ||
35 | u64 end; | ||
36 | }; | ||
37 | |||
38 | static int __cpuinit cmp_range(const void *x1, const void *x2) | 35 | static int __cpuinit cmp_range(const void *x1, const void *x2) |
39 | { | 36 | { |
40 | const struct range *r1 = x1; | 37 | const struct range *r1 = x1; |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 89f386f044e4..e0bc186d7501 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
24 | #include <linux/bug.h> | 24 | #include <linux/bug.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/gfp.h> | ||
26 | 27 | ||
27 | #include <asm/system.h> | 28 | #include <asm/system.h> |
28 | #include <asm/page.h> | 29 | #include <asm/page.h> |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 40b54ceb68b5..e81030f71a8f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -359,13 +359,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | |||
359 | x86_init.mpparse.mpc_record(1); | 359 | x86_init.mpparse.mpc_record(1); |
360 | } | 360 | } |
361 | 361 | ||
362 | #ifdef CONFIG_X86_BIGSMP | ||
363 | generic_bigsmp_probe(); | ||
364 | #endif | ||
365 | |||
366 | if (apic->setup_apic_routing) | ||
367 | apic->setup_apic_routing(); | ||
368 | |||
369 | if (!num_processors) | 362 | if (!num_processors) |
370 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | 363 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
371 | return num_processors; | 364 | return num_processors; |
@@ -671,7 +664,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf) | |||
671 | { | 664 | { |
672 | unsigned long size = get_mpc_size(mpf->physptr); | 665 | unsigned long size = get_mpc_size(mpf->physptr); |
673 | 666 | ||
674 | reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc"); | 667 | reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc"); |
675 | } | 668 | } |
676 | 669 | ||
677 | static int __init smp_scan_config(unsigned long base, unsigned long length) | 670 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
@@ -700,7 +693,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) | |||
700 | mpf, (u64)virt_to_phys(mpf)); | 693 | mpf, (u64)virt_to_phys(mpf)); |
701 | 694 | ||
702 | mem = virt_to_phys(mpf); | 695 | mem = virt_to_phys(mpf); |
703 | reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf"); | 696 | reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf"); |
704 | if (mpf->physptr) | 697 | if (mpf->physptr) |
705 | smp_reserve_memory(mpf); | 698 | smp_reserve_memory(mpf); |
706 | 699 | ||
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index 3b7078abc871..0aad8670858e 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c | |||
@@ -10,8 +10,211 @@ | |||
10 | * of the License. | 10 | * of the License. |
11 | */ | 11 | */ |
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/kernel.h> | ||
14 | #include <linux/sfi.h> | ||
15 | #include <linux/irq.h> | ||
16 | #include <linux/module.h> | ||
13 | 17 | ||
14 | #include <asm/setup.h> | 18 | #include <asm/setup.h> |
19 | #include <asm/mpspec_def.h> | ||
20 | #include <asm/hw_irq.h> | ||
21 | #include <asm/apic.h> | ||
22 | #include <asm/io_apic.h> | ||
23 | #include <asm/mrst.h> | ||
24 | #include <asm/io.h> | ||
25 | #include <asm/i8259.h> | ||
26 | #include <asm/apb_timer.h> | ||
27 | |||
28 | static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; | ||
29 | static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; | ||
30 | int sfi_mtimer_num; | ||
31 | |||
32 | struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; | ||
33 | EXPORT_SYMBOL_GPL(sfi_mrtc_array); | ||
34 | int sfi_mrtc_num; | ||
35 | |||
36 | static inline void assign_to_mp_irq(struct mpc_intsrc *m, | ||
37 | struct mpc_intsrc *mp_irq) | ||
38 | { | ||
39 | memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); | ||
40 | } | ||
41 | |||
42 | static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq, | ||
43 | struct mpc_intsrc *m) | ||
44 | { | ||
45 | return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); | ||
46 | } | ||
47 | |||
48 | static void save_mp_irq(struct mpc_intsrc *m) | ||
49 | { | ||
50 | int i; | ||
51 | |||
52 | for (i = 0; i < mp_irq_entries; i++) { | ||
53 | if (!mp_irq_cmp(&mp_irqs[i], m)) | ||
54 | return; | ||
55 | } | ||
56 | |||
57 | assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); | ||
58 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | ||
59 | panic("Max # of irq sources exceeded!!\n"); | ||
60 | } | ||
61 | |||
62 | /* parse all the mtimer info to a static mtimer array */ | ||
63 | static int __init sfi_parse_mtmr(struct sfi_table_header *table) | ||
64 | { | ||
65 | struct sfi_table_simple *sb; | ||
66 | struct sfi_timer_table_entry *pentry; | ||
67 | struct mpc_intsrc mp_irq; | ||
68 | int totallen; | ||
69 | |||
70 | sb = (struct sfi_table_simple *)table; | ||
71 | if (!sfi_mtimer_num) { | ||
72 | sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb, | ||
73 | struct sfi_timer_table_entry); | ||
74 | pentry = (struct sfi_timer_table_entry *) sb->pentry; | ||
75 | totallen = sfi_mtimer_num * sizeof(*pentry); | ||
76 | memcpy(sfi_mtimer_array, pentry, totallen); | ||
77 | } | ||
78 | |||
79 | printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); | ||
80 | pentry = sfi_mtimer_array; | ||
81 | for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { | ||
82 | printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," | ||
83 | " irq = %d\n", totallen, (u32)pentry->phys_addr, | ||
84 | pentry->freq_hz, pentry->irq); | ||
85 | if (!pentry->irq) | ||
86 | continue; | ||
87 | mp_irq.type = MP_IOAPIC; | ||
88 | mp_irq.irqtype = mp_INT; | ||
89 | /* triggering mode edge bit 2-3, active high polarity bit 0-1 */ | ||
90 | mp_irq.irqflag = 5; | ||
91 | mp_irq.srcbus = 0; | ||
92 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ | ||
93 | mp_irq.dstapic = MP_APIC_ALL; | ||
94 | mp_irq.dstirq = pentry->irq; | ||
95 | save_mp_irq(&mp_irq); | ||
96 | } | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | struct sfi_timer_table_entry *sfi_get_mtmr(int hint) | ||
102 | { | ||
103 | int i; | ||
104 | if (hint < sfi_mtimer_num) { | ||
105 | if (!sfi_mtimer_usage[hint]) { | ||
106 | pr_debug("hint taken for timer %d irq %d\n",\ | ||
107 | hint, sfi_mtimer_array[hint].irq); | ||
108 | sfi_mtimer_usage[hint] = 1; | ||
109 | return &sfi_mtimer_array[hint]; | ||
110 | } | ||
111 | } | ||
112 | /* take the first timer available */ | ||
113 | for (i = 0; i < sfi_mtimer_num;) { | ||
114 | if (!sfi_mtimer_usage[i]) { | ||
115 | sfi_mtimer_usage[i] = 1; | ||
116 | return &sfi_mtimer_array[i]; | ||
117 | } | ||
118 | i++; | ||
119 | } | ||
120 | return NULL; | ||
121 | } | ||
122 | |||
123 | void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr) | ||
124 | { | ||
125 | int i; | ||
126 | for (i = 0; i < sfi_mtimer_num;) { | ||
127 | if (mtmr->irq == sfi_mtimer_array[i].irq) { | ||
128 | sfi_mtimer_usage[i] = 0; | ||
129 | return; | ||
130 | } | ||
131 | i++; | ||
132 | } | ||
133 | } | ||
134 | |||
135 | /* parse all the mrtc info to a global mrtc array */ | ||
136 | int __init sfi_parse_mrtc(struct sfi_table_header *table) | ||
137 | { | ||
138 | struct sfi_table_simple *sb; | ||
139 | struct sfi_rtc_table_entry *pentry; | ||
140 | struct mpc_intsrc mp_irq; | ||
141 | |||
142 | int totallen; | ||
143 | |||
144 | sb = (struct sfi_table_simple *)table; | ||
145 | if (!sfi_mrtc_num) { | ||
146 | sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb, | ||
147 | struct sfi_rtc_table_entry); | ||
148 | pentry = (struct sfi_rtc_table_entry *)sb->pentry; | ||
149 | totallen = sfi_mrtc_num * sizeof(*pentry); | ||
150 | memcpy(sfi_mrtc_array, pentry, totallen); | ||
151 | } | ||
152 | |||
153 | printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); | ||
154 | pentry = sfi_mrtc_array; | ||
155 | for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { | ||
156 | printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", | ||
157 | totallen, (u32)pentry->phys_addr, pentry->irq); | ||
158 | mp_irq.type = MP_IOAPIC; | ||
159 | mp_irq.irqtype = mp_INT; | ||
160 | mp_irq.irqflag = 0; | ||
161 | mp_irq.srcbus = 0; | ||
162 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ | ||
163 | mp_irq.dstapic = MP_APIC_ALL; | ||
164 | mp_irq.dstirq = pentry->irq; | ||
165 | save_mp_irq(&mp_irq); | ||
166 | } | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * the secondary clock in Moorestown can be APBT or LAPIC clock, default to | ||
172 | * APBT but cmdline option can also override it. | ||
173 | */ | ||
174 | static void __cpuinit mrst_setup_secondary_clock(void) | ||
175 | { | ||
176 | /* restore default lapic clock if disabled by cmdline */ | ||
177 | if (disable_apbt_percpu) | ||
178 | return setup_secondary_APIC_clock(); | ||
179 | apbt_setup_secondary_clock(); | ||
180 | } | ||
181 | |||
182 | static unsigned long __init mrst_calibrate_tsc(void) | ||
183 | { | ||
184 | unsigned long flags, fast_calibrate; | ||
185 | |||
186 | local_irq_save(flags); | ||
187 | fast_calibrate = apbt_quick_calibrate(); | ||
188 | local_irq_restore(flags); | ||
189 | |||
190 | if (fast_calibrate) | ||
191 | return fast_calibrate; | ||
192 | |||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | void __init mrst_time_init(void) | ||
197 | { | ||
198 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); | ||
199 | pre_init_apic_IRQ0(); | ||
200 | apbt_time_init(); | ||
201 | } | ||
202 | |||
203 | void __init mrst_rtc_init(void) | ||
204 | { | ||
205 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * if we use per cpu apb timer, the bootclock already setup. if we use lapic | ||
210 | * timer and one apbt timer for broadcast, we need to set up lapic boot clock. | ||
211 | */ | ||
212 | static void __init mrst_setup_boot_clock(void) | ||
213 | { | ||
214 | pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu); | ||
215 | if (disable_apbt_percpu) | ||
216 | setup_boot_APIC_clock(); | ||
217 | }; | ||
15 | 218 | ||
16 | /* | 219 | /* |
17 | * Moorestown specific x86_init function overrides and early setup | 220 | * Moorestown specific x86_init function overrides and early setup |
@@ -21,4 +224,17 @@ void __init x86_mrst_early_setup(void) | |||
21 | { | 224 | { |
22 | x86_init.resources.probe_roms = x86_init_noop; | 225 | x86_init.resources.probe_roms = x86_init_noop; |
23 | x86_init.resources.reserve_resources = x86_init_noop; | 226 | x86_init.resources.reserve_resources = x86_init_noop; |
227 | |||
228 | x86_init.timers.timer_init = mrst_time_init; | ||
229 | x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock; | ||
230 | |||
231 | x86_init.irqs.pre_vector_init = x86_init_noop; | ||
232 | |||
233 | x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; | ||
234 | |||
235 | x86_platform.calibrate_tsc = mrst_calibrate_tsc; | ||
236 | x86_init.pci.init = pci_mrst_init; | ||
237 | x86_init.pci.fixup_irqs = x86_init_noop; | ||
238 | |||
239 | legacy_pic = &null_legacy_pic; | ||
24 | } | 240 | } |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4bd93c9b2b27..4d4468e9f47c 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/cpu.h> | 37 | #include <linux/cpu.h> |
38 | #include <linux/notifier.h> | 38 | #include <linux/notifier.h> |
39 | #include <linux/uaccess.h> | 39 | #include <linux/uaccess.h> |
40 | #include <linux/gfp.h> | ||
40 | 41 | ||
41 | #include <asm/processor.h> | 42 | #include <asm/processor.h> |
42 | #include <asm/msr.h> | 43 | #include <asm/msr.h> |
@@ -285,7 +286,7 @@ static void __exit msr_exit(void) | |||
285 | for_each_online_cpu(cpu) | 286 | for_each_online_cpu(cpu) |
286 | msr_device_destroy(cpu); | 287 | msr_device_destroy(cpu); |
287 | class_destroy(msr_class); | 288 | class_destroy(msr_class); |
288 | unregister_chrdev(MSR_MAJOR, "cpu/msr"); | 289 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
289 | unregister_hotcpu_notifier(&msr_class_cpu_notifier); | 290 | unregister_hotcpu_notifier(&msr_class_cpu_notifier); |
290 | } | 291 | } |
291 | 292 | ||
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 9d1d263f786f..8297160c41b3 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c | |||
@@ -17,7 +17,9 @@ | |||
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/io.h> | 18 | #include <linux/io.h> |
19 | #include <linux/string.h> | 19 | #include <linux/string.h> |
20 | |||
20 | #include <asm/geode.h> | 21 | #include <asm/geode.h> |
22 | #include <asm/setup.h> | ||
21 | #include <asm/olpc.h> | 23 | #include <asm/olpc.h> |
22 | 24 | ||
23 | #ifdef CONFIG_OPEN_FIRMWARE | 25 | #ifdef CONFIG_OPEN_FIRMWARE |
@@ -243,9 +245,11 @@ static int __init olpc_init(void) | |||
243 | olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, | 245 | olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, |
244 | (unsigned char *) &olpc_platform_info.ecver, 1); | 246 | (unsigned char *) &olpc_platform_info.ecver, 1); |
245 | 247 | ||
246 | /* check to see if the VSA exists */ | 248 | #ifdef CONFIG_PCI_OLPC |
247 | if (cs5535_has_vsa2()) | 249 | /* If the VSA exists let it emulate PCI, if not emulate in kernel */ |
248 | olpc_platform_info.flags |= OLPC_F_VSA; | 250 | if (!cs5535_has_vsa2()) |
251 | x86_init.pci.arch_init = pci_olpc_init; | ||
252 | #endif | ||
249 | 253 | ||
250 | printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", | 254 | printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", |
251 | ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", | 255 | ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b1739d16310..1db183ed7c01 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -428,10 +428,6 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
428 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 428 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
429 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 429 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
430 | 430 | ||
431 | #ifdef CONFIG_HIGHPTE | ||
432 | .kmap_atomic_pte = kmap_atomic, | ||
433 | #endif | ||
434 | |||
435 | #if PAGETABLE_LEVELS >= 3 | 431 | #if PAGETABLE_LEVELS >= 3 |
436 | #ifdef CONFIG_X86_PAE | 432 | #ifdef CONFIG_X86_PAE |
437 | .set_pte_atomic = native_set_pte_atomic, | 433 | .set_pte_atomic = native_set_pte_atomic, |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 2bbde6078143..fb99f7edb341 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -1309,7 +1309,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) | |||
1309 | /* | 1309 | /* |
1310 | * get_tce_space_from_tar(): | 1310 | * get_tce_space_from_tar(): |
1311 | * Function for kdump case. Get the tce tables from first kernel | 1311 | * Function for kdump case. Get the tce tables from first kernel |
1312 | * by reading the contents of the base adress register of calgary iommu | 1312 | * by reading the contents of the base address register of calgary iommu |
1313 | */ | 1313 | */ |
1314 | static void __init get_tce_space_from_tar(void) | 1314 | static void __init get_tce_space_from_tar(void) |
1315 | { | 1315 | { |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 75e14e21f61a..4b7e3d8b01dd 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/dma-debug.h> | 2 | #include <linux/dma-debug.h> |
3 | #include <linux/dmar.h> | 3 | #include <linux/dmar.h> |
4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
5 | #include <linux/gfp.h> | ||
5 | #include <linux/pci.h> | 6 | #include <linux/pci.h> |
6 | #include <linux/kmemleak.h> | 7 | #include <linux/kmemleak.h> |
7 | 8 | ||
@@ -38,7 +39,7 @@ int iommu_detected __read_mostly = 0; | |||
38 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | 39 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. |
39 | * If this variable is 1, IOMMU implementations do no DMA translation for | 40 | * If this variable is 1, IOMMU implementations do no DMA translation for |
40 | * devices and allow every device to access to whole physical memory. This is | 41 | * devices and allow every device to access to whole physical memory. This is |
41 | * useful if a user want to use an IOMMU only for KVM device assignment to | 42 | * useful if a user wants to use an IOMMU only for KVM device assignment to |
42 | * guests and not for driver dma translation. | 43 | * guests and not for driver dma translation. |
43 | */ | 44 | */ |
44 | int iommu_pass_through __read_mostly; | 45 | int iommu_pass_through __read_mostly; |
@@ -65,7 +66,7 @@ int dma_set_mask(struct device *dev, u64 mask) | |||
65 | } | 66 | } |
66 | EXPORT_SYMBOL(dma_set_mask); | 67 | EXPORT_SYMBOL(dma_set_mask); |
67 | 68 | ||
68 | #ifdef CONFIG_X86_64 | 69 | #if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA) |
69 | static __initdata void *dma32_bootmem_ptr; | 70 | static __initdata void *dma32_bootmem_ptr; |
70 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); | 71 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); |
71 | 72 | ||
@@ -116,14 +117,21 @@ static void __init dma32_free_bootmem(void) | |||
116 | dma32_bootmem_ptr = NULL; | 117 | dma32_bootmem_ptr = NULL; |
117 | dma32_bootmem_size = 0; | 118 | dma32_bootmem_size = 0; |
118 | } | 119 | } |
120 | #else | ||
121 | void __init dma32_reserve_bootmem(void) | ||
122 | { | ||
123 | } | ||
124 | static void __init dma32_free_bootmem(void) | ||
125 | { | ||
126 | } | ||
127 | |||
119 | #endif | 128 | #endif |
120 | 129 | ||
121 | void __init pci_iommu_alloc(void) | 130 | void __init pci_iommu_alloc(void) |
122 | { | 131 | { |
123 | #ifdef CONFIG_X86_64 | ||
124 | /* free the range so iommu could get some range less than 4G */ | 132 | /* free the range so iommu could get some range less than 4G */ |
125 | dma32_free_bootmem(); | 133 | dma32_free_bootmem(); |
126 | #endif | 134 | |
127 | if (pci_swiotlb_detect()) | 135 | if (pci_swiotlb_detect()) |
128 | goto out; | 136 | goto out; |
129 | 137 | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 34de53b46f87..0f7f130caa67 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/iommu-helper.h> | 29 | #include <linux/iommu-helper.h> |
30 | #include <linux/sysdev.h> | 30 | #include <linux/sysdev.h> |
31 | #include <linux/io.h> | 31 | #include <linux/io.h> |
32 | #include <linux/gfp.h> | ||
32 | #include <asm/atomic.h> | 33 | #include <asm/atomic.h> |
33 | #include <asm/mtrr.h> | 34 | #include <asm/mtrr.h> |
34 | #include <asm/pgtable.h> | 35 | #include <asm/pgtable.h> |
@@ -564,6 +565,9 @@ static void enable_gart_translations(void) | |||
564 | 565 | ||
565 | enable_gart_translation(dev, __pa(agp_gatt_table)); | 566 | enable_gart_translation(dev, __pa(agp_gatt_table)); |
566 | } | 567 | } |
568 | |||
569 | /* Flush the GART-TLB to remove stale entries */ | ||
570 | k8_flush_garts(); | ||
567 | } | 571 | } |
568 | 572 | ||
569 | /* | 573 | /* |
@@ -735,7 +739,7 @@ int __init gart_iommu_init(void) | |||
735 | unsigned long scratch; | 739 | unsigned long scratch; |
736 | long i; | 740 | long i; |
737 | 741 | ||
738 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) | 742 | if (num_k8_northbridges == 0) |
739 | return 0; | 743 | return 0; |
740 | 744 | ||
741 | #ifndef CONFIG_AGP_AMD64 | 745 | #ifndef CONFIG_AGP_AMD64 |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 22be12b60a8f..3af4af810c07 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/scatterlist.h> | 4 | #include <linux/scatterlist.h> |
5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/gfp.h> | ||
7 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
8 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
9 | 10 | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cf1e04b2ad65..28ad9f4d8b94 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -110,8 +110,8 @@ void show_regs_common(void) | |||
110 | if (!product) | 110 | if (!product) |
111 | product = ""; | 111 | product = ""; |
112 | 112 | ||
113 | printk("\n"); | 113 | printk(KERN_CONT "\n"); |
114 | printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", | 114 | printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", |
115 | current->pid, current->comm, print_tainted(), | 115 | current->pid, current->comm, print_tainted(), |
116 | init_utsname()->release, | 116 | init_utsname()->release, |
117 | (int)strcspn(init_utsname()->version, " "), | 117 | (int)strcspn(init_utsname()->version, " "), |
@@ -122,18 +122,6 @@ void flush_thread(void) | |||
122 | { | 122 | { |
123 | struct task_struct *tsk = current; | 123 | struct task_struct *tsk = current; |
124 | 124 | ||
125 | #ifdef CONFIG_X86_64 | ||
126 | if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { | ||
127 | clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); | ||
128 | if (test_tsk_thread_flag(tsk, TIF_IA32)) { | ||
129 | clear_tsk_thread_flag(tsk, TIF_IA32); | ||
130 | } else { | ||
131 | set_tsk_thread_flag(tsk, TIF_IA32); | ||
132 | current_thread_info()->status |= TS_COMPAT; | ||
133 | } | ||
134 | } | ||
135 | #endif | ||
136 | |||
137 | flush_ptrace_hw_breakpoint(tsk); | 125 | flush_ptrace_hw_breakpoint(tsk); |
138 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 126 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
139 | /* | 127 | /* |
@@ -295,6 +283,8 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | |||
295 | regs.es = __USER_DS; | 283 | regs.es = __USER_DS; |
296 | regs.fs = __KERNEL_PERCPU; | 284 | regs.fs = __KERNEL_PERCPU; |
297 | regs.gs = __KERNEL_STACK_CANARY; | 285 | regs.gs = __KERNEL_STACK_CANARY; |
286 | #else | ||
287 | regs.ss = __KERNEL_DS; | ||
298 | #endif | 288 | #endif |
299 | 289 | ||
300 | regs.orig_ax = -1; | 290 | regs.orig_ax = -1; |
@@ -536,21 +526,37 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | |||
536 | } | 526 | } |
537 | 527 | ||
538 | /* | 528 | /* |
539 | * Check for AMD CPUs, which have potentially C1E support | 529 | * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. |
530 | * For more information see | ||
531 | * - Erratum #400 for NPT family 0xf and family 0x10 CPUs | ||
532 | * - Erratum #365 for family 0x11 (not affected because C1e not in use) | ||
540 | */ | 533 | */ |
541 | static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | 534 | static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) |
542 | { | 535 | { |
536 | u64 val; | ||
543 | if (c->x86_vendor != X86_VENDOR_AMD) | 537 | if (c->x86_vendor != X86_VENDOR_AMD) |
544 | return 0; | 538 | goto no_c1e_idle; |
545 | |||
546 | if (c->x86 < 0x0F) | ||
547 | return 0; | ||
548 | 539 | ||
549 | /* Family 0x0f models < rev F do not have C1E */ | 540 | /* Family 0x0f models < rev F do not have C1E */ |
550 | if (c->x86 == 0x0f && c->x86_model < 0x40) | 541 | if (c->x86 == 0x0F && c->x86_model >= 0x40) |
551 | return 0; | 542 | return 1; |
552 | 543 | ||
553 | return 1; | 544 | if (c->x86 == 0x10) { |
545 | /* | ||
546 | * check OSVW bit for CPUs that are not affected | ||
547 | * by erratum #400 | ||
548 | */ | ||
549 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); | ||
550 | if (val >= 2) { | ||
551 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); | ||
552 | if (!(val & BIT(1))) | ||
553 | goto no_c1e_idle; | ||
554 | } | ||
555 | return 1; | ||
556 | } | ||
557 | |||
558 | no_c1e_idle: | ||
559 | return 0; | ||
554 | } | 560 | } |
555 | 561 | ||
556 | static cpumask_var_t c1e_mask; | 562 | static cpumask_var_t c1e_mask; |
@@ -617,7 +623,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
617 | { | 623 | { |
618 | #ifdef CONFIG_SMP | 624 | #ifdef CONFIG_SMP |
619 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | 625 | if (pm_idle == poll_idle && smp_num_siblings > 1) { |
620 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," | 626 | printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," |
621 | " performance may degrade.\n"); | 627 | " performance may degrade.\n"); |
622 | } | 628 | } |
623 | #endif | 629 | #endif |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index fe6a34e42bde..f6c62667e30c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -139,16 +139,16 @@ void __show_regs(struct pt_regs *regs, int all) | |||
139 | 139 | ||
140 | show_regs_common(); | 140 | show_regs_common(); |
141 | 141 | ||
142 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", | 142 | printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", |
143 | (u16)regs->cs, regs->ip, regs->flags, | 143 | (u16)regs->cs, regs->ip, regs->flags, |
144 | smp_processor_id()); | 144 | smp_processor_id()); |
145 | print_symbol("EIP is at %s\n", regs->ip); | 145 | print_symbol("EIP is at %s\n", regs->ip); |
146 | 146 | ||
147 | printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", | 147 | printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", |
148 | regs->ax, regs->bx, regs->cx, regs->dx); | 148 | regs->ax, regs->bx, regs->cx, regs->dx); |
149 | printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", | 149 | printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", |
150 | regs->si, regs->di, regs->bp, sp); | 150 | regs->si, regs->di, regs->bp, sp); |
151 | printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", | 151 | printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", |
152 | (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); | 152 | (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); |
153 | 153 | ||
154 | if (!all) | 154 | if (!all) |
@@ -158,19 +158,19 @@ void __show_regs(struct pt_regs *regs, int all) | |||
158 | cr2 = read_cr2(); | 158 | cr2 = read_cr2(); |
159 | cr3 = read_cr3(); | 159 | cr3 = read_cr3(); |
160 | cr4 = read_cr4_safe(); | 160 | cr4 = read_cr4_safe(); |
161 | printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", | 161 | printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", |
162 | cr0, cr2, cr3, cr4); | 162 | cr0, cr2, cr3, cr4); |
163 | 163 | ||
164 | get_debugreg(d0, 0); | 164 | get_debugreg(d0, 0); |
165 | get_debugreg(d1, 1); | 165 | get_debugreg(d1, 1); |
166 | get_debugreg(d2, 2); | 166 | get_debugreg(d2, 2); |
167 | get_debugreg(d3, 3); | 167 | get_debugreg(d3, 3); |
168 | printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", | 168 | printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", |
169 | d0, d1, d2, d3); | 169 | d0, d1, d2, d3); |
170 | 170 | ||
171 | get_debugreg(d6, 6); | 171 | get_debugreg(d6, 6); |
172 | get_debugreg(d7, 7); | 172 | get_debugreg(d7, 7); |
173 | printk("DR6: %08lx DR7: %08lx\n", | 173 | printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n", |
174 | d6, d7); | 174 | d6, d7); |
175 | } | 175 | } |
176 | 176 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 418f860880a2..dc9690b4c4cc 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -161,19 +161,19 @@ void __show_regs(struct pt_regs *regs, int all) | |||
161 | unsigned int ds, cs, es; | 161 | unsigned int ds, cs, es; |
162 | 162 | ||
163 | show_regs_common(); | 163 | show_regs_common(); |
164 | printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); | 164 | printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); |
165 | printk_address(regs->ip, 1); | 165 | printk_address(regs->ip, 1); |
166 | printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, | 166 | printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, |
167 | regs->sp, regs->flags); | 167 | regs->sp, regs->flags); |
168 | printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", | 168 | printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", |
169 | regs->ax, regs->bx, regs->cx); | 169 | regs->ax, regs->bx, regs->cx); |
170 | printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", | 170 | printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", |
171 | regs->dx, regs->si, regs->di); | 171 | regs->dx, regs->si, regs->di); |
172 | printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", | 172 | printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n", |
173 | regs->bp, regs->r8, regs->r9); | 173 | regs->bp, regs->r8, regs->r9); |
174 | printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", | 174 | printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n", |
175 | regs->r10, regs->r11, regs->r12); | 175 | regs->r10, regs->r11, regs->r12); |
176 | printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", | 176 | printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", |
177 | regs->r13, regs->r14, regs->r15); | 177 | regs->r13, regs->r14, regs->r15); |
178 | 178 | ||
179 | asm("movl %%ds,%0" : "=r" (ds)); | 179 | asm("movl %%ds,%0" : "=r" (ds)); |
@@ -194,21 +194,21 @@ void __show_regs(struct pt_regs *regs, int all) | |||
194 | cr3 = read_cr3(); | 194 | cr3 = read_cr3(); |
195 | cr4 = read_cr4(); | 195 | cr4 = read_cr4(); |
196 | 196 | ||
197 | printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", | 197 | printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
198 | fs, fsindex, gs, gsindex, shadowgs); | 198 | fs, fsindex, gs, gsindex, shadowgs); |
199 | printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, | 199 | printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, |
200 | es, cr0); | 200 | es, cr0); |
201 | printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, | 201 | printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, |
202 | cr4); | 202 | cr4); |
203 | 203 | ||
204 | get_debugreg(d0, 0); | 204 | get_debugreg(d0, 0); |
205 | get_debugreg(d1, 1); | 205 | get_debugreg(d1, 1); |
206 | get_debugreg(d2, 2); | 206 | get_debugreg(d2, 2); |
207 | printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); | 207 | printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); |
208 | get_debugreg(d3, 3); | 208 | get_debugreg(d3, 3); |
209 | get_debugreg(d6, 6); | 209 | get_debugreg(d6, 6); |
210 | get_debugreg(d7, 7); | 210 | get_debugreg(d7, 7); |
211 | printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); | 211 | printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); |
212 | } | 212 | } |
213 | 213 | ||
214 | void release_thread(struct task_struct *dead_task) | 214 | void release_thread(struct task_struct *dead_task) |
@@ -515,6 +515,18 @@ void set_personality_64bit(void) | |||
515 | current->personality &= ~READ_IMPLIES_EXEC; | 515 | current->personality &= ~READ_IMPLIES_EXEC; |
516 | } | 516 | } |
517 | 517 | ||
518 | void set_personality_ia32(void) | ||
519 | { | ||
520 | /* inherit personality from parent */ | ||
521 | |||
522 | /* Make sure to be in 32bit mode */ | ||
523 | set_thread_flag(TIF_IA32); | ||
524 | current->personality |= force_personality32; | ||
525 | |||
526 | /* Prepare the first "return" to user space */ | ||
527 | current_thread_info()->status |= TS_COMPAT; | ||
528 | } | ||
529 | |||
518 | unsigned long get_wchan(struct task_struct *p) | 530 | unsigned long get_wchan(struct task_struct *p) |
519 | { | 531 | { |
520 | unsigned long stack; | 532 | unsigned long stack; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 017d937639fe..2e9b55027b7e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/smp.h> | 13 | #include <linux/smp.h> |
14 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
15 | #include <linux/slab.h> | ||
15 | #include <linux/ptrace.h> | 16 | #include <linux/ptrace.h> |
16 | #include <linux/regset.h> | 17 | #include <linux/regset.h> |
17 | #include <linux/tracehook.h> | 18 | #include <linux/tracehook.h> |
@@ -48,6 +49,7 @@ enum x86_regset { | |||
48 | REGSET_FP, | 49 | REGSET_FP, |
49 | REGSET_XFP, | 50 | REGSET_XFP, |
50 | REGSET_IOPERM64 = REGSET_XFP, | 51 | REGSET_IOPERM64 = REGSET_XFP, |
52 | REGSET_XSTATE, | ||
51 | REGSET_TLS, | 53 | REGSET_TLS, |
52 | REGSET_IOPERM32, | 54 | REGSET_IOPERM32, |
53 | }; | 55 | }; |
@@ -140,30 +142,6 @@ static const int arg_offs_table[] = { | |||
140 | #endif | 142 | #endif |
141 | }; | 143 | }; |
142 | 144 | ||
143 | /** | ||
144 | * regs_get_argument_nth() - get Nth argument at function call | ||
145 | * @regs: pt_regs which contains registers at function entry. | ||
146 | * @n: argument number. | ||
147 | * | ||
148 | * regs_get_argument_nth() returns @n th argument of a function call. | ||
149 | * Since usually the kernel stack will be changed right after function entry, | ||
150 | * you must use this at function entry. If the @n th entry is NOT in the | ||
151 | * kernel stack or pt_regs, this returns 0. | ||
152 | */ | ||
153 | unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) | ||
154 | { | ||
155 | if (n < ARRAY_SIZE(arg_offs_table)) | ||
156 | return *(unsigned long *)((char *)regs + arg_offs_table[n]); | ||
157 | else { | ||
158 | /* | ||
159 | * The typical case: arg n is on the stack. | ||
160 | * (Note: stack[0] = return address, so skip it) | ||
161 | */ | ||
162 | n -= ARRAY_SIZE(arg_offs_table); | ||
163 | return regs_get_kernel_stack_nth(regs, 1 + n); | ||
164 | } | ||
165 | } | ||
166 | |||
167 | /* | 145 | /* |
168 | * does not yet catch signals sent when the child dies. | 146 | * does not yet catch signals sent when the child dies. |
169 | * in exit.c or in signal.c. | 147 | * in exit.c or in signal.c. |
@@ -604,7 +582,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, | |||
604 | struct perf_event_attr attr; | 582 | struct perf_event_attr attr; |
605 | 583 | ||
606 | /* | 584 | /* |
607 | * We shoud have at least an inactive breakpoint at this | 585 | * We should have at least an inactive breakpoint at this |
608 | * slot. It means the user is writing dr7 without having | 586 | * slot. It means the user is writing dr7 without having |
609 | * written the address register first | 587 | * written the address register first |
610 | */ | 588 | */ |
@@ -702,7 +680,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | |||
702 | } else if (n == 6) { | 680 | } else if (n == 6) { |
703 | val = thread->debugreg6; | 681 | val = thread->debugreg6; |
704 | } else if (n == 7) { | 682 | } else if (n == 7) { |
705 | val = ptrace_get_dr7(thread->ptrace_bps); | 683 | val = thread->ptrace_dr7; |
706 | } | 684 | } |
707 | return val; | 685 | return val; |
708 | } | 686 | } |
@@ -778,8 +756,11 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | |||
778 | return rc; | 756 | return rc; |
779 | } | 757 | } |
780 | /* All that's left is DR7 */ | 758 | /* All that's left is DR7 */ |
781 | if (n == 7) | 759 | if (n == 7) { |
782 | rc = ptrace_write_dr7(tsk, val); | 760 | rc = ptrace_write_dr7(tsk, val); |
761 | if (!rc) | ||
762 | thread->ptrace_dr7 = val; | ||
763 | } | ||
783 | 764 | ||
784 | ret_path: | 765 | ret_path: |
785 | return rc; | 766 | return rc; |
@@ -1584,7 +1565,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1584 | 1565 | ||
1585 | #ifdef CONFIG_X86_64 | 1566 | #ifdef CONFIG_X86_64 |
1586 | 1567 | ||
1587 | static const struct user_regset x86_64_regsets[] = { | 1568 | static struct user_regset x86_64_regsets[] __read_mostly = { |
1588 | [REGSET_GENERAL] = { | 1569 | [REGSET_GENERAL] = { |
1589 | .core_note_type = NT_PRSTATUS, | 1570 | .core_note_type = NT_PRSTATUS, |
1590 | .n = sizeof(struct user_regs_struct) / sizeof(long), | 1571 | .n = sizeof(struct user_regs_struct) / sizeof(long), |
@@ -1597,6 +1578,12 @@ static const struct user_regset x86_64_regsets[] = { | |||
1597 | .size = sizeof(long), .align = sizeof(long), | 1578 | .size = sizeof(long), .align = sizeof(long), |
1598 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | 1579 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set |
1599 | }, | 1580 | }, |
1581 | [REGSET_XSTATE] = { | ||
1582 | .core_note_type = NT_X86_XSTATE, | ||
1583 | .size = sizeof(u64), .align = sizeof(u64), | ||
1584 | .active = xstateregs_active, .get = xstateregs_get, | ||
1585 | .set = xstateregs_set | ||
1586 | }, | ||
1600 | [REGSET_IOPERM64] = { | 1587 | [REGSET_IOPERM64] = { |
1601 | .core_note_type = NT_386_IOPERM, | 1588 | .core_note_type = NT_386_IOPERM, |
1602 | .n = IO_BITMAP_LONGS, | 1589 | .n = IO_BITMAP_LONGS, |
@@ -1622,7 +1609,7 @@ static const struct user_regset_view user_x86_64_view = { | |||
1622 | #endif /* CONFIG_X86_64 */ | 1609 | #endif /* CONFIG_X86_64 */ |
1623 | 1610 | ||
1624 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 1611 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
1625 | static const struct user_regset x86_32_regsets[] = { | 1612 | static struct user_regset x86_32_regsets[] __read_mostly = { |
1626 | [REGSET_GENERAL] = { | 1613 | [REGSET_GENERAL] = { |
1627 | .core_note_type = NT_PRSTATUS, | 1614 | .core_note_type = NT_PRSTATUS, |
1628 | .n = sizeof(struct user_regs_struct32) / sizeof(u32), | 1615 | .n = sizeof(struct user_regs_struct32) / sizeof(u32), |
@@ -1641,6 +1628,12 @@ static const struct user_regset x86_32_regsets[] = { | |||
1641 | .size = sizeof(u32), .align = sizeof(u32), | 1628 | .size = sizeof(u32), .align = sizeof(u32), |
1642 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | 1629 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set |
1643 | }, | 1630 | }, |
1631 | [REGSET_XSTATE] = { | ||
1632 | .core_note_type = NT_X86_XSTATE, | ||
1633 | .size = sizeof(u64), .align = sizeof(u64), | ||
1634 | .active = xstateregs_active, .get = xstateregs_get, | ||
1635 | .set = xstateregs_set | ||
1636 | }, | ||
1644 | [REGSET_TLS] = { | 1637 | [REGSET_TLS] = { |
1645 | .core_note_type = NT_386_TLS, | 1638 | .core_note_type = NT_386_TLS, |
1646 | .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, | 1639 | .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, |
@@ -1663,6 +1656,23 @@ static const struct user_regset_view user_x86_32_view = { | |||
1663 | }; | 1656 | }; |
1664 | #endif | 1657 | #endif |
1665 | 1658 | ||
1659 | /* | ||
1660 | * This represents bytes 464..511 in the memory layout exported through | ||
1661 | * the REGSET_XSTATE interface. | ||
1662 | */ | ||
1663 | u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | ||
1664 | |||
1665 | void update_regset_xstate_info(unsigned int size, u64 xstate_mask) | ||
1666 | { | ||
1667 | #ifdef CONFIG_X86_64 | ||
1668 | x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); | ||
1669 | #endif | ||
1670 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | ||
1671 | x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64); | ||
1672 | #endif | ||
1673 | xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask; | ||
1674 | } | ||
1675 | |||
1666 | const struct user_regset_view *task_user_regset_view(struct task_struct *task) | 1676 | const struct user_regset_view *task_user_regset_view(struct task_struct *task) |
1667 | { | 1677 | { |
1668 | #ifdef CONFIG_IA32_EMULATION | 1678 | #ifdef CONFIG_IA32_EMULATION |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 18093d7498f0..12e9feaa2f7a 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -491,6 +491,19 @@ void force_hpet_resume(void) | |||
491 | break; | 491 | break; |
492 | } | 492 | } |
493 | } | 493 | } |
494 | |||
495 | /* | ||
496 | * HPET MSI on some boards (ATI SB700/SB800) has side effect on | ||
497 | * floppy DMA. Disable HPET MSI on such platforms. | ||
498 | */ | ||
499 | static void force_disable_hpet_msi(struct pci_dev *unused) | ||
500 | { | ||
501 | hpet_msi_disable = 1; | ||
502 | } | ||
503 | |||
504 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | ||
505 | force_disable_hpet_msi); | ||
506 | |||
494 | #endif | 507 | #endif |
495 | 508 | ||
496 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) | 509 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 1545bc0c9845..8e1aac86b50c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -203,6 +203,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
203 | DMI_MATCH(DMI_BOARD_NAME, "0T656F"), | 203 | DMI_MATCH(DMI_BOARD_NAME, "0T656F"), |
204 | }, | 204 | }, |
205 | }, | 205 | }, |
206 | { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ | ||
207 | .callback = set_bios_reboot, | ||
208 | .ident = "Dell OptiPlex 760", | ||
209 | .matches = { | ||
210 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
211 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"), | ||
212 | DMI_MATCH(DMI_BOARD_NAME, "0G919G"), | ||
213 | }, | ||
214 | }, | ||
206 | { /* Handle problems with rebooting on Dell 2400's */ | 215 | { /* Handle problems with rebooting on Dell 2400's */ |
207 | .callback = set_bios_reboot, | 216 | .callback = set_bios_reboot, |
208 | .ident = "Dell PowerEdge 2400", | 217 | .ident = "Dell PowerEdge 2400", |
@@ -452,6 +461,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | |||
452 | DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), | 461 | DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), |
453 | }, | 462 | }, |
454 | }, | 463 | }, |
464 | { /* Handle problems with rebooting on the iMac9,1. */ | ||
465 | .callback = set_pci_reboot, | ||
466 | .ident = "Apple iMac9,1", | ||
467 | .matches = { | ||
468 | DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), | ||
469 | DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), | ||
470 | }, | ||
471 | }, | ||
455 | { } | 472 | { } |
456 | }; | 473 | }; |
457 | 474 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f7b8b9894b22..c4851eff57b3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include <linux/stddef.h> | 55 | #include <linux/stddef.h> |
56 | #include <linux/unistd.h> | 56 | #include <linux/unistd.h> |
57 | #include <linux/ptrace.h> | 57 | #include <linux/ptrace.h> |
58 | #include <linux/slab.h> | ||
59 | #include <linux/user.h> | 58 | #include <linux/user.h> |
60 | #include <linux/delay.h> | 59 | #include <linux/delay.h> |
61 | 60 | ||
@@ -121,7 +120,9 @@ | |||
121 | unsigned long max_low_pfn_mapped; | 120 | unsigned long max_low_pfn_mapped; |
122 | unsigned long max_pfn_mapped; | 121 | unsigned long max_pfn_mapped; |
123 | 122 | ||
123 | #ifdef CONFIG_DMI | ||
124 | RESERVE_BRK(dmi_alloc, 65536); | 124 | RESERVE_BRK(dmi_alloc, 65536); |
125 | #endif | ||
125 | 126 | ||
126 | unsigned int boot_cpu_id __read_mostly; | 127 | unsigned int boot_cpu_id __read_mostly; |
127 | 128 | ||
@@ -312,16 +313,17 @@ static void __init reserve_brk(void) | |||
312 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | 313 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) |
313 | static void __init relocate_initrd(void) | 314 | static void __init relocate_initrd(void) |
314 | { | 315 | { |
315 | 316 | /* Assume only end is not page aligned */ | |
316 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 317 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
317 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 318 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
319 | u64 area_size = PAGE_ALIGN(ramdisk_size); | ||
318 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | 320 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; |
319 | u64 ramdisk_here; | 321 | u64 ramdisk_here; |
320 | unsigned long slop, clen, mapaddr; | 322 | unsigned long slop, clen, mapaddr; |
321 | char *p, *q; | 323 | char *p, *q; |
322 | 324 | ||
323 | /* We need to move the initrd down into lowmem */ | 325 | /* We need to move the initrd down into lowmem */ |
324 | ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size, | 326 | ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, |
325 | PAGE_SIZE); | 327 | PAGE_SIZE); |
326 | 328 | ||
327 | if (ramdisk_here == -1ULL) | 329 | if (ramdisk_here == -1ULL) |
@@ -330,7 +332,7 @@ static void __init relocate_initrd(void) | |||
330 | 332 | ||
331 | /* Note: this includes all the lowmem currently occupied by | 333 | /* Note: this includes all the lowmem currently occupied by |
332 | the initrd, we rely on that fact to keep the data intact. */ | 334 | the initrd, we rely on that fact to keep the data intact. */ |
333 | reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, | 335 | reserve_early(ramdisk_here, ramdisk_here + area_size, |
334 | "NEW RAMDISK"); | 336 | "NEW RAMDISK"); |
335 | initrd_start = ramdisk_here + PAGE_OFFSET; | 337 | initrd_start = ramdisk_here + PAGE_OFFSET; |
336 | initrd_end = initrd_start + ramdisk_size; | 338 | initrd_end = initrd_start + ramdisk_size; |
@@ -374,9 +376,10 @@ static void __init relocate_initrd(void) | |||
374 | 376 | ||
375 | static void __init reserve_initrd(void) | 377 | static void __init reserve_initrd(void) |
376 | { | 378 | { |
379 | /* Assume only end is not page aligned */ | ||
377 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 380 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
378 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 381 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
379 | u64 ramdisk_end = ramdisk_image + ramdisk_size; | 382 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
380 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | 383 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; |
381 | 384 | ||
382 | if (!boot_params.hdr.type_of_loader || | 385 | if (!boot_params.hdr.type_of_loader || |
@@ -604,6 +607,16 @@ static int __init setup_elfcorehdr(char *arg) | |||
604 | early_param("elfcorehdr", setup_elfcorehdr); | 607 | early_param("elfcorehdr", setup_elfcorehdr); |
605 | #endif | 608 | #endif |
606 | 609 | ||
610 | static __init void reserve_ibft_region(void) | ||
611 | { | ||
612 | unsigned long addr, size = 0; | ||
613 | |||
614 | addr = find_ibft_region(&size); | ||
615 | |||
616 | if (size) | ||
617 | reserve_early_overlap_ok(addr, addr + size, "ibft"); | ||
618 | } | ||
619 | |||
607 | #ifdef CONFIG_X86_RESERVE_LOW_64K | 620 | #ifdef CONFIG_X86_RESERVE_LOW_64K |
608 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | 621 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) |
609 | { | 622 | { |
@@ -642,23 +655,48 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
642 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), | 655 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), |
643 | }, | 656 | }, |
644 | }, | 657 | }, |
645 | { | ||
646 | /* | 658 | /* |
647 | * AMI BIOS with low memory corruption was found on Intel DG45ID board. | 659 | * AMI BIOS with low memory corruption was found on Intel DG45ID and |
648 | * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will | 660 | * DG45FC boards. |
661 | * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will | ||
649 | * match only DMI_BOARD_NAME and see if there is more bad products | 662 | * match only DMI_BOARD_NAME and see if there is more bad products |
650 | * with this vendor. | 663 | * with this vendor. |
651 | */ | 664 | */ |
665 | { | ||
652 | .callback = dmi_low_memory_corruption, | 666 | .callback = dmi_low_memory_corruption, |
653 | .ident = "AMI BIOS", | 667 | .ident = "AMI BIOS", |
654 | .matches = { | 668 | .matches = { |
655 | DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), | 669 | DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), |
656 | }, | 670 | }, |
657 | }, | 671 | }, |
672 | { | ||
673 | .callback = dmi_low_memory_corruption, | ||
674 | .ident = "AMI BIOS", | ||
675 | .matches = { | ||
676 | DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), | ||
677 | }, | ||
678 | }, | ||
658 | #endif | 679 | #endif |
659 | {} | 680 | {} |
660 | }; | 681 | }; |
661 | 682 | ||
683 | static void __init trim_bios_range(void) | ||
684 | { | ||
685 | /* | ||
686 | * A special case is the first 4Kb of memory; | ||
687 | * This is a BIOS owned area, not kernel ram, but generally | ||
688 | * not listed as such in the E820 table. | ||
689 | */ | ||
690 | e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); | ||
691 | /* | ||
692 | * special case: Some BIOSen report the PC BIOS | ||
693 | * area (640->1Mb) as ram even though it is not. | ||
694 | * take them out. | ||
695 | */ | ||
696 | e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); | ||
697 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
698 | } | ||
699 | |||
662 | /* | 700 | /* |
663 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 701 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
664 | * passed the efi memmap, systab, etc., so we should use these data structures | 702 | * passed the efi memmap, systab, etc., so we should use these data structures |
@@ -822,7 +860,7 @@ void __init setup_arch(char **cmdline_p) | |||
822 | insert_resource(&iomem_resource, &data_resource); | 860 | insert_resource(&iomem_resource, &data_resource); |
823 | insert_resource(&iomem_resource, &bss_resource); | 861 | insert_resource(&iomem_resource, &bss_resource); |
824 | 862 | ||
825 | 863 | trim_bios_range(); | |
826 | #ifdef CONFIG_X86_32 | 864 | #ifdef CONFIG_X86_32 |
827 | if (ppro_with_ram_bug()) { | 865 | if (ppro_with_ram_bug()) { |
828 | e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, | 866 | e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, |
@@ -881,6 +919,8 @@ void __init setup_arch(char **cmdline_p) | |||
881 | */ | 919 | */ |
882 | find_smp_config(); | 920 | find_smp_config(); |
883 | 921 | ||
922 | reserve_ibft_region(); | ||
923 | |||
884 | reserve_trampoline_memory(); | 924 | reserve_trampoline_memory(); |
885 | 925 | ||
886 | #ifdef CONFIG_ACPI_SLEEP | 926 | #ifdef CONFIG_ACPI_SLEEP |
@@ -942,17 +982,11 @@ void __init setup_arch(char **cmdline_p) | |||
942 | #endif | 982 | #endif |
943 | 983 | ||
944 | initmem_init(0, max_pfn, acpi, k8); | 984 | initmem_init(0, max_pfn, acpi, k8); |
945 | 985 | #ifndef CONFIG_NO_BOOTMEM | |
946 | #ifdef CONFIG_X86_64 | 986 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); |
947 | /* | ||
948 | * dma32_reserve_bootmem() allocates bootmem which may conflict | ||
949 | * with the crashkernel command line, so do that after | ||
950 | * reserve_crashkernel() | ||
951 | */ | ||
952 | dma32_reserve_bootmem(); | ||
953 | #endif | 987 | #endif |
954 | 988 | ||
955 | reserve_ibft_region(); | 989 | dma32_reserve_bootmem(); |
956 | 990 | ||
957 | #ifdef CONFIG_KVM_CLOCK | 991 | #ifdef CONFIG_KVM_CLOCK |
958 | kvmclock_init(); | 992 | kvmclock_init(); |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 35abcb8b00e9..ef6370b00e70 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) | |||
137 | 137 | ||
138 | static void __init pcpu_fc_free(void *ptr, size_t size) | 138 | static void __init pcpu_fc_free(void *ptr, size_t size) |
139 | { | 139 | { |
140 | #ifdef CONFIG_NO_BOOTMEM | ||
141 | u64 start = __pa(ptr); | ||
142 | u64 end = start + size; | ||
143 | free_early_partial(start, end); | ||
144 | #else | ||
140 | free_bootmem(__pa(ptr), size); | 145 | free_bootmem(__pa(ptr), size); |
146 | #endif | ||
141 | } | 147 | } |
142 | 148 | ||
143 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) | 149 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index ec1de97600e7..d801210945d6 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/cache.h> | 21 | #include <linux/cache.h> |
22 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
24 | #include <linux/gfp.h> | ||
24 | 25 | ||
25 | #include <asm/mtrr.h> | 26 | #include <asm/mtrr.h> |
26 | #include <asm/tlbflush.h> | 27 | #include <asm/tlbflush.h> |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 678d0b8c26f3..763d815e27a0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -48,6 +48,8 @@ | |||
48 | #include <linux/err.h> | 48 | #include <linux/err.h> |
49 | #include <linux/nmi.h> | 49 | #include <linux/nmi.h> |
50 | #include <linux/tboot.h> | 50 | #include <linux/tboot.h> |
51 | #include <linux/stackprotector.h> | ||
52 | #include <linux/gfp.h> | ||
51 | 53 | ||
52 | #include <asm/acpi.h> | 54 | #include <asm/acpi.h> |
53 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
@@ -67,6 +69,7 @@ | |||
67 | #include <linux/mc146818rtc.h> | 69 | #include <linux/mc146818rtc.h> |
68 | 70 | ||
69 | #include <asm/smpboot_hooks.h> | 71 | #include <asm/smpboot_hooks.h> |
72 | #include <asm/i8259.h> | ||
70 | 73 | ||
71 | #ifdef CONFIG_X86_32 | 74 | #ifdef CONFIG_X86_32 |
72 | u8 apicid_2_node[MAX_APICID]; | 75 | u8 apicid_2_node[MAX_APICID]; |
@@ -240,7 +243,10 @@ static void __cpuinit smp_callin(void) | |||
240 | end_local_APIC_setup(); | 243 | end_local_APIC_setup(); |
241 | map_cpu_to_logical_apicid(); | 244 | map_cpu_to_logical_apicid(); |
242 | 245 | ||
243 | notify_cpu_starting(cpuid); | 246 | /* |
247 | * Need to setup vector mappings before we enable interrupts. | ||
248 | */ | ||
249 | setup_vector_irq(smp_processor_id()); | ||
244 | /* | 250 | /* |
245 | * Get our bogomips. | 251 | * Get our bogomips. |
246 | * | 252 | * |
@@ -257,6 +263,8 @@ static void __cpuinit smp_callin(void) | |||
257 | */ | 263 | */ |
258 | smp_store_cpu_info(cpuid); | 264 | smp_store_cpu_info(cpuid); |
259 | 265 | ||
266 | notify_cpu_starting(cpuid); | ||
267 | |||
260 | /* | 268 | /* |
261 | * Allow the master to continue. | 269 | * Allow the master to continue. |
262 | */ | 270 | */ |
@@ -286,9 +294,9 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
286 | check_tsc_sync_target(); | 294 | check_tsc_sync_target(); |
287 | 295 | ||
288 | if (nmi_watchdog == NMI_IO_APIC) { | 296 | if (nmi_watchdog == NMI_IO_APIC) { |
289 | disable_8259A_irq(0); | 297 | legacy_pic->chip->mask(0); |
290 | enable_NMI_through_LVT0(); | 298 | enable_NMI_through_LVT0(); |
291 | enable_8259A_irq(0); | 299 | legacy_pic->chip->unmask(0); |
292 | } | 300 | } |
293 | 301 | ||
294 | #ifdef CONFIG_X86_32 | 302 | #ifdef CONFIG_X86_32 |
@@ -315,15 +323,18 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
315 | */ | 323 | */ |
316 | ipi_call_lock(); | 324 | ipi_call_lock(); |
317 | lock_vector_lock(); | 325 | lock_vector_lock(); |
318 | __setup_vector_irq(smp_processor_id()); | ||
319 | set_cpu_online(smp_processor_id(), true); | 326 | set_cpu_online(smp_processor_id(), true); |
320 | unlock_vector_lock(); | 327 | unlock_vector_lock(); |
321 | ipi_call_unlock(); | 328 | ipi_call_unlock(); |
322 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 329 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
330 | x86_platform.nmi_init(); | ||
323 | 331 | ||
324 | /* enable local interrupts */ | 332 | /* enable local interrupts */ |
325 | local_irq_enable(); | 333 | local_irq_enable(); |
326 | 334 | ||
335 | /* to prevent fake stack check failure in clock setup */ | ||
336 | boot_init_stack_canary(); | ||
337 | |||
327 | x86_cpuinit.setup_percpu_clockev(); | 338 | x86_cpuinit.setup_percpu_clockev(); |
328 | 339 | ||
329 | wmb(); | 340 | wmb(); |
@@ -1083,9 +1094,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1083 | set_cpu_sibling_map(0); | 1094 | set_cpu_sibling_map(0); |
1084 | 1095 | ||
1085 | enable_IR_x2apic(); | 1096 | enable_IR_x2apic(); |
1086 | #ifdef CONFIG_X86_64 | ||
1087 | default_setup_apic_routing(); | 1097 | default_setup_apic_routing(); |
1088 | #endif | ||
1089 | 1098 | ||
1090 | if (smp_sanity_check(max_cpus) < 0) { | 1099 | if (smp_sanity_check(max_cpus) < 0) { |
1091 | printk(KERN_INFO "SMP disabled\n"); | 1100 | printk(KERN_INFO "SMP disabled\n"); |
@@ -1213,11 +1222,12 @@ __init void prefill_possible_map(void) | |||
1213 | 1222 | ||
1214 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); | 1223 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); |
1215 | 1224 | ||
1216 | if (possible > CONFIG_NR_CPUS) { | 1225 | /* nr_cpu_ids could be reduced via nr_cpus= */ |
1226 | if (possible > nr_cpu_ids) { | ||
1217 | printk(KERN_WARNING | 1227 | printk(KERN_WARNING |
1218 | "%d Processors exceeds NR_CPUS limit of %d\n", | 1228 | "%d Processors exceeds NR_CPUS limit of %d\n", |
1219 | possible, CONFIG_NR_CPUS); | 1229 | possible, nr_cpu_ids); |
1220 | possible = CONFIG_NR_CPUS; | 1230 | possible = nr_cpu_ids; |
1221 | } | 1231 | } |
1222 | 1232 | ||
1223 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1233 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index dee1ff7cba58..196552bb412c 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
@@ -25,191 +25,6 @@ | |||
25 | #include <asm/syscalls.h> | 25 | #include <asm/syscalls.h> |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * Perform the select(nd, in, out, ex, tv) and mmap() system | ||
29 | * calls. Linux/i386 didn't use to be able to handle more than | ||
30 | * 4 system call parameters, so these system calls used a memory | ||
31 | * block for parameter passing.. | ||
32 | */ | ||
33 | |||
34 | struct mmap_arg_struct { | ||
35 | unsigned long addr; | ||
36 | unsigned long len; | ||
37 | unsigned long prot; | ||
38 | unsigned long flags; | ||
39 | unsigned long fd; | ||
40 | unsigned long offset; | ||
41 | }; | ||
42 | |||
43 | asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) | ||
44 | { | ||
45 | struct mmap_arg_struct a; | ||
46 | int err = -EFAULT; | ||
47 | |||
48 | if (copy_from_user(&a, arg, sizeof(a))) | ||
49 | goto out; | ||
50 | |||
51 | err = -EINVAL; | ||
52 | if (a.offset & ~PAGE_MASK) | ||
53 | goto out; | ||
54 | |||
55 | err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, | ||
56 | a.fd, a.offset >> PAGE_SHIFT); | ||
57 | out: | ||
58 | return err; | ||
59 | } | ||
60 | |||
61 | |||
62 | struct sel_arg_struct { | ||
63 | unsigned long n; | ||
64 | fd_set __user *inp, *outp, *exp; | ||
65 | struct timeval __user *tvp; | ||
66 | }; | ||
67 | |||
68 | asmlinkage int old_select(struct sel_arg_struct __user *arg) | ||
69 | { | ||
70 | struct sel_arg_struct a; | ||
71 | |||
72 | if (copy_from_user(&a, arg, sizeof(a))) | ||
73 | return -EFAULT; | ||
74 | /* sys_select() does the appropriate kernel locking */ | ||
75 | return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * sys_ipc() is the de-multiplexer for the SysV IPC calls.. | ||
80 | * | ||
81 | * This is really horribly ugly. | ||
82 | */ | ||
83 | asmlinkage int sys_ipc(uint call, int first, int second, | ||
84 | int third, void __user *ptr, long fifth) | ||
85 | { | ||
86 | int version, ret; | ||
87 | |||
88 | version = call >> 16; /* hack for backward compatibility */ | ||
89 | call &= 0xffff; | ||
90 | |||
91 | switch (call) { | ||
92 | case SEMOP: | ||
93 | return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL); | ||
94 | case SEMTIMEDOP: | ||
95 | return sys_semtimedop(first, (struct sembuf __user *)ptr, second, | ||
96 | (const struct timespec __user *)fifth); | ||
97 | |||
98 | case SEMGET: | ||
99 | return sys_semget(first, second, third); | ||
100 | case SEMCTL: { | ||
101 | union semun fourth; | ||
102 | if (!ptr) | ||
103 | return -EINVAL; | ||
104 | if (get_user(fourth.__pad, (void __user * __user *) ptr)) | ||
105 | return -EFAULT; | ||
106 | return sys_semctl(first, second, third, fourth); | ||
107 | } | ||
108 | |||
109 | case MSGSND: | ||
110 | return sys_msgsnd(first, (struct msgbuf __user *) ptr, | ||
111 | second, third); | ||
112 | case MSGRCV: | ||
113 | switch (version) { | ||
114 | case 0: { | ||
115 | struct ipc_kludge tmp; | ||
116 | if (!ptr) | ||
117 | return -EINVAL; | ||
118 | |||
119 | if (copy_from_user(&tmp, | ||
120 | (struct ipc_kludge __user *) ptr, | ||
121 | sizeof(tmp))) | ||
122 | return -EFAULT; | ||
123 | return sys_msgrcv(first, tmp.msgp, second, | ||
124 | tmp.msgtyp, third); | ||
125 | } | ||
126 | default: | ||
127 | return sys_msgrcv(first, | ||
128 | (struct msgbuf __user *) ptr, | ||
129 | second, fifth, third); | ||
130 | } | ||
131 | case MSGGET: | ||
132 | return sys_msgget((key_t) first, second); | ||
133 | case MSGCTL: | ||
134 | return sys_msgctl(first, second, (struct msqid_ds __user *) ptr); | ||
135 | |||
136 | case SHMAT: | ||
137 | switch (version) { | ||
138 | default: { | ||
139 | ulong raddr; | ||
140 | ret = do_shmat(first, (char __user *) ptr, second, &raddr); | ||
141 | if (ret) | ||
142 | return ret; | ||
143 | return put_user(raddr, (ulong __user *) third); | ||
144 | } | ||
145 | case 1: /* iBCS2 emulator entry point */ | ||
146 | if (!segment_eq(get_fs(), get_ds())) | ||
147 | return -EINVAL; | ||
148 | /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ | ||
149 | return do_shmat(first, (char __user *) ptr, second, (ulong *) third); | ||
150 | } | ||
151 | case SHMDT: | ||
152 | return sys_shmdt((char __user *)ptr); | ||
153 | case SHMGET: | ||
154 | return sys_shmget(first, second, third); | ||
155 | case SHMCTL: | ||
156 | return sys_shmctl(first, second, | ||
157 | (struct shmid_ds __user *) ptr); | ||
158 | default: | ||
159 | return -ENOSYS; | ||
160 | } | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Old cruft | ||
165 | */ | ||
166 | asmlinkage int sys_uname(struct old_utsname __user *name) | ||
167 | { | ||
168 | int err; | ||
169 | if (!name) | ||
170 | return -EFAULT; | ||
171 | down_read(&uts_sem); | ||
172 | err = copy_to_user(name, utsname(), sizeof(*name)); | ||
173 | up_read(&uts_sem); | ||
174 | return err? -EFAULT:0; | ||
175 | } | ||
176 | |||
177 | asmlinkage int sys_olduname(struct oldold_utsname __user *name) | ||
178 | { | ||
179 | int error; | ||
180 | |||
181 | if (!name) | ||
182 | return -EFAULT; | ||
183 | if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) | ||
184 | return -EFAULT; | ||
185 | |||
186 | down_read(&uts_sem); | ||
187 | |||
188 | error = __copy_to_user(&name->sysname, &utsname()->sysname, | ||
189 | __OLD_UTS_LEN); | ||
190 | error |= __put_user(0, name->sysname + __OLD_UTS_LEN); | ||
191 | error |= __copy_to_user(&name->nodename, &utsname()->nodename, | ||
192 | __OLD_UTS_LEN); | ||
193 | error |= __put_user(0, name->nodename + __OLD_UTS_LEN); | ||
194 | error |= __copy_to_user(&name->release, &utsname()->release, | ||
195 | __OLD_UTS_LEN); | ||
196 | error |= __put_user(0, name->release + __OLD_UTS_LEN); | ||
197 | error |= __copy_to_user(&name->version, &utsname()->version, | ||
198 | __OLD_UTS_LEN); | ||
199 | error |= __put_user(0, name->version + __OLD_UTS_LEN); | ||
200 | error |= __copy_to_user(&name->machine, &utsname()->machine, | ||
201 | __OLD_UTS_LEN); | ||
202 | error |= __put_user(0, name->machine + __OLD_UTS_LEN); | ||
203 | |||
204 | up_read(&uts_sem); | ||
205 | |||
206 | error = error ? -EFAULT : 0; | ||
207 | |||
208 | return error; | ||
209 | } | ||
210 | |||
211 | |||
212 | /* | ||
213 | * Do a system call from kernel instead of calling sys_execve so we | 28 | * Do a system call from kernel instead of calling sys_execve so we |
214 | * end up with proper pt_regs. | 29 | * end up with proper pt_regs. |
215 | */ | 30 | */ |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 8aa2057efd12..ff14a5044ce6 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -209,15 +209,3 @@ bottomup: | |||
209 | 209 | ||
210 | return addr; | 210 | return addr; |
211 | } | 211 | } |
212 | |||
213 | |||
214 | SYSCALL_DEFINE1(uname, struct new_utsname __user *, name) | ||
215 | { | ||
216 | int err; | ||
217 | down_read(&uts_sem); | ||
218 | err = copy_to_user(name, utsname(), sizeof(*name)); | ||
219 | up_read(&uts_sem); | ||
220 | if (personality(current->personality) == PER_LINUX32) | ||
221 | err |= copy_to_user(&name->machine, "i686", 5); | ||
222 | return err ? -EFAULT : 0; | ||
223 | } | ||
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 15228b5d3eb7..8b3729341216 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -81,7 +81,7 @@ ENTRY(sys_call_table) | |||
81 | .long sys_settimeofday | 81 | .long sys_settimeofday |
82 | .long sys_getgroups16 /* 80 */ | 82 | .long sys_getgroups16 /* 80 */ |
83 | .long sys_setgroups16 | 83 | .long sys_setgroups16 |
84 | .long old_select | 84 | .long sys_old_select |
85 | .long sys_symlink | 85 | .long sys_symlink |
86 | .long sys_lstat | 86 | .long sys_lstat |
87 | .long sys_readlink /* 85 */ | 87 | .long sys_readlink /* 85 */ |
@@ -89,7 +89,7 @@ ENTRY(sys_call_table) | |||
89 | .long sys_swapon | 89 | .long sys_swapon |
90 | .long sys_reboot | 90 | .long sys_reboot |
91 | .long sys_old_readdir | 91 | .long sys_old_readdir |
92 | .long old_mmap /* 90 */ | 92 | .long sys_old_mmap /* 90 */ |
93 | .long sys_munmap | 93 | .long sys_munmap |
94 | .long sys_truncate | 94 | .long sys_truncate |
95 | .long sys_ftruncate | 95 | .long sys_ftruncate |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be2573448ed9..fb5cc5e14cfa 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -70,11 +70,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
70 | * manually to deassert NMI lines for the watchdog if run | 70 | * manually to deassert NMI lines for the watchdog if run |
71 | * on an 82489DX-based system. | 71 | * on an 82489DX-based system. |
72 | */ | 72 | */ |
73 | spin_lock(&i8259A_lock); | 73 | raw_spin_lock(&i8259A_lock); |
74 | outb(0x0c, PIC_MASTER_OCW3); | 74 | outb(0x0c, PIC_MASTER_OCW3); |
75 | /* Ack the IRQ; AEOI will end it automatically. */ | 75 | /* Ack the IRQ; AEOI will end it automatically. */ |
76 | inb(PIC_MASTER_POLL); | 76 | inb(PIC_MASTER_POLL); |
77 | spin_unlock(&i8259A_lock); | 77 | raw_spin_unlock(&i8259A_lock); |
78 | } | 78 | } |
79 | 79 | ||
80 | global_clock_event->event_handler(global_clock_event); | 80 | global_clock_event->event_handler(global_clock_event); |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 364d015efebc..17b03dd3a6b5 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/seq_file.h> | 9 | #include <linux/seq_file.h> |
10 | #include <linux/proc_fs.h> | 10 | #include <linux/proc_fs.h> |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/slab.h> | ||
12 | 13 | ||
13 | #include <asm/mmu_context.h> | 14 | #include <asm/mmu_context.h> |
14 | #include <asm/uv/uv.h> | 15 | #include <asm/uv/uv.h> |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 33399176512a..1168e4454188 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -534,6 +534,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
534 | 534 | ||
535 | get_debugreg(dr6, 6); | 535 | get_debugreg(dr6, 6); |
536 | 536 | ||
537 | /* Filter out all the reserved bits which are preset to 1 */ | ||
538 | dr6 &= ~DR6_RESERVED; | ||
539 | |||
537 | /* Catch kmemcheck conditions first of all! */ | 540 | /* Catch kmemcheck conditions first of all! */ |
538 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) | 541 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
539 | return; | 542 | return; |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 597683aa5ba0..9faf91ae1841 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -50,7 +50,7 @@ u64 native_sched_clock(void) | |||
50 | * unstable. We do this because unlike Time Of Day, | 50 | * unstable. We do this because unlike Time Of Day, |
51 | * the scheduler clock tolerates small errors and it's | 51 | * the scheduler clock tolerates small errors and it's |
52 | * very important for it to be as fast as the platform | 52 | * very important for it to be as fast as the platform |
53 | * can achive it. ) | 53 | * can achieve it. ) |
54 | */ | 54 | */ |
55 | if (unlikely(tsc_disabled)) { | 55 | if (unlikely(tsc_disabled)) { |
56 | /* No locking but a rare wrong value is not a big deal: */ | 56 | /* No locking but a rare wrong value is not a big deal: */ |
@@ -740,7 +740,7 @@ static cycle_t __vsyscall_fn vread_tsc(void) | |||
740 | } | 740 | } |
741 | #endif | 741 | #endif |
742 | 742 | ||
743 | static void resume_tsc(void) | 743 | static void resume_tsc(struct clocksource *cs) |
744 | { | 744 | { |
745 | clocksource_tsc.cycle_last = 0; | 745 | clocksource_tsc.cycle_last = 0; |
746 | } | 746 | } |
@@ -806,7 +806,7 @@ static void __init check_system_tsc_reliable(void) | |||
806 | unsigned long res_low, res_high; | 806 | unsigned long res_low, res_high; |
807 | 807 | ||
808 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | 808 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); |
809 | /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ | 809 | /* Geode_LX - the OLPC CPU has a very reliable TSC */ |
810 | if (res_low & RTSC_SUSP) | 810 | if (res_low & RTSC_SUSP) |
811 | tsc_clocksource_reliable = 1; | 811 | tsc_clocksource_reliable = 1; |
812 | #endif | 812 | #endif |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index ece73d8e3240..1d40336b030a 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
13 | #include <linux/slab.h> | ||
13 | #include <linux/irq.h> | 14 | #include <linux/irq.h> |
14 | 15 | ||
15 | #include <asm/apic.h> | 16 | #include <asm/apic.h> |
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c index 36afb98675a4..309c70fb7759 100644 --- a/arch/x86/kernel/uv_sysfs.c +++ b/arch/x86/kernel/uv_sysfs.c | |||
@@ -54,19 +54,19 @@ static int __init sgi_uv_sysfs_init(void) | |||
54 | if (!sgi_uv_kobj) | 54 | if (!sgi_uv_kobj) |
55 | sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); | 55 | sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); |
56 | if (!sgi_uv_kobj) { | 56 | if (!sgi_uv_kobj) { |
57 | printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); | 57 | printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n"); |
58 | return -EINVAL; | 58 | return -EINVAL; |
59 | } | 59 | } |
60 | 60 | ||
61 | ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); | 61 | ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); |
62 | if (ret) { | 62 | if (ret) { |
63 | printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); | 63 | printk(KERN_WARNING "sysfs_create_file partition_id failed\n"); |
64 | return ret; | 64 | return ret; |
65 | } | 65 | } |
66 | 66 | ||
67 | ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); | 67 | ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); |
68 | if (ret) { | 68 | if (ret) { |
69 | printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); | 69 | printk(KERN_WARNING "sysfs_create_file coherence_id failed\n"); |
70 | return ret; | 70 | return ret; |
71 | } | 71 | } |
72 | 72 | ||
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 3c84aa001c11..56e421bc379b 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c | |||
@@ -19,6 +19,7 @@ | |||
19 | * Copyright (c) Dimitri Sivanich | 19 | * Copyright (c) Dimitri Sivanich |
20 | */ | 20 | */ |
21 | #include <linux/clockchips.h> | 21 | #include <linux/clockchips.h> |
22 | #include <linux/slab.h> | ||
22 | 23 | ||
23 | #include <asm/uv/uv_mmrs.h> | 24 | #include <asm/uv/uv_mmrs.h> |
24 | #include <asm/uv/uv_hub.h> | 25 | #include <asm/uv/uv_hub.h> |
@@ -282,10 +283,21 @@ static int uv_rtc_unset_timer(int cpu, int force) | |||
282 | 283 | ||
283 | /* | 284 | /* |
284 | * Read the RTC. | 285 | * Read the RTC. |
286 | * | ||
287 | * Starting with HUB rev 2.0, the UV RTC register is replicated across all | ||
288 | * cachelines of it's own page. This allows faster simultaneous reads | ||
289 | * from a given socket. | ||
285 | */ | 290 | */ |
286 | static cycle_t uv_read_rtc(struct clocksource *cs) | 291 | static cycle_t uv_read_rtc(struct clocksource *cs) |
287 | { | 292 | { |
288 | return (cycle_t)uv_read_local_mmr(UVH_RTC); | 293 | unsigned long offset; |
294 | |||
295 | if (uv_get_min_hub_revision_id() == 1) | ||
296 | offset = 0; | ||
297 | else | ||
298 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | ||
299 | |||
300 | return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | ||
289 | } | 301 | } |
290 | 302 | ||
291 | /* | 303 | /* |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 34a279a7471d..e680ea52db9b 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -49,11 +49,6 @@ extern int no_broadcast; | |||
49 | char visws_board_type = -1; | 49 | char visws_board_type = -1; |
50 | char visws_board_rev = -1; | 50 | char visws_board_rev = -1; |
51 | 51 | ||
52 | int is_visws_box(void) | ||
53 | { | ||
54 | return visws_board_type >= 0; | ||
55 | } | ||
56 | |||
57 | static void __init visws_time_init(void) | 52 | static void __init visws_time_init(void) |
58 | { | 53 | { |
59 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | 54 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); |
@@ -242,6 +237,8 @@ void __init visws_early_detect(void) | |||
242 | x86_init.irqs.pre_vector_init = visws_pre_intr_init; | 237 | x86_init.irqs.pre_vector_init = visws_pre_intr_init; |
243 | x86_init.irqs.trap_init = visws_trap_init; | 238 | x86_init.irqs.trap_init = visws_trap_init; |
244 | x86_init.timers.timer_init = visws_time_init; | 239 | x86_init.timers.timer_init = visws_time_init; |
240 | x86_init.pci.init = pci_visws_init; | ||
241 | x86_init.pci.init_irq = x86_init_noop; | ||
245 | 242 | ||
246 | /* | 243 | /* |
247 | * Install reboot quirks: | 244 | * Install reboot quirks: |
@@ -508,7 +505,7 @@ static struct irq_chip cobalt_irq_type = { | |||
508 | */ | 505 | */ |
509 | static unsigned int startup_piix4_master_irq(unsigned int irq) | 506 | static unsigned int startup_piix4_master_irq(unsigned int irq) |
510 | { | 507 | { |
511 | init_8259A(0); | 508 | legacy_pic->init(0); |
512 | 509 | ||
513 | return startup_cobalt_irq(irq); | 510 | return startup_cobalt_irq(irq); |
514 | } | 511 | } |
@@ -532,9 +529,6 @@ static struct irq_chip piix4_master_irq_type = { | |||
532 | 529 | ||
533 | static struct irq_chip piix4_virtual_irq_type = { | 530 | static struct irq_chip piix4_virtual_irq_type = { |
534 | .name = "PIIX4-virtual", | 531 | .name = "PIIX4-virtual", |
535 | .shutdown = disable_8259A_irq, | ||
536 | .enable = enable_8259A_irq, | ||
537 | .disable = disable_8259A_irq, | ||
538 | }; | 532 | }; |
539 | 533 | ||
540 | 534 | ||
@@ -559,7 +553,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
559 | struct irq_desc *desc; | 553 | struct irq_desc *desc; |
560 | unsigned long flags; | 554 | unsigned long flags; |
561 | 555 | ||
562 | spin_lock_irqsave(&i8259A_lock, flags); | 556 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
563 | 557 | ||
564 | /* Find out what's interrupting in the PIIX4 master 8259 */ | 558 | /* Find out what's interrupting in the PIIX4 master 8259 */ |
565 | outb(0x0c, 0x20); /* OCW3 Poll command */ | 559 | outb(0x0c, 0x20); /* OCW3 Poll command */ |
@@ -596,7 +590,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
596 | outb(0x60 + realirq, 0x20); | 590 | outb(0x60 + realirq, 0x20); |
597 | } | 591 | } |
598 | 592 | ||
599 | spin_unlock_irqrestore(&i8259A_lock, flags); | 593 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
600 | 594 | ||
601 | desc = irq_to_desc(realirq); | 595 | desc = irq_to_desc(realirq); |
602 | 596 | ||
@@ -609,12 +603,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
609 | handle_IRQ_event(realirq, desc->action); | 603 | handle_IRQ_event(realirq, desc->action); |
610 | 604 | ||
611 | if (!(desc->status & IRQ_DISABLED)) | 605 | if (!(desc->status & IRQ_DISABLED)) |
612 | enable_8259A_irq(realirq); | 606 | legacy_pic->chip->unmask(realirq); |
613 | 607 | ||
614 | return IRQ_HANDLED; | 608 | return IRQ_HANDLED; |
615 | 609 | ||
616 | out_unlock: | 610 | out_unlock: |
617 | spin_unlock_irqrestore(&i8259A_lock, flags); | 611 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
618 | return IRQ_NONE; | 612 | return IRQ_NONE; |
619 | } | 613 | } |
620 | 614 | ||
@@ -628,6 +622,12 @@ static struct irqaction cascade_action = { | |||
628 | .name = "cascade", | 622 | .name = "cascade", |
629 | }; | 623 | }; |
630 | 624 | ||
625 | static inline void set_piix4_virtual_irq_type(void) | ||
626 | { | ||
627 | piix4_virtual_irq_type.shutdown = i8259A_chip.mask; | ||
628 | piix4_virtual_irq_type.enable = i8259A_chip.unmask; | ||
629 | piix4_virtual_irq_type.disable = i8259A_chip.mask; | ||
630 | } | ||
631 | 631 | ||
632 | void init_VISWS_APIC_irqs(void) | 632 | void init_VISWS_APIC_irqs(void) |
633 | { | 633 | { |
@@ -653,6 +653,7 @@ void init_VISWS_APIC_irqs(void) | |||
653 | desc->chip = &piix4_master_irq_type; | 653 | desc->chip = &piix4_master_irq_type; |
654 | } | 654 | } |
655 | else if (i < CO_IRQ_APIC0) { | 655 | else if (i < CO_IRQ_APIC0) { |
656 | set_piix4_virtual_irq_type(); | ||
656 | desc->chip = &piix4_virtual_irq_type; | 657 | desc->chip = &piix4_virtual_irq_type; |
657 | } | 658 | } |
658 | else if (IS_CO_APIC(i)) { | 659 | else if (IS_CO_APIC(i)) { |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index d430e4c30193..ce9fbacb7526 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -28,11 +28,13 @@ | |||
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
31 | #include <linux/gfp.h> | ||
31 | #include <asm/vmi.h> | 32 | #include <asm/vmi.h> |
32 | #include <asm/io.h> | 33 | #include <asm/io.h> |
33 | #include <asm/fixmap.h> | 34 | #include <asm/fixmap.h> |
34 | #include <asm/apicdef.h> | 35 | #include <asm/apicdef.h> |
35 | #include <asm/apic.h> | 36 | #include <asm/apic.h> |
37 | #include <asm/pgalloc.h> | ||
36 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
37 | #include <asm/timer.h> | 39 | #include <asm/timer.h> |
38 | #include <asm/vmi_time.h> | 40 | #include <asm/vmi_time.h> |
@@ -266,30 +268,6 @@ static void vmi_nop(void) | |||
266 | { | 268 | { |
267 | } | 269 | } |
268 | 270 | ||
269 | #ifdef CONFIG_HIGHPTE | ||
270 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | ||
271 | { | ||
272 | void *va = kmap_atomic(page, type); | ||
273 | |||
274 | /* | ||
275 | * Internally, the VMI ROM must map virtual addresses to physical | ||
276 | * addresses for processing MMU updates. By the time MMU updates | ||
277 | * are issued, this information is typically already lost. | ||
278 | * Fortunately, the VMI provides a cache of mapping slots for active | ||
279 | * page tables. | ||
280 | * | ||
281 | * We use slot zero for the linear mapping of physical memory, and | ||
282 | * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1. | ||
283 | * | ||
284 | * args: SLOT VA COUNT PFN | ||
285 | */ | ||
286 | BUG_ON(type != KM_PTE0 && type != KM_PTE1); | ||
287 | vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page)); | ||
288 | |||
289 | return va; | ||
290 | } | ||
291 | #endif | ||
292 | |||
293 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | 271 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
294 | { | 272 | { |
295 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 273 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
@@ -640,6 +618,12 @@ static inline int __init activate_vmi(void) | |||
640 | u64 reloc; | 618 | u64 reloc; |
641 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | 619 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; |
642 | 620 | ||
621 | /* | ||
622 | * Prevent page tables from being allocated in highmem, even if | ||
623 | * CONFIG_HIGHPTE is enabled. | ||
624 | */ | ||
625 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
626 | |||
643 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { | 627 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { |
644 | printk(KERN_ERR "VMI ROM failed to initialize!"); | 628 | printk(KERN_ERR "VMI ROM failed to initialize!"); |
645 | return 0; | 629 | return 0; |
@@ -778,10 +762,6 @@ static inline int __init activate_vmi(void) | |||
778 | 762 | ||
779 | /* Set linear is needed in all cases */ | 763 | /* Set linear is needed in all cases */ |
780 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | 764 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); |
781 | #ifdef CONFIG_HIGHPTE | ||
782 | if (vmi_ops.set_linear_mapping) | ||
783 | pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; | ||
784 | #endif | ||
785 | 765 | ||
786 | /* | 766 | /* |
787 | * These MUST always be patched. Don't support indirect jumps | 767 | * These MUST always be patched. Don't support indirect jumps |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 74c92bb194df..5e1ff66ecd73 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void) | |||
79 | 79 | ||
80 | static inline unsigned int vmi_get_timer_vector(void) | 80 | static inline unsigned int vmi_get_timer_vector(void) |
81 | { | 81 | { |
82 | #ifdef CONFIG_X86_IO_APIC | 82 | return IRQ0_VECTOR; |
83 | return FIRST_DEVICE_VECTOR; | ||
84 | #else | ||
85 | return FIRST_EXTERNAL_VECTOR; | ||
86 | #endif | ||
87 | } | 83 | } |
88 | 84 | ||
89 | /** vmi clockchip */ | 85 | /** vmi clockchip */ |
@@ -171,7 +167,7 @@ static int vmi_timer_next_event(unsigned long delta, | |||
171 | { | 167 | { |
172 | /* Unfortunately, set_next_event interface only passes relative | 168 | /* Unfortunately, set_next_event interface only passes relative |
173 | * expiry, but we want absolute expiry. It'd be better if were | 169 | * expiry, but we want absolute expiry. It'd be better if were |
174 | * were passed an aboslute expiry, since a bunch of time may | 170 | * were passed an absolute expiry, since a bunch of time may |
175 | * have been stolen between the time the delta is computed and | 171 | * have been stolen between the time the delta is computed and |
176 | * when we set the alarm below. */ | 172 | * when we set the alarm below. */ |
177 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); | 173 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index f92a0da608cb..2cc249718c46 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -291,8 +291,8 @@ SECTIONS | |||
291 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | 291 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { |
292 | __smp_locks = .; | 292 | __smp_locks = .; |
293 | *(.smp_locks) | 293 | *(.smp_locks) |
294 | __smp_locks_end = .; | ||
295 | . = ALIGN(PAGE_SIZE); | 294 | . = ALIGN(PAGE_SIZE); |
295 | __smp_locks_end = .; | ||
296 | } | 296 | } |
297 | 297 | ||
298 | #ifdef CONFIG_X86_64 | 298 | #ifdef CONFIG_X86_64 |
@@ -341,7 +341,7 @@ SECTIONS | |||
341 | * Per-cpu symbols which need to be offset from __per_cpu_load | 341 | * Per-cpu symbols which need to be offset from __per_cpu_load |
342 | * for the boot processor. | 342 | * for the boot processor. |
343 | */ | 343 | */ |
344 | #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load | 344 | #define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load |
345 | INIT_PER_CPU(gdt_page); | 345 | INIT_PER_CPU(gdt_page); |
346 | INIT_PER_CPU(irq_stack_union); | 346 | INIT_PER_CPU(irq_stack_union); |
347 | 347 | ||
@@ -352,7 +352,7 @@ INIT_PER_CPU(irq_stack_union); | |||
352 | "kernel image bigger than KERNEL_IMAGE_SIZE"); | 352 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
353 | 353 | ||
354 | #ifdef CONFIG_SMP | 354 | #ifdef CONFIG_SMP |
355 | . = ASSERT((per_cpu__irq_stack_union == 0), | 355 | . = ASSERT((irq_stack_union == 0), |
356 | "irq_stack_union is not at start of per-cpu area"); | 356 | "irq_stack_union is not at start of per-cpu area"); |
357 | #endif | 357 | #endif |
358 | 358 | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 9055e5872ff0..1c0c6ab9c60f 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -301,7 +301,8 @@ static int __init vsyscall_init(void) | |||
301 | register_sysctl_table(kernel_root_table2); | 301 | register_sysctl_table(kernel_root_table2); |
302 | #endif | 302 | #endif |
303 | on_each_cpu(cpu_vsyscall_init, NULL, 1); | 303 | on_each_cpu(cpu_vsyscall_init, NULL, 1); |
304 | hotcpu_notifier(cpu_vsyscall_notifier, 0); | 304 | /* notifier priority > KVM */ |
305 | hotcpu_notifier(cpu_vsyscall_notifier, 30); | ||
305 | return 0; | 306 | return 0; |
306 | } | 307 | } |
307 | 308 | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ccd179dec36e..61a1e8c7e19f 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -4,9 +4,11 @@ | |||
4 | * For licencing details see kernel-base/COPYING | 4 | * For licencing details see kernel-base/COPYING |
5 | */ | 5 | */ |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/ioport.h> | ||
7 | 8 | ||
8 | #include <asm/bios_ebda.h> | 9 | #include <asm/bios_ebda.h> |
9 | #include <asm/paravirt.h> | 10 | #include <asm/paravirt.h> |
11 | #include <asm/pci_x86.h> | ||
10 | #include <asm/mpspec.h> | 12 | #include <asm/mpspec.h> |
11 | #include <asm/setup.h> | 13 | #include <asm/setup.h> |
12 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
@@ -70,16 +72,25 @@ struct x86_init_ops x86_init __initdata = { | |||
70 | .iommu = { | 72 | .iommu = { |
71 | .iommu_init = iommu_init_noop, | 73 | .iommu_init = iommu_init_noop, |
72 | }, | 74 | }, |
75 | |||
76 | .pci = { | ||
77 | .init = x86_default_pci_init, | ||
78 | .init_irq = x86_default_pci_init_irq, | ||
79 | .fixup_irqs = x86_default_pci_fixup_irqs, | ||
80 | }, | ||
73 | }; | 81 | }; |
74 | 82 | ||
75 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | 83 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { |
76 | .setup_percpu_clockev = setup_secondary_APIC_clock, | 84 | .setup_percpu_clockev = setup_secondary_APIC_clock, |
77 | }; | 85 | }; |
78 | 86 | ||
87 | static void default_nmi_init(void) { }; | ||
88 | |||
79 | struct x86_platform_ops x86_platform = { | 89 | struct x86_platform_ops x86_platform = { |
80 | .calibrate_tsc = native_calibrate_tsc, | 90 | .calibrate_tsc = native_calibrate_tsc, |
81 | .get_wallclock = mach_get_cmos_time, | 91 | .get_wallclock = mach_get_cmos_time, |
82 | .set_wallclock = mach_set_rtc_mmss, | 92 | .set_wallclock = mach_set_rtc_mmss, |
83 | .iommu_shutdown = iommu_shutdown_noop, | 93 | .iommu_shutdown = iommu_shutdown_noop, |
84 | .is_untracked_pat_range = is_ISA_range, | 94 | .is_untracked_pat_range = is_ISA_range, |
95 | .nmi_init = default_nmi_init | ||
85 | }; | 96 | }; |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index c5ee17e8c6d9..782c3a362ec6 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -337,6 +337,7 @@ void __ref xsave_cntxt_init(void) | |||
337 | cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); | 337 | cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); |
338 | xstate_size = ebx; | 338 | xstate_size = ebx; |
339 | 339 | ||
340 | update_regset_xstate_info(xstate_size, pcntxt_mask); | ||
340 | prepare_fx_sw_frame(); | 341 | prepare_fx_sw_frame(); |
341 | 342 | ||
342 | setup_xstate_init(); | 343 | setup_xstate_init(); |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4cd498332466..970bbd479516 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -29,6 +29,7 @@ config KVM | |||
29 | select HAVE_KVM_EVENTFD | 29 | select HAVE_KVM_EVENTFD |
30 | select KVM_APIC_ARCHITECTURE | 30 | select KVM_APIC_ARCHITECTURE |
31 | select USER_RETURN_NOTIFIER | 31 | select USER_RETURN_NOTIFIER |
32 | select KVM_MMIO | ||
32 | ---help--- | 33 | ---help--- |
33 | Support hosting fully virtualized guest machines using hardware | 34 | Support hosting fully virtualized guest machines using hardware |
34 | virtualization extensions. You will need a fairly recent | 35 | virtualization extensions. You will need a fairly recent |
@@ -65,6 +66,7 @@ config KVM_AMD | |||
65 | 66 | ||
66 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 67 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
67 | # the virtualization menu. | 68 | # the virtualization menu. |
69 | source drivers/vhost/Kconfig | ||
68 | source drivers/lguest/Kconfig | 70 | source drivers/lguest/Kconfig |
69 | source drivers/virtio/Kconfig | 71 | source drivers/virtio/Kconfig |
70 | 72 | ||
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7e8faea4651e..4dade6ac0827 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -32,7 +32,7 @@ | |||
32 | #include <linux/module.h> | 32 | #include <linux/module.h> |
33 | #include <asm/kvm_emulate.h> | 33 | #include <asm/kvm_emulate.h> |
34 | 34 | ||
35 | #include "mmu.h" /* for is_long_mode() */ | 35 | #include "x86.h" |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * Opcode effective-address decode tables. | 38 | * Opcode effective-address decode tables. |
@@ -76,6 +76,8 @@ | |||
76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
78 | /* Misc flags */ | 78 | /* Misc flags */ |
79 | #define Lock (1<<26) /* lock prefix is allowed for the instruction */ | ||
80 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | ||
79 | #define No64 (1<<28) | 81 | #define No64 (1<<28) |
80 | /* Source 2 operand type */ | 82 | /* Source 2 operand type */ |
81 | #define Src2None (0<<29) | 83 | #define Src2None (0<<29) |
@@ -88,39 +90,40 @@ | |||
88 | enum { | 90 | enum { |
89 | Group1_80, Group1_81, Group1_82, Group1_83, | 91 | Group1_80, Group1_81, Group1_82, Group1_83, |
90 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, | 92 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, |
93 | Group8, Group9, | ||
91 | }; | 94 | }; |
92 | 95 | ||
93 | static u32 opcode_table[256] = { | 96 | static u32 opcode_table[256] = { |
94 | /* 0x00 - 0x07 */ | 97 | /* 0x00 - 0x07 */ |
95 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 98 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
96 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 99 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
97 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, | 100 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
98 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | 101 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
99 | /* 0x08 - 0x0F */ | 102 | /* 0x08 - 0x0F */ |
100 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 103 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
101 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 104 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
102 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, | 105 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
103 | ImplicitOps | Stack | No64, 0, | 106 | ImplicitOps | Stack | No64, 0, |
104 | /* 0x10 - 0x17 */ | 107 | /* 0x10 - 0x17 */ |
105 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 108 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
106 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 109 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
107 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, | 110 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
108 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | 111 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
109 | /* 0x18 - 0x1F */ | 112 | /* 0x18 - 0x1F */ |
110 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 113 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
111 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 114 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
112 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, | 115 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
113 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | 116 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
114 | /* 0x20 - 0x27 */ | 117 | /* 0x20 - 0x27 */ |
115 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 118 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
116 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 119 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
117 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, | 120 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
118 | /* 0x28 - 0x2F */ | 121 | /* 0x28 - 0x2F */ |
119 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 122 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
120 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 123 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
121 | 0, 0, 0, 0, | 124 | 0, 0, 0, 0, |
122 | /* 0x30 - 0x37 */ | 125 | /* 0x30 - 0x37 */ |
123 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 126 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
124 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 127 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
125 | 0, 0, 0, 0, | 128 | 0, 0, 0, 0, |
126 | /* 0x38 - 0x3F */ | 129 | /* 0x38 - 0x3F */ |
@@ -156,7 +159,7 @@ static u32 opcode_table[256] = { | |||
156 | Group | Group1_80, Group | Group1_81, | 159 | Group | Group1_80, Group | Group1_81, |
157 | Group | Group1_82, Group | Group1_83, | 160 | Group | Group1_82, Group | Group1_83, |
158 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 161 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
159 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 162 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
160 | /* 0x88 - 0x8F */ | 163 | /* 0x88 - 0x8F */ |
161 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, | 164 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, |
162 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 165 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
@@ -210,7 +213,7 @@ static u32 opcode_table[256] = { | |||
210 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 213 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
211 | /* 0xF0 - 0xF7 */ | 214 | /* 0xF0 - 0xF7 */ |
212 | 0, 0, 0, 0, | 215 | 0, 0, 0, 0, |
213 | ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, | 216 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, |
214 | /* 0xF8 - 0xFF */ | 217 | /* 0xF8 - 0xFF */ |
215 | ImplicitOps, 0, ImplicitOps, ImplicitOps, | 218 | ImplicitOps, 0, ImplicitOps, ImplicitOps, |
216 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, | 219 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, |
@@ -218,16 +221,20 @@ static u32 opcode_table[256] = { | |||
218 | 221 | ||
219 | static u32 twobyte_table[256] = { | 222 | static u32 twobyte_table[256] = { |
220 | /* 0x00 - 0x0F */ | 223 | /* 0x00 - 0x0F */ |
221 | 0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0, | 224 | 0, Group | GroupDual | Group7, 0, 0, |
222 | ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, | 225 | 0, ImplicitOps, ImplicitOps | Priv, 0, |
226 | ImplicitOps | Priv, ImplicitOps | Priv, 0, 0, | ||
227 | 0, ImplicitOps | ModRM, 0, 0, | ||
223 | /* 0x10 - 0x1F */ | 228 | /* 0x10 - 0x1F */ |
224 | 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, | 229 | 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, |
225 | /* 0x20 - 0x2F */ | 230 | /* 0x20 - 0x2F */ |
226 | ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0, | 231 | ModRM | ImplicitOps | Priv, ModRM | Priv, |
232 | ModRM | ImplicitOps | Priv, ModRM | Priv, | ||
233 | 0, 0, 0, 0, | ||
227 | 0, 0, 0, 0, 0, 0, 0, 0, | 234 | 0, 0, 0, 0, 0, 0, 0, 0, |
228 | /* 0x30 - 0x3F */ | 235 | /* 0x30 - 0x3F */ |
229 | ImplicitOps, 0, ImplicitOps, 0, | 236 | ImplicitOps | Priv, 0, ImplicitOps | Priv, 0, |
230 | ImplicitOps, ImplicitOps, 0, 0, | 237 | ImplicitOps, ImplicitOps | Priv, 0, 0, |
231 | 0, 0, 0, 0, 0, 0, 0, 0, | 238 | 0, 0, 0, 0, 0, 0, 0, 0, |
232 | /* 0x40 - 0x47 */ | 239 | /* 0x40 - 0x47 */ |
233 | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 240 | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
@@ -257,21 +264,23 @@ static u32 twobyte_table[256] = { | |||
257 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, | 264 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, |
258 | /* 0xA8 - 0xAF */ | 265 | /* 0xA8 - 0xAF */ |
259 | ImplicitOps | Stack, ImplicitOps | Stack, | 266 | ImplicitOps | Stack, ImplicitOps | Stack, |
260 | 0, DstMem | SrcReg | ModRM | BitOp, | 267 | 0, DstMem | SrcReg | ModRM | BitOp | Lock, |
261 | DstMem | SrcReg | Src2ImmByte | ModRM, | 268 | DstMem | SrcReg | Src2ImmByte | ModRM, |
262 | DstMem | SrcReg | Src2CL | ModRM, | 269 | DstMem | SrcReg | Src2CL | ModRM, |
263 | ModRM, 0, | 270 | ModRM, 0, |
264 | /* 0xB0 - 0xB7 */ | 271 | /* 0xB0 - 0xB7 */ |
265 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, | 272 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
266 | DstMem | SrcReg | ModRM | BitOp, | 273 | 0, DstMem | SrcReg | ModRM | BitOp | Lock, |
267 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, | 274 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, |
268 | DstReg | SrcMem16 | ModRM | Mov, | 275 | DstReg | SrcMem16 | ModRM | Mov, |
269 | /* 0xB8 - 0xBF */ | 276 | /* 0xB8 - 0xBF */ |
270 | 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp, | 277 | 0, 0, |
278 | Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock, | ||
271 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, | 279 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, |
272 | DstReg | SrcMem16 | ModRM | Mov, | 280 | DstReg | SrcMem16 | ModRM | Mov, |
273 | /* 0xC0 - 0xCF */ | 281 | /* 0xC0 - 0xCF */ |
274 | 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM, | 282 | 0, 0, 0, DstMem | SrcReg | ModRM | Mov, |
283 | 0, 0, 0, Group | GroupDual | Group9, | ||
275 | 0, 0, 0, 0, 0, 0, 0, 0, | 284 | 0, 0, 0, 0, 0, 0, 0, 0, |
276 | /* 0xD0 - 0xDF */ | 285 | /* 0xD0 - 0xDF */ |
277 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 286 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
@@ -283,25 +292,41 @@ static u32 twobyte_table[256] = { | |||
283 | 292 | ||
284 | static u32 group_table[] = { | 293 | static u32 group_table[] = { |
285 | [Group1_80*8] = | 294 | [Group1_80*8] = |
286 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 295 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
287 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 296 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
288 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 297 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
289 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 298 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
299 | ByteOp | DstMem | SrcImm | ModRM | Lock, | ||
300 | ByteOp | DstMem | SrcImm | ModRM | Lock, | ||
301 | ByteOp | DstMem | SrcImm | ModRM | Lock, | ||
302 | ByteOp | DstMem | SrcImm | ModRM, | ||
290 | [Group1_81*8] = | 303 | [Group1_81*8] = |
291 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 304 | DstMem | SrcImm | ModRM | Lock, |
292 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 305 | DstMem | SrcImm | ModRM | Lock, |
293 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 306 | DstMem | SrcImm | ModRM | Lock, |
294 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 307 | DstMem | SrcImm | ModRM | Lock, |
308 | DstMem | SrcImm | ModRM | Lock, | ||
309 | DstMem | SrcImm | ModRM | Lock, | ||
310 | DstMem | SrcImm | ModRM | Lock, | ||
311 | DstMem | SrcImm | ModRM, | ||
295 | [Group1_82*8] = | 312 | [Group1_82*8] = |
296 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 313 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
297 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 314 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
298 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 315 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
299 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 316 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
317 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, | ||
318 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, | ||
319 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, | ||
320 | ByteOp | DstMem | SrcImm | ModRM | No64, | ||
300 | [Group1_83*8] = | 321 | [Group1_83*8] = |
301 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 322 | DstMem | SrcImmByte | ModRM | Lock, |
302 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 323 | DstMem | SrcImmByte | ModRM | Lock, |
303 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 324 | DstMem | SrcImmByte | ModRM | Lock, |
304 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 325 | DstMem | SrcImmByte | ModRM | Lock, |
326 | DstMem | SrcImmByte | ModRM | Lock, | ||
327 | DstMem | SrcImmByte | ModRM | Lock, | ||
328 | DstMem | SrcImmByte | ModRM | Lock, | ||
329 | DstMem | SrcImmByte | ModRM, | ||
305 | [Group1A*8] = | 330 | [Group1A*8] = |
306 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, | 331 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, |
307 | [Group3_Byte*8] = | 332 | [Group3_Byte*8] = |
@@ -320,24 +345,39 @@ static u32 group_table[] = { | |||
320 | SrcMem | ModRM | Stack, 0, | 345 | SrcMem | ModRM | Stack, 0, |
321 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | 346 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, |
322 | [Group7*8] = | 347 | [Group7*8] = |
323 | 0, 0, ModRM | SrcMem, ModRM | SrcMem, | 348 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
324 | SrcNone | ModRM | DstMem | Mov, 0, | 349 | SrcNone | ModRM | DstMem | Mov, 0, |
325 | SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, | 350 | SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv, |
351 | [Group8*8] = | ||
352 | 0, 0, 0, 0, | ||
353 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, | ||
354 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, | ||
355 | [Group9*8] = | ||
356 | 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, | ||
326 | }; | 357 | }; |
327 | 358 | ||
328 | static u32 group2_table[] = { | 359 | static u32 group2_table[] = { |
329 | [Group7*8] = | 360 | [Group7*8] = |
330 | SrcNone | ModRM, 0, 0, SrcNone | ModRM, | 361 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, |
331 | SrcNone | ModRM | DstMem | Mov, 0, | 362 | SrcNone | ModRM | DstMem | Mov, 0, |
332 | SrcMem16 | ModRM | Mov, 0, | 363 | SrcMem16 | ModRM | Mov, 0, |
364 | [Group9*8] = | ||
365 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
333 | }; | 366 | }; |
334 | 367 | ||
335 | /* EFLAGS bit definitions. */ | 368 | /* EFLAGS bit definitions. */ |
369 | #define EFLG_ID (1<<21) | ||
370 | #define EFLG_VIP (1<<20) | ||
371 | #define EFLG_VIF (1<<19) | ||
372 | #define EFLG_AC (1<<18) | ||
336 | #define EFLG_VM (1<<17) | 373 | #define EFLG_VM (1<<17) |
337 | #define EFLG_RF (1<<16) | 374 | #define EFLG_RF (1<<16) |
375 | #define EFLG_IOPL (3<<12) | ||
376 | #define EFLG_NT (1<<14) | ||
338 | #define EFLG_OF (1<<11) | 377 | #define EFLG_OF (1<<11) |
339 | #define EFLG_DF (1<<10) | 378 | #define EFLG_DF (1<<10) |
340 | #define EFLG_IF (1<<9) | 379 | #define EFLG_IF (1<<9) |
380 | #define EFLG_TF (1<<8) | ||
341 | #define EFLG_SF (1<<7) | 381 | #define EFLG_SF (1<<7) |
342 | #define EFLG_ZF (1<<6) | 382 | #define EFLG_ZF (1<<6) |
343 | #define EFLG_AF (1<<4) | 383 | #define EFLG_AF (1<<4) |
@@ -606,7 +646,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | |||
606 | 646 | ||
607 | if (linear < fc->start || linear >= fc->end) { | 647 | if (linear < fc->start || linear >= fc->end) { |
608 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); | 648 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); |
609 | rc = ops->read_std(linear, fc->data, size, ctxt->vcpu); | 649 | rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); |
610 | if (rc) | 650 | if (rc) |
611 | return rc; | 651 | return rc; |
612 | fc->start = linear; | 652 | fc->start = linear; |
@@ -661,11 +701,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
661 | op_bytes = 3; | 701 | op_bytes = 3; |
662 | *address = 0; | 702 | *address = 0; |
663 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, | 703 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
664 | ctxt->vcpu); | 704 | ctxt->vcpu, NULL); |
665 | if (rc) | 705 | if (rc) |
666 | return rc; | 706 | return rc; |
667 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, | 707 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
668 | ctxt->vcpu); | 708 | ctxt->vcpu, NULL); |
669 | return rc; | 709 | return rc; |
670 | } | 710 | } |
671 | 711 | ||
@@ -889,6 +929,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
889 | 929 | ||
890 | switch (mode) { | 930 | switch (mode) { |
891 | case X86EMUL_MODE_REAL: | 931 | case X86EMUL_MODE_REAL: |
932 | case X86EMUL_MODE_VM86: | ||
892 | case X86EMUL_MODE_PROT16: | 933 | case X86EMUL_MODE_PROT16: |
893 | def_op_bytes = def_ad_bytes = 2; | 934 | def_op_bytes = def_ad_bytes = 2; |
894 | break; | 935 | break; |
@@ -975,7 +1016,7 @@ done_prefixes: | |||
975 | } | 1016 | } |
976 | 1017 | ||
977 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | 1018 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { |
978 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");; | 1019 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); |
979 | return -1; | 1020 | return -1; |
980 | } | 1021 | } |
981 | 1022 | ||
@@ -1196,13 +1237,56 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1196 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), | 1237 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), |
1197 | c->regs[VCPU_REGS_RSP]), | 1238 | c->regs[VCPU_REGS_RSP]), |
1198 | dest, len, ctxt->vcpu); | 1239 | dest, len, ctxt->vcpu); |
1199 | if (rc != 0) | 1240 | if (rc != X86EMUL_CONTINUE) |
1200 | return rc; | 1241 | return rc; |
1201 | 1242 | ||
1202 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); | 1243 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); |
1203 | return rc; | 1244 | return rc; |
1204 | } | 1245 | } |
1205 | 1246 | ||
1247 | static int emulate_popf(struct x86_emulate_ctxt *ctxt, | ||
1248 | struct x86_emulate_ops *ops, | ||
1249 | void *dest, int len) | ||
1250 | { | ||
1251 | int rc; | ||
1252 | unsigned long val, change_mask; | ||
1253 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1254 | int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); | ||
1255 | |||
1256 | rc = emulate_pop(ctxt, ops, &val, len); | ||
1257 | if (rc != X86EMUL_CONTINUE) | ||
1258 | return rc; | ||
1259 | |||
1260 | change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF | ||
1261 | | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID; | ||
1262 | |||
1263 | switch(ctxt->mode) { | ||
1264 | case X86EMUL_MODE_PROT64: | ||
1265 | case X86EMUL_MODE_PROT32: | ||
1266 | case X86EMUL_MODE_PROT16: | ||
1267 | if (cpl == 0) | ||
1268 | change_mask |= EFLG_IOPL; | ||
1269 | if (cpl <= iopl) | ||
1270 | change_mask |= EFLG_IF; | ||
1271 | break; | ||
1272 | case X86EMUL_MODE_VM86: | ||
1273 | if (iopl < 3) { | ||
1274 | kvm_inject_gp(ctxt->vcpu, 0); | ||
1275 | return X86EMUL_PROPAGATE_FAULT; | ||
1276 | } | ||
1277 | change_mask |= EFLG_IF; | ||
1278 | break; | ||
1279 | default: /* real mode */ | ||
1280 | change_mask |= (EFLG_IOPL | EFLG_IF); | ||
1281 | break; | ||
1282 | } | ||
1283 | |||
1284 | *(unsigned long *)dest = | ||
1285 | (ctxt->eflags & ~change_mask) | (val & change_mask); | ||
1286 | |||
1287 | return rc; | ||
1288 | } | ||
1289 | |||
1206 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | 1290 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) |
1207 | { | 1291 | { |
1208 | struct decode_cache *c = &ctxt->decode; | 1292 | struct decode_cache *c = &ctxt->decode; |
@@ -1225,7 +1309,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1225 | if (rc != 0) | 1309 | if (rc != 0) |
1226 | return rc; | 1310 | return rc; |
1227 | 1311 | ||
1228 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg); | 1312 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); |
1229 | return rc; | 1313 | return rc; |
1230 | } | 1314 | } |
1231 | 1315 | ||
@@ -1370,7 +1454,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1370 | int rc; | 1454 | int rc; |
1371 | 1455 | ||
1372 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); | 1456 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); |
1373 | if (rc != 0) | 1457 | if (rc != X86EMUL_CONTINUE) |
1374 | return rc; | 1458 | return rc; |
1375 | 1459 | ||
1376 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1460 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || |
@@ -1385,7 +1469,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1385 | (u32) c->regs[VCPU_REGS_RBX]; | 1469 | (u32) c->regs[VCPU_REGS_RBX]; |
1386 | 1470 | ||
1387 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); | 1471 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); |
1388 | if (rc != 0) | 1472 | if (rc != X86EMUL_CONTINUE) |
1389 | return rc; | 1473 | return rc; |
1390 | ctxt->eflags |= EFLG_ZF; | 1474 | ctxt->eflags |= EFLG_ZF; |
1391 | } | 1475 | } |
@@ -1407,7 +1491,7 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1407 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1491 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
1408 | if (rc) | 1492 | if (rc) |
1409 | return rc; | 1493 | return rc; |
1410 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS); | 1494 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); |
1411 | return rc; | 1495 | return rc; |
1412 | } | 1496 | } |
1413 | 1497 | ||
@@ -1451,7 +1535,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1451 | &c->dst.val, | 1535 | &c->dst.val, |
1452 | c->dst.bytes, | 1536 | c->dst.bytes, |
1453 | ctxt->vcpu); | 1537 | ctxt->vcpu); |
1454 | if (rc != 0) | 1538 | if (rc != X86EMUL_CONTINUE) |
1455 | return rc; | 1539 | return rc; |
1456 | break; | 1540 | break; |
1457 | case OP_NONE: | 1541 | case OP_NONE: |
@@ -1514,9 +1598,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1514 | u64 msr_data; | 1598 | u64 msr_data; |
1515 | 1599 | ||
1516 | /* syscall is not available in real mode */ | 1600 | /* syscall is not available in real mode */ |
1517 | if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL | 1601 | if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) |
1518 | || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) | 1602 | return X86EMUL_UNHANDLEABLE; |
1519 | return -1; | ||
1520 | 1603 | ||
1521 | setup_syscalls_segments(ctxt, &cs, &ss); | 1604 | setup_syscalls_segments(ctxt, &cs, &ss); |
1522 | 1605 | ||
@@ -1553,7 +1636,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1553 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 1636 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
1554 | } | 1637 | } |
1555 | 1638 | ||
1556 | return 0; | 1639 | return X86EMUL_CONTINUE; |
1557 | } | 1640 | } |
1558 | 1641 | ||
1559 | static int | 1642 | static int |
@@ -1563,22 +1646,17 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1563 | struct kvm_segment cs, ss; | 1646 | struct kvm_segment cs, ss; |
1564 | u64 msr_data; | 1647 | u64 msr_data; |
1565 | 1648 | ||
1566 | /* inject #UD if LOCK prefix is used */ | 1649 | /* inject #GP if in real mode */ |
1567 | if (c->lock_prefix) | 1650 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1568 | return -1; | ||
1569 | |||
1570 | /* inject #GP if in real mode or paging is disabled */ | ||
1571 | if (ctxt->mode == X86EMUL_MODE_REAL || | ||
1572 | !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) { | ||
1573 | kvm_inject_gp(ctxt->vcpu, 0); | 1651 | kvm_inject_gp(ctxt->vcpu, 0); |
1574 | return -1; | 1652 | return X86EMUL_UNHANDLEABLE; |
1575 | } | 1653 | } |
1576 | 1654 | ||
1577 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1655 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
1578 | * Therefore, we inject an #UD. | 1656 | * Therefore, we inject an #UD. |
1579 | */ | 1657 | */ |
1580 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1658 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
1581 | return -1; | 1659 | return X86EMUL_UNHANDLEABLE; |
1582 | 1660 | ||
1583 | setup_syscalls_segments(ctxt, &cs, &ss); | 1661 | setup_syscalls_segments(ctxt, &cs, &ss); |
1584 | 1662 | ||
@@ -1587,13 +1665,13 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1587 | case X86EMUL_MODE_PROT32: | 1665 | case X86EMUL_MODE_PROT32: |
1588 | if ((msr_data & 0xfffc) == 0x0) { | 1666 | if ((msr_data & 0xfffc) == 0x0) { |
1589 | kvm_inject_gp(ctxt->vcpu, 0); | 1667 | kvm_inject_gp(ctxt->vcpu, 0); |
1590 | return -1; | 1668 | return X86EMUL_PROPAGATE_FAULT; |
1591 | } | 1669 | } |
1592 | break; | 1670 | break; |
1593 | case X86EMUL_MODE_PROT64: | 1671 | case X86EMUL_MODE_PROT64: |
1594 | if (msr_data == 0x0) { | 1672 | if (msr_data == 0x0) { |
1595 | kvm_inject_gp(ctxt->vcpu, 0); | 1673 | kvm_inject_gp(ctxt->vcpu, 0); |
1596 | return -1; | 1674 | return X86EMUL_PROPAGATE_FAULT; |
1597 | } | 1675 | } |
1598 | break; | 1676 | break; |
1599 | } | 1677 | } |
@@ -1618,7 +1696,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1618 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); | 1696 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); |
1619 | c->regs[VCPU_REGS_RSP] = msr_data; | 1697 | c->regs[VCPU_REGS_RSP] = msr_data; |
1620 | 1698 | ||
1621 | return 0; | 1699 | return X86EMUL_CONTINUE; |
1622 | } | 1700 | } |
1623 | 1701 | ||
1624 | static int | 1702 | static int |
@@ -1629,21 +1707,11 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1629 | u64 msr_data; | 1707 | u64 msr_data; |
1630 | int usermode; | 1708 | int usermode; |
1631 | 1709 | ||
1632 | /* inject #UD if LOCK prefix is used */ | 1710 | /* inject #GP if in real mode or Virtual 8086 mode */ |
1633 | if (c->lock_prefix) | 1711 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1634 | return -1; | 1712 | ctxt->mode == X86EMUL_MODE_VM86) { |
1635 | |||
1636 | /* inject #GP if in real mode or paging is disabled */ | ||
1637 | if (ctxt->mode == X86EMUL_MODE_REAL | ||
1638 | || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) { | ||
1639 | kvm_inject_gp(ctxt->vcpu, 0); | ||
1640 | return -1; | ||
1641 | } | ||
1642 | |||
1643 | /* sysexit must be called from CPL 0 */ | ||
1644 | if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) { | ||
1645 | kvm_inject_gp(ctxt->vcpu, 0); | 1713 | kvm_inject_gp(ctxt->vcpu, 0); |
1646 | return -1; | 1714 | return X86EMUL_UNHANDLEABLE; |
1647 | } | 1715 | } |
1648 | 1716 | ||
1649 | setup_syscalls_segments(ctxt, &cs, &ss); | 1717 | setup_syscalls_segments(ctxt, &cs, &ss); |
@@ -1661,7 +1729,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1661 | cs.selector = (u16)(msr_data + 16); | 1729 | cs.selector = (u16)(msr_data + 16); |
1662 | if ((msr_data & 0xfffc) == 0x0) { | 1730 | if ((msr_data & 0xfffc) == 0x0) { |
1663 | kvm_inject_gp(ctxt->vcpu, 0); | 1731 | kvm_inject_gp(ctxt->vcpu, 0); |
1664 | return -1; | 1732 | return X86EMUL_PROPAGATE_FAULT; |
1665 | } | 1733 | } |
1666 | ss.selector = (u16)(msr_data + 24); | 1734 | ss.selector = (u16)(msr_data + 24); |
1667 | break; | 1735 | break; |
@@ -1669,7 +1737,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1669 | cs.selector = (u16)(msr_data + 32); | 1737 | cs.selector = (u16)(msr_data + 32); |
1670 | if (msr_data == 0x0) { | 1738 | if (msr_data == 0x0) { |
1671 | kvm_inject_gp(ctxt->vcpu, 0); | 1739 | kvm_inject_gp(ctxt->vcpu, 0); |
1672 | return -1; | 1740 | return X86EMUL_PROPAGATE_FAULT; |
1673 | } | 1741 | } |
1674 | ss.selector = cs.selector + 8; | 1742 | ss.selector = cs.selector + 8; |
1675 | cs.db = 0; | 1743 | cs.db = 0; |
@@ -1685,7 +1753,58 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1685 | c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; | 1753 | c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; |
1686 | c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; | 1754 | c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; |
1687 | 1755 | ||
1688 | return 0; | 1756 | return X86EMUL_CONTINUE; |
1757 | } | ||
1758 | |||
1759 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | ||
1760 | { | ||
1761 | int iopl; | ||
1762 | if (ctxt->mode == X86EMUL_MODE_REAL) | ||
1763 | return false; | ||
1764 | if (ctxt->mode == X86EMUL_MODE_VM86) | ||
1765 | return true; | ||
1766 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1767 | return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; | ||
1768 | } | ||
1769 | |||
1770 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | ||
1771 | struct x86_emulate_ops *ops, | ||
1772 | u16 port, u16 len) | ||
1773 | { | ||
1774 | struct kvm_segment tr_seg; | ||
1775 | int r; | ||
1776 | u16 io_bitmap_ptr; | ||
1777 | u8 perm, bit_idx = port & 0x7; | ||
1778 | unsigned mask = (1 << len) - 1; | ||
1779 | |||
1780 | kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR); | ||
1781 | if (tr_seg.unusable) | ||
1782 | return false; | ||
1783 | if (tr_seg.limit < 103) | ||
1784 | return false; | ||
1785 | r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, | ||
1786 | NULL); | ||
1787 | if (r != X86EMUL_CONTINUE) | ||
1788 | return false; | ||
1789 | if (io_bitmap_ptr + port/8 > tr_seg.limit) | ||
1790 | return false; | ||
1791 | r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1, | ||
1792 | ctxt->vcpu, NULL); | ||
1793 | if (r != X86EMUL_CONTINUE) | ||
1794 | return false; | ||
1795 | if ((perm >> bit_idx) & mask) | ||
1796 | return false; | ||
1797 | return true; | ||
1798 | } | ||
1799 | |||
1800 | static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | ||
1801 | struct x86_emulate_ops *ops, | ||
1802 | u16 port, u16 len) | ||
1803 | { | ||
1804 | if (emulator_bad_iopl(ctxt)) | ||
1805 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | ||
1806 | return false; | ||
1807 | return true; | ||
1689 | } | 1808 | } |
1690 | 1809 | ||
1691 | int | 1810 | int |
@@ -1709,6 +1828,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1709 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 1828 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
1710 | saved_eip = c->eip; | 1829 | saved_eip = c->eip; |
1711 | 1830 | ||
1831 | /* LOCK prefix is allowed only with some instructions */ | ||
1832 | if (c->lock_prefix && !(c->d & Lock)) { | ||
1833 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
1834 | goto done; | ||
1835 | } | ||
1836 | |||
1837 | /* Privileged instruction can be executed only in CPL=0 */ | ||
1838 | if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { | ||
1839 | kvm_inject_gp(ctxt->vcpu, 0); | ||
1840 | goto done; | ||
1841 | } | ||
1842 | |||
1712 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) | 1843 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) |
1713 | memop = c->modrm_ea; | 1844 | memop = c->modrm_ea; |
1714 | 1845 | ||
@@ -1749,7 +1880,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1749 | &c->src.val, | 1880 | &c->src.val, |
1750 | c->src.bytes, | 1881 | c->src.bytes, |
1751 | ctxt->vcpu); | 1882 | ctxt->vcpu); |
1752 | if (rc != 0) | 1883 | if (rc != X86EMUL_CONTINUE) |
1753 | goto done; | 1884 | goto done; |
1754 | c->src.orig_val = c->src.val; | 1885 | c->src.orig_val = c->src.val; |
1755 | } | 1886 | } |
@@ -1768,12 +1899,15 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1768 | c->dst.ptr = (void *)c->dst.ptr + | 1899 | c->dst.ptr = (void *)c->dst.ptr + |
1769 | (c->src.val & mask) / 8; | 1900 | (c->src.val & mask) / 8; |
1770 | } | 1901 | } |
1771 | if (!(c->d & Mov) && | 1902 | if (!(c->d & Mov)) { |
1772 | /* optimisation - avoid slow emulated read */ | 1903 | /* optimisation - avoid slow emulated read */ |
1773 | ((rc = ops->read_emulated((unsigned long)c->dst.ptr, | 1904 | rc = ops->read_emulated((unsigned long)c->dst.ptr, |
1774 | &c->dst.val, | 1905 | &c->dst.val, |
1775 | c->dst.bytes, ctxt->vcpu)) != 0)) | 1906 | c->dst.bytes, |
1776 | goto done; | 1907 | ctxt->vcpu); |
1908 | if (rc != X86EMUL_CONTINUE) | ||
1909 | goto done; | ||
1910 | } | ||
1777 | } | 1911 | } |
1778 | c->dst.orig_val = c->dst.val; | 1912 | c->dst.orig_val = c->dst.val; |
1779 | 1913 | ||
@@ -1876,7 +2010,12 @@ special_insn: | |||
1876 | break; | 2010 | break; |
1877 | case 0x6c: /* insb */ | 2011 | case 0x6c: /* insb */ |
1878 | case 0x6d: /* insw/insd */ | 2012 | case 0x6d: /* insw/insd */ |
1879 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2013 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2014 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2015 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2016 | goto done; | ||
2017 | } | ||
2018 | if (kvm_emulate_pio_string(ctxt->vcpu, | ||
1880 | 1, | 2019 | 1, |
1881 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2020 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1882 | c->rep_prefix ? | 2021 | c->rep_prefix ? |
@@ -1892,6 +2031,11 @@ special_insn: | |||
1892 | return 0; | 2031 | return 0; |
1893 | case 0x6e: /* outsb */ | 2032 | case 0x6e: /* outsb */ |
1894 | case 0x6f: /* outsw/outsd */ | 2033 | case 0x6f: /* outsw/outsd */ |
2034 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | ||
2035 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2036 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2037 | goto done; | ||
2038 | } | ||
1895 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2039 | if (kvm_emulate_pio_string(ctxt->vcpu, |
1896 | 0, | 2040 | 0, |
1897 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2041 | (c->d & ByteOp) ? 1 : c->op_bytes, |
@@ -1978,25 +2122,19 @@ special_insn: | |||
1978 | break; | 2122 | break; |
1979 | case 0x8e: { /* mov seg, r/m16 */ | 2123 | case 0x8e: { /* mov seg, r/m16 */ |
1980 | uint16_t sel; | 2124 | uint16_t sel; |
1981 | int type_bits; | ||
1982 | int err; | ||
1983 | 2125 | ||
1984 | sel = c->src.val; | 2126 | sel = c->src.val; |
1985 | if (c->modrm_reg == VCPU_SREG_SS) | ||
1986 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | ||
1987 | 2127 | ||
1988 | if (c->modrm_reg <= 5) { | 2128 | if (c->modrm_reg == VCPU_SREG_CS || |
1989 | type_bits = (c->modrm_reg == 1) ? 9 : 1; | 2129 | c->modrm_reg > VCPU_SREG_GS) { |
1990 | err = kvm_load_segment_descriptor(ctxt->vcpu, sel, | 2130 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1991 | type_bits, c->modrm_reg); | 2131 | goto done; |
1992 | } else { | ||
1993 | printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n", | ||
1994 | c->modrm); | ||
1995 | goto cannot_emulate; | ||
1996 | } | 2132 | } |
1997 | 2133 | ||
1998 | if (err < 0) | 2134 | if (c->modrm_reg == VCPU_SREG_SS) |
1999 | goto cannot_emulate; | 2135 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); |
2136 | |||
2137 | rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); | ||
2000 | 2138 | ||
2001 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2139 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2002 | break; | 2140 | break; |
@@ -2025,7 +2163,10 @@ special_insn: | |||
2025 | c->dst.type = OP_REG; | 2163 | c->dst.type = OP_REG; |
2026 | c->dst.ptr = (unsigned long *) &ctxt->eflags; | 2164 | c->dst.ptr = (unsigned long *) &ctxt->eflags; |
2027 | c->dst.bytes = c->op_bytes; | 2165 | c->dst.bytes = c->op_bytes; |
2028 | goto pop_instruction; | 2166 | rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); |
2167 | if (rc != X86EMUL_CONTINUE) | ||
2168 | goto done; | ||
2169 | break; | ||
2029 | case 0xa0 ... 0xa1: /* mov */ | 2170 | case 0xa0 ... 0xa1: /* mov */ |
2030 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 2171 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
2031 | c->dst.val = c->src.val; | 2172 | c->dst.val = c->src.val; |
@@ -2039,11 +2180,12 @@ special_insn: | |||
2039 | c->dst.ptr = (unsigned long *)register_address(c, | 2180 | c->dst.ptr = (unsigned long *)register_address(c, |
2040 | es_base(ctxt), | 2181 | es_base(ctxt), |
2041 | c->regs[VCPU_REGS_RDI]); | 2182 | c->regs[VCPU_REGS_RDI]); |
2042 | if ((rc = ops->read_emulated(register_address(c, | 2183 | rc = ops->read_emulated(register_address(c, |
2043 | seg_override_base(ctxt, c), | 2184 | seg_override_base(ctxt, c), |
2044 | c->regs[VCPU_REGS_RSI]), | 2185 | c->regs[VCPU_REGS_RSI]), |
2045 | &c->dst.val, | 2186 | &c->dst.val, |
2046 | c->dst.bytes, ctxt->vcpu)) != 0) | 2187 | c->dst.bytes, ctxt->vcpu); |
2188 | if (rc != X86EMUL_CONTINUE) | ||
2047 | goto done; | 2189 | goto done; |
2048 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | 2190 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], |
2049 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 2191 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
@@ -2058,10 +2200,11 @@ special_insn: | |||
2058 | c->src.ptr = (unsigned long *)register_address(c, | 2200 | c->src.ptr = (unsigned long *)register_address(c, |
2059 | seg_override_base(ctxt, c), | 2201 | seg_override_base(ctxt, c), |
2060 | c->regs[VCPU_REGS_RSI]); | 2202 | c->regs[VCPU_REGS_RSI]); |
2061 | if ((rc = ops->read_emulated((unsigned long)c->src.ptr, | 2203 | rc = ops->read_emulated((unsigned long)c->src.ptr, |
2062 | &c->src.val, | 2204 | &c->src.val, |
2063 | c->src.bytes, | 2205 | c->src.bytes, |
2064 | ctxt->vcpu)) != 0) | 2206 | ctxt->vcpu); |
2207 | if (rc != X86EMUL_CONTINUE) | ||
2065 | goto done; | 2208 | goto done; |
2066 | 2209 | ||
2067 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2210 | c->dst.type = OP_NONE; /* Disable writeback. */ |
@@ -2069,10 +2212,11 @@ special_insn: | |||
2069 | c->dst.ptr = (unsigned long *)register_address(c, | 2212 | c->dst.ptr = (unsigned long *)register_address(c, |
2070 | es_base(ctxt), | 2213 | es_base(ctxt), |
2071 | c->regs[VCPU_REGS_RDI]); | 2214 | c->regs[VCPU_REGS_RDI]); |
2072 | if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, | 2215 | rc = ops->read_emulated((unsigned long)c->dst.ptr, |
2073 | &c->dst.val, | 2216 | &c->dst.val, |
2074 | c->dst.bytes, | 2217 | c->dst.bytes, |
2075 | ctxt->vcpu)) != 0) | 2218 | ctxt->vcpu); |
2219 | if (rc != X86EMUL_CONTINUE) | ||
2076 | goto done; | 2220 | goto done; |
2077 | 2221 | ||
2078 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2222 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
@@ -2102,12 +2246,13 @@ special_insn: | |||
2102 | c->dst.type = OP_REG; | 2246 | c->dst.type = OP_REG; |
2103 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2247 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
2104 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 2248 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
2105 | if ((rc = ops->read_emulated(register_address(c, | 2249 | rc = ops->read_emulated(register_address(c, |
2106 | seg_override_base(ctxt, c), | 2250 | seg_override_base(ctxt, c), |
2107 | c->regs[VCPU_REGS_RSI]), | 2251 | c->regs[VCPU_REGS_RSI]), |
2108 | &c->dst.val, | 2252 | &c->dst.val, |
2109 | c->dst.bytes, | 2253 | c->dst.bytes, |
2110 | ctxt->vcpu)) != 0) | 2254 | ctxt->vcpu); |
2255 | if (rc != X86EMUL_CONTINUE) | ||
2111 | goto done; | 2256 | goto done; |
2112 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | 2257 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], |
2113 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 2258 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
@@ -2163,11 +2308,9 @@ special_insn: | |||
2163 | case 0xe9: /* jmp rel */ | 2308 | case 0xe9: /* jmp rel */ |
2164 | goto jmp; | 2309 | goto jmp; |
2165 | case 0xea: /* jmp far */ | 2310 | case 0xea: /* jmp far */ |
2166 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9, | 2311 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, |
2167 | VCPU_SREG_CS) < 0) { | 2312 | VCPU_SREG_CS)) |
2168 | DPRINTF("jmp far: Failed to load CS descriptor\n"); | 2313 | goto done; |
2169 | goto cannot_emulate; | ||
2170 | } | ||
2171 | 2314 | ||
2172 | c->eip = c->src.val; | 2315 | c->eip = c->src.val; |
2173 | break; | 2316 | break; |
@@ -2185,7 +2328,13 @@ special_insn: | |||
2185 | case 0xef: /* out (e/r)ax,dx */ | 2328 | case 0xef: /* out (e/r)ax,dx */ |
2186 | port = c->regs[VCPU_REGS_RDX]; | 2329 | port = c->regs[VCPU_REGS_RDX]; |
2187 | io_dir_in = 0; | 2330 | io_dir_in = 0; |
2188 | do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | 2331 | do_io: |
2332 | if (!emulator_io_permited(ctxt, ops, port, | ||
2333 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2334 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2335 | goto done; | ||
2336 | } | ||
2337 | if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | ||
2189 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2338 | (c->d & ByteOp) ? 1 : c->op_bytes, |
2190 | port) != 0) { | 2339 | port) != 0) { |
2191 | c->eip = saved_eip; | 2340 | c->eip = saved_eip; |
@@ -2210,13 +2359,21 @@ special_insn: | |||
2210 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2359 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2211 | break; | 2360 | break; |
2212 | case 0xfa: /* cli */ | 2361 | case 0xfa: /* cli */ |
2213 | ctxt->eflags &= ~X86_EFLAGS_IF; | 2362 | if (emulator_bad_iopl(ctxt)) |
2214 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2363 | kvm_inject_gp(ctxt->vcpu, 0); |
2364 | else { | ||
2365 | ctxt->eflags &= ~X86_EFLAGS_IF; | ||
2366 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
2367 | } | ||
2215 | break; | 2368 | break; |
2216 | case 0xfb: /* sti */ | 2369 | case 0xfb: /* sti */ |
2217 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | 2370 | if (emulator_bad_iopl(ctxt)) |
2218 | ctxt->eflags |= X86_EFLAGS_IF; | 2371 | kvm_inject_gp(ctxt->vcpu, 0); |
2219 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2372 | else { |
2373 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | ||
2374 | ctxt->eflags |= X86_EFLAGS_IF; | ||
2375 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
2376 | } | ||
2220 | break; | 2377 | break; |
2221 | case 0xfc: /* cld */ | 2378 | case 0xfc: /* cld */ |
2222 | ctxt->eflags &= ~EFLG_DF; | 2379 | ctxt->eflags &= ~EFLG_DF; |
@@ -2319,8 +2476,9 @@ twobyte_insn: | |||
2319 | } | 2476 | } |
2320 | break; | 2477 | break; |
2321 | case 0x05: /* syscall */ | 2478 | case 0x05: /* syscall */ |
2322 | if (emulate_syscall(ctxt) == -1) | 2479 | rc = emulate_syscall(ctxt); |
2323 | goto cannot_emulate; | 2480 | if (rc != X86EMUL_CONTINUE) |
2481 | goto done; | ||
2324 | else | 2482 | else |
2325 | goto writeback; | 2483 | goto writeback; |
2326 | break; | 2484 | break; |
@@ -2391,14 +2549,16 @@ twobyte_insn: | |||
2391 | c->dst.type = OP_NONE; | 2549 | c->dst.type = OP_NONE; |
2392 | break; | 2550 | break; |
2393 | case 0x34: /* sysenter */ | 2551 | case 0x34: /* sysenter */ |
2394 | if (emulate_sysenter(ctxt) == -1) | 2552 | rc = emulate_sysenter(ctxt); |
2395 | goto cannot_emulate; | 2553 | if (rc != X86EMUL_CONTINUE) |
2554 | goto done; | ||
2396 | else | 2555 | else |
2397 | goto writeback; | 2556 | goto writeback; |
2398 | break; | 2557 | break; |
2399 | case 0x35: /* sysexit */ | 2558 | case 0x35: /* sysexit */ |
2400 | if (emulate_sysexit(ctxt) == -1) | 2559 | rc = emulate_sysexit(ctxt); |
2401 | goto cannot_emulate; | 2560 | if (rc != X86EMUL_CONTINUE) |
2561 | goto done; | ||
2402 | else | 2562 | else |
2403 | goto writeback; | 2563 | goto writeback; |
2404 | break; | 2564 | break; |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 296aba49472a..0150affad25d 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #define pr_fmt(fmt) "pit: " fmt | 32 | #define pr_fmt(fmt) "pit: " fmt |
33 | 33 | ||
34 | #include <linux/kvm_host.h> | 34 | #include <linux/kvm_host.h> |
35 | #include <linux/slab.h> | ||
35 | 36 | ||
36 | #include "irq.h" | 37 | #include "irq.h" |
37 | #include "i8254.h" | 38 | #include "i8254.h" |
@@ -242,11 +243,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
242 | { | 243 | { |
243 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, | 244 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, |
244 | irq_ack_notifier); | 245 | irq_ack_notifier); |
245 | spin_lock(&ps->inject_lock); | 246 | raw_spin_lock(&ps->inject_lock); |
246 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) | 247 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) |
247 | atomic_inc(&ps->pit_timer.pending); | 248 | atomic_inc(&ps->pit_timer.pending); |
248 | ps->irq_ack = 1; | 249 | ps->irq_ack = 1; |
249 | spin_unlock(&ps->inject_lock); | 250 | raw_spin_unlock(&ps->inject_lock); |
250 | } | 251 | } |
251 | 252 | ||
252 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | 253 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) |
@@ -467,6 +468,9 @@ static int pit_ioport_read(struct kvm_io_device *this, | |||
467 | return -EOPNOTSUPP; | 468 | return -EOPNOTSUPP; |
468 | 469 | ||
469 | addr &= KVM_PIT_CHANNEL_MASK; | 470 | addr &= KVM_PIT_CHANNEL_MASK; |
471 | if (addr == 3) | ||
472 | return 0; | ||
473 | |||
470 | s = &pit_state->channels[addr]; | 474 | s = &pit_state->channels[addr]; |
471 | 475 | ||
472 | mutex_lock(&pit_state->lock); | 476 | mutex_lock(&pit_state->lock); |
@@ -602,7 +606,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = { | |||
602 | .write = speaker_ioport_write, | 606 | .write = speaker_ioport_write, |
603 | }; | 607 | }; |
604 | 608 | ||
605 | /* Caller must have writers lock on slots_lock */ | 609 | /* Caller must hold slots_lock */ |
606 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | 610 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) |
607 | { | 611 | { |
608 | struct kvm_pit *pit; | 612 | struct kvm_pit *pit; |
@@ -621,7 +625,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
621 | 625 | ||
622 | mutex_init(&pit->pit_state.lock); | 626 | mutex_init(&pit->pit_state.lock); |
623 | mutex_lock(&pit->pit_state.lock); | 627 | mutex_lock(&pit->pit_state.lock); |
624 | spin_lock_init(&pit->pit_state.inject_lock); | 628 | raw_spin_lock_init(&pit->pit_state.inject_lock); |
625 | 629 | ||
626 | kvm->arch.vpit = pit; | 630 | kvm->arch.vpit = pit; |
627 | pit->kvm = kvm; | 631 | pit->kvm = kvm; |
@@ -642,13 +646,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
642 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | 646 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); |
643 | 647 | ||
644 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); | 648 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); |
645 | ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); | 649 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev); |
646 | if (ret < 0) | 650 | if (ret < 0) |
647 | goto fail; | 651 | goto fail; |
648 | 652 | ||
649 | if (flags & KVM_PIT_SPEAKER_DUMMY) { | 653 | if (flags & KVM_PIT_SPEAKER_DUMMY) { |
650 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); | 654 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); |
651 | ret = __kvm_io_bus_register_dev(&kvm->pio_bus, | 655 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, |
652 | &pit->speaker_dev); | 656 | &pit->speaker_dev); |
653 | if (ret < 0) | 657 | if (ret < 0) |
654 | goto fail_unregister; | 658 | goto fail_unregister; |
@@ -657,11 +661,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
657 | return pit; | 661 | return pit; |
658 | 662 | ||
659 | fail_unregister: | 663 | fail_unregister: |
660 | __kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev); | 664 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); |
661 | 665 | ||
662 | fail: | 666 | fail: |
663 | if (pit->irq_source_id >= 0) | 667 | kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); |
664 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | 668 | kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); |
669 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | ||
665 | 670 | ||
666 | kfree(pit); | 671 | kfree(pit); |
667 | return NULL; | 672 | return NULL; |
@@ -720,12 +725,12 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | |||
720 | /* Try to inject pending interrupts when | 725 | /* Try to inject pending interrupts when |
721 | * last one has been acked. | 726 | * last one has been acked. |
722 | */ | 727 | */ |
723 | spin_lock(&ps->inject_lock); | 728 | raw_spin_lock(&ps->inject_lock); |
724 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { | 729 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { |
725 | ps->irq_ack = 0; | 730 | ps->irq_ack = 0; |
726 | inject = 1; | 731 | inject = 1; |
727 | } | 732 | } |
728 | spin_unlock(&ps->inject_lock); | 733 | raw_spin_unlock(&ps->inject_lock); |
729 | if (inject) | 734 | if (inject) |
730 | __inject_pit_timer_intr(kvm); | 735 | __inject_pit_timer_intr(kvm); |
731 | } | 736 | } |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index d4c1c7ffdc09..900d6b0ba7c2 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -27,7 +27,7 @@ struct kvm_kpit_state { | |||
27 | u32 speaker_data_on; | 27 | u32 speaker_data_on; |
28 | struct mutex lock; | 28 | struct mutex lock; |
29 | struct kvm_pit *pit; | 29 | struct kvm_pit *pit; |
30 | spinlock_t inject_lock; | 30 | raw_spinlock_t inject_lock; |
31 | unsigned long irq_ack; | 31 | unsigned long irq_ack; |
32 | struct kvm_irq_ack_notifier irq_ack_notifier; | 32 | struct kvm_irq_ack_notifier irq_ack_notifier; |
33 | }; | 33 | }; |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index d057c0cbd245..a790fa128a9f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -26,6 +26,7 @@ | |||
26 | * Port from Qemu. | 26 | * Port from Qemu. |
27 | */ | 27 | */ |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/slab.h> | ||
29 | #include <linux/bitops.h> | 30 | #include <linux/bitops.h> |
30 | #include "irq.h" | 31 | #include "irq.h" |
31 | 32 | ||
@@ -44,18 +45,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
44 | * Other interrupt may be delivered to PIC while lock is dropped but | 45 | * Other interrupt may be delivered to PIC while lock is dropped but |
45 | * it should be safe since PIC state is already updated at this stage. | 46 | * it should be safe since PIC state is already updated at this stage. |
46 | */ | 47 | */ |
47 | spin_unlock(&s->pics_state->lock); | 48 | raw_spin_unlock(&s->pics_state->lock); |
48 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 49 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
49 | spin_lock(&s->pics_state->lock); | 50 | raw_spin_lock(&s->pics_state->lock); |
50 | } | 51 | } |
51 | 52 | ||
52 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 53 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
53 | { | 54 | { |
54 | struct kvm_pic *s = pic_irqchip(kvm); | 55 | struct kvm_pic *s = pic_irqchip(kvm); |
55 | spin_lock(&s->lock); | 56 | |
57 | raw_spin_lock(&s->lock); | ||
56 | s->pics[0].isr_ack = 0xff; | 58 | s->pics[0].isr_ack = 0xff; |
57 | s->pics[1].isr_ack = 0xff; | 59 | s->pics[1].isr_ack = 0xff; |
58 | spin_unlock(&s->lock); | 60 | raw_spin_unlock(&s->lock); |
59 | } | 61 | } |
60 | 62 | ||
61 | /* | 63 | /* |
@@ -156,9 +158,9 @@ static void pic_update_irq(struct kvm_pic *s) | |||
156 | 158 | ||
157 | void kvm_pic_update_irq(struct kvm_pic *s) | 159 | void kvm_pic_update_irq(struct kvm_pic *s) |
158 | { | 160 | { |
159 | spin_lock(&s->lock); | 161 | raw_spin_lock(&s->lock); |
160 | pic_update_irq(s); | 162 | pic_update_irq(s); |
161 | spin_unlock(&s->lock); | 163 | raw_spin_unlock(&s->lock); |
162 | } | 164 | } |
163 | 165 | ||
164 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 166 | int kvm_pic_set_irq(void *opaque, int irq, int level) |
@@ -166,14 +168,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
166 | struct kvm_pic *s = opaque; | 168 | struct kvm_pic *s = opaque; |
167 | int ret = -1; | 169 | int ret = -1; |
168 | 170 | ||
169 | spin_lock(&s->lock); | 171 | raw_spin_lock(&s->lock); |
170 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 172 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
171 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 173 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
172 | pic_update_irq(s); | 174 | pic_update_irq(s); |
173 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 175 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
174 | s->pics[irq >> 3].imr, ret == 0); | 176 | s->pics[irq >> 3].imr, ret == 0); |
175 | } | 177 | } |
176 | spin_unlock(&s->lock); | 178 | raw_spin_unlock(&s->lock); |
177 | 179 | ||
178 | return ret; | 180 | return ret; |
179 | } | 181 | } |
@@ -203,7 +205,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
203 | int irq, irq2, intno; | 205 | int irq, irq2, intno; |
204 | struct kvm_pic *s = pic_irqchip(kvm); | 206 | struct kvm_pic *s = pic_irqchip(kvm); |
205 | 207 | ||
206 | spin_lock(&s->lock); | 208 | raw_spin_lock(&s->lock); |
207 | irq = pic_get_irq(&s->pics[0]); | 209 | irq = pic_get_irq(&s->pics[0]); |
208 | if (irq >= 0) { | 210 | if (irq >= 0) { |
209 | pic_intack(&s->pics[0], irq); | 211 | pic_intack(&s->pics[0], irq); |
@@ -228,7 +230,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
228 | intno = s->pics[0].irq_base + irq; | 230 | intno = s->pics[0].irq_base + irq; |
229 | } | 231 | } |
230 | pic_update_irq(s); | 232 | pic_update_irq(s); |
231 | spin_unlock(&s->lock); | 233 | raw_spin_unlock(&s->lock); |
232 | 234 | ||
233 | return intno; | 235 | return intno; |
234 | } | 236 | } |
@@ -442,7 +444,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
442 | printk(KERN_ERR "PIC: non byte write\n"); | 444 | printk(KERN_ERR "PIC: non byte write\n"); |
443 | return 0; | 445 | return 0; |
444 | } | 446 | } |
445 | spin_lock(&s->lock); | 447 | raw_spin_lock(&s->lock); |
446 | switch (addr) { | 448 | switch (addr) { |
447 | case 0x20: | 449 | case 0x20: |
448 | case 0x21: | 450 | case 0x21: |
@@ -455,7 +457,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
455 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
456 | break; | 458 | break; |
457 | } | 459 | } |
458 | spin_unlock(&s->lock); | 460 | raw_spin_unlock(&s->lock); |
459 | return 0; | 461 | return 0; |
460 | } | 462 | } |
461 | 463 | ||
@@ -472,7 +474,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
472 | printk(KERN_ERR "PIC: non byte read\n"); | 474 | printk(KERN_ERR "PIC: non byte read\n"); |
473 | return 0; | 475 | return 0; |
474 | } | 476 | } |
475 | spin_lock(&s->lock); | 477 | raw_spin_lock(&s->lock); |
476 | switch (addr) { | 478 | switch (addr) { |
477 | case 0x20: | 479 | case 0x20: |
478 | case 0x21: | 480 | case 0x21: |
@@ -486,7 +488,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
486 | break; | 488 | break; |
487 | } | 489 | } |
488 | *(unsigned char *)val = data; | 490 | *(unsigned char *)val = data; |
489 | spin_unlock(&s->lock); | 491 | raw_spin_unlock(&s->lock); |
490 | return 0; | 492 | return 0; |
491 | } | 493 | } |
492 | 494 | ||
@@ -520,7 +522,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
520 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); | 522 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); |
521 | if (!s) | 523 | if (!s) |
522 | return NULL; | 524 | return NULL; |
523 | spin_lock_init(&s->lock); | 525 | raw_spin_lock_init(&s->lock); |
524 | s->kvm = kvm; | 526 | s->kvm = kvm; |
525 | s->pics[0].elcr_mask = 0xf8; | 527 | s->pics[0].elcr_mask = 0xf8; |
526 | s->pics[1].elcr_mask = 0xde; | 528 | s->pics[1].elcr_mask = 0xde; |
@@ -533,7 +535,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
533 | * Initialize PIO device | 535 | * Initialize PIO device |
534 | */ | 536 | */ |
535 | kvm_iodevice_init(&s->dev, &picdev_ops); | 537 | kvm_iodevice_init(&s->dev, &picdev_ops); |
536 | ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev); | 538 | mutex_lock(&kvm->slots_lock); |
539 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev); | ||
540 | mutex_unlock(&kvm->slots_lock); | ||
537 | if (ret < 0) { | 541 | if (ret < 0) { |
538 | kfree(s); | 542 | kfree(s); |
539 | return NULL; | 543 | return NULL; |
@@ -541,3 +545,14 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
541 | 545 | ||
542 | return s; | 546 | return s; |
543 | } | 547 | } |
548 | |||
549 | void kvm_destroy_pic(struct kvm *kvm) | ||
550 | { | ||
551 | struct kvm_pic *vpic = kvm->arch.vpic; | ||
552 | |||
553 | if (vpic) { | ||
554 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev); | ||
555 | kvm->arch.vpic = NULL; | ||
556 | kfree(vpic); | ||
557 | } | ||
558 | } | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index be399e207d57..34b15915754d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -62,7 +62,7 @@ struct kvm_kpic_state { | |||
62 | }; | 62 | }; |
63 | 63 | ||
64 | struct kvm_pic { | 64 | struct kvm_pic { |
65 | spinlock_t lock; | 65 | raw_spinlock_t lock; |
66 | unsigned pending_acks; | 66 | unsigned pending_acks; |
67 | struct kvm *kvm; | 67 | struct kvm *kvm; |
68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
@@ -75,6 +75,7 @@ struct kvm_pic { | |||
75 | }; | 75 | }; |
76 | 76 | ||
77 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 77 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
78 | void kvm_destroy_pic(struct kvm *kvm); | ||
78 | int kvm_pic_read_irq(struct kvm *kvm); | 79 | int kvm_pic_read_irq(struct kvm *kvm); |
79 | void kvm_pic_update_irq(struct kvm_pic *s); | 80 | void kvm_pic_update_irq(struct kvm_pic *s); |
80 | void kvm_pic_clear_isr_ack(struct kvm *kvm); | 81 | void kvm_pic_clear_isr_ack(struct kvm *kvm); |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 7bcc5b6a4403..cff851cf5322 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -1,6 +1,11 @@ | |||
1 | #ifndef ASM_KVM_CACHE_REGS_H | 1 | #ifndef ASM_KVM_CACHE_REGS_H |
2 | #define ASM_KVM_CACHE_REGS_H | 2 | #define ASM_KVM_CACHE_REGS_H |
3 | 3 | ||
4 | #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS | ||
5 | #define KVM_POSSIBLE_CR4_GUEST_BITS \ | ||
6 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
7 | | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) | ||
8 | |||
4 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, | 9 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, |
5 | enum kvm_reg reg) | 10 | enum kvm_reg reg) |
6 | { | 11 | { |
@@ -38,4 +43,30 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | |||
38 | return vcpu->arch.pdptrs[index]; | 43 | return vcpu->arch.pdptrs[index]; |
39 | } | 44 | } |
40 | 45 | ||
46 | static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) | ||
47 | { | ||
48 | ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; | ||
49 | if (tmask & vcpu->arch.cr0_guest_owned_bits) | ||
50 | kvm_x86_ops->decache_cr0_guest_bits(vcpu); | ||
51 | return vcpu->arch.cr0 & mask; | ||
52 | } | ||
53 | |||
54 | static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) | ||
55 | { | ||
56 | return kvm_read_cr0_bits(vcpu, ~0UL); | ||
57 | } | ||
58 | |||
59 | static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) | ||
60 | { | ||
61 | ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; | ||
62 | if (tmask & vcpu->arch.cr4_guest_owned_bits) | ||
63 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | ||
64 | return vcpu->arch.cr4 & mask; | ||
65 | } | ||
66 | |||
67 | static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) | ||
68 | { | ||
69 | return kvm_read_cr4_bits(vcpu, ~0UL); | ||
70 | } | ||
71 | |||
41 | #endif | 72 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cd60c0bd1b32..1eb7a4ae0c9c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/io.h> | 26 | #include <linux/io.h> |
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/math64.h> | 28 | #include <linux/math64.h> |
29 | #include <linux/slab.h> | ||
29 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
30 | #include <asm/msr.h> | 31 | #include <asm/msr.h> |
31 | #include <asm/page.h> | 32 | #include <asm/page.h> |
@@ -373,6 +374,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
373 | if (unlikely(!apic_enabled(apic))) | 374 | if (unlikely(!apic_enabled(apic))) |
374 | break; | 375 | break; |
375 | 376 | ||
377 | if (trig_mode) { | ||
378 | apic_debug("level trig mode for vector %d", vector); | ||
379 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
380 | } else | ||
381 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
382 | |||
376 | result = !apic_test_and_set_irr(vector, apic); | 383 | result = !apic_test_and_set_irr(vector, apic); |
377 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | 384 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, |
378 | trig_mode, vector, !result); | 385 | trig_mode, vector, !result); |
@@ -383,11 +390,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
383 | break; | 390 | break; |
384 | } | 391 | } |
385 | 392 | ||
386 | if (trig_mode) { | ||
387 | apic_debug("level trig mode for vector %d", vector); | ||
388 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
389 | } else | ||
390 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
391 | kvm_vcpu_kick(vcpu); | 393 | kvm_vcpu_kick(vcpu); |
392 | break; | 394 | break; |
393 | 395 | ||
@@ -1150,6 +1152,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | |||
1150 | hrtimer_cancel(&apic->lapic_timer.timer); | 1152 | hrtimer_cancel(&apic->lapic_timer.timer); |
1151 | update_divide_count(apic); | 1153 | update_divide_count(apic); |
1152 | start_apic_timer(apic); | 1154 | start_apic_timer(apic); |
1155 | apic->irr_pending = true; | ||
1153 | } | 1156 | } |
1154 | 1157 | ||
1155 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | 1158 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) |
@@ -1244,3 +1247,34 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | |||
1244 | 1247 | ||
1245 | return 0; | 1248 | return 0; |
1246 | } | 1249 | } |
1250 | |||
1251 | int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) | ||
1252 | { | ||
1253 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1254 | |||
1255 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
1256 | return 1; | ||
1257 | |||
1258 | /* if this is ICR write vector before command */ | ||
1259 | if (reg == APIC_ICR) | ||
1260 | apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); | ||
1261 | return apic_reg_write(apic, reg, (u32)data); | ||
1262 | } | ||
1263 | |||
1264 | int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) | ||
1265 | { | ||
1266 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1267 | u32 low, high = 0; | ||
1268 | |||
1269 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
1270 | return 1; | ||
1271 | |||
1272 | if (apic_reg_read(apic, reg, 4, &low)) | ||
1273 | return 1; | ||
1274 | if (reg == APIC_ICR) | ||
1275 | apic_reg_read(apic, APIC_ICR2, 4, &high); | ||
1276 | |||
1277 | *data = (((u64)high) << 32) | low; | ||
1278 | |||
1279 | return 0; | ||
1280 | } | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 40010b09c4aa..f5fe32c5edad 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -48,4 +48,12 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); | |||
48 | 48 | ||
49 | int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 49 | int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
50 | int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | 50 | int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); |
51 | |||
52 | int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); | ||
53 | int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||
54 | |||
55 | static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) | ||
56 | { | ||
57 | return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; | ||
58 | } | ||
51 | #endif | 59 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4c3e5b2314cb..19a8906bcaa2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include "mmu.h" | 20 | #include "mmu.h" |
21 | #include "x86.h" | ||
21 | #include "kvm_cache_regs.h" | 22 | #include "kvm_cache_regs.h" |
22 | 23 | ||
23 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
@@ -29,6 +30,8 @@ | |||
29 | #include <linux/swap.h> | 30 | #include <linux/swap.h> |
30 | #include <linux/hugetlb.h> | 31 | #include <linux/hugetlb.h> |
31 | #include <linux/compiler.h> | 32 | #include <linux/compiler.h> |
33 | #include <linux/srcu.h> | ||
34 | #include <linux/slab.h> | ||
32 | 35 | ||
33 | #include <asm/page.h> | 36 | #include <asm/page.h> |
34 | #include <asm/cmpxchg.h> | 37 | #include <asm/cmpxchg.h> |
@@ -136,16 +139,6 @@ module_param(oos_shadow, bool, 0644); | |||
136 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 139 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ |
137 | | PT64_NX_MASK) | 140 | | PT64_NX_MASK) |
138 | 141 | ||
139 | #define PFERR_PRESENT_MASK (1U << 0) | ||
140 | #define PFERR_WRITE_MASK (1U << 1) | ||
141 | #define PFERR_USER_MASK (1U << 2) | ||
142 | #define PFERR_RSVD_MASK (1U << 3) | ||
143 | #define PFERR_FETCH_MASK (1U << 4) | ||
144 | |||
145 | #define PT_PDPE_LEVEL 3 | ||
146 | #define PT_DIRECTORY_LEVEL 2 | ||
147 | #define PT_PAGE_TABLE_LEVEL 1 | ||
148 | |||
149 | #define RMAP_EXT 4 | 142 | #define RMAP_EXT 4 |
150 | 143 | ||
151 | #define ACC_EXEC_MASK 1 | 144 | #define ACC_EXEC_MASK 1 |
@@ -153,6 +146,9 @@ module_param(oos_shadow, bool, 0644); | |||
153 | #define ACC_USER_MASK PT_USER_MASK | 146 | #define ACC_USER_MASK PT_USER_MASK |
154 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 147 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
155 | 148 | ||
149 | #include <trace/events/kvm.h> | ||
150 | |||
151 | #undef TRACE_INCLUDE_FILE | ||
156 | #define CREATE_TRACE_POINTS | 152 | #define CREATE_TRACE_POINTS |
157 | #include "mmutrace.h" | 153 | #include "mmutrace.h" |
158 | 154 | ||
@@ -229,7 +225,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | |||
229 | 225 | ||
230 | static int is_write_protection(struct kvm_vcpu *vcpu) | 226 | static int is_write_protection(struct kvm_vcpu *vcpu) |
231 | { | 227 | { |
232 | return vcpu->arch.cr0 & X86_CR0_WP; | 228 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
233 | } | 229 | } |
234 | 230 | ||
235 | static int is_cpuid_PSE36(void) | 231 | static int is_cpuid_PSE36(void) |
@@ -239,7 +235,7 @@ static int is_cpuid_PSE36(void) | |||
239 | 235 | ||
240 | static int is_nx(struct kvm_vcpu *vcpu) | 236 | static int is_nx(struct kvm_vcpu *vcpu) |
241 | { | 237 | { |
242 | return vcpu->arch.shadow_efer & EFER_NX; | 238 | return vcpu->arch.efer & EFER_NX; |
243 | } | 239 | } |
244 | 240 | ||
245 | static int is_shadow_present_pte(u64 pte) | 241 | static int is_shadow_present_pte(u64 pte) |
@@ -253,7 +249,7 @@ static int is_large_pte(u64 pte) | |||
253 | return pte & PT_PAGE_SIZE_MASK; | 249 | return pte & PT_PAGE_SIZE_MASK; |
254 | } | 250 | } |
255 | 251 | ||
256 | static int is_writeble_pte(unsigned long pte) | 252 | static int is_writable_pte(unsigned long pte) |
257 | { | 253 | { |
258 | return pte & PT_WRITABLE_MASK; | 254 | return pte & PT_WRITABLE_MASK; |
259 | } | 255 | } |
@@ -470,24 +466,10 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
470 | 466 | ||
471 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | 467 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) |
472 | { | 468 | { |
473 | unsigned long page_size = PAGE_SIZE; | 469 | unsigned long page_size; |
474 | struct vm_area_struct *vma; | ||
475 | unsigned long addr; | ||
476 | int i, ret = 0; | 470 | int i, ret = 0; |
477 | 471 | ||
478 | addr = gfn_to_hva(kvm, gfn); | 472 | page_size = kvm_host_page_size(kvm, gfn); |
479 | if (kvm_is_error_hva(addr)) | ||
480 | return page_size; | ||
481 | |||
482 | down_read(¤t->mm->mmap_sem); | ||
483 | vma = find_vma(current->mm, addr); | ||
484 | if (!vma) | ||
485 | goto out; | ||
486 | |||
487 | page_size = vma_kernel_pagesize(vma); | ||
488 | |||
489 | out: | ||
490 | up_read(¤t->mm->mmap_sem); | ||
491 | 473 | ||
492 | for (i = PT_PAGE_TABLE_LEVEL; | 474 | for (i = PT_PAGE_TABLE_LEVEL; |
493 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { | 475 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { |
@@ -503,8 +485,7 @@ out: | |||
503 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 485 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
504 | { | 486 | { |
505 | struct kvm_memory_slot *slot; | 487 | struct kvm_memory_slot *slot; |
506 | int host_level; | 488 | int host_level, level, max_level; |
507 | int level = PT_PAGE_TABLE_LEVEL; | ||
508 | 489 | ||
509 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 490 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); |
510 | if (slot && slot->dirty_bitmap) | 491 | if (slot && slot->dirty_bitmap) |
@@ -515,11 +496,12 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
515 | if (host_level == PT_PAGE_TABLE_LEVEL) | 496 | if (host_level == PT_PAGE_TABLE_LEVEL) |
516 | return host_level; | 497 | return host_level; |
517 | 498 | ||
518 | for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { | 499 | max_level = kvm_x86_ops->get_lpage_level() < host_level ? |
500 | kvm_x86_ops->get_lpage_level() : host_level; | ||
519 | 501 | ||
502 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | ||
520 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) | 503 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) |
521 | break; | 504 | break; |
522 | } | ||
523 | 505 | ||
524 | return level - 1; | 506 | return level - 1; |
525 | } | 507 | } |
@@ -635,7 +617,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
635 | pfn = spte_to_pfn(*spte); | 617 | pfn = spte_to_pfn(*spte); |
636 | if (*spte & shadow_accessed_mask) | 618 | if (*spte & shadow_accessed_mask) |
637 | kvm_set_pfn_accessed(pfn); | 619 | kvm_set_pfn_accessed(pfn); |
638 | if (is_writeble_pte(*spte)) | 620 | if (is_writable_pte(*spte)) |
639 | kvm_set_pfn_dirty(pfn); | 621 | kvm_set_pfn_dirty(pfn); |
640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | 622 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); |
641 | if (!*rmapp) { | 623 | if (!*rmapp) { |
@@ -664,6 +646,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
664 | prev_desc = desc; | 646 | prev_desc = desc; |
665 | desc = desc->more; | 647 | desc = desc->more; |
666 | } | 648 | } |
649 | pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); | ||
667 | BUG(); | 650 | BUG(); |
668 | } | 651 | } |
669 | } | 652 | } |
@@ -710,7 +693,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
710 | BUG_ON(!spte); | 693 | BUG_ON(!spte); |
711 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 694 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
712 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 695 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
713 | if (is_writeble_pte(*spte)) { | 696 | if (is_writable_pte(*spte)) { |
714 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); | 697 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); |
715 | write_protected = 1; | 698 | write_protected = 1; |
716 | } | 699 | } |
@@ -734,7 +717,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
734 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 717 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
735 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 718 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); |
736 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 719 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
737 | if (is_writeble_pte(*spte)) { | 720 | if (is_writable_pte(*spte)) { |
738 | rmap_remove(kvm, spte); | 721 | rmap_remove(kvm, spte); |
739 | --kvm->stat.lpages; | 722 | --kvm->stat.lpages; |
740 | __set_spte(spte, shadow_trap_nonpresent_pte); | 723 | __set_spte(spte, shadow_trap_nonpresent_pte); |
@@ -789,7 +772,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
789 | 772 | ||
790 | new_spte &= ~PT_WRITABLE_MASK; | 773 | new_spte &= ~PT_WRITABLE_MASK; |
791 | new_spte &= ~SPTE_HOST_WRITEABLE; | 774 | new_spte &= ~SPTE_HOST_WRITEABLE; |
792 | if (is_writeble_pte(*spte)) | 775 | if (is_writable_pte(*spte)) |
793 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | 776 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); |
794 | __set_spte(spte, new_spte); | 777 | __set_spte(spte, new_spte); |
795 | spte = rmap_next(kvm, rmapp, spte); | 778 | spte = rmap_next(kvm, rmapp, spte); |
@@ -807,35 +790,32 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
807 | unsigned long data)) | 790 | unsigned long data)) |
808 | { | 791 | { |
809 | int i, j; | 792 | int i, j; |
793 | int ret; | ||
810 | int retval = 0; | 794 | int retval = 0; |
795 | struct kvm_memslots *slots; | ||
811 | 796 | ||
812 | /* | 797 | slots = rcu_dereference(kvm->memslots); |
813 | * If mmap_sem isn't taken, we can look the memslots with only | 798 | |
814 | * the mmu_lock by skipping over the slots with userspace_addr == 0. | 799 | for (i = 0; i < slots->nmemslots; i++) { |
815 | */ | 800 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
816 | for (i = 0; i < kvm->nmemslots; i++) { | ||
817 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | ||
818 | unsigned long start = memslot->userspace_addr; | 801 | unsigned long start = memslot->userspace_addr; |
819 | unsigned long end; | 802 | unsigned long end; |
820 | 803 | ||
821 | /* mmu_lock protects userspace_addr */ | ||
822 | if (!start) | ||
823 | continue; | ||
824 | |||
825 | end = start + (memslot->npages << PAGE_SHIFT); | 804 | end = start + (memslot->npages << PAGE_SHIFT); |
826 | if (hva >= start && hva < end) { | 805 | if (hva >= start && hva < end) { |
827 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 806 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
828 | 807 | ||
829 | retval |= handler(kvm, &memslot->rmap[gfn_offset], | 808 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
830 | data); | ||
831 | 809 | ||
832 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 810 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
833 | int idx = gfn_offset; | 811 | int idx = gfn_offset; |
834 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 812 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); |
835 | retval |= handler(kvm, | 813 | ret |= handler(kvm, |
836 | &memslot->lpage_info[j][idx].rmap_pde, | 814 | &memslot->lpage_info[j][idx].rmap_pde, |
837 | data); | 815 | data); |
838 | } | 816 | } |
817 | trace_kvm_age_page(hva, memslot, ret); | ||
818 | retval |= ret; | ||
839 | } | 819 | } |
840 | } | 820 | } |
841 | 821 | ||
@@ -858,9 +838,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
858 | u64 *spte; | 838 | u64 *spte; |
859 | int young = 0; | 839 | int young = 0; |
860 | 840 | ||
861 | /* always return old for EPT */ | 841 | /* |
842 | * Emulate the accessed bit for EPT, by checking if this page has | ||
843 | * an EPT mapping, and clearing it if it does. On the next access, | ||
844 | * a new EPT mapping will be established. | ||
845 | * This has some overhead, but not as much as the cost of swapping | ||
846 | * out actively used pages or breaking up actively used hugepages. | ||
847 | */ | ||
862 | if (!shadow_accessed_mask) | 848 | if (!shadow_accessed_mask) |
863 | return 0; | 849 | return kvm_unmap_rmapp(kvm, rmapp, data); |
864 | 850 | ||
865 | spte = rmap_next(kvm, rmapp, NULL); | 851 | spte = rmap_next(kvm, rmapp, NULL); |
866 | while (spte) { | 852 | while (spte) { |
@@ -1504,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
1504 | for_each_sp(pages, sp, parents, i) { | 1490 | for_each_sp(pages, sp, parents, i) { |
1505 | kvm_mmu_zap_page(kvm, sp); | 1491 | kvm_mmu_zap_page(kvm, sp); |
1506 | mmu_pages_clear_parents(&parents); | 1492 | mmu_pages_clear_parents(&parents); |
1493 | zapped++; | ||
1507 | } | 1494 | } |
1508 | zapped += pages.nr; | ||
1509 | kvm_mmu_pages_init(parent, &parents, &pages); | 1495 | kvm_mmu_pages_init(parent, &parents, &pages); |
1510 | } | 1496 | } |
1511 | 1497 | ||
@@ -1556,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
1556 | */ | 1542 | */ |
1557 | 1543 | ||
1558 | if (used_pages > kvm_nr_mmu_pages) { | 1544 | if (used_pages > kvm_nr_mmu_pages) { |
1559 | while (used_pages > kvm_nr_mmu_pages) { | 1545 | while (used_pages > kvm_nr_mmu_pages && |
1546 | !list_empty(&kvm->arch.active_mmu_pages)) { | ||
1560 | struct kvm_mmu_page *page; | 1547 | struct kvm_mmu_page *page; |
1561 | 1548 | ||
1562 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1549 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1563 | struct kvm_mmu_page, link); | 1550 | struct kvm_mmu_page, link); |
1564 | kvm_mmu_zap_page(kvm, page); | 1551 | used_pages -= kvm_mmu_zap_page(kvm, page); |
1565 | used_pages--; | 1552 | used_pages--; |
1566 | } | 1553 | } |
1554 | kvm_nr_mmu_pages = used_pages; | ||
1567 | kvm->arch.n_free_mmu_pages = 0; | 1555 | kvm->arch.n_free_mmu_pages = 0; |
1568 | } | 1556 | } |
1569 | else | 1557 | else |
@@ -1610,14 +1598,15 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1610 | && !sp->role.invalid) { | 1598 | && !sp->role.invalid) { |
1611 | pgprintk("%s: zap %lx %x\n", | 1599 | pgprintk("%s: zap %lx %x\n", |
1612 | __func__, gfn, sp->role.word); | 1600 | __func__, gfn, sp->role.word); |
1613 | kvm_mmu_zap_page(kvm, sp); | 1601 | if (kvm_mmu_zap_page(kvm, sp)) |
1602 | nn = bucket->first; | ||
1614 | } | 1603 | } |
1615 | } | 1604 | } |
1616 | } | 1605 | } |
1617 | 1606 | ||
1618 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 1607 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
1619 | { | 1608 | { |
1620 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); | 1609 | int slot = memslot_id(kvm, gfn); |
1621 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | 1610 | struct kvm_mmu_page *sp = page_header(__pa(pte)); |
1622 | 1611 | ||
1623 | __set_bit(slot, sp->slot_bitmap); | 1612 | __set_bit(slot, sp->slot_bitmap); |
@@ -1641,7 +1630,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
1641 | { | 1630 | { |
1642 | struct page *page; | 1631 | struct page *page; |
1643 | 1632 | ||
1644 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 1633 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
1645 | 1634 | ||
1646 | if (gpa == UNMAPPED_GVA) | 1635 | if (gpa == UNMAPPED_GVA) |
1647 | return NULL; | 1636 | return NULL; |
@@ -1854,7 +1843,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1854 | * is responsibility of mmu_get_page / kvm_sync_page. | 1843 | * is responsibility of mmu_get_page / kvm_sync_page. |
1855 | * Same reasoning can be applied to dirty page accounting. | 1844 | * Same reasoning can be applied to dirty page accounting. |
1856 | */ | 1845 | */ |
1857 | if (!can_unsync && is_writeble_pte(*sptep)) | 1846 | if (!can_unsync && is_writable_pte(*sptep)) |
1858 | goto set_pte; | 1847 | goto set_pte; |
1859 | 1848 | ||
1860 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { | 1849 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
@@ -1862,7 +1851,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1862 | __func__, gfn); | 1851 | __func__, gfn); |
1863 | ret = 1; | 1852 | ret = 1; |
1864 | pte_access &= ~ACC_WRITE_MASK; | 1853 | pte_access &= ~ACC_WRITE_MASK; |
1865 | if (is_writeble_pte(spte)) | 1854 | if (is_writable_pte(spte)) |
1866 | spte &= ~PT_WRITABLE_MASK; | 1855 | spte &= ~PT_WRITABLE_MASK; |
1867 | } | 1856 | } |
1868 | } | 1857 | } |
@@ -1883,7 +1872,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1883 | bool reset_host_protection) | 1872 | bool reset_host_protection) |
1884 | { | 1873 | { |
1885 | int was_rmapped = 0; | 1874 | int was_rmapped = 0; |
1886 | int was_writeble = is_writeble_pte(*sptep); | 1875 | int was_writable = is_writable_pte(*sptep); |
1887 | int rmap_count; | 1876 | int rmap_count; |
1888 | 1877 | ||
1889 | pgprintk("%s: spte %llx access %x write_fault %d" | 1878 | pgprintk("%s: spte %llx access %x write_fault %d" |
@@ -1934,7 +1923,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1934 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 1923 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1935 | rmap_recycle(vcpu, sptep, gfn); | 1924 | rmap_recycle(vcpu, sptep, gfn); |
1936 | } else { | 1925 | } else { |
1937 | if (was_writeble) | 1926 | if (was_writable) |
1938 | kvm_release_pfn_dirty(pfn); | 1927 | kvm_release_pfn_dirty(pfn); |
1939 | else | 1928 | else |
1940 | kvm_release_pfn_clean(pfn); | 1929 | kvm_release_pfn_clean(pfn); |
@@ -2164,8 +2153,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2164 | spin_unlock(&vcpu->kvm->mmu_lock); | 2153 | spin_unlock(&vcpu->kvm->mmu_lock); |
2165 | } | 2154 | } |
2166 | 2155 | ||
2167 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 2156 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
2157 | u32 access, u32 *error) | ||
2168 | { | 2158 | { |
2159 | if (error) | ||
2160 | *error = 0; | ||
2169 | return vaddr; | 2161 | return vaddr; |
2170 | } | 2162 | } |
2171 | 2163 | ||
@@ -2749,7 +2741,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
2749 | if (tdp_enabled) | 2741 | if (tdp_enabled) |
2750 | return 0; | 2742 | return 0; |
2751 | 2743 | ||
2752 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 2744 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
2753 | 2745 | ||
2754 | spin_lock(&vcpu->kvm->mmu_lock); | 2746 | spin_lock(&vcpu->kvm->mmu_lock); |
2755 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 2747 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
@@ -2849,16 +2841,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
2849 | */ | 2841 | */ |
2850 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); | 2842 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); |
2851 | if (!page) | 2843 | if (!page) |
2852 | goto error_1; | 2844 | return -ENOMEM; |
2845 | |||
2853 | vcpu->arch.mmu.pae_root = page_address(page); | 2846 | vcpu->arch.mmu.pae_root = page_address(page); |
2854 | for (i = 0; i < 4; ++i) | 2847 | for (i = 0; i < 4; ++i) |
2855 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 2848 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
2856 | 2849 | ||
2857 | return 0; | 2850 | return 0; |
2858 | |||
2859 | error_1: | ||
2860 | free_mmu_pages(vcpu); | ||
2861 | return -ENOMEM; | ||
2862 | } | 2851 | } |
2863 | 2852 | ||
2864 | int kvm_mmu_create(struct kvm_vcpu *vcpu) | 2853 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
@@ -2938,10 +2927,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2938 | spin_lock(&kvm_lock); | 2927 | spin_lock(&kvm_lock); |
2939 | 2928 | ||
2940 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2929 | list_for_each_entry(kvm, &vm_list, vm_list) { |
2941 | int npages; | 2930 | int npages, idx; |
2942 | 2931 | ||
2943 | if (!down_read_trylock(&kvm->slots_lock)) | 2932 | idx = srcu_read_lock(&kvm->srcu); |
2944 | continue; | ||
2945 | spin_lock(&kvm->mmu_lock); | 2933 | spin_lock(&kvm->mmu_lock); |
2946 | npages = kvm->arch.n_alloc_mmu_pages - | 2934 | npages = kvm->arch.n_alloc_mmu_pages - |
2947 | kvm->arch.n_free_mmu_pages; | 2935 | kvm->arch.n_free_mmu_pages; |
@@ -2954,7 +2942,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2954 | nr_to_scan--; | 2942 | nr_to_scan--; |
2955 | 2943 | ||
2956 | spin_unlock(&kvm->mmu_lock); | 2944 | spin_unlock(&kvm->mmu_lock); |
2957 | up_read(&kvm->slots_lock); | 2945 | srcu_read_unlock(&kvm->srcu, idx); |
2958 | } | 2946 | } |
2959 | if (kvm_freed) | 2947 | if (kvm_freed) |
2960 | list_move_tail(&kvm_freed->vm_list, &vm_list); | 2948 | list_move_tail(&kvm_freed->vm_list, &vm_list); |
@@ -3021,9 +3009,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3021 | int i; | 3009 | int i; |
3022 | unsigned int nr_mmu_pages; | 3010 | unsigned int nr_mmu_pages; |
3023 | unsigned int nr_pages = 0; | 3011 | unsigned int nr_pages = 0; |
3012 | struct kvm_memslots *slots; | ||
3024 | 3013 | ||
3025 | for (i = 0; i < kvm->nmemslots; i++) | 3014 | slots = rcu_dereference(kvm->memslots); |
3026 | nr_pages += kvm->memslots[i].npages; | 3015 | for (i = 0; i < slots->nmemslots; i++) |
3016 | nr_pages += slots->memslots[i].npages; | ||
3027 | 3017 | ||
3028 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 3018 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
3029 | nr_mmu_pages = max(nr_mmu_pages, | 3019 | nr_mmu_pages = max(nr_mmu_pages, |
@@ -3248,7 +3238,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
3248 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) | 3238 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) |
3249 | audit_mappings_page(vcpu, ent, va, level - 1); | 3239 | audit_mappings_page(vcpu, ent, va, level - 1); |
3250 | else { | 3240 | else { |
3251 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); | 3241 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL); |
3252 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3242 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3253 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); | 3243 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); |
3254 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; | 3244 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; |
@@ -3293,10 +3283,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
3293 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3283 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3294 | { | 3284 | { |
3295 | int nmaps = 0; | 3285 | int nmaps = 0; |
3296 | int i, j, k; | 3286 | int i, j, k, idx; |
3297 | 3287 | ||
3288 | idx = srcu_read_lock(&kvm->srcu); | ||
3289 | slots = rcu_dereference(kvm->memslots); | ||
3298 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3290 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3299 | struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; | 3291 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3300 | struct kvm_rmap_desc *d; | 3292 | struct kvm_rmap_desc *d; |
3301 | 3293 | ||
3302 | for (j = 0; j < m->npages; ++j) { | 3294 | for (j = 0; j < m->npages; ++j) { |
@@ -3319,6 +3311,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
3319 | } | 3311 | } |
3320 | } | 3312 | } |
3321 | } | 3313 | } |
3314 | srcu_read_unlock(&kvm->srcu, idx); | ||
3322 | return nmaps; | 3315 | return nmaps; |
3323 | } | 3316 | } |
3324 | 3317 | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 61a1b3884b49..be66759321a5 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define __KVM_X86_MMU_H | 2 | #define __KVM_X86_MMU_H |
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | #include "kvm_cache_regs.h" | ||
5 | 6 | ||
6 | #define PT64_PT_BITS 9 | 7 | #define PT64_PT_BITS 9 |
7 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) | 8 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) |
@@ -37,6 +38,16 @@ | |||
37 | #define PT32_ROOT_LEVEL 2 | 38 | #define PT32_ROOT_LEVEL 2 |
38 | #define PT32E_ROOT_LEVEL 3 | 39 | #define PT32E_ROOT_LEVEL 3 |
39 | 40 | ||
41 | #define PT_PDPE_LEVEL 3 | ||
42 | #define PT_DIRECTORY_LEVEL 2 | ||
43 | #define PT_PAGE_TABLE_LEVEL 1 | ||
44 | |||
45 | #define PFERR_PRESENT_MASK (1U << 0) | ||
46 | #define PFERR_WRITE_MASK (1U << 1) | ||
47 | #define PFERR_USER_MASK (1U << 2) | ||
48 | #define PFERR_RSVD_MASK (1U << 3) | ||
49 | #define PFERR_FETCH_MASK (1U << 4) | ||
50 | |||
40 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); | 51 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); |
41 | 52 | ||
42 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 53 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
@@ -53,30 +64,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) | |||
53 | return kvm_mmu_load(vcpu); | 64 | return kvm_mmu_load(vcpu); |
54 | } | 65 | } |
55 | 66 | ||
56 | static inline int is_long_mode(struct kvm_vcpu *vcpu) | ||
57 | { | ||
58 | #ifdef CONFIG_X86_64 | ||
59 | return vcpu->arch.shadow_efer & EFER_LMA; | ||
60 | #else | ||
61 | return 0; | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | static inline int is_pae(struct kvm_vcpu *vcpu) | ||
66 | { | ||
67 | return vcpu->arch.cr4 & X86_CR4_PAE; | ||
68 | } | ||
69 | |||
70 | static inline int is_pse(struct kvm_vcpu *vcpu) | ||
71 | { | ||
72 | return vcpu->arch.cr4 & X86_CR4_PSE; | ||
73 | } | ||
74 | |||
75 | static inline int is_paging(struct kvm_vcpu *vcpu) | ||
76 | { | ||
77 | return vcpu->arch.cr0 & X86_CR0_PG; | ||
78 | } | ||
79 | |||
80 | static inline int is_present_gpte(unsigned long pte) | 67 | static inline int is_present_gpte(unsigned long pte) |
81 | { | 68 | { |
82 | return pte & PT_PRESENT_MASK; | 69 | return pte & PT_PRESENT_MASK; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a6017132fba8..81eab9a50e6a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -150,7 +150,9 @@ walk: | |||
150 | walker->table_gfn[walker->level - 1] = table_gfn; | 150 | walker->table_gfn[walker->level - 1] = table_gfn; |
151 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 151 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
152 | 152 | ||
153 | kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); | 153 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) |
154 | goto not_present; | ||
155 | |||
154 | trace_kvm_mmu_paging_element(pte, walker->level); | 156 | trace_kvm_mmu_paging_element(pte, walker->level); |
155 | 157 | ||
156 | if (!is_present_gpte(pte)) | 158 | if (!is_present_gpte(pte)) |
@@ -160,7 +162,7 @@ walk: | |||
160 | if (rsvd_fault) | 162 | if (rsvd_fault) |
161 | goto access_error; | 163 | goto access_error; |
162 | 164 | ||
163 | if (write_fault && !is_writeble_pte(pte)) | 165 | if (write_fault && !is_writable_pte(pte)) |
164 | if (user_fault || is_write_protection(vcpu)) | 166 | if (user_fault || is_write_protection(vcpu)) |
165 | goto access_error; | 167 | goto access_error; |
166 | 168 | ||
@@ -455,8 +457,6 @@ out_unlock: | |||
455 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 457 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
456 | { | 458 | { |
457 | struct kvm_shadow_walk_iterator iterator; | 459 | struct kvm_shadow_walk_iterator iterator; |
458 | pt_element_t gpte; | ||
459 | gpa_t pte_gpa = -1; | ||
460 | int level; | 460 | int level; |
461 | u64 *sptep; | 461 | u64 *sptep; |
462 | int need_flush = 0; | 462 | int need_flush = 0; |
@@ -470,10 +470,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
470 | if (level == PT_PAGE_TABLE_LEVEL || | 470 | if (level == PT_PAGE_TABLE_LEVEL || |
471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || |
472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { |
473 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
474 | |||
475 | pte_gpa = (sp->gfn << PAGE_SHIFT); | ||
476 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
477 | 473 | ||
478 | if (is_shadow_present_pte(*sptep)) { | 474 | if (is_shadow_present_pte(*sptep)) { |
479 | rmap_remove(vcpu->kvm, sptep); | 475 | rmap_remove(vcpu->kvm, sptep); |
@@ -492,32 +488,25 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
492 | if (need_flush) | 488 | if (need_flush) |
493 | kvm_flush_remote_tlbs(vcpu->kvm); | 489 | kvm_flush_remote_tlbs(vcpu->kvm); |
494 | spin_unlock(&vcpu->kvm->mmu_lock); | 490 | spin_unlock(&vcpu->kvm->mmu_lock); |
495 | |||
496 | if (pte_gpa == -1) | ||
497 | return; | ||
498 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | ||
499 | sizeof(pt_element_t))) | ||
500 | return; | ||
501 | if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) { | ||
502 | if (mmu_topup_memory_caches(vcpu)) | ||
503 | return; | ||
504 | kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, | ||
505 | sizeof(pt_element_t), 0); | ||
506 | } | ||
507 | } | 491 | } |
508 | 492 | ||
509 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
494 | u32 *error) | ||
510 | { | 495 | { |
511 | struct guest_walker walker; | 496 | struct guest_walker walker; |
512 | gpa_t gpa = UNMAPPED_GVA; | 497 | gpa_t gpa = UNMAPPED_GVA; |
513 | int r; | 498 | int r; |
514 | 499 | ||
515 | r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0); | 500 | r = FNAME(walk_addr)(&walker, vcpu, vaddr, |
501 | !!(access & PFERR_WRITE_MASK), | ||
502 | !!(access & PFERR_USER_MASK), | ||
503 | !!(access & PFERR_FETCH_MASK)); | ||
516 | 504 | ||
517 | if (r) { | 505 | if (r) { |
518 | gpa = gfn_to_gpa(walker.gfn); | 506 | gpa = gfn_to_gpa(walker.gfn); |
519 | gpa |= vaddr & ~PAGE_MASK; | 507 | gpa |= vaddr & ~PAGE_MASK; |
520 | } | 508 | } else if (error) |
509 | *error = walker.error_code; | ||
521 | 510 | ||
522 | return gpa; | 511 | return gpa; |
523 | } | 512 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1d9b33843c80..2ba58206812a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/highmem.h> | 26 | #include <linux/highmem.h> |
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
29 | #include <linux/slab.h> | ||
29 | 30 | ||
30 | #include <asm/desc.h> | 31 | #include <asm/desc.h> |
31 | 32 | ||
@@ -231,7 +232,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
231 | efer &= ~EFER_LME; | 232 | efer &= ~EFER_LME; |
232 | 233 | ||
233 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; | 234 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; |
234 | vcpu->arch.shadow_efer = efer; | 235 | vcpu->arch.efer = efer; |
235 | } | 236 | } |
236 | 237 | ||
237 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 238 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
@@ -540,6 +541,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
540 | struct vmcb_control_area *control = &svm->vmcb->control; | 541 | struct vmcb_control_area *control = &svm->vmcb->control; |
541 | struct vmcb_save_area *save = &svm->vmcb->save; | 542 | struct vmcb_save_area *save = &svm->vmcb->save; |
542 | 543 | ||
544 | svm->vcpu.fpu_active = 1; | ||
545 | |||
543 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 546 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
544 | INTERCEPT_CR3_MASK | | 547 | INTERCEPT_CR3_MASK | |
545 | INTERCEPT_CR4_MASK; | 548 | INTERCEPT_CR4_MASK; |
@@ -552,13 +555,19 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
552 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 555 | control->intercept_dr_read = INTERCEPT_DR0_MASK | |
553 | INTERCEPT_DR1_MASK | | 556 | INTERCEPT_DR1_MASK | |
554 | INTERCEPT_DR2_MASK | | 557 | INTERCEPT_DR2_MASK | |
555 | INTERCEPT_DR3_MASK; | 558 | INTERCEPT_DR3_MASK | |
559 | INTERCEPT_DR4_MASK | | ||
560 | INTERCEPT_DR5_MASK | | ||
561 | INTERCEPT_DR6_MASK | | ||
562 | INTERCEPT_DR7_MASK; | ||
556 | 563 | ||
557 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 564 | control->intercept_dr_write = INTERCEPT_DR0_MASK | |
558 | INTERCEPT_DR1_MASK | | 565 | INTERCEPT_DR1_MASK | |
559 | INTERCEPT_DR2_MASK | | 566 | INTERCEPT_DR2_MASK | |
560 | INTERCEPT_DR3_MASK | | 567 | INTERCEPT_DR3_MASK | |
568 | INTERCEPT_DR4_MASK | | ||
561 | INTERCEPT_DR5_MASK | | 569 | INTERCEPT_DR5_MASK | |
570 | INTERCEPT_DR6_MASK | | ||
562 | INTERCEPT_DR7_MASK; | 571 | INTERCEPT_DR7_MASK; |
563 | 572 | ||
564 | control->intercept_exceptions = (1 << PF_VECTOR) | | 573 | control->intercept_exceptions = (1 << PF_VECTOR) | |
@@ -569,6 +578,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
569 | control->intercept = (1ULL << INTERCEPT_INTR) | | 578 | control->intercept = (1ULL << INTERCEPT_INTR) | |
570 | (1ULL << INTERCEPT_NMI) | | 579 | (1ULL << INTERCEPT_NMI) | |
571 | (1ULL << INTERCEPT_SMI) | | 580 | (1ULL << INTERCEPT_SMI) | |
581 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | ||
572 | (1ULL << INTERCEPT_CPUID) | | 582 | (1ULL << INTERCEPT_CPUID) | |
573 | (1ULL << INTERCEPT_INVD) | | 583 | (1ULL << INTERCEPT_INVD) | |
574 | (1ULL << INTERCEPT_HLT) | | 584 | (1ULL << INTERCEPT_HLT) | |
@@ -641,10 +651,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
641 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | | 651 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | |
642 | (1ULL << INTERCEPT_INVLPG)); | 652 | (1ULL << INTERCEPT_INVLPG)); |
643 | control->intercept_exceptions &= ~(1 << PF_VECTOR); | 653 | control->intercept_exceptions &= ~(1 << PF_VECTOR); |
644 | control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| | 654 | control->intercept_cr_read &= ~INTERCEPT_CR3_MASK; |
645 | INTERCEPT_CR3_MASK); | 655 | control->intercept_cr_write &= ~INTERCEPT_CR3_MASK; |
646 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| | ||
647 | INTERCEPT_CR3_MASK); | ||
648 | save->g_pat = 0x0007040600070406ULL; | 656 | save->g_pat = 0x0007040600070406ULL; |
649 | save->cr3 = 0; | 657 | save->cr3 = 0; |
650 | save->cr4 = 0; | 658 | save->cr4 = 0; |
@@ -698,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
698 | if (err) | 706 | if (err) |
699 | goto free_svm; | 707 | goto free_svm; |
700 | 708 | ||
709 | err = -ENOMEM; | ||
701 | page = alloc_page(GFP_KERNEL); | 710 | page = alloc_page(GFP_KERNEL); |
702 | if (!page) { | 711 | if (!page) |
703 | err = -ENOMEM; | ||
704 | goto uninit; | 712 | goto uninit; |
705 | } | ||
706 | 713 | ||
707 | err = -ENOMEM; | ||
708 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 714 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); |
709 | if (!msrpm_pages) | 715 | if (!msrpm_pages) |
710 | goto uninit; | 716 | goto free_page1; |
711 | 717 | ||
712 | nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 718 | nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); |
713 | if (!nested_msrpm_pages) | 719 | if (!nested_msrpm_pages) |
714 | goto uninit; | 720 | goto free_page2; |
715 | |||
716 | svm->msrpm = page_address(msrpm_pages); | ||
717 | svm_vcpu_init_msrpm(svm->msrpm); | ||
718 | 721 | ||
719 | hsave_page = alloc_page(GFP_KERNEL); | 722 | hsave_page = alloc_page(GFP_KERNEL); |
720 | if (!hsave_page) | 723 | if (!hsave_page) |
721 | goto uninit; | 724 | goto free_page3; |
725 | |||
722 | svm->nested.hsave = page_address(hsave_page); | 726 | svm->nested.hsave = page_address(hsave_page); |
723 | 727 | ||
728 | svm->msrpm = page_address(msrpm_pages); | ||
729 | svm_vcpu_init_msrpm(svm->msrpm); | ||
730 | |||
724 | svm->nested.msrpm = page_address(nested_msrpm_pages); | 731 | svm->nested.msrpm = page_address(nested_msrpm_pages); |
725 | 732 | ||
726 | svm->vmcb = page_address(page); | 733 | svm->vmcb = page_address(page); |
@@ -730,13 +737,18 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
730 | init_vmcb(svm); | 737 | init_vmcb(svm); |
731 | 738 | ||
732 | fx_init(&svm->vcpu); | 739 | fx_init(&svm->vcpu); |
733 | svm->vcpu.fpu_active = 1; | ||
734 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 740 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
735 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 741 | if (kvm_vcpu_is_bsp(&svm->vcpu)) |
736 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 742 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
737 | 743 | ||
738 | return &svm->vcpu; | 744 | return &svm->vcpu; |
739 | 745 | ||
746 | free_page3: | ||
747 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); | ||
748 | free_page2: | ||
749 | __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); | ||
750 | free_page1: | ||
751 | __free_page(page); | ||
740 | uninit: | 752 | uninit: |
741 | kvm_vcpu_uninit(&svm->vcpu); | 753 | kvm_vcpu_uninit(&svm->vcpu); |
742 | free_svm: | 754 | free_svm: |
@@ -765,14 +777,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
765 | if (unlikely(cpu != vcpu->cpu)) { | 777 | if (unlikely(cpu != vcpu->cpu)) { |
766 | u64 delta; | 778 | u64 delta; |
767 | 779 | ||
768 | /* | 780 | if (check_tsc_unstable()) { |
769 | * Make sure that the guest sees a monotonically | 781 | /* |
770 | * increasing TSC. | 782 | * Make sure that the guest sees a monotonically |
771 | */ | 783 | * increasing TSC. |
772 | delta = vcpu->arch.host_tsc - native_read_tsc(); | 784 | */ |
773 | svm->vmcb->control.tsc_offset += delta; | 785 | delta = vcpu->arch.host_tsc - native_read_tsc(); |
774 | if (is_nested(svm)) | 786 | svm->vmcb->control.tsc_offset += delta; |
775 | svm->nested.hsave->control.tsc_offset += delta; | 787 | if (is_nested(svm)) |
788 | svm->nested.hsave->control.tsc_offset += delta; | ||
789 | } | ||
776 | vcpu->cpu = cpu; | 790 | vcpu->cpu = cpu; |
777 | kvm_migrate_timers(vcpu); | 791 | kvm_migrate_timers(vcpu); |
778 | svm->asid_generation = 0; | 792 | svm->asid_generation = 0; |
@@ -954,42 +968,59 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | |||
954 | svm->vmcb->save.gdtr.base = dt->base ; | 968 | svm->vmcb->save.gdtr.base = dt->base ; |
955 | } | 969 | } |
956 | 970 | ||
971 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | ||
972 | { | ||
973 | } | ||
974 | |||
957 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 975 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
958 | { | 976 | { |
959 | } | 977 | } |
960 | 978 | ||
979 | static void update_cr0_intercept(struct vcpu_svm *svm) | ||
980 | { | ||
981 | ulong gcr0 = svm->vcpu.arch.cr0; | ||
982 | u64 *hcr0 = &svm->vmcb->save.cr0; | ||
983 | |||
984 | if (!svm->vcpu.fpu_active) | ||
985 | *hcr0 |= SVM_CR0_SELECTIVE_MASK; | ||
986 | else | ||
987 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | ||
988 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); | ||
989 | |||
990 | |||
991 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | ||
992 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
993 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
994 | } else { | ||
995 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
996 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
997 | } | ||
998 | } | ||
999 | |||
961 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1000 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
962 | { | 1001 | { |
963 | struct vcpu_svm *svm = to_svm(vcpu); | 1002 | struct vcpu_svm *svm = to_svm(vcpu); |
964 | 1003 | ||
965 | #ifdef CONFIG_X86_64 | 1004 | #ifdef CONFIG_X86_64 |
966 | if (vcpu->arch.shadow_efer & EFER_LME) { | 1005 | if (vcpu->arch.efer & EFER_LME) { |
967 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1006 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
968 | vcpu->arch.shadow_efer |= EFER_LMA; | 1007 | vcpu->arch.efer |= EFER_LMA; |
969 | svm->vmcb->save.efer |= EFER_LMA | EFER_LME; | 1008 | svm->vmcb->save.efer |= EFER_LMA | EFER_LME; |
970 | } | 1009 | } |
971 | 1010 | ||
972 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { | 1011 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { |
973 | vcpu->arch.shadow_efer &= ~EFER_LMA; | 1012 | vcpu->arch.efer &= ~EFER_LMA; |
974 | svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); | 1013 | svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); |
975 | } | 1014 | } |
976 | } | 1015 | } |
977 | #endif | 1016 | #endif |
978 | if (npt_enabled) | 1017 | vcpu->arch.cr0 = cr0; |
979 | goto set; | ||
980 | 1018 | ||
981 | if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { | 1019 | if (!npt_enabled) |
982 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1020 | cr0 |= X86_CR0_PG | X86_CR0_WP; |
983 | vcpu->fpu_active = 1; | ||
984 | } | ||
985 | 1021 | ||
986 | vcpu->arch.cr0 = cr0; | 1022 | if (!vcpu->fpu_active) |
987 | cr0 |= X86_CR0_PG | X86_CR0_WP; | ||
988 | if (!vcpu->fpu_active) { | ||
989 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | ||
990 | cr0 |= X86_CR0_TS; | 1023 | cr0 |= X86_CR0_TS; |
991 | } | ||
992 | set: | ||
993 | /* | 1024 | /* |
994 | * re-enable caching here because the QEMU bios | 1025 | * re-enable caching here because the QEMU bios |
995 | * does not do it - this results in some delay at | 1026 | * does not do it - this results in some delay at |
@@ -997,6 +1028,7 @@ set: | |||
997 | */ | 1028 | */ |
998 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | 1029 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); |
999 | svm->vmcb->save.cr0 = cr0; | 1030 | svm->vmcb->save.cr0 = cr0; |
1031 | update_cr0_intercept(svm); | ||
1000 | } | 1032 | } |
1001 | 1033 | ||
1002 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1034 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -1102,76 +1134,70 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1102 | svm->vmcb->control.asid = sd->next_asid++; | 1134 | svm->vmcb->control.asid = sd->next_asid++; |
1103 | } | 1135 | } |
1104 | 1136 | ||
1105 | static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) | 1137 | static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) |
1106 | { | 1138 | { |
1107 | struct vcpu_svm *svm = to_svm(vcpu); | 1139 | struct vcpu_svm *svm = to_svm(vcpu); |
1108 | unsigned long val; | ||
1109 | 1140 | ||
1110 | switch (dr) { | 1141 | switch (dr) { |
1111 | case 0 ... 3: | 1142 | case 0 ... 3: |
1112 | val = vcpu->arch.db[dr]; | 1143 | *dest = vcpu->arch.db[dr]; |
1113 | break; | 1144 | break; |
1145 | case 4: | ||
1146 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1147 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1148 | /* fall through */ | ||
1114 | case 6: | 1149 | case 6: |
1115 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1150 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1116 | val = vcpu->arch.dr6; | 1151 | *dest = vcpu->arch.dr6; |
1117 | else | 1152 | else |
1118 | val = svm->vmcb->save.dr6; | 1153 | *dest = svm->vmcb->save.dr6; |
1119 | break; | 1154 | break; |
1155 | case 5: | ||
1156 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1157 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1158 | /* fall through */ | ||
1120 | case 7: | 1159 | case 7: |
1121 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1160 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1122 | val = vcpu->arch.dr7; | 1161 | *dest = vcpu->arch.dr7; |
1123 | else | 1162 | else |
1124 | val = svm->vmcb->save.dr7; | 1163 | *dest = svm->vmcb->save.dr7; |
1125 | break; | 1164 | break; |
1126 | default: | ||
1127 | val = 0; | ||
1128 | } | 1165 | } |
1129 | 1166 | ||
1130 | return val; | 1167 | return EMULATE_DONE; |
1131 | } | 1168 | } |
1132 | 1169 | ||
1133 | static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | 1170 | static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) |
1134 | int *exception) | ||
1135 | { | 1171 | { |
1136 | struct vcpu_svm *svm = to_svm(vcpu); | 1172 | struct vcpu_svm *svm = to_svm(vcpu); |
1137 | 1173 | ||
1138 | *exception = 0; | ||
1139 | |||
1140 | switch (dr) { | 1174 | switch (dr) { |
1141 | case 0 ... 3: | 1175 | case 0 ... 3: |
1142 | vcpu->arch.db[dr] = value; | 1176 | vcpu->arch.db[dr] = value; |
1143 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | 1177 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) |
1144 | vcpu->arch.eff_db[dr] = value; | 1178 | vcpu->arch.eff_db[dr] = value; |
1145 | return; | 1179 | break; |
1146 | case 4 ... 5: | 1180 | case 4: |
1147 | if (vcpu->arch.cr4 & X86_CR4_DE) | 1181 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
1148 | *exception = UD_VECTOR; | 1182 | return EMULATE_FAIL; /* will re-inject UD */ |
1149 | return; | 1183 | /* fall through */ |
1150 | case 6: | 1184 | case 6: |
1151 | if (value & 0xffffffff00000000ULL) { | ||
1152 | *exception = GP_VECTOR; | ||
1153 | return; | ||
1154 | } | ||
1155 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; | 1185 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; |
1156 | return; | 1186 | break; |
1187 | case 5: | ||
1188 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1189 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1190 | /* fall through */ | ||
1157 | case 7: | 1191 | case 7: |
1158 | if (value & 0xffffffff00000000ULL) { | ||
1159 | *exception = GP_VECTOR; | ||
1160 | return; | ||
1161 | } | ||
1162 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; | 1192 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; |
1163 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 1193 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
1164 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1194 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
1165 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); | 1195 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); |
1166 | } | 1196 | } |
1167 | return; | 1197 | break; |
1168 | default: | ||
1169 | /* FIXME: Possible case? */ | ||
1170 | printk(KERN_DEBUG "%s: unexpected dr %u\n", | ||
1171 | __func__, dr); | ||
1172 | *exception = UD_VECTOR; | ||
1173 | return; | ||
1174 | } | 1198 | } |
1199 | |||
1200 | return EMULATE_DONE; | ||
1175 | } | 1201 | } |
1176 | 1202 | ||
1177 | static int pf_interception(struct vcpu_svm *svm) | 1203 | static int pf_interception(struct vcpu_svm *svm) |
@@ -1239,13 +1265,17 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1239 | return 1; | 1265 | return 1; |
1240 | } | 1266 | } |
1241 | 1267 | ||
1242 | static int nm_interception(struct vcpu_svm *svm) | 1268 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1243 | { | 1269 | { |
1270 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1244 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1271 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
1245 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) | ||
1246 | svm->vmcb->save.cr0 &= ~X86_CR0_TS; | ||
1247 | svm->vcpu.fpu_active = 1; | 1272 | svm->vcpu.fpu_active = 1; |
1273 | update_cr0_intercept(svm); | ||
1274 | } | ||
1248 | 1275 | ||
1276 | static int nm_interception(struct vcpu_svm *svm) | ||
1277 | { | ||
1278 | svm_fpu_activate(&svm->vcpu); | ||
1249 | return 1; | 1279 | return 1; |
1250 | } | 1280 | } |
1251 | 1281 | ||
@@ -1337,7 +1367,7 @@ static int vmmcall_interception(struct vcpu_svm *svm) | |||
1337 | 1367 | ||
1338 | static int nested_svm_check_permissions(struct vcpu_svm *svm) | 1368 | static int nested_svm_check_permissions(struct vcpu_svm *svm) |
1339 | { | 1369 | { |
1340 | if (!(svm->vcpu.arch.shadow_efer & EFER_SVME) | 1370 | if (!(svm->vcpu.arch.efer & EFER_SVME) |
1341 | || !is_paging(&svm->vcpu)) { | 1371 | || !is_paging(&svm->vcpu)) { |
1342 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1372 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1343 | return 1; | 1373 | return 1; |
@@ -1740,8 +1770,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1740 | hsave->save.ds = vmcb->save.ds; | 1770 | hsave->save.ds = vmcb->save.ds; |
1741 | hsave->save.gdtr = vmcb->save.gdtr; | 1771 | hsave->save.gdtr = vmcb->save.gdtr; |
1742 | hsave->save.idtr = vmcb->save.idtr; | 1772 | hsave->save.idtr = vmcb->save.idtr; |
1743 | hsave->save.efer = svm->vcpu.arch.shadow_efer; | 1773 | hsave->save.efer = svm->vcpu.arch.efer; |
1744 | hsave->save.cr0 = svm->vcpu.arch.cr0; | 1774 | hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); |
1745 | hsave->save.cr4 = svm->vcpu.arch.cr4; | 1775 | hsave->save.cr4 = svm->vcpu.arch.cr4; |
1746 | hsave->save.rflags = vmcb->save.rflags; | 1776 | hsave->save.rflags = vmcb->save.rflags; |
1747 | hsave->save.rip = svm->next_rip; | 1777 | hsave->save.rip = svm->next_rip; |
@@ -2153,9 +2183,10 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
2153 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2183 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2154 | u64 data; | 2184 | u64 data; |
2155 | 2185 | ||
2156 | if (svm_get_msr(&svm->vcpu, ecx, &data)) | 2186 | if (svm_get_msr(&svm->vcpu, ecx, &data)) { |
2187 | trace_kvm_msr_read_ex(ecx); | ||
2157 | kvm_inject_gp(&svm->vcpu, 0); | 2188 | kvm_inject_gp(&svm->vcpu, 0); |
2158 | else { | 2189 | } else { |
2159 | trace_kvm_msr_read(ecx, data); | 2190 | trace_kvm_msr_read(ecx, data); |
2160 | 2191 | ||
2161 | svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; | 2192 | svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; |
@@ -2247,13 +2278,15 @@ static int wrmsr_interception(struct vcpu_svm *svm) | |||
2247 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | 2278 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) |
2248 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 2279 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
2249 | 2280 | ||
2250 | trace_kvm_msr_write(ecx, data); | ||
2251 | 2281 | ||
2252 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 2282 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
2253 | if (svm_set_msr(&svm->vcpu, ecx, data)) | 2283 | if (svm_set_msr(&svm->vcpu, ecx, data)) { |
2284 | trace_kvm_msr_write_ex(ecx, data); | ||
2254 | kvm_inject_gp(&svm->vcpu, 0); | 2285 | kvm_inject_gp(&svm->vcpu, 0); |
2255 | else | 2286 | } else { |
2287 | trace_kvm_msr_write(ecx, data); | ||
2256 | skip_emulated_instruction(&svm->vcpu); | 2288 | skip_emulated_instruction(&svm->vcpu); |
2289 | } | ||
2257 | return 1; | 2290 | return 1; |
2258 | } | 2291 | } |
2259 | 2292 | ||
@@ -2297,7 +2330,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2297 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2330 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2298 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2331 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
2299 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 2332 | [SVM_EXIT_READ_CR8] = emulate_on_interception, |
2300 | /* for now: */ | 2333 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
2301 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, | 2334 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, |
2302 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 2335 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, |
2303 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 2336 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, |
@@ -2306,11 +2339,17 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2306 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 2339 | [SVM_EXIT_READ_DR1] = emulate_on_interception, |
2307 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 2340 | [SVM_EXIT_READ_DR2] = emulate_on_interception, |
2308 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 2341 | [SVM_EXIT_READ_DR3] = emulate_on_interception, |
2342 | [SVM_EXIT_READ_DR4] = emulate_on_interception, | ||
2343 | [SVM_EXIT_READ_DR5] = emulate_on_interception, | ||
2344 | [SVM_EXIT_READ_DR6] = emulate_on_interception, | ||
2345 | [SVM_EXIT_READ_DR7] = emulate_on_interception, | ||
2309 | [SVM_EXIT_WRITE_DR0] = emulate_on_interception, | 2346 | [SVM_EXIT_WRITE_DR0] = emulate_on_interception, |
2310 | [SVM_EXIT_WRITE_DR1] = emulate_on_interception, | 2347 | [SVM_EXIT_WRITE_DR1] = emulate_on_interception, |
2311 | [SVM_EXIT_WRITE_DR2] = emulate_on_interception, | 2348 | [SVM_EXIT_WRITE_DR2] = emulate_on_interception, |
2312 | [SVM_EXIT_WRITE_DR3] = emulate_on_interception, | 2349 | [SVM_EXIT_WRITE_DR3] = emulate_on_interception, |
2350 | [SVM_EXIT_WRITE_DR4] = emulate_on_interception, | ||
2313 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, | 2351 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, |
2352 | [SVM_EXIT_WRITE_DR6] = emulate_on_interception, | ||
2314 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, | 2353 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, |
2315 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 2354 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
2316 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 2355 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
@@ -2383,20 +2422,10 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2383 | 2422 | ||
2384 | svm_complete_interrupts(svm); | 2423 | svm_complete_interrupts(svm); |
2385 | 2424 | ||
2386 | if (npt_enabled) { | 2425 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) |
2387 | int mmu_reload = 0; | ||
2388 | if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { | ||
2389 | svm_set_cr0(vcpu, svm->vmcb->save.cr0); | ||
2390 | mmu_reload = 1; | ||
2391 | } | ||
2392 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 2426 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
2427 | if (npt_enabled) | ||
2393 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | 2428 | vcpu->arch.cr3 = svm->vmcb->save.cr3; |
2394 | if (mmu_reload) { | ||
2395 | kvm_mmu_reset_context(vcpu); | ||
2396 | kvm_mmu_load(vcpu); | ||
2397 | } | ||
2398 | } | ||
2399 | |||
2400 | 2429 | ||
2401 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2430 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
2402 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2431 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
@@ -2798,12 +2827,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
2798 | 2827 | ||
2799 | svm->vmcb->save.cr3 = root; | 2828 | svm->vmcb->save.cr3 = root; |
2800 | force_new_asid(vcpu); | 2829 | force_new_asid(vcpu); |
2801 | |||
2802 | if (vcpu->fpu_active) { | ||
2803 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | ||
2804 | svm->vmcb->save.cr0 |= X86_CR0_TS; | ||
2805 | vcpu->fpu_active = 0; | ||
2806 | } | ||
2807 | } | 2830 | } |
2808 | 2831 | ||
2809 | static int is_disabled(void) | 2832 | static int is_disabled(void) |
@@ -2852,6 +2875,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
2852 | return 0; | 2875 | return 0; |
2853 | } | 2876 | } |
2854 | 2877 | ||
2878 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) | ||
2879 | { | ||
2880 | } | ||
2881 | |||
2855 | static const struct trace_print_flags svm_exit_reasons_str[] = { | 2882 | static const struct trace_print_flags svm_exit_reasons_str[] = { |
2856 | { SVM_EXIT_READ_CR0, "read_cr0" }, | 2883 | { SVM_EXIT_READ_CR0, "read_cr0" }, |
2857 | { SVM_EXIT_READ_CR3, "read_cr3" }, | 2884 | { SVM_EXIT_READ_CR3, "read_cr3" }, |
@@ -2905,9 +2932,22 @@ static const struct trace_print_flags svm_exit_reasons_str[] = { | |||
2905 | { -1, NULL } | 2932 | { -1, NULL } |
2906 | }; | 2933 | }; |
2907 | 2934 | ||
2908 | static bool svm_gb_page_enable(void) | 2935 | static int svm_get_lpage_level(void) |
2909 | { | 2936 | { |
2910 | return true; | 2937 | return PT_PDPE_LEVEL; |
2938 | } | ||
2939 | |||
2940 | static bool svm_rdtscp_supported(void) | ||
2941 | { | ||
2942 | return false; | ||
2943 | } | ||
2944 | |||
2945 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | ||
2946 | { | ||
2947 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2948 | |||
2949 | update_cr0_intercept(svm); | ||
2950 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
2911 | } | 2951 | } |
2912 | 2952 | ||
2913 | static struct kvm_x86_ops svm_x86_ops = { | 2953 | static struct kvm_x86_ops svm_x86_ops = { |
@@ -2936,6 +2976,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2936 | .set_segment = svm_set_segment, | 2976 | .set_segment = svm_set_segment, |
2937 | .get_cpl = svm_get_cpl, | 2977 | .get_cpl = svm_get_cpl, |
2938 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, | 2978 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, |
2979 | .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, | ||
2939 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, | 2980 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, |
2940 | .set_cr0 = svm_set_cr0, | 2981 | .set_cr0 = svm_set_cr0, |
2941 | .set_cr3 = svm_set_cr3, | 2982 | .set_cr3 = svm_set_cr3, |
@@ -2950,6 +2991,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2950 | .cache_reg = svm_cache_reg, | 2991 | .cache_reg = svm_cache_reg, |
2951 | .get_rflags = svm_get_rflags, | 2992 | .get_rflags = svm_get_rflags, |
2952 | .set_rflags = svm_set_rflags, | 2993 | .set_rflags = svm_set_rflags, |
2994 | .fpu_activate = svm_fpu_activate, | ||
2995 | .fpu_deactivate = svm_fpu_deactivate, | ||
2953 | 2996 | ||
2954 | .tlb_flush = svm_flush_tlb, | 2997 | .tlb_flush = svm_flush_tlb, |
2955 | 2998 | ||
@@ -2975,7 +3018,11 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2975 | .get_mt_mask = svm_get_mt_mask, | 3018 | .get_mt_mask = svm_get_mt_mask, |
2976 | 3019 | ||
2977 | .exit_reasons_str = svm_exit_reasons_str, | 3020 | .exit_reasons_str = svm_exit_reasons_str, |
2978 | .gb_page_enable = svm_gb_page_enable, | 3021 | .get_lpage_level = svm_get_lpage_level, |
3022 | |||
3023 | .cpuid_update = svm_cpuid_update, | ||
3024 | |||
3025 | .rdtscp_supported = svm_rdtscp_supported, | ||
2979 | }; | 3026 | }; |
2980 | 3027 | ||
2981 | static int __init svm_init(void) | 3028 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 816e0449db0b..6ad30a29f044 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -56,6 +56,38 @@ TRACE_EVENT(kvm_hypercall, | |||
56 | ); | 56 | ); |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * Tracepoint for hypercall. | ||
60 | */ | ||
61 | TRACE_EVENT(kvm_hv_hypercall, | ||
62 | TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx, | ||
63 | __u64 ingpa, __u64 outgpa), | ||
64 | TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), | ||
65 | |||
66 | TP_STRUCT__entry( | ||
67 | __field( __u16, code ) | ||
68 | __field( bool, fast ) | ||
69 | __field( __u16, rep_cnt ) | ||
70 | __field( __u16, rep_idx ) | ||
71 | __field( __u64, ingpa ) | ||
72 | __field( __u64, outgpa ) | ||
73 | ), | ||
74 | |||
75 | TP_fast_assign( | ||
76 | __entry->code = code; | ||
77 | __entry->fast = fast; | ||
78 | __entry->rep_cnt = rep_cnt; | ||
79 | __entry->rep_idx = rep_idx; | ||
80 | __entry->ingpa = ingpa; | ||
81 | __entry->outgpa = outgpa; | ||
82 | ), | ||
83 | |||
84 | TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", | ||
85 | __entry->code, __entry->fast ? "fast" : "slow", | ||
86 | __entry->rep_cnt, __entry->rep_idx, __entry->ingpa, | ||
87 | __entry->outgpa) | ||
88 | ); | ||
89 | |||
90 | /* | ||
59 | * Tracepoint for PIO. | 91 | * Tracepoint for PIO. |
60 | */ | 92 | */ |
61 | TRACE_EVENT(kvm_pio, | 93 | TRACE_EVENT(kvm_pio, |
@@ -214,28 +246,33 @@ TRACE_EVENT(kvm_page_fault, | |||
214 | * Tracepoint for guest MSR access. | 246 | * Tracepoint for guest MSR access. |
215 | */ | 247 | */ |
216 | TRACE_EVENT(kvm_msr, | 248 | TRACE_EVENT(kvm_msr, |
217 | TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data), | 249 | TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception), |
218 | TP_ARGS(rw, ecx, data), | 250 | TP_ARGS(write, ecx, data, exception), |
219 | 251 | ||
220 | TP_STRUCT__entry( | 252 | TP_STRUCT__entry( |
221 | __field( unsigned int, rw ) | 253 | __field( unsigned, write ) |
222 | __field( unsigned int, ecx ) | 254 | __field( u32, ecx ) |
223 | __field( unsigned long, data ) | 255 | __field( u64, data ) |
256 | __field( u8, exception ) | ||
224 | ), | 257 | ), |
225 | 258 | ||
226 | TP_fast_assign( | 259 | TP_fast_assign( |
227 | __entry->rw = rw; | 260 | __entry->write = write; |
228 | __entry->ecx = ecx; | 261 | __entry->ecx = ecx; |
229 | __entry->data = data; | 262 | __entry->data = data; |
263 | __entry->exception = exception; | ||
230 | ), | 264 | ), |
231 | 265 | ||
232 | TP_printk("msr_%s %x = 0x%lx", | 266 | TP_printk("msr_%s %x = 0x%llx%s", |
233 | __entry->rw ? "write" : "read", | 267 | __entry->write ? "write" : "read", |
234 | __entry->ecx, __entry->data) | 268 | __entry->ecx, __entry->data, |
269 | __entry->exception ? " (#GP)" : "") | ||
235 | ); | 270 | ); |
236 | 271 | ||
237 | #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data) | 272 | #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false) |
238 | #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data) | 273 | #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false) |
274 | #define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true) | ||
275 | #define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true) | ||
239 | 276 | ||
240 | /* | 277 | /* |
241 | * Tracepoint for guest CR access. | 278 | * Tracepoint for guest CR access. |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d4918d6fc924..bc933cfb4e66 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
27 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
29 | #include <linux/slab.h> | ||
29 | #include "kvm_cache_regs.h" | 30 | #include "kvm_cache_regs.h" |
30 | #include "x86.h" | 31 | #include "x86.h" |
31 | 32 | ||
@@ -61,6 +62,23 @@ module_param_named(unrestricted_guest, | |||
61 | static int __read_mostly emulate_invalid_guest_state = 0; | 62 | static int __read_mostly emulate_invalid_guest_state = 0; |
62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 63 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
63 | 64 | ||
65 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | ||
66 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | ||
67 | #define KVM_GUEST_CR0_MASK \ | ||
68 | (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
69 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ | ||
70 | (X86_CR0_WP | X86_CR0_NE) | ||
71 | #define KVM_VM_CR0_ALWAYS_ON \ | ||
72 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
73 | #define KVM_CR4_GUEST_OWNED_BITS \ | ||
74 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
75 | | X86_CR4_OSXMMEXCPT) | ||
76 | |||
77 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | ||
78 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | ||
79 | |||
80 | #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) | ||
81 | |||
64 | /* | 82 | /* |
65 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | 83 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: |
66 | * ple_gap: upper bound on the amount of time between two successive | 84 | * ple_gap: upper bound on the amount of time between two successive |
@@ -115,7 +133,7 @@ struct vcpu_vmx { | |||
115 | } host_state; | 133 | } host_state; |
116 | struct { | 134 | struct { |
117 | int vm86_active; | 135 | int vm86_active; |
118 | u8 save_iopl; | 136 | ulong save_rflags; |
119 | struct kvm_save_segment { | 137 | struct kvm_save_segment { |
120 | u16 selector; | 138 | u16 selector; |
121 | unsigned long base; | 139 | unsigned long base; |
@@ -136,6 +154,8 @@ struct vcpu_vmx { | |||
136 | ktime_t entry_time; | 154 | ktime_t entry_time; |
137 | s64 vnmi_blocked_time; | 155 | s64 vnmi_blocked_time; |
138 | u32 exit_reason; | 156 | u32 exit_reason; |
157 | |||
158 | bool rdtscp_enabled; | ||
139 | }; | 159 | }; |
140 | 160 | ||
141 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 161 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
@@ -210,7 +230,7 @@ static const u32 vmx_msr_index[] = { | |||
210 | #ifdef CONFIG_X86_64 | 230 | #ifdef CONFIG_X86_64 |
211 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, | 231 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, |
212 | #endif | 232 | #endif |
213 | MSR_EFER, MSR_K6_STAR, | 233 | MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR, |
214 | }; | 234 | }; |
215 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 235 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
216 | 236 | ||
@@ -301,6 +321,11 @@ static inline bool cpu_has_vmx_ept_2m_page(void) | |||
301 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 321 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); |
302 | } | 322 | } |
303 | 323 | ||
324 | static inline bool cpu_has_vmx_ept_1g_page(void) | ||
325 | { | ||
326 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | ||
327 | } | ||
328 | |||
304 | static inline int cpu_has_vmx_invept_individual_addr(void) | 329 | static inline int cpu_has_vmx_invept_individual_addr(void) |
305 | { | 330 | { |
306 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 331 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); |
@@ -336,9 +361,7 @@ static inline int cpu_has_vmx_ple(void) | |||
336 | 361 | ||
337 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 362 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
338 | { | 363 | { |
339 | return flexpriority_enabled && | 364 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
340 | (cpu_has_vmx_virtualize_apic_accesses()) && | ||
341 | (irqchip_in_kernel(kvm)); | ||
342 | } | 365 | } |
343 | 366 | ||
344 | static inline int cpu_has_vmx_vpid(void) | 367 | static inline int cpu_has_vmx_vpid(void) |
@@ -347,6 +370,12 @@ static inline int cpu_has_vmx_vpid(void) | |||
347 | SECONDARY_EXEC_ENABLE_VPID; | 370 | SECONDARY_EXEC_ENABLE_VPID; |
348 | } | 371 | } |
349 | 372 | ||
373 | static inline int cpu_has_vmx_rdtscp(void) | ||
374 | { | ||
375 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
376 | SECONDARY_EXEC_RDTSCP; | ||
377 | } | ||
378 | |||
350 | static inline int cpu_has_virtual_nmis(void) | 379 | static inline int cpu_has_virtual_nmis(void) |
351 | { | 380 | { |
352 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 381 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
@@ -551,22 +580,18 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
551 | { | 580 | { |
552 | u32 eb; | 581 | u32 eb; |
553 | 582 | ||
554 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); | 583 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | |
555 | if (!vcpu->fpu_active) | 584 | (1u << NM_VECTOR) | (1u << DB_VECTOR); |
556 | eb |= 1u << NM_VECTOR; | 585 | if ((vcpu->guest_debug & |
557 | /* | 586 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == |
558 | * Unconditionally intercept #DB so we can maintain dr6 without | 587 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) |
559 | * reading it every exit. | 588 | eb |= 1u << BP_VECTOR; |
560 | */ | ||
561 | eb |= 1u << DB_VECTOR; | ||
562 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | ||
563 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | ||
564 | eb |= 1u << BP_VECTOR; | ||
565 | } | ||
566 | if (to_vmx(vcpu)->rmode.vm86_active) | 589 | if (to_vmx(vcpu)->rmode.vm86_active) |
567 | eb = ~0; | 590 | eb = ~0; |
568 | if (enable_ept) | 591 | if (enable_ept) |
569 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 592 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
593 | if (vcpu->fpu_active) | ||
594 | eb &= ~(1u << NM_VECTOR); | ||
570 | vmcs_write32(EXCEPTION_BITMAP, eb); | 595 | vmcs_write32(EXCEPTION_BITMAP, eb); |
571 | } | 596 | } |
572 | 597 | ||
@@ -589,7 +614,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
589 | u64 guest_efer; | 614 | u64 guest_efer; |
590 | u64 ignore_bits; | 615 | u64 ignore_bits; |
591 | 616 | ||
592 | guest_efer = vmx->vcpu.arch.shadow_efer; | 617 | guest_efer = vmx->vcpu.arch.efer; |
593 | 618 | ||
594 | /* | 619 | /* |
595 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 620 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless |
@@ -767,38 +792,51 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | |||
767 | 792 | ||
768 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 793 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
769 | { | 794 | { |
795 | ulong cr0; | ||
796 | |||
770 | if (vcpu->fpu_active) | 797 | if (vcpu->fpu_active) |
771 | return; | 798 | return; |
772 | vcpu->fpu_active = 1; | 799 | vcpu->fpu_active = 1; |
773 | vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); | 800 | cr0 = vmcs_readl(GUEST_CR0); |
774 | if (vcpu->arch.cr0 & X86_CR0_TS) | 801 | cr0 &= ~(X86_CR0_TS | X86_CR0_MP); |
775 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); | 802 | cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP); |
803 | vmcs_writel(GUEST_CR0, cr0); | ||
776 | update_exception_bitmap(vcpu); | 804 | update_exception_bitmap(vcpu); |
805 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; | ||
806 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
777 | } | 807 | } |
778 | 808 | ||
809 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); | ||
810 | |||
779 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | 811 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) |
780 | { | 812 | { |
781 | if (!vcpu->fpu_active) | 813 | vmx_decache_cr0_guest_bits(vcpu); |
782 | return; | 814 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); |
783 | vcpu->fpu_active = 0; | ||
784 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); | ||
785 | update_exception_bitmap(vcpu); | 815 | update_exception_bitmap(vcpu); |
816 | vcpu->arch.cr0_guest_owned_bits = 0; | ||
817 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
818 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
786 | } | 819 | } |
787 | 820 | ||
788 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 821 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
789 | { | 822 | { |
790 | unsigned long rflags; | 823 | unsigned long rflags, save_rflags; |
791 | 824 | ||
792 | rflags = vmcs_readl(GUEST_RFLAGS); | 825 | rflags = vmcs_readl(GUEST_RFLAGS); |
793 | if (to_vmx(vcpu)->rmode.vm86_active) | 826 | if (to_vmx(vcpu)->rmode.vm86_active) { |
794 | rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | 827 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
828 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | ||
829 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | ||
830 | } | ||
795 | return rflags; | 831 | return rflags; |
796 | } | 832 | } |
797 | 833 | ||
798 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 834 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
799 | { | 835 | { |
800 | if (to_vmx(vcpu)->rmode.vm86_active) | 836 | if (to_vmx(vcpu)->rmode.vm86_active) { |
837 | to_vmx(vcpu)->rmode.save_rflags = rflags; | ||
801 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 838 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
839 | } | ||
802 | vmcs_writel(GUEST_RFLAGS, rflags); | 840 | vmcs_writel(GUEST_RFLAGS, rflags); |
803 | } | 841 | } |
804 | 842 | ||
@@ -878,6 +916,11 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
878 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 916 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
879 | } | 917 | } |
880 | 918 | ||
919 | static bool vmx_rdtscp_supported(void) | ||
920 | { | ||
921 | return cpu_has_vmx_rdtscp(); | ||
922 | } | ||
923 | |||
881 | /* | 924 | /* |
882 | * Swap MSR entry in host/guest MSR entry array. | 925 | * Swap MSR entry in host/guest MSR entry array. |
883 | */ | 926 | */ |
@@ -913,12 +956,15 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
913 | index = __find_msr_index(vmx, MSR_CSTAR); | 956 | index = __find_msr_index(vmx, MSR_CSTAR); |
914 | if (index >= 0) | 957 | if (index >= 0) |
915 | move_msr_up(vmx, index, save_nmsrs++); | 958 | move_msr_up(vmx, index, save_nmsrs++); |
959 | index = __find_msr_index(vmx, MSR_TSC_AUX); | ||
960 | if (index >= 0 && vmx->rdtscp_enabled) | ||
961 | move_msr_up(vmx, index, save_nmsrs++); | ||
916 | /* | 962 | /* |
917 | * MSR_K6_STAR is only needed on long mode guests, and only | 963 | * MSR_K6_STAR is only needed on long mode guests, and only |
918 | * if efer.sce is enabled. | 964 | * if efer.sce is enabled. |
919 | */ | 965 | */ |
920 | index = __find_msr_index(vmx, MSR_K6_STAR); | 966 | index = __find_msr_index(vmx, MSR_K6_STAR); |
921 | if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE)) | 967 | if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) |
922 | move_msr_up(vmx, index, save_nmsrs++); | 968 | move_msr_up(vmx, index, save_nmsrs++); |
923 | } | 969 | } |
924 | #endif | 970 | #endif |
@@ -1002,6 +1048,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1002 | case MSR_IA32_SYSENTER_ESP: | 1048 | case MSR_IA32_SYSENTER_ESP: |
1003 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 1049 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
1004 | break; | 1050 | break; |
1051 | case MSR_TSC_AUX: | ||
1052 | if (!to_vmx(vcpu)->rdtscp_enabled) | ||
1053 | return 1; | ||
1054 | /* Otherwise falls through */ | ||
1005 | default: | 1055 | default: |
1006 | vmx_load_host_state(to_vmx(vcpu)); | 1056 | vmx_load_host_state(to_vmx(vcpu)); |
1007 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 1057 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
@@ -1065,7 +1115,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1065 | vcpu->arch.pat = data; | 1115 | vcpu->arch.pat = data; |
1066 | break; | 1116 | break; |
1067 | } | 1117 | } |
1068 | /* Otherwise falls through to kvm_set_msr_common */ | 1118 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
1119 | break; | ||
1120 | case MSR_TSC_AUX: | ||
1121 | if (!vmx->rdtscp_enabled) | ||
1122 | return 1; | ||
1123 | /* Check reserved bit, higher 32 bits should be zero */ | ||
1124 | if ((data >> 32) != 0) | ||
1125 | return 1; | ||
1126 | /* Otherwise falls through */ | ||
1069 | default: | 1127 | default: |
1070 | msr = find_msr_entry(vmx, msr_index); | 1128 | msr = find_msr_entry(vmx, msr_index); |
1071 | if (msr) { | 1129 | if (msr) { |
@@ -1224,6 +1282,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1224 | CPU_BASED_USE_IO_BITMAPS | | 1282 | CPU_BASED_USE_IO_BITMAPS | |
1225 | CPU_BASED_MOV_DR_EXITING | | 1283 | CPU_BASED_MOV_DR_EXITING | |
1226 | CPU_BASED_USE_TSC_OFFSETING | | 1284 | CPU_BASED_USE_TSC_OFFSETING | |
1285 | CPU_BASED_MWAIT_EXITING | | ||
1286 | CPU_BASED_MONITOR_EXITING | | ||
1227 | CPU_BASED_INVLPG_EXITING; | 1287 | CPU_BASED_INVLPG_EXITING; |
1228 | opt = CPU_BASED_TPR_SHADOW | | 1288 | opt = CPU_BASED_TPR_SHADOW | |
1229 | CPU_BASED_USE_MSR_BITMAPS | | 1289 | CPU_BASED_USE_MSR_BITMAPS | |
@@ -1243,7 +1303,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1243 | SECONDARY_EXEC_ENABLE_VPID | | 1303 | SECONDARY_EXEC_ENABLE_VPID | |
1244 | SECONDARY_EXEC_ENABLE_EPT | | 1304 | SECONDARY_EXEC_ENABLE_EPT | |
1245 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | 1305 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
1246 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 1306 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | |
1307 | SECONDARY_EXEC_RDTSCP; | ||
1247 | if (adjust_vmx_controls(min2, opt2, | 1308 | if (adjust_vmx_controls(min2, opt2, |
1248 | MSR_IA32_VMX_PROCBASED_CTLS2, | 1309 | MSR_IA32_VMX_PROCBASED_CTLS2, |
1249 | &_cpu_based_2nd_exec_control) < 0) | 1310 | &_cpu_based_2nd_exec_control) < 0) |
@@ -1429,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1429 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); | 1490 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); |
1430 | 1491 | ||
1431 | flags = vmcs_readl(GUEST_RFLAGS); | 1492 | flags = vmcs_readl(GUEST_RFLAGS); |
1432 | flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | 1493 | flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
1433 | flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); | 1494 | flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; |
1434 | vmcs_writel(GUEST_RFLAGS, flags); | 1495 | vmcs_writel(GUEST_RFLAGS, flags); |
1435 | 1496 | ||
1436 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | | 1497 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | |
@@ -1457,8 +1518,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1457 | static gva_t rmode_tss_base(struct kvm *kvm) | 1518 | static gva_t rmode_tss_base(struct kvm *kvm) |
1458 | { | 1519 | { |
1459 | if (!kvm->arch.tss_addr) { | 1520 | if (!kvm->arch.tss_addr) { |
1460 | gfn_t base_gfn = kvm->memslots[0].base_gfn + | 1521 | struct kvm_memslots *slots; |
1461 | kvm->memslots[0].npages - 3; | 1522 | gfn_t base_gfn; |
1523 | |||
1524 | slots = rcu_dereference(kvm->memslots); | ||
1525 | base_gfn = kvm->memslots->memslots[0].base_gfn + | ||
1526 | kvm->memslots->memslots[0].npages - 3; | ||
1462 | return base_gfn << PAGE_SHIFT; | 1527 | return base_gfn << PAGE_SHIFT; |
1463 | } | 1528 | } |
1464 | return kvm->arch.tss_addr; | 1529 | return kvm->arch.tss_addr; |
@@ -1499,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1499 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 1564 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
1500 | 1565 | ||
1501 | flags = vmcs_readl(GUEST_RFLAGS); | 1566 | flags = vmcs_readl(GUEST_RFLAGS); |
1502 | vmx->rmode.save_iopl | 1567 | vmx->rmode.save_rflags = flags; |
1503 | = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1504 | 1568 | ||
1505 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 1569 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
1506 | 1570 | ||
@@ -1544,9 +1608,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
1544 | * of this msr depends on is_long_mode(). | 1608 | * of this msr depends on is_long_mode(). |
1545 | */ | 1609 | */ |
1546 | vmx_load_host_state(to_vmx(vcpu)); | 1610 | vmx_load_host_state(to_vmx(vcpu)); |
1547 | vcpu->arch.shadow_efer = efer; | 1611 | vcpu->arch.efer = efer; |
1548 | if (!msr) | ||
1549 | return; | ||
1550 | if (efer & EFER_LMA) { | 1612 | if (efer & EFER_LMA) { |
1551 | vmcs_write32(VM_ENTRY_CONTROLS, | 1613 | vmcs_write32(VM_ENTRY_CONTROLS, |
1552 | vmcs_read32(VM_ENTRY_CONTROLS) | | 1614 | vmcs_read32(VM_ENTRY_CONTROLS) | |
@@ -1576,13 +1638,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
1576 | (guest_tr_ar & ~AR_TYPE_MASK) | 1638 | (guest_tr_ar & ~AR_TYPE_MASK) |
1577 | | AR_TYPE_BUSY_64_TSS); | 1639 | | AR_TYPE_BUSY_64_TSS); |
1578 | } | 1640 | } |
1579 | vcpu->arch.shadow_efer |= EFER_LMA; | 1641 | vcpu->arch.efer |= EFER_LMA; |
1580 | vmx_set_efer(vcpu, vcpu->arch.shadow_efer); | 1642 | vmx_set_efer(vcpu, vcpu->arch.efer); |
1581 | } | 1643 | } |
1582 | 1644 | ||
1583 | static void exit_lmode(struct kvm_vcpu *vcpu) | 1645 | static void exit_lmode(struct kvm_vcpu *vcpu) |
1584 | { | 1646 | { |
1585 | vcpu->arch.shadow_efer &= ~EFER_LMA; | 1647 | vcpu->arch.efer &= ~EFER_LMA; |
1586 | 1648 | ||
1587 | vmcs_write32(VM_ENTRY_CONTROLS, | 1649 | vmcs_write32(VM_ENTRY_CONTROLS, |
1588 | vmcs_read32(VM_ENTRY_CONTROLS) | 1650 | vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1598,10 +1660,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | |||
1598 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 1660 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
1599 | } | 1661 | } |
1600 | 1662 | ||
1663 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | ||
1664 | { | ||
1665 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; | ||
1666 | |||
1667 | vcpu->arch.cr0 &= ~cr0_guest_owned_bits; | ||
1668 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; | ||
1669 | } | ||
1670 | |||
1601 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1671 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1602 | { | 1672 | { |
1603 | vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; | 1673 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; |
1604 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; | 1674 | |
1675 | vcpu->arch.cr4 &= ~cr4_guest_owned_bits; | ||
1676 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; | ||
1605 | } | 1677 | } |
1606 | 1678 | ||
1607 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | 1679 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) |
@@ -1646,7 +1718,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1646 | (CPU_BASED_CR3_LOAD_EXITING | | 1718 | (CPU_BASED_CR3_LOAD_EXITING | |
1647 | CPU_BASED_CR3_STORE_EXITING)); | 1719 | CPU_BASED_CR3_STORE_EXITING)); |
1648 | vcpu->arch.cr0 = cr0; | 1720 | vcpu->arch.cr0 = cr0; |
1649 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1721 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
1650 | } else if (!is_paging(vcpu)) { | 1722 | } else if (!is_paging(vcpu)) { |
1651 | /* From nonpaging to paging */ | 1723 | /* From nonpaging to paging */ |
1652 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1724 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
@@ -1654,23 +1726,13 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1654 | ~(CPU_BASED_CR3_LOAD_EXITING | | 1726 | ~(CPU_BASED_CR3_LOAD_EXITING | |
1655 | CPU_BASED_CR3_STORE_EXITING)); | 1727 | CPU_BASED_CR3_STORE_EXITING)); |
1656 | vcpu->arch.cr0 = cr0; | 1728 | vcpu->arch.cr0 = cr0; |
1657 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1729 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
1658 | } | 1730 | } |
1659 | 1731 | ||
1660 | if (!(cr0 & X86_CR0_WP)) | 1732 | if (!(cr0 & X86_CR0_WP)) |
1661 | *hw_cr0 &= ~X86_CR0_WP; | 1733 | *hw_cr0 &= ~X86_CR0_WP; |
1662 | } | 1734 | } |
1663 | 1735 | ||
1664 | static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, | ||
1665 | struct kvm_vcpu *vcpu) | ||
1666 | { | ||
1667 | if (!is_paging(vcpu)) { | ||
1668 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1669 | *hw_cr4 |= X86_CR4_PSE; | ||
1670 | } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) | ||
1671 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1672 | } | ||
1673 | |||
1674 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1736 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
1675 | { | 1737 | { |
1676 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1738 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -1682,8 +1744,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1682 | else | 1744 | else |
1683 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; | 1745 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; |
1684 | 1746 | ||
1685 | vmx_fpu_deactivate(vcpu); | ||
1686 | |||
1687 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) | 1747 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) |
1688 | enter_pmode(vcpu); | 1748 | enter_pmode(vcpu); |
1689 | 1749 | ||
@@ -1691,7 +1751,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1691 | enter_rmode(vcpu); | 1751 | enter_rmode(vcpu); |
1692 | 1752 | ||
1693 | #ifdef CONFIG_X86_64 | 1753 | #ifdef CONFIG_X86_64 |
1694 | if (vcpu->arch.shadow_efer & EFER_LME) { | 1754 | if (vcpu->arch.efer & EFER_LME) { |
1695 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) | 1755 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) |
1696 | enter_lmode(vcpu); | 1756 | enter_lmode(vcpu); |
1697 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) | 1757 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) |
@@ -1702,12 +1762,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1702 | if (enable_ept) | 1762 | if (enable_ept) |
1703 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | 1763 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); |
1704 | 1764 | ||
1765 | if (!vcpu->fpu_active) | ||
1766 | hw_cr0 |= X86_CR0_TS | X86_CR0_MP; | ||
1767 | |||
1705 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1768 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1706 | vmcs_writel(GUEST_CR0, hw_cr0); | 1769 | vmcs_writel(GUEST_CR0, hw_cr0); |
1707 | vcpu->arch.cr0 = cr0; | 1770 | vcpu->arch.cr0 = cr0; |
1708 | |||
1709 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) | ||
1710 | vmx_fpu_activate(vcpu); | ||
1711 | } | 1771 | } |
1712 | 1772 | ||
1713 | static u64 construct_eptp(unsigned long root_hpa) | 1773 | static u64 construct_eptp(unsigned long root_hpa) |
@@ -1738,8 +1798,6 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1738 | 1798 | ||
1739 | vmx_flush_tlb(vcpu); | 1799 | vmx_flush_tlb(vcpu); |
1740 | vmcs_writel(GUEST_CR3, guest_cr3); | 1800 | vmcs_writel(GUEST_CR3, guest_cr3); |
1741 | if (vcpu->arch.cr0 & X86_CR0_PE) | ||
1742 | vmx_fpu_deactivate(vcpu); | ||
1743 | } | 1801 | } |
1744 | 1802 | ||
1745 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1803 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -1748,8 +1806,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1748 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); | 1806 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
1749 | 1807 | ||
1750 | vcpu->arch.cr4 = cr4; | 1808 | vcpu->arch.cr4 = cr4; |
1751 | if (enable_ept) | 1809 | if (enable_ept) { |
1752 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); | 1810 | if (!is_paging(vcpu)) { |
1811 | hw_cr4 &= ~X86_CR4_PAE; | ||
1812 | hw_cr4 |= X86_CR4_PSE; | ||
1813 | } else if (!(cr4 & X86_CR4_PAE)) { | ||
1814 | hw_cr4 &= ~X86_CR4_PAE; | ||
1815 | } | ||
1816 | } | ||
1753 | 1817 | ||
1754 | vmcs_writel(CR4_READ_SHADOW, cr4); | 1818 | vmcs_writel(CR4_READ_SHADOW, cr4); |
1755 | vmcs_writel(GUEST_CR4, hw_cr4); | 1819 | vmcs_writel(GUEST_CR4, hw_cr4); |
@@ -1787,7 +1851,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
1787 | 1851 | ||
1788 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 1852 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
1789 | { | 1853 | { |
1790 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ | 1854 | if (!is_protmode(vcpu)) |
1791 | return 0; | 1855 | return 0; |
1792 | 1856 | ||
1793 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ | 1857 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ |
@@ -2042,7 +2106,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | |||
2042 | static bool guest_state_valid(struct kvm_vcpu *vcpu) | 2106 | static bool guest_state_valid(struct kvm_vcpu *vcpu) |
2043 | { | 2107 | { |
2044 | /* real mode guest state checks */ | 2108 | /* real mode guest state checks */ |
2045 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) { | 2109 | if (!is_protmode(vcpu)) { |
2046 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 2110 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
2047 | return false; | 2111 | return false; |
2048 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) | 2112 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) |
@@ -2175,7 +2239,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
2175 | struct kvm_userspace_memory_region kvm_userspace_mem; | 2239 | struct kvm_userspace_memory_region kvm_userspace_mem; |
2176 | int r = 0; | 2240 | int r = 0; |
2177 | 2241 | ||
2178 | down_write(&kvm->slots_lock); | 2242 | mutex_lock(&kvm->slots_lock); |
2179 | if (kvm->arch.apic_access_page) | 2243 | if (kvm->arch.apic_access_page) |
2180 | goto out; | 2244 | goto out; |
2181 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | 2245 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; |
@@ -2188,7 +2252,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
2188 | 2252 | ||
2189 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); | 2253 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); |
2190 | out: | 2254 | out: |
2191 | up_write(&kvm->slots_lock); | 2255 | mutex_unlock(&kvm->slots_lock); |
2192 | return r; | 2256 | return r; |
2193 | } | 2257 | } |
2194 | 2258 | ||
@@ -2197,7 +2261,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
2197 | struct kvm_userspace_memory_region kvm_userspace_mem; | 2261 | struct kvm_userspace_memory_region kvm_userspace_mem; |
2198 | int r = 0; | 2262 | int r = 0; |
2199 | 2263 | ||
2200 | down_write(&kvm->slots_lock); | 2264 | mutex_lock(&kvm->slots_lock); |
2201 | if (kvm->arch.ept_identity_pagetable) | 2265 | if (kvm->arch.ept_identity_pagetable) |
2202 | goto out; | 2266 | goto out; |
2203 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | 2267 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; |
@@ -2212,7 +2276,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
2212 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | 2276 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, |
2213 | kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); | 2277 | kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); |
2214 | out: | 2278 | out: |
2215 | up_write(&kvm->slots_lock); | 2279 | mutex_unlock(&kvm->slots_lock); |
2216 | return r; | 2280 | return r; |
2217 | } | 2281 | } |
2218 | 2282 | ||
@@ -2384,14 +2448,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2384 | for (i = 0; i < NR_VMX_MSR; ++i) { | 2448 | for (i = 0; i < NR_VMX_MSR; ++i) { |
2385 | u32 index = vmx_msr_index[i]; | 2449 | u32 index = vmx_msr_index[i]; |
2386 | u32 data_low, data_high; | 2450 | u32 data_low, data_high; |
2387 | u64 data; | ||
2388 | int j = vmx->nmsrs; | 2451 | int j = vmx->nmsrs; |
2389 | 2452 | ||
2390 | if (rdmsr_safe(index, &data_low, &data_high) < 0) | 2453 | if (rdmsr_safe(index, &data_low, &data_high) < 0) |
2391 | continue; | 2454 | continue; |
2392 | if (wrmsr_safe(index, data_low, data_high) < 0) | 2455 | if (wrmsr_safe(index, data_low, data_high) < 0) |
2393 | continue; | 2456 | continue; |
2394 | data = data_low | ((u64)data_high << 32); | ||
2395 | vmx->guest_msrs[j].index = i; | 2457 | vmx->guest_msrs[j].index = i; |
2396 | vmx->guest_msrs[j].data = 0; | 2458 | vmx->guest_msrs[j].data = 0; |
2397 | vmx->guest_msrs[j].mask = -1ull; | 2459 | vmx->guest_msrs[j].mask = -1ull; |
@@ -2404,7 +2466,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2404 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); | 2466 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); |
2405 | 2467 | ||
2406 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 2468 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
2407 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); | 2469 | vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; |
2470 | if (enable_ept) | ||
2471 | vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; | ||
2472 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | ||
2408 | 2473 | ||
2409 | tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; | 2474 | tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; |
2410 | rdtscll(tsc_this); | 2475 | rdtscll(tsc_this); |
@@ -2429,10 +2494,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2429 | { | 2494 | { |
2430 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2495 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2431 | u64 msr; | 2496 | u64 msr; |
2432 | int ret; | 2497 | int ret, idx; |
2433 | 2498 | ||
2434 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2499 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2435 | down_read(&vcpu->kvm->slots_lock); | 2500 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
2436 | if (!init_rmode(vmx->vcpu.kvm)) { | 2501 | if (!init_rmode(vmx->vcpu.kvm)) { |
2437 | ret = -ENOMEM; | 2502 | ret = -ENOMEM; |
2438 | goto out; | 2503 | goto out; |
@@ -2526,7 +2591,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2526 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 2591 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
2527 | 2592 | ||
2528 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 2593 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
2529 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ | 2594 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ |
2530 | vmx_set_cr4(&vmx->vcpu, 0); | 2595 | vmx_set_cr4(&vmx->vcpu, 0); |
2531 | vmx_set_efer(&vmx->vcpu, 0); | 2596 | vmx_set_efer(&vmx->vcpu, 0); |
2532 | vmx_fpu_activate(&vmx->vcpu); | 2597 | vmx_fpu_activate(&vmx->vcpu); |
@@ -2540,7 +2605,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2540 | vmx->emulation_required = 0; | 2605 | vmx->emulation_required = 0; |
2541 | 2606 | ||
2542 | out: | 2607 | out: |
2543 | up_read(&vcpu->kvm->slots_lock); | 2608 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
2544 | return ret; | 2609 | return ret; |
2545 | } | 2610 | } |
2546 | 2611 | ||
@@ -2717,6 +2782,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2717 | kvm_queue_exception(vcpu, vec); | 2782 | kvm_queue_exception(vcpu, vec); |
2718 | return 1; | 2783 | return 1; |
2719 | case BP_VECTOR: | 2784 | case BP_VECTOR: |
2785 | /* | ||
2786 | * Update instruction length as we may reinject the exception | ||
2787 | * from user space while in guest debugging mode. | ||
2788 | */ | ||
2789 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = | ||
2790 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
2720 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 2791 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
2721 | return 0; | 2792 | return 0; |
2722 | /* fall through */ | 2793 | /* fall through */ |
@@ -2839,6 +2910,13 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
2839 | kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); | 2910 | kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); |
2840 | /* fall through */ | 2911 | /* fall through */ |
2841 | case BP_VECTOR: | 2912 | case BP_VECTOR: |
2913 | /* | ||
2914 | * Update instruction length as we may reinject #BP from | ||
2915 | * user space while in guest debugging mode. Reading it for | ||
2916 | * #DB as well causes no harm, it is not used in that case. | ||
2917 | */ | ||
2918 | vmx->vcpu.arch.event_exit_inst_len = | ||
2919 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
2842 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 2920 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
2843 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; | 2921 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; |
2844 | kvm_run->debug.arch.exception = ex_no; | 2922 | kvm_run->debug.arch.exception = ex_no; |
@@ -2940,11 +3018,10 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
2940 | }; | 3018 | }; |
2941 | break; | 3019 | break; |
2942 | case 2: /* clts */ | 3020 | case 2: /* clts */ |
2943 | vmx_fpu_deactivate(vcpu); | 3021 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
2944 | vcpu->arch.cr0 &= ~X86_CR0_TS; | 3022 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); |
2945 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
2946 | vmx_fpu_activate(vcpu); | ||
2947 | skip_emulated_instruction(vcpu); | 3023 | skip_emulated_instruction(vcpu); |
3024 | vmx_fpu_activate(vcpu); | ||
2948 | return 1; | 3025 | return 1; |
2949 | case 1: /*mov from cr*/ | 3026 | case 1: /*mov from cr*/ |
2950 | switch (cr) { | 3027 | switch (cr) { |
@@ -2962,7 +3039,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
2962 | } | 3039 | } |
2963 | break; | 3040 | break; |
2964 | case 3: /* lmsw */ | 3041 | case 3: /* lmsw */ |
2965 | kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); | 3042 | val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; |
3043 | trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); | ||
3044 | kvm_lmsw(vcpu, val); | ||
2966 | 3045 | ||
2967 | skip_emulated_instruction(vcpu); | 3046 | skip_emulated_instruction(vcpu); |
2968 | return 1; | 3047 | return 1; |
@@ -2975,12 +3054,22 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
2975 | return 0; | 3054 | return 0; |
2976 | } | 3055 | } |
2977 | 3056 | ||
3057 | static int check_dr_alias(struct kvm_vcpu *vcpu) | ||
3058 | { | ||
3059 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
3060 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3061 | return -1; | ||
3062 | } | ||
3063 | return 0; | ||
3064 | } | ||
3065 | |||
2978 | static int handle_dr(struct kvm_vcpu *vcpu) | 3066 | static int handle_dr(struct kvm_vcpu *vcpu) |
2979 | { | 3067 | { |
2980 | unsigned long exit_qualification; | 3068 | unsigned long exit_qualification; |
2981 | unsigned long val; | 3069 | unsigned long val; |
2982 | int dr, reg; | 3070 | int dr, reg; |
2983 | 3071 | ||
3072 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | ||
2984 | if (!kvm_require_cpl(vcpu, 0)) | 3073 | if (!kvm_require_cpl(vcpu, 0)) |
2985 | return 1; | 3074 | return 1; |
2986 | dr = vmcs_readl(GUEST_DR7); | 3075 | dr = vmcs_readl(GUEST_DR7); |
@@ -3016,14 +3105,20 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
3016 | case 0 ... 3: | 3105 | case 0 ... 3: |
3017 | val = vcpu->arch.db[dr]; | 3106 | val = vcpu->arch.db[dr]; |
3018 | break; | 3107 | break; |
3108 | case 4: | ||
3109 | if (check_dr_alias(vcpu) < 0) | ||
3110 | return 1; | ||
3111 | /* fall through */ | ||
3019 | case 6: | 3112 | case 6: |
3020 | val = vcpu->arch.dr6; | 3113 | val = vcpu->arch.dr6; |
3021 | break; | 3114 | break; |
3022 | case 7: | 3115 | case 5: |
3116 | if (check_dr_alias(vcpu) < 0) | ||
3117 | return 1; | ||
3118 | /* fall through */ | ||
3119 | default: /* 7 */ | ||
3023 | val = vcpu->arch.dr7; | 3120 | val = vcpu->arch.dr7; |
3024 | break; | 3121 | break; |
3025 | default: | ||
3026 | val = 0; | ||
3027 | } | 3122 | } |
3028 | kvm_register_write(vcpu, reg, val); | 3123 | kvm_register_write(vcpu, reg, val); |
3029 | } else { | 3124 | } else { |
@@ -3034,21 +3129,25 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
3034 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | 3129 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) |
3035 | vcpu->arch.eff_db[dr] = val; | 3130 | vcpu->arch.eff_db[dr] = val; |
3036 | break; | 3131 | break; |
3037 | case 4 ... 5: | 3132 | case 4: |
3038 | if (vcpu->arch.cr4 & X86_CR4_DE) | 3133 | if (check_dr_alias(vcpu) < 0) |
3039 | kvm_queue_exception(vcpu, UD_VECTOR); | 3134 | return 1; |
3040 | break; | 3135 | /* fall through */ |
3041 | case 6: | 3136 | case 6: |
3042 | if (val & 0xffffffff00000000ULL) { | 3137 | if (val & 0xffffffff00000000ULL) { |
3043 | kvm_queue_exception(vcpu, GP_VECTOR); | 3138 | kvm_inject_gp(vcpu, 0); |
3044 | break; | 3139 | return 1; |
3045 | } | 3140 | } |
3046 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 3141 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
3047 | break; | 3142 | break; |
3048 | case 7: | 3143 | case 5: |
3144 | if (check_dr_alias(vcpu) < 0) | ||
3145 | return 1; | ||
3146 | /* fall through */ | ||
3147 | default: /* 7 */ | ||
3049 | if (val & 0xffffffff00000000ULL) { | 3148 | if (val & 0xffffffff00000000ULL) { |
3050 | kvm_queue_exception(vcpu, GP_VECTOR); | 3149 | kvm_inject_gp(vcpu, 0); |
3051 | break; | 3150 | return 1; |
3052 | } | 3151 | } |
3053 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | 3152 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; |
3054 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 3153 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
@@ -3075,6 +3174,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu) | |||
3075 | u64 data; | 3174 | u64 data; |
3076 | 3175 | ||
3077 | if (vmx_get_msr(vcpu, ecx, &data)) { | 3176 | if (vmx_get_msr(vcpu, ecx, &data)) { |
3177 | trace_kvm_msr_read_ex(ecx); | ||
3078 | kvm_inject_gp(vcpu, 0); | 3178 | kvm_inject_gp(vcpu, 0); |
3079 | return 1; | 3179 | return 1; |
3080 | } | 3180 | } |
@@ -3094,13 +3194,13 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) | |||
3094 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 3194 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) |
3095 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 3195 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
3096 | 3196 | ||
3097 | trace_kvm_msr_write(ecx, data); | ||
3098 | |||
3099 | if (vmx_set_msr(vcpu, ecx, data) != 0) { | 3197 | if (vmx_set_msr(vcpu, ecx, data) != 0) { |
3198 | trace_kvm_msr_write_ex(ecx, data); | ||
3100 | kvm_inject_gp(vcpu, 0); | 3199 | kvm_inject_gp(vcpu, 0); |
3101 | return 1; | 3200 | return 1; |
3102 | } | 3201 | } |
3103 | 3202 | ||
3203 | trace_kvm_msr_write(ecx, data); | ||
3104 | skip_emulated_instruction(vcpu); | 3204 | skip_emulated_instruction(vcpu); |
3105 | return 1; | 3205 | return 1; |
3106 | } | 3206 | } |
@@ -3385,7 +3485,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
3385 | } | 3485 | } |
3386 | 3486 | ||
3387 | if (err != EMULATE_DONE) { | 3487 | if (err != EMULATE_DONE) { |
3388 | kvm_report_emulation_failure(vcpu, "emulation failure"); | ||
3389 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 3488 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
3390 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 3489 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
3391 | vcpu->run->internal.ndata = 0; | 3490 | vcpu->run->internal.ndata = 0; |
@@ -3416,6 +3515,12 @@ static int handle_pause(struct kvm_vcpu *vcpu) | |||
3416 | return 1; | 3515 | return 1; |
3417 | } | 3516 | } |
3418 | 3517 | ||
3518 | static int handle_invalid_op(struct kvm_vcpu *vcpu) | ||
3519 | { | ||
3520 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3521 | return 1; | ||
3522 | } | ||
3523 | |||
3419 | /* | 3524 | /* |
3420 | * The exit handlers return 1 if the exit was handled fully and guest execution | 3525 | * The exit handlers return 1 if the exit was handled fully and guest execution |
3421 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 3526 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -3453,6 +3558,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3453 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3558 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
3454 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, | 3559 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, |
3455 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | 3560 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, |
3561 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, | ||
3562 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, | ||
3456 | }; | 3563 | }; |
3457 | 3564 | ||
3458 | static const int kvm_vmx_max_exit_handlers = | 3565 | static const int kvm_vmx_max_exit_handlers = |
@@ -3686,9 +3793,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3686 | */ | 3793 | */ |
3687 | vmcs_writel(HOST_CR0, read_cr0()); | 3794 | vmcs_writel(HOST_CR0, read_cr0()); |
3688 | 3795 | ||
3689 | if (vcpu->arch.switch_db_regs) | ||
3690 | set_debugreg(vcpu->arch.dr6, 6); | ||
3691 | |||
3692 | asm( | 3796 | asm( |
3693 | /* Store host registers */ | 3797 | /* Store host registers */ |
3694 | "push %%"R"dx; push %%"R"bp;" | 3798 | "push %%"R"dx; push %%"R"bp;" |
@@ -3789,9 +3893,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3789 | | (1 << VCPU_EXREG_PDPTR)); | 3893 | | (1 << VCPU_EXREG_PDPTR)); |
3790 | vcpu->arch.regs_dirty = 0; | 3894 | vcpu->arch.regs_dirty = 0; |
3791 | 3895 | ||
3792 | if (vcpu->arch.switch_db_regs) | ||
3793 | get_debugreg(vcpu->arch.dr6, 6); | ||
3794 | |||
3795 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 3896 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
3796 | if (vmx->rmode.irq.pending) | 3897 | if (vmx->rmode.irq.pending) |
3797 | fixup_rmode_irq(vmx); | 3898 | fixup_rmode_irq(vmx); |
@@ -3920,7 +4021,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
3920 | * b. VT-d with snooping control feature: snooping control feature of | 4021 | * b. VT-d with snooping control feature: snooping control feature of |
3921 | * VT-d engine can guarantee the cache correctness. Just set it | 4022 | * VT-d engine can guarantee the cache correctness. Just set it |
3922 | * to WB to keep consistent with host. So the same as item 3. | 4023 | * to WB to keep consistent with host. So the same as item 3. |
3923 | * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep | 4024 | * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep |
3924 | * consistent with host MTRR | 4025 | * consistent with host MTRR |
3925 | */ | 4026 | */ |
3926 | if (is_mmio) | 4027 | if (is_mmio) |
@@ -3931,37 +4032,88 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
3931 | VMX_EPT_MT_EPTE_SHIFT; | 4032 | VMX_EPT_MT_EPTE_SHIFT; |
3932 | else | 4033 | else |
3933 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | 4034 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) |
3934 | | VMX_EPT_IGMT_BIT; | 4035 | | VMX_EPT_IPAT_BIT; |
3935 | 4036 | ||
3936 | return ret; | 4037 | return ret; |
3937 | } | 4038 | } |
3938 | 4039 | ||
4040 | #define _ER(x) { EXIT_REASON_##x, #x } | ||
4041 | |||
3939 | static const struct trace_print_flags vmx_exit_reasons_str[] = { | 4042 | static const struct trace_print_flags vmx_exit_reasons_str[] = { |
3940 | { EXIT_REASON_EXCEPTION_NMI, "exception" }, | 4043 | _ER(EXCEPTION_NMI), |
3941 | { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, | 4044 | _ER(EXTERNAL_INTERRUPT), |
3942 | { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, | 4045 | _ER(TRIPLE_FAULT), |
3943 | { EXIT_REASON_NMI_WINDOW, "nmi_window" }, | 4046 | _ER(PENDING_INTERRUPT), |
3944 | { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, | 4047 | _ER(NMI_WINDOW), |
3945 | { EXIT_REASON_CR_ACCESS, "cr_access" }, | 4048 | _ER(TASK_SWITCH), |
3946 | { EXIT_REASON_DR_ACCESS, "dr_access" }, | 4049 | _ER(CPUID), |
3947 | { EXIT_REASON_CPUID, "cpuid" }, | 4050 | _ER(HLT), |
3948 | { EXIT_REASON_MSR_READ, "rdmsr" }, | 4051 | _ER(INVLPG), |
3949 | { EXIT_REASON_MSR_WRITE, "wrmsr" }, | 4052 | _ER(RDPMC), |
3950 | { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, | 4053 | _ER(RDTSC), |
3951 | { EXIT_REASON_HLT, "halt" }, | 4054 | _ER(VMCALL), |
3952 | { EXIT_REASON_INVLPG, "invlpg" }, | 4055 | _ER(VMCLEAR), |
3953 | { EXIT_REASON_VMCALL, "hypercall" }, | 4056 | _ER(VMLAUNCH), |
3954 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, | 4057 | _ER(VMPTRLD), |
3955 | { EXIT_REASON_APIC_ACCESS, "apic_access" }, | 4058 | _ER(VMPTRST), |
3956 | { EXIT_REASON_WBINVD, "wbinvd" }, | 4059 | _ER(VMREAD), |
3957 | { EXIT_REASON_TASK_SWITCH, "task_switch" }, | 4060 | _ER(VMRESUME), |
3958 | { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, | 4061 | _ER(VMWRITE), |
4062 | _ER(VMOFF), | ||
4063 | _ER(VMON), | ||
4064 | _ER(CR_ACCESS), | ||
4065 | _ER(DR_ACCESS), | ||
4066 | _ER(IO_INSTRUCTION), | ||
4067 | _ER(MSR_READ), | ||
4068 | _ER(MSR_WRITE), | ||
4069 | _ER(MWAIT_INSTRUCTION), | ||
4070 | _ER(MONITOR_INSTRUCTION), | ||
4071 | _ER(PAUSE_INSTRUCTION), | ||
4072 | _ER(MCE_DURING_VMENTRY), | ||
4073 | _ER(TPR_BELOW_THRESHOLD), | ||
4074 | _ER(APIC_ACCESS), | ||
4075 | _ER(EPT_VIOLATION), | ||
4076 | _ER(EPT_MISCONFIG), | ||
4077 | _ER(WBINVD), | ||
3959 | { -1, NULL } | 4078 | { -1, NULL } |
3960 | }; | 4079 | }; |
3961 | 4080 | ||
3962 | static bool vmx_gb_page_enable(void) | 4081 | #undef _ER |
4082 | |||
4083 | static int vmx_get_lpage_level(void) | ||
3963 | { | 4084 | { |
3964 | return false; | 4085 | if (enable_ept && !cpu_has_vmx_ept_1g_page()) |
4086 | return PT_DIRECTORY_LEVEL; | ||
4087 | else | ||
4088 | /* For shadow and EPT supported 1GB page */ | ||
4089 | return PT_PDPE_LEVEL; | ||
4090 | } | ||
4091 | |||
4092 | static inline u32 bit(int bitno) | ||
4093 | { | ||
4094 | return 1 << (bitno & 31); | ||
4095 | } | ||
4096 | |||
4097 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | ||
4098 | { | ||
4099 | struct kvm_cpuid_entry2 *best; | ||
4100 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4101 | u32 exec_control; | ||
4102 | |||
4103 | vmx->rdtscp_enabled = false; | ||
4104 | if (vmx_rdtscp_supported()) { | ||
4105 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
4106 | if (exec_control & SECONDARY_EXEC_RDTSCP) { | ||
4107 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
4108 | if (best && (best->edx & bit(X86_FEATURE_RDTSCP))) | ||
4109 | vmx->rdtscp_enabled = true; | ||
4110 | else { | ||
4111 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | ||
4112 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
4113 | exec_control); | ||
4114 | } | ||
4115 | } | ||
4116 | } | ||
3965 | } | 4117 | } |
3966 | 4118 | ||
3967 | static struct kvm_x86_ops vmx_x86_ops = { | 4119 | static struct kvm_x86_ops vmx_x86_ops = { |
@@ -3990,6 +4142,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3990 | .set_segment = vmx_set_segment, | 4142 | .set_segment = vmx_set_segment, |
3991 | .get_cpl = vmx_get_cpl, | 4143 | .get_cpl = vmx_get_cpl, |
3992 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 4144 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
4145 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, | ||
3993 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | 4146 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
3994 | .set_cr0 = vmx_set_cr0, | 4147 | .set_cr0 = vmx_set_cr0, |
3995 | .set_cr3 = vmx_set_cr3, | 4148 | .set_cr3 = vmx_set_cr3, |
@@ -4002,6 +4155,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4002 | .cache_reg = vmx_cache_reg, | 4155 | .cache_reg = vmx_cache_reg, |
4003 | .get_rflags = vmx_get_rflags, | 4156 | .get_rflags = vmx_get_rflags, |
4004 | .set_rflags = vmx_set_rflags, | 4157 | .set_rflags = vmx_set_rflags, |
4158 | .fpu_activate = vmx_fpu_activate, | ||
4159 | .fpu_deactivate = vmx_fpu_deactivate, | ||
4005 | 4160 | ||
4006 | .tlb_flush = vmx_flush_tlb, | 4161 | .tlb_flush = vmx_flush_tlb, |
4007 | 4162 | ||
@@ -4027,7 +4182,11 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4027 | .get_mt_mask = vmx_get_mt_mask, | 4182 | .get_mt_mask = vmx_get_mt_mask, |
4028 | 4183 | ||
4029 | .exit_reasons_str = vmx_exit_reasons_str, | 4184 | .exit_reasons_str = vmx_exit_reasons_str, |
4030 | .gb_page_enable = vmx_gb_page_enable, | 4185 | .get_lpage_level = vmx_get_lpage_level, |
4186 | |||
4187 | .cpuid_update = vmx_cpuid_update, | ||
4188 | |||
4189 | .rdtscp_supported = vmx_rdtscp_supported, | ||
4031 | }; | 4190 | }; |
4032 | 4191 | ||
4033 | static int __init vmx_init(void) | 4192 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9d068966fb2a..3c4ca98ad27f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -38,6 +38,8 @@ | |||
38 | #include <linux/intel-iommu.h> | 38 | #include <linux/intel-iommu.h> |
39 | #include <linux/cpufreq.h> | 39 | #include <linux/cpufreq.h> |
40 | #include <linux/user-return-notifier.h> | 40 | #include <linux/user-return-notifier.h> |
41 | #include <linux/srcu.h> | ||
42 | #include <linux/slab.h> | ||
41 | #include <trace/events/kvm.h> | 43 | #include <trace/events/kvm.h> |
42 | #undef TRACE_INCLUDE_FILE | 44 | #undef TRACE_INCLUDE_FILE |
43 | #define CREATE_TRACE_POINTS | 45 | #define CREATE_TRACE_POINTS |
@@ -93,16 +95,16 @@ module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | |||
93 | 95 | ||
94 | struct kvm_shared_msrs_global { | 96 | struct kvm_shared_msrs_global { |
95 | int nr; | 97 | int nr; |
96 | struct kvm_shared_msr { | 98 | u32 msrs[KVM_NR_SHARED_MSRS]; |
97 | u32 msr; | ||
98 | u64 value; | ||
99 | } msrs[KVM_NR_SHARED_MSRS]; | ||
100 | }; | 99 | }; |
101 | 100 | ||
102 | struct kvm_shared_msrs { | 101 | struct kvm_shared_msrs { |
103 | struct user_return_notifier urn; | 102 | struct user_return_notifier urn; |
104 | bool registered; | 103 | bool registered; |
105 | u64 current_value[KVM_NR_SHARED_MSRS]; | 104 | struct kvm_shared_msr_values { |
105 | u64 host; | ||
106 | u64 curr; | ||
107 | } values[KVM_NR_SHARED_MSRS]; | ||
106 | }; | 108 | }; |
107 | 109 | ||
108 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; | 110 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; |
@@ -147,53 +149,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
147 | static void kvm_on_user_return(struct user_return_notifier *urn) | 149 | static void kvm_on_user_return(struct user_return_notifier *urn) |
148 | { | 150 | { |
149 | unsigned slot; | 151 | unsigned slot; |
150 | struct kvm_shared_msr *global; | ||
151 | struct kvm_shared_msrs *locals | 152 | struct kvm_shared_msrs *locals |
152 | = container_of(urn, struct kvm_shared_msrs, urn); | 153 | = container_of(urn, struct kvm_shared_msrs, urn); |
154 | struct kvm_shared_msr_values *values; | ||
153 | 155 | ||
154 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { | 156 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { |
155 | global = &shared_msrs_global.msrs[slot]; | 157 | values = &locals->values[slot]; |
156 | if (global->value != locals->current_value[slot]) { | 158 | if (values->host != values->curr) { |
157 | wrmsrl(global->msr, global->value); | 159 | wrmsrl(shared_msrs_global.msrs[slot], values->host); |
158 | locals->current_value[slot] = global->value; | 160 | values->curr = values->host; |
159 | } | 161 | } |
160 | } | 162 | } |
161 | locals->registered = false; | 163 | locals->registered = false; |
162 | user_return_notifier_unregister(urn); | 164 | user_return_notifier_unregister(urn); |
163 | } | 165 | } |
164 | 166 | ||
165 | void kvm_define_shared_msr(unsigned slot, u32 msr) | 167 | static void shared_msr_update(unsigned slot, u32 msr) |
166 | { | 168 | { |
167 | int cpu; | 169 | struct kvm_shared_msrs *smsr; |
168 | u64 value; | 170 | u64 value; |
169 | 171 | ||
172 | smsr = &__get_cpu_var(shared_msrs); | ||
173 | /* only read, and nobody should modify it at this time, | ||
174 | * so don't need lock */ | ||
175 | if (slot >= shared_msrs_global.nr) { | ||
176 | printk(KERN_ERR "kvm: invalid MSR slot!"); | ||
177 | return; | ||
178 | } | ||
179 | rdmsrl_safe(msr, &value); | ||
180 | smsr->values[slot].host = value; | ||
181 | smsr->values[slot].curr = value; | ||
182 | } | ||
183 | |||
184 | void kvm_define_shared_msr(unsigned slot, u32 msr) | ||
185 | { | ||
170 | if (slot >= shared_msrs_global.nr) | 186 | if (slot >= shared_msrs_global.nr) |
171 | shared_msrs_global.nr = slot + 1; | 187 | shared_msrs_global.nr = slot + 1; |
172 | shared_msrs_global.msrs[slot].msr = msr; | 188 | shared_msrs_global.msrs[slot] = msr; |
173 | rdmsrl_safe(msr, &value); | 189 | /* we need ensured the shared_msr_global have been updated */ |
174 | shared_msrs_global.msrs[slot].value = value; | 190 | smp_wmb(); |
175 | for_each_online_cpu(cpu) | ||
176 | per_cpu(shared_msrs, cpu).current_value[slot] = value; | ||
177 | } | 191 | } |
178 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); | 192 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); |
179 | 193 | ||
180 | static void kvm_shared_msr_cpu_online(void) | 194 | static void kvm_shared_msr_cpu_online(void) |
181 | { | 195 | { |
182 | unsigned i; | 196 | unsigned i; |
183 | struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs); | ||
184 | 197 | ||
185 | for (i = 0; i < shared_msrs_global.nr; ++i) | 198 | for (i = 0; i < shared_msrs_global.nr; ++i) |
186 | locals->current_value[i] = shared_msrs_global.msrs[i].value; | 199 | shared_msr_update(i, shared_msrs_global.msrs[i]); |
187 | } | 200 | } |
188 | 201 | ||
189 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | 202 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) |
190 | { | 203 | { |
191 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | 204 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); |
192 | 205 | ||
193 | if (((value ^ smsr->current_value[slot]) & mask) == 0) | 206 | if (((value ^ smsr->values[slot].curr) & mask) == 0) |
194 | return; | 207 | return; |
195 | smsr->current_value[slot] = value; | 208 | smsr->values[slot].curr = value; |
196 | wrmsrl(shared_msrs_global.msrs[slot].msr, value); | 209 | wrmsrl(shared_msrs_global.msrs[slot], value); |
197 | if (!smsr->registered) { | 210 | if (!smsr->registered) { |
198 | smsr->urn.on_user_return = kvm_on_user_return; | 211 | smsr->urn.on_user_return = kvm_on_user_return; |
199 | user_return_notifier_register(&smsr->urn); | 212 | user_return_notifier_register(&smsr->urn); |
@@ -257,12 +270,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | |||
257 | } | 270 | } |
258 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 271 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
259 | 272 | ||
273 | #define EXCPT_BENIGN 0 | ||
274 | #define EXCPT_CONTRIBUTORY 1 | ||
275 | #define EXCPT_PF 2 | ||
276 | |||
277 | static int exception_class(int vector) | ||
278 | { | ||
279 | switch (vector) { | ||
280 | case PF_VECTOR: | ||
281 | return EXCPT_PF; | ||
282 | case DE_VECTOR: | ||
283 | case TS_VECTOR: | ||
284 | case NP_VECTOR: | ||
285 | case SS_VECTOR: | ||
286 | case GP_VECTOR: | ||
287 | return EXCPT_CONTRIBUTORY; | ||
288 | default: | ||
289 | break; | ||
290 | } | ||
291 | return EXCPT_BENIGN; | ||
292 | } | ||
293 | |||
294 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | ||
295 | unsigned nr, bool has_error, u32 error_code) | ||
296 | { | ||
297 | u32 prev_nr; | ||
298 | int class1, class2; | ||
299 | |||
300 | if (!vcpu->arch.exception.pending) { | ||
301 | queue: | ||
302 | vcpu->arch.exception.pending = true; | ||
303 | vcpu->arch.exception.has_error_code = has_error; | ||
304 | vcpu->arch.exception.nr = nr; | ||
305 | vcpu->arch.exception.error_code = error_code; | ||
306 | return; | ||
307 | } | ||
308 | |||
309 | /* to check exception */ | ||
310 | prev_nr = vcpu->arch.exception.nr; | ||
311 | if (prev_nr == DF_VECTOR) { | ||
312 | /* triple fault -> shutdown */ | ||
313 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
314 | return; | ||
315 | } | ||
316 | class1 = exception_class(prev_nr); | ||
317 | class2 = exception_class(nr); | ||
318 | if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) | ||
319 | || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { | ||
320 | /* generate double fault per SDM Table 5-5 */ | ||
321 | vcpu->arch.exception.pending = true; | ||
322 | vcpu->arch.exception.has_error_code = true; | ||
323 | vcpu->arch.exception.nr = DF_VECTOR; | ||
324 | vcpu->arch.exception.error_code = 0; | ||
325 | } else | ||
326 | /* replace previous exception with a new one in a hope | ||
327 | that instruction re-execution will regenerate lost | ||
328 | exception */ | ||
329 | goto queue; | ||
330 | } | ||
331 | |||
260 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) | 332 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) |
261 | { | 333 | { |
262 | WARN_ON(vcpu->arch.exception.pending); | 334 | kvm_multiple_exception(vcpu, nr, false, 0); |
263 | vcpu->arch.exception.pending = true; | ||
264 | vcpu->arch.exception.has_error_code = false; | ||
265 | vcpu->arch.exception.nr = nr; | ||
266 | } | 335 | } |
267 | EXPORT_SYMBOL_GPL(kvm_queue_exception); | 336 | EXPORT_SYMBOL_GPL(kvm_queue_exception); |
268 | 337 | ||
@@ -270,25 +339,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
270 | u32 error_code) | 339 | u32 error_code) |
271 | { | 340 | { |
272 | ++vcpu->stat.pf_guest; | 341 | ++vcpu->stat.pf_guest; |
273 | |||
274 | if (vcpu->arch.exception.pending) { | ||
275 | switch(vcpu->arch.exception.nr) { | ||
276 | case DF_VECTOR: | ||
277 | /* triple fault -> shutdown */ | ||
278 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
279 | return; | ||
280 | case PF_VECTOR: | ||
281 | vcpu->arch.exception.nr = DF_VECTOR; | ||
282 | vcpu->arch.exception.error_code = 0; | ||
283 | return; | ||
284 | default: | ||
285 | /* replace previous exception with a new one in a hope | ||
286 | that instruction re-execution will regenerate lost | ||
287 | exception */ | ||
288 | vcpu->arch.exception.pending = false; | ||
289 | break; | ||
290 | } | ||
291 | } | ||
292 | vcpu->arch.cr2 = addr; | 342 | vcpu->arch.cr2 = addr; |
293 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); | 343 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); |
294 | } | 344 | } |
@@ -301,11 +351,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); | |||
301 | 351 | ||
302 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 352 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
303 | { | 353 | { |
304 | WARN_ON(vcpu->arch.exception.pending); | 354 | kvm_multiple_exception(vcpu, nr, true, error_code); |
305 | vcpu->arch.exception.pending = true; | ||
306 | vcpu->arch.exception.has_error_code = true; | ||
307 | vcpu->arch.exception.nr = nr; | ||
308 | vcpu->arch.exception.error_code = error_code; | ||
309 | } | 355 | } |
310 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); | 356 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); |
311 | 357 | ||
@@ -383,41 +429,38 @@ out: | |||
383 | 429 | ||
384 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 430 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
385 | { | 431 | { |
386 | if (cr0 & CR0_RESERVED_BITS) { | 432 | cr0 |= X86_CR0_ET; |
387 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", | 433 | |
388 | cr0, vcpu->arch.cr0); | 434 | #ifdef CONFIG_X86_64 |
435 | if (cr0 & 0xffffffff00000000UL) { | ||
389 | kvm_inject_gp(vcpu, 0); | 436 | kvm_inject_gp(vcpu, 0); |
390 | return; | 437 | return; |
391 | } | 438 | } |
439 | #endif | ||
440 | |||
441 | cr0 &= ~CR0_RESERVED_BITS; | ||
392 | 442 | ||
393 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { | 443 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { |
394 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); | ||
395 | kvm_inject_gp(vcpu, 0); | 444 | kvm_inject_gp(vcpu, 0); |
396 | return; | 445 | return; |
397 | } | 446 | } |
398 | 447 | ||
399 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { | 448 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { |
400 | printk(KERN_DEBUG "set_cr0: #GP, set PG flag " | ||
401 | "and a clear PE flag\n"); | ||
402 | kvm_inject_gp(vcpu, 0); | 449 | kvm_inject_gp(vcpu, 0); |
403 | return; | 450 | return; |
404 | } | 451 | } |
405 | 452 | ||
406 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 453 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
407 | #ifdef CONFIG_X86_64 | 454 | #ifdef CONFIG_X86_64 |
408 | if ((vcpu->arch.shadow_efer & EFER_LME)) { | 455 | if ((vcpu->arch.efer & EFER_LME)) { |
409 | int cs_db, cs_l; | 456 | int cs_db, cs_l; |
410 | 457 | ||
411 | if (!is_pae(vcpu)) { | 458 | if (!is_pae(vcpu)) { |
412 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | ||
413 | "in long mode while PAE is disabled\n"); | ||
414 | kvm_inject_gp(vcpu, 0); | 459 | kvm_inject_gp(vcpu, 0); |
415 | return; | 460 | return; |
416 | } | 461 | } |
417 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 462 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
418 | if (cs_l) { | 463 | if (cs_l) { |
419 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | ||
420 | "in long mode while CS.L == 1\n"); | ||
421 | kvm_inject_gp(vcpu, 0); | 464 | kvm_inject_gp(vcpu, 0); |
422 | return; | 465 | return; |
423 | 466 | ||
@@ -425,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
425 | } else | 468 | } else |
426 | #endif | 469 | #endif |
427 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 470 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { |
428 | printk(KERN_DEBUG "set_cr0: #GP, pdptrs " | ||
429 | "reserved bits\n"); | ||
430 | kvm_inject_gp(vcpu, 0); | 471 | kvm_inject_gp(vcpu, 0); |
431 | return; | 472 | return; |
432 | } | 473 | } |
@@ -443,38 +484,33 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); | |||
443 | 484 | ||
444 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 485 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
445 | { | 486 | { |
446 | kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); | 487 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); |
447 | } | 488 | } |
448 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 489 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
449 | 490 | ||
450 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 491 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
451 | { | 492 | { |
452 | unsigned long old_cr4 = vcpu->arch.cr4; | 493 | unsigned long old_cr4 = kvm_read_cr4(vcpu); |
453 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | 494 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; |
454 | 495 | ||
455 | if (cr4 & CR4_RESERVED_BITS) { | 496 | if (cr4 & CR4_RESERVED_BITS) { |
456 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); | ||
457 | kvm_inject_gp(vcpu, 0); | 497 | kvm_inject_gp(vcpu, 0); |
458 | return; | 498 | return; |
459 | } | 499 | } |
460 | 500 | ||
461 | if (is_long_mode(vcpu)) { | 501 | if (is_long_mode(vcpu)) { |
462 | if (!(cr4 & X86_CR4_PAE)) { | 502 | if (!(cr4 & X86_CR4_PAE)) { |
463 | printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " | ||
464 | "in long mode\n"); | ||
465 | kvm_inject_gp(vcpu, 0); | 503 | kvm_inject_gp(vcpu, 0); |
466 | return; | 504 | return; |
467 | } | 505 | } |
468 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) | 506 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) |
469 | && ((cr4 ^ old_cr4) & pdptr_bits) | 507 | && ((cr4 ^ old_cr4) & pdptr_bits) |
470 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 508 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) { |
471 | printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); | ||
472 | kvm_inject_gp(vcpu, 0); | 509 | kvm_inject_gp(vcpu, 0); |
473 | return; | 510 | return; |
474 | } | 511 | } |
475 | 512 | ||
476 | if (cr4 & X86_CR4_VMXE) { | 513 | if (cr4 & X86_CR4_VMXE) { |
477 | printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); | ||
478 | kvm_inject_gp(vcpu, 0); | 514 | kvm_inject_gp(vcpu, 0); |
479 | return; | 515 | return; |
480 | } | 516 | } |
@@ -495,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
495 | 531 | ||
496 | if (is_long_mode(vcpu)) { | 532 | if (is_long_mode(vcpu)) { |
497 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { | 533 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { |
498 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); | ||
499 | kvm_inject_gp(vcpu, 0); | 534 | kvm_inject_gp(vcpu, 0); |
500 | return; | 535 | return; |
501 | } | 536 | } |
502 | } else { | 537 | } else { |
503 | if (is_pae(vcpu)) { | 538 | if (is_pae(vcpu)) { |
504 | if (cr3 & CR3_PAE_RESERVED_BITS) { | 539 | if (cr3 & CR3_PAE_RESERVED_BITS) { |
505 | printk(KERN_DEBUG | ||
506 | "set_cr3: #GP, reserved bits\n"); | ||
507 | kvm_inject_gp(vcpu, 0); | 540 | kvm_inject_gp(vcpu, 0); |
508 | return; | 541 | return; |
509 | } | 542 | } |
510 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { | 543 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { |
511 | printk(KERN_DEBUG "set_cr3: #GP, pdptrs " | ||
512 | "reserved bits\n"); | ||
513 | kvm_inject_gp(vcpu, 0); | 544 | kvm_inject_gp(vcpu, 0); |
514 | return; | 545 | return; |
515 | } | 546 | } |
@@ -541,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3); | |||
541 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 572 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
542 | { | 573 | { |
543 | if (cr8 & CR8_RESERVED_BITS) { | 574 | if (cr8 & CR8_RESERVED_BITS) { |
544 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); | ||
545 | kvm_inject_gp(vcpu, 0); | 575 | kvm_inject_gp(vcpu, 0); |
546 | return; | 576 | return; |
547 | } | 577 | } |
@@ -575,9 +605,11 @@ static inline u32 bit(int bitno) | |||
575 | * kvm-specific. Those are put in the beginning of the list. | 605 | * kvm-specific. Those are put in the beginning of the list. |
576 | */ | 606 | */ |
577 | 607 | ||
578 | #define KVM_SAVE_MSRS_BEGIN 2 | 608 | #define KVM_SAVE_MSRS_BEGIN 5 |
579 | static u32 msrs_to_save[] = { | 609 | static u32 msrs_to_save[] = { |
580 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 610 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
611 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | ||
612 | HV_X64_MSR_APIC_ASSIST_PAGE, | ||
581 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 613 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
582 | MSR_K6_STAR, | 614 | MSR_K6_STAR, |
583 | #ifdef CONFIG_X86_64 | 615 | #ifdef CONFIG_X86_64 |
@@ -595,15 +627,12 @@ static u32 emulated_msrs[] = { | |||
595 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | 627 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) |
596 | { | 628 | { |
597 | if (efer & efer_reserved_bits) { | 629 | if (efer & efer_reserved_bits) { |
598 | printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", | ||
599 | efer); | ||
600 | kvm_inject_gp(vcpu, 0); | 630 | kvm_inject_gp(vcpu, 0); |
601 | return; | 631 | return; |
602 | } | 632 | } |
603 | 633 | ||
604 | if (is_paging(vcpu) | 634 | if (is_paging(vcpu) |
605 | && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { | 635 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { |
606 | printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); | ||
607 | kvm_inject_gp(vcpu, 0); | 636 | kvm_inject_gp(vcpu, 0); |
608 | return; | 637 | return; |
609 | } | 638 | } |
@@ -613,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
613 | 642 | ||
614 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 643 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
615 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { | 644 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { |
616 | printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); | ||
617 | kvm_inject_gp(vcpu, 0); | 645 | kvm_inject_gp(vcpu, 0); |
618 | return; | 646 | return; |
619 | } | 647 | } |
@@ -624,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
624 | 652 | ||
625 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 653 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
626 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { | 654 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { |
627 | printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); | ||
628 | kvm_inject_gp(vcpu, 0); | 655 | kvm_inject_gp(vcpu, 0); |
629 | return; | 656 | return; |
630 | } | 657 | } |
@@ -633,9 +660,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
633 | kvm_x86_ops->set_efer(vcpu, efer); | 660 | kvm_x86_ops->set_efer(vcpu, efer); |
634 | 661 | ||
635 | efer &= ~EFER_LMA; | 662 | efer &= ~EFER_LMA; |
636 | efer |= vcpu->arch.shadow_efer & EFER_LMA; | 663 | efer |= vcpu->arch.efer & EFER_LMA; |
637 | 664 | ||
638 | vcpu->arch.shadow_efer = efer; | 665 | vcpu->arch.efer = efer; |
639 | 666 | ||
640 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 667 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
641 | kvm_mmu_reset_context(vcpu); | 668 | kvm_mmu_reset_context(vcpu); |
@@ -670,7 +697,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
670 | { | 697 | { |
671 | static int version; | 698 | static int version; |
672 | struct pvclock_wall_clock wc; | 699 | struct pvclock_wall_clock wc; |
673 | struct timespec now, sys, boot; | 700 | struct timespec boot; |
674 | 701 | ||
675 | if (!wall_clock) | 702 | if (!wall_clock) |
676 | return; | 703 | return; |
@@ -685,9 +712,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
685 | * wall clock specified here. guest system time equals host | 712 | * wall clock specified here. guest system time equals host |
686 | * system time for us, thus we must fill in host boot time here. | 713 | * system time for us, thus we must fill in host boot time here. |
687 | */ | 714 | */ |
688 | now = current_kernel_time(); | 715 | getboottime(&boot); |
689 | ktime_get_ts(&sys); | ||
690 | boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); | ||
691 | 716 | ||
692 | wc.sec = boot.tv_sec; | 717 | wc.sec = boot.tv_sec; |
693 | wc.nsec = boot.tv_nsec; | 718 | wc.nsec = boot.tv_nsec; |
@@ -762,6 +787,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
762 | local_irq_save(flags); | 787 | local_irq_save(flags); |
763 | kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); | 788 | kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); |
764 | ktime_get_ts(&ts); | 789 | ktime_get_ts(&ts); |
790 | monotonic_to_bootbased(&ts); | ||
765 | local_irq_restore(flags); | 791 | local_irq_restore(flags); |
766 | 792 | ||
767 | /* With all the info we got, fill in the values */ | 793 | /* With all the info we got, fill in the values */ |
@@ -914,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
914 | if (msr >= MSR_IA32_MC0_CTL && | 940 | if (msr >= MSR_IA32_MC0_CTL && |
915 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { | 941 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { |
916 | u32 offset = msr - MSR_IA32_MC0_CTL; | 942 | u32 offset = msr - MSR_IA32_MC0_CTL; |
917 | /* only 0 or all 1s can be written to IA32_MCi_CTL */ | 943 | /* only 0 or all 1s can be written to IA32_MCi_CTL |
944 | * some Linux kernels though clear bit 10 in bank 4 to | ||
945 | * workaround a BIOS/GART TBL issue on AMD K8s, ignore | ||
946 | * this to avoid an uncatched #GP in the guest | ||
947 | */ | ||
918 | if ((offset & 0x3) == 0 && | 948 | if ((offset & 0x3) == 0 && |
919 | data != 0 && data != ~(u64)0) | 949 | data != 0 && (data | (1 << 10)) != ~(u64)0) |
920 | return -1; | 950 | return -1; |
921 | vcpu->arch.mce_banks[offset] = data; | 951 | vcpu->arch.mce_banks[offset] = data; |
922 | break; | 952 | break; |
@@ -958,6 +988,100 @@ out: | |||
958 | return r; | 988 | return r; |
959 | } | 989 | } |
960 | 990 | ||
991 | static bool kvm_hv_hypercall_enabled(struct kvm *kvm) | ||
992 | { | ||
993 | return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; | ||
994 | } | ||
995 | |||
996 | static bool kvm_hv_msr_partition_wide(u32 msr) | ||
997 | { | ||
998 | bool r = false; | ||
999 | switch (msr) { | ||
1000 | case HV_X64_MSR_GUEST_OS_ID: | ||
1001 | case HV_X64_MSR_HYPERCALL: | ||
1002 | r = true; | ||
1003 | break; | ||
1004 | } | ||
1005 | |||
1006 | return r; | ||
1007 | } | ||
1008 | |||
1009 | static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1010 | { | ||
1011 | struct kvm *kvm = vcpu->kvm; | ||
1012 | |||
1013 | switch (msr) { | ||
1014 | case HV_X64_MSR_GUEST_OS_ID: | ||
1015 | kvm->arch.hv_guest_os_id = data; | ||
1016 | /* setting guest os id to zero disables hypercall page */ | ||
1017 | if (!kvm->arch.hv_guest_os_id) | ||
1018 | kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; | ||
1019 | break; | ||
1020 | case HV_X64_MSR_HYPERCALL: { | ||
1021 | u64 gfn; | ||
1022 | unsigned long addr; | ||
1023 | u8 instructions[4]; | ||
1024 | |||
1025 | /* if guest os id is not set hypercall should remain disabled */ | ||
1026 | if (!kvm->arch.hv_guest_os_id) | ||
1027 | break; | ||
1028 | if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { | ||
1029 | kvm->arch.hv_hypercall = data; | ||
1030 | break; | ||
1031 | } | ||
1032 | gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; | ||
1033 | addr = gfn_to_hva(kvm, gfn); | ||
1034 | if (kvm_is_error_hva(addr)) | ||
1035 | return 1; | ||
1036 | kvm_x86_ops->patch_hypercall(vcpu, instructions); | ||
1037 | ((unsigned char *)instructions)[3] = 0xc3; /* ret */ | ||
1038 | if (copy_to_user((void __user *)addr, instructions, 4)) | ||
1039 | return 1; | ||
1040 | kvm->arch.hv_hypercall = data; | ||
1041 | break; | ||
1042 | } | ||
1043 | default: | ||
1044 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | ||
1045 | "data 0x%llx\n", msr, data); | ||
1046 | return 1; | ||
1047 | } | ||
1048 | return 0; | ||
1049 | } | ||
1050 | |||
1051 | static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1052 | { | ||
1053 | switch (msr) { | ||
1054 | case HV_X64_MSR_APIC_ASSIST_PAGE: { | ||
1055 | unsigned long addr; | ||
1056 | |||
1057 | if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { | ||
1058 | vcpu->arch.hv_vapic = data; | ||
1059 | break; | ||
1060 | } | ||
1061 | addr = gfn_to_hva(vcpu->kvm, data >> | ||
1062 | HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); | ||
1063 | if (kvm_is_error_hva(addr)) | ||
1064 | return 1; | ||
1065 | if (clear_user((void __user *)addr, PAGE_SIZE)) | ||
1066 | return 1; | ||
1067 | vcpu->arch.hv_vapic = data; | ||
1068 | break; | ||
1069 | } | ||
1070 | case HV_X64_MSR_EOI: | ||
1071 | return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); | ||
1072 | case HV_X64_MSR_ICR: | ||
1073 | return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); | ||
1074 | case HV_X64_MSR_TPR: | ||
1075 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); | ||
1076 | default: | ||
1077 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | ||
1078 | "data 0x%llx\n", msr, data); | ||
1079 | return 1; | ||
1080 | } | ||
1081 | |||
1082 | return 0; | ||
1083 | } | ||
1084 | |||
961 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1085 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
962 | { | 1086 | { |
963 | switch (msr) { | 1087 | switch (msr) { |
@@ -1072,6 +1196,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1072 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " | 1196 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " |
1073 | "0x%x data 0x%llx\n", msr, data); | 1197 | "0x%x data 0x%llx\n", msr, data); |
1074 | break; | 1198 | break; |
1199 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | ||
1200 | if (kvm_hv_msr_partition_wide(msr)) { | ||
1201 | int r; | ||
1202 | mutex_lock(&vcpu->kvm->lock); | ||
1203 | r = set_msr_hyperv_pw(vcpu, msr, data); | ||
1204 | mutex_unlock(&vcpu->kvm->lock); | ||
1205 | return r; | ||
1206 | } else | ||
1207 | return set_msr_hyperv(vcpu, msr, data); | ||
1208 | break; | ||
1075 | default: | 1209 | default: |
1076 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1210 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1077 | return xen_hvm_config(vcpu, data); | 1211 | return xen_hvm_config(vcpu, data); |
@@ -1171,6 +1305,54 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1171 | return 0; | 1305 | return 0; |
1172 | } | 1306 | } |
1173 | 1307 | ||
1308 | static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
1309 | { | ||
1310 | u64 data = 0; | ||
1311 | struct kvm *kvm = vcpu->kvm; | ||
1312 | |||
1313 | switch (msr) { | ||
1314 | case HV_X64_MSR_GUEST_OS_ID: | ||
1315 | data = kvm->arch.hv_guest_os_id; | ||
1316 | break; | ||
1317 | case HV_X64_MSR_HYPERCALL: | ||
1318 | data = kvm->arch.hv_hypercall; | ||
1319 | break; | ||
1320 | default: | ||
1321 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | ||
1322 | return 1; | ||
1323 | } | ||
1324 | |||
1325 | *pdata = data; | ||
1326 | return 0; | ||
1327 | } | ||
1328 | |||
1329 | static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
1330 | { | ||
1331 | u64 data = 0; | ||
1332 | |||
1333 | switch (msr) { | ||
1334 | case HV_X64_MSR_VP_INDEX: { | ||
1335 | int r; | ||
1336 | struct kvm_vcpu *v; | ||
1337 | kvm_for_each_vcpu(r, v, vcpu->kvm) | ||
1338 | if (v == vcpu) | ||
1339 | data = r; | ||
1340 | break; | ||
1341 | } | ||
1342 | case HV_X64_MSR_EOI: | ||
1343 | return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); | ||
1344 | case HV_X64_MSR_ICR: | ||
1345 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); | ||
1346 | case HV_X64_MSR_TPR: | ||
1347 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); | ||
1348 | default: | ||
1349 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | ||
1350 | return 1; | ||
1351 | } | ||
1352 | *pdata = data; | ||
1353 | return 0; | ||
1354 | } | ||
1355 | |||
1174 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 1356 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
1175 | { | 1357 | { |
1176 | u64 data; | 1358 | u64 data; |
@@ -1222,7 +1404,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1222 | data |= (((uint64_t)4ULL) << 40); | 1404 | data |= (((uint64_t)4ULL) << 40); |
1223 | break; | 1405 | break; |
1224 | case MSR_EFER: | 1406 | case MSR_EFER: |
1225 | data = vcpu->arch.shadow_efer; | 1407 | data = vcpu->arch.efer; |
1226 | break; | 1408 | break; |
1227 | case MSR_KVM_WALL_CLOCK: | 1409 | case MSR_KVM_WALL_CLOCK: |
1228 | data = vcpu->kvm->arch.wall_clock; | 1410 | data = vcpu->kvm->arch.wall_clock; |
@@ -1237,6 +1419,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1237 | case MSR_IA32_MCG_STATUS: | 1419 | case MSR_IA32_MCG_STATUS: |
1238 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1420 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
1239 | return get_msr_mce(vcpu, msr, pdata); | 1421 | return get_msr_mce(vcpu, msr, pdata); |
1422 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | ||
1423 | if (kvm_hv_msr_partition_wide(msr)) { | ||
1424 | int r; | ||
1425 | mutex_lock(&vcpu->kvm->lock); | ||
1426 | r = get_msr_hyperv_pw(vcpu, msr, pdata); | ||
1427 | mutex_unlock(&vcpu->kvm->lock); | ||
1428 | return r; | ||
1429 | } else | ||
1430 | return get_msr_hyperv(vcpu, msr, pdata); | ||
1431 | break; | ||
1240 | default: | 1432 | default: |
1241 | if (!ignore_msrs) { | 1433 | if (!ignore_msrs) { |
1242 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1434 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
@@ -1262,15 +1454,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, | |||
1262 | int (*do_msr)(struct kvm_vcpu *vcpu, | 1454 | int (*do_msr)(struct kvm_vcpu *vcpu, |
1263 | unsigned index, u64 *data)) | 1455 | unsigned index, u64 *data)) |
1264 | { | 1456 | { |
1265 | int i; | 1457 | int i, idx; |
1266 | 1458 | ||
1267 | vcpu_load(vcpu); | 1459 | vcpu_load(vcpu); |
1268 | 1460 | ||
1269 | down_read(&vcpu->kvm->slots_lock); | 1461 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
1270 | for (i = 0; i < msrs->nmsrs; ++i) | 1462 | for (i = 0; i < msrs->nmsrs; ++i) |
1271 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) | 1463 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) |
1272 | break; | 1464 | break; |
1273 | up_read(&vcpu->kvm->slots_lock); | 1465 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
1274 | 1466 | ||
1275 | vcpu_put(vcpu); | 1467 | vcpu_put(vcpu); |
1276 | 1468 | ||
@@ -1352,6 +1544,11 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1352 | case KVM_CAP_XEN_HVM: | 1544 | case KVM_CAP_XEN_HVM: |
1353 | case KVM_CAP_ADJUST_CLOCK: | 1545 | case KVM_CAP_ADJUST_CLOCK: |
1354 | case KVM_CAP_VCPU_EVENTS: | 1546 | case KVM_CAP_VCPU_EVENTS: |
1547 | case KVM_CAP_HYPERV: | ||
1548 | case KVM_CAP_HYPERV_VAPIC: | ||
1549 | case KVM_CAP_HYPERV_SPIN: | ||
1550 | case KVM_CAP_PCI_SEGMENT: | ||
1551 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | ||
1355 | r = 1; | 1552 | r = 1; |
1356 | break; | 1553 | break; |
1357 | case KVM_CAP_COALESCED_MMIO: | 1554 | case KVM_CAP_COALESCED_MMIO: |
@@ -1465,8 +1662,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1465 | 1662 | ||
1466 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 1663 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
1467 | { | 1664 | { |
1468 | kvm_x86_ops->vcpu_put(vcpu); | ||
1469 | kvm_put_guest_fpu(vcpu); | 1665 | kvm_put_guest_fpu(vcpu); |
1666 | kvm_x86_ops->vcpu_put(vcpu); | ||
1470 | } | 1667 | } |
1471 | 1668 | ||
1472 | static int is_efer_nx(void) | 1669 | static int is_efer_nx(void) |
@@ -1531,6 +1728,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1531 | cpuid_fix_nx_cap(vcpu); | 1728 | cpuid_fix_nx_cap(vcpu); |
1532 | r = 0; | 1729 | r = 0; |
1533 | kvm_apic_set_version(vcpu); | 1730 | kvm_apic_set_version(vcpu); |
1731 | kvm_x86_ops->cpuid_update(vcpu); | ||
1534 | 1732 | ||
1535 | out_free: | 1733 | out_free: |
1536 | vfree(cpuid_entries); | 1734 | vfree(cpuid_entries); |
@@ -1553,6 +1751,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
1553 | goto out; | 1751 | goto out; |
1554 | vcpu->arch.cpuid_nent = cpuid->nent; | 1752 | vcpu->arch.cpuid_nent = cpuid->nent; |
1555 | kvm_apic_set_version(vcpu); | 1753 | kvm_apic_set_version(vcpu); |
1754 | kvm_x86_ops->cpuid_update(vcpu); | ||
1556 | return 0; | 1755 | return 0; |
1557 | 1756 | ||
1558 | out: | 1757 | out: |
@@ -1595,12 +1794,15 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1595 | u32 index, int *nent, int maxnent) | 1794 | u32 index, int *nent, int maxnent) |
1596 | { | 1795 | { |
1597 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | 1796 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; |
1598 | unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0; | ||
1599 | #ifdef CONFIG_X86_64 | 1797 | #ifdef CONFIG_X86_64 |
1798 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
1799 | ? F(GBPAGES) : 0; | ||
1600 | unsigned f_lm = F(LM); | 1800 | unsigned f_lm = F(LM); |
1601 | #else | 1801 | #else |
1802 | unsigned f_gbpages = 0; | ||
1602 | unsigned f_lm = 0; | 1803 | unsigned f_lm = 0; |
1603 | #endif | 1804 | #endif |
1805 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
1604 | 1806 | ||
1605 | /* cpuid 1.edx */ | 1807 | /* cpuid 1.edx */ |
1606 | const u32 kvm_supported_word0_x86_features = | 1808 | const u32 kvm_supported_word0_x86_features = |
@@ -1620,7 +1822,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1620 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | 1822 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | |
1621 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | 1823 | F(PAT) | F(PSE36) | 0 /* Reserved */ | |
1622 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | 1824 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | |
1623 | F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ | | 1825 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | |
1624 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | 1826 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); |
1625 | /* cpuid 1.ecx */ | 1827 | /* cpuid 1.ecx */ |
1626 | const u32 kvm_supported_word4_x86_features = | 1828 | const u32 kvm_supported_word4_x86_features = |
@@ -1867,7 +2069,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
1867 | return 0; | 2069 | return 0; |
1868 | if (mce->status & MCI_STATUS_UC) { | 2070 | if (mce->status & MCI_STATUS_UC) { |
1869 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || | 2071 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || |
1870 | !(vcpu->arch.cr4 & X86_CR4_MCE)) { | 2072 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { |
1871 | printk(KERN_DEBUG "kvm: set_mce: " | 2073 | printk(KERN_DEBUG "kvm: set_mce: " |
1872 | "injects mce exception while " | 2074 | "injects mce exception while " |
1873 | "previous one is in progress!\n"); | 2075 | "previous one is in progress!\n"); |
@@ -1913,7 +2115,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
1913 | 2115 | ||
1914 | events->sipi_vector = vcpu->arch.sipi_vector; | 2116 | events->sipi_vector = vcpu->arch.sipi_vector; |
1915 | 2117 | ||
1916 | events->flags = 0; | 2118 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2119 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR); | ||
1917 | 2120 | ||
1918 | vcpu_put(vcpu); | 2121 | vcpu_put(vcpu); |
1919 | } | 2122 | } |
@@ -1921,7 +2124,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
1921 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | 2124 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, |
1922 | struct kvm_vcpu_events *events) | 2125 | struct kvm_vcpu_events *events) |
1923 | { | 2126 | { |
1924 | if (events->flags) | 2127 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING |
2128 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) | ||
1925 | return -EINVAL; | 2129 | return -EINVAL; |
1926 | 2130 | ||
1927 | vcpu_load(vcpu); | 2131 | vcpu_load(vcpu); |
@@ -1938,10 +2142,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
1938 | kvm_pic_clear_isr_ack(vcpu->kvm); | 2142 | kvm_pic_clear_isr_ack(vcpu->kvm); |
1939 | 2143 | ||
1940 | vcpu->arch.nmi_injected = events->nmi.injected; | 2144 | vcpu->arch.nmi_injected = events->nmi.injected; |
1941 | vcpu->arch.nmi_pending = events->nmi.pending; | 2145 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) |
2146 | vcpu->arch.nmi_pending = events->nmi.pending; | ||
1942 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); | 2147 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); |
1943 | 2148 | ||
1944 | vcpu->arch.sipi_vector = events->sipi_vector; | 2149 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) |
2150 | vcpu->arch.sipi_vector = events->sipi_vector; | ||
1945 | 2151 | ||
1946 | vcpu_put(vcpu); | 2152 | vcpu_put(vcpu); |
1947 | 2153 | ||
@@ -2157,14 +2363,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
2157 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) | 2363 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) |
2158 | return -EINVAL; | 2364 | return -EINVAL; |
2159 | 2365 | ||
2160 | down_write(&kvm->slots_lock); | 2366 | mutex_lock(&kvm->slots_lock); |
2161 | spin_lock(&kvm->mmu_lock); | 2367 | spin_lock(&kvm->mmu_lock); |
2162 | 2368 | ||
2163 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 2369 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
2164 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 2370 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
2165 | 2371 | ||
2166 | spin_unlock(&kvm->mmu_lock); | 2372 | spin_unlock(&kvm->mmu_lock); |
2167 | up_write(&kvm->slots_lock); | 2373 | mutex_unlock(&kvm->slots_lock); |
2168 | return 0; | 2374 | return 0; |
2169 | } | 2375 | } |
2170 | 2376 | ||
@@ -2173,13 +2379,35 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | |||
2173 | return kvm->arch.n_alloc_mmu_pages; | 2379 | return kvm->arch.n_alloc_mmu_pages; |
2174 | } | 2380 | } |
2175 | 2381 | ||
2382 | gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | ||
2383 | { | ||
2384 | int i; | ||
2385 | struct kvm_mem_alias *alias; | ||
2386 | struct kvm_mem_aliases *aliases; | ||
2387 | |||
2388 | aliases = rcu_dereference(kvm->arch.aliases); | ||
2389 | |||
2390 | for (i = 0; i < aliases->naliases; ++i) { | ||
2391 | alias = &aliases->aliases[i]; | ||
2392 | if (alias->flags & KVM_ALIAS_INVALID) | ||
2393 | continue; | ||
2394 | if (gfn >= alias->base_gfn | ||
2395 | && gfn < alias->base_gfn + alias->npages) | ||
2396 | return alias->target_gfn + gfn - alias->base_gfn; | ||
2397 | } | ||
2398 | return gfn; | ||
2399 | } | ||
2400 | |||
2176 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | 2401 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) |
2177 | { | 2402 | { |
2178 | int i; | 2403 | int i; |
2179 | struct kvm_mem_alias *alias; | 2404 | struct kvm_mem_alias *alias; |
2405 | struct kvm_mem_aliases *aliases; | ||
2180 | 2406 | ||
2181 | for (i = 0; i < kvm->arch.naliases; ++i) { | 2407 | aliases = rcu_dereference(kvm->arch.aliases); |
2182 | alias = &kvm->arch.aliases[i]; | 2408 | |
2409 | for (i = 0; i < aliases->naliases; ++i) { | ||
2410 | alias = &aliases->aliases[i]; | ||
2183 | if (gfn >= alias->base_gfn | 2411 | if (gfn >= alias->base_gfn |
2184 | && gfn < alias->base_gfn + alias->npages) | 2412 | && gfn < alias->base_gfn + alias->npages) |
2185 | return alias->target_gfn + gfn - alias->base_gfn; | 2413 | return alias->target_gfn + gfn - alias->base_gfn; |
@@ -2197,6 +2425,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
2197 | { | 2425 | { |
2198 | int r, n; | 2426 | int r, n; |
2199 | struct kvm_mem_alias *p; | 2427 | struct kvm_mem_alias *p; |
2428 | struct kvm_mem_aliases *aliases, *old_aliases; | ||
2200 | 2429 | ||
2201 | r = -EINVAL; | 2430 | r = -EINVAL; |
2202 | /* General sanity checks */ | 2431 | /* General sanity checks */ |
@@ -2213,26 +2442,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
2213 | < alias->target_phys_addr) | 2442 | < alias->target_phys_addr) |
2214 | goto out; | 2443 | goto out; |
2215 | 2444 | ||
2216 | down_write(&kvm->slots_lock); | 2445 | r = -ENOMEM; |
2217 | spin_lock(&kvm->mmu_lock); | 2446 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); |
2447 | if (!aliases) | ||
2448 | goto out; | ||
2449 | |||
2450 | mutex_lock(&kvm->slots_lock); | ||
2218 | 2451 | ||
2219 | p = &kvm->arch.aliases[alias->slot]; | 2452 | /* invalidate any gfn reference in case of deletion/shrinking */ |
2453 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2454 | aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; | ||
2455 | old_aliases = kvm->arch.aliases; | ||
2456 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
2457 | synchronize_srcu_expedited(&kvm->srcu); | ||
2458 | kvm_mmu_zap_all(kvm); | ||
2459 | kfree(old_aliases); | ||
2460 | |||
2461 | r = -ENOMEM; | ||
2462 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
2463 | if (!aliases) | ||
2464 | goto out_unlock; | ||
2465 | |||
2466 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2467 | |||
2468 | p = &aliases->aliases[alias->slot]; | ||
2220 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 2469 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
2221 | p->npages = alias->memory_size >> PAGE_SHIFT; | 2470 | p->npages = alias->memory_size >> PAGE_SHIFT; |
2222 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | 2471 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; |
2472 | p->flags &= ~(KVM_ALIAS_INVALID); | ||
2223 | 2473 | ||
2224 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | 2474 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) |
2225 | if (kvm->arch.aliases[n - 1].npages) | 2475 | if (aliases->aliases[n - 1].npages) |
2226 | break; | 2476 | break; |
2227 | kvm->arch.naliases = n; | 2477 | aliases->naliases = n; |
2228 | 2478 | ||
2229 | spin_unlock(&kvm->mmu_lock); | 2479 | old_aliases = kvm->arch.aliases; |
2230 | kvm_mmu_zap_all(kvm); | 2480 | rcu_assign_pointer(kvm->arch.aliases, aliases); |
2231 | 2481 | synchronize_srcu_expedited(&kvm->srcu); | |
2232 | up_write(&kvm->slots_lock); | 2482 | kfree(old_aliases); |
2233 | 2483 | r = 0; | |
2234 | return 0; | ||
2235 | 2484 | ||
2485 | out_unlock: | ||
2486 | mutex_unlock(&kvm->slots_lock); | ||
2236 | out: | 2487 | out: |
2237 | return r; | 2488 | return r; |
2238 | } | 2489 | } |
@@ -2270,18 +2521,18 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2270 | r = 0; | 2521 | r = 0; |
2271 | switch (chip->chip_id) { | 2522 | switch (chip->chip_id) { |
2272 | case KVM_IRQCHIP_PIC_MASTER: | 2523 | case KVM_IRQCHIP_PIC_MASTER: |
2273 | spin_lock(&pic_irqchip(kvm)->lock); | 2524 | raw_spin_lock(&pic_irqchip(kvm)->lock); |
2274 | memcpy(&pic_irqchip(kvm)->pics[0], | 2525 | memcpy(&pic_irqchip(kvm)->pics[0], |
2275 | &chip->chip.pic, | 2526 | &chip->chip.pic, |
2276 | sizeof(struct kvm_pic_state)); | 2527 | sizeof(struct kvm_pic_state)); |
2277 | spin_unlock(&pic_irqchip(kvm)->lock); | 2528 | raw_spin_unlock(&pic_irqchip(kvm)->lock); |
2278 | break; | 2529 | break; |
2279 | case KVM_IRQCHIP_PIC_SLAVE: | 2530 | case KVM_IRQCHIP_PIC_SLAVE: |
2280 | spin_lock(&pic_irqchip(kvm)->lock); | 2531 | raw_spin_lock(&pic_irqchip(kvm)->lock); |
2281 | memcpy(&pic_irqchip(kvm)->pics[1], | 2532 | memcpy(&pic_irqchip(kvm)->pics[1], |
2282 | &chip->chip.pic, | 2533 | &chip->chip.pic, |
2283 | sizeof(struct kvm_pic_state)); | 2534 | sizeof(struct kvm_pic_state)); |
2284 | spin_unlock(&pic_irqchip(kvm)->lock); | 2535 | raw_spin_unlock(&pic_irqchip(kvm)->lock); |
2285 | break; | 2536 | break; |
2286 | case KVM_IRQCHIP_IOAPIC: | 2537 | case KVM_IRQCHIP_IOAPIC: |
2287 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); | 2538 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); |
@@ -2361,29 +2612,63 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
2361 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 2612 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
2362 | struct kvm_dirty_log *log) | 2613 | struct kvm_dirty_log *log) |
2363 | { | 2614 | { |
2364 | int r; | 2615 | int r, i; |
2365 | int n; | ||
2366 | struct kvm_memory_slot *memslot; | 2616 | struct kvm_memory_slot *memslot; |
2367 | int is_dirty = 0; | 2617 | unsigned long n; |
2618 | unsigned long is_dirty = 0; | ||
2619 | unsigned long *dirty_bitmap = NULL; | ||
2368 | 2620 | ||
2369 | down_write(&kvm->slots_lock); | 2621 | mutex_lock(&kvm->slots_lock); |
2370 | 2622 | ||
2371 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 2623 | r = -EINVAL; |
2372 | if (r) | 2624 | if (log->slot >= KVM_MEMORY_SLOTS) |
2625 | goto out; | ||
2626 | |||
2627 | memslot = &kvm->memslots->memslots[log->slot]; | ||
2628 | r = -ENOENT; | ||
2629 | if (!memslot->dirty_bitmap) | ||
2630 | goto out; | ||
2631 | |||
2632 | n = kvm_dirty_bitmap_bytes(memslot); | ||
2633 | |||
2634 | r = -ENOMEM; | ||
2635 | dirty_bitmap = vmalloc(n); | ||
2636 | if (!dirty_bitmap) | ||
2373 | goto out; | 2637 | goto out; |
2638 | memset(dirty_bitmap, 0, n); | ||
2639 | |||
2640 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | ||
2641 | is_dirty = memslot->dirty_bitmap[i]; | ||
2374 | 2642 | ||
2375 | /* If nothing is dirty, don't bother messing with page tables. */ | 2643 | /* If nothing is dirty, don't bother messing with page tables. */ |
2376 | if (is_dirty) { | 2644 | if (is_dirty) { |
2645 | struct kvm_memslots *slots, *old_slots; | ||
2646 | |||
2377 | spin_lock(&kvm->mmu_lock); | 2647 | spin_lock(&kvm->mmu_lock); |
2378 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 2648 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
2379 | spin_unlock(&kvm->mmu_lock); | 2649 | spin_unlock(&kvm->mmu_lock); |
2380 | memslot = &kvm->memslots[log->slot]; | 2650 | |
2381 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 2651 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
2382 | memset(memslot->dirty_bitmap, 0, n); | 2652 | if (!slots) |
2653 | goto out_free; | ||
2654 | |||
2655 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
2656 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | ||
2657 | |||
2658 | old_slots = kvm->memslots; | ||
2659 | rcu_assign_pointer(kvm->memslots, slots); | ||
2660 | synchronize_srcu_expedited(&kvm->srcu); | ||
2661 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | ||
2662 | kfree(old_slots); | ||
2383 | } | 2663 | } |
2664 | |||
2384 | r = 0; | 2665 | r = 0; |
2666 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | ||
2667 | r = -EFAULT; | ||
2668 | out_free: | ||
2669 | vfree(dirty_bitmap); | ||
2385 | out: | 2670 | out: |
2386 | up_write(&kvm->slots_lock); | 2671 | mutex_unlock(&kvm->slots_lock); |
2387 | return r; | 2672 | return r; |
2388 | } | 2673 | } |
2389 | 2674 | ||
@@ -2466,6 +2751,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2466 | if (vpic) { | 2751 | if (vpic) { |
2467 | r = kvm_ioapic_init(kvm); | 2752 | r = kvm_ioapic_init(kvm); |
2468 | if (r) { | 2753 | if (r) { |
2754 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
2755 | &vpic->dev); | ||
2469 | kfree(vpic); | 2756 | kfree(vpic); |
2470 | goto create_irqchip_unlock; | 2757 | goto create_irqchip_unlock; |
2471 | } | 2758 | } |
@@ -2477,10 +2764,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2477 | r = kvm_setup_default_irq_routing(kvm); | 2764 | r = kvm_setup_default_irq_routing(kvm); |
2478 | if (r) { | 2765 | if (r) { |
2479 | mutex_lock(&kvm->irq_lock); | 2766 | mutex_lock(&kvm->irq_lock); |
2480 | kfree(kvm->arch.vpic); | 2767 | kvm_ioapic_destroy(kvm); |
2481 | kfree(kvm->arch.vioapic); | 2768 | kvm_destroy_pic(kvm); |
2482 | kvm->arch.vpic = NULL; | ||
2483 | kvm->arch.vioapic = NULL; | ||
2484 | mutex_unlock(&kvm->irq_lock); | 2769 | mutex_unlock(&kvm->irq_lock); |
2485 | } | 2770 | } |
2486 | create_irqchip_unlock: | 2771 | create_irqchip_unlock: |
@@ -2496,7 +2781,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2496 | sizeof(struct kvm_pit_config))) | 2781 | sizeof(struct kvm_pit_config))) |
2497 | goto out; | 2782 | goto out; |
2498 | create_pit: | 2783 | create_pit: |
2499 | down_write(&kvm->slots_lock); | 2784 | mutex_lock(&kvm->slots_lock); |
2500 | r = -EEXIST; | 2785 | r = -EEXIST; |
2501 | if (kvm->arch.vpit) | 2786 | if (kvm->arch.vpit) |
2502 | goto create_pit_unlock; | 2787 | goto create_pit_unlock; |
@@ -2505,7 +2790,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2505 | if (kvm->arch.vpit) | 2790 | if (kvm->arch.vpit) |
2506 | r = 0; | 2791 | r = 0; |
2507 | create_pit_unlock: | 2792 | create_pit_unlock: |
2508 | up_write(&kvm->slots_lock); | 2793 | mutex_unlock(&kvm->slots_lock); |
2509 | break; | 2794 | break; |
2510 | case KVM_IRQ_LINE_STATUS: | 2795 | case KVM_IRQ_LINE_STATUS: |
2511 | case KVM_IRQ_LINE: { | 2796 | case KVM_IRQ_LINE: { |
@@ -2722,7 +3007,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | |||
2722 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) | 3007 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) |
2723 | return 0; | 3008 | return 0; |
2724 | 3009 | ||
2725 | return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); | 3010 | return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
2726 | } | 3011 | } |
2727 | 3012 | ||
2728 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | 3013 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) |
@@ -2731,17 +3016,44 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
2731 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) | 3016 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) |
2732 | return 0; | 3017 | return 0; |
2733 | 3018 | ||
2734 | return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); | 3019 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
2735 | } | 3020 | } |
2736 | 3021 | ||
2737 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3022 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
2738 | struct kvm_vcpu *vcpu) | 3023 | { |
3024 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3025 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3026 | } | ||
3027 | |||
3028 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3029 | { | ||
3030 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3031 | access |= PFERR_FETCH_MASK; | ||
3032 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3033 | } | ||
3034 | |||
3035 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3036 | { | ||
3037 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3038 | access |= PFERR_WRITE_MASK; | ||
3039 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3040 | } | ||
3041 | |||
3042 | /* uses this to access any guest's mapped memory without checking CPL */ | ||
3043 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3044 | { | ||
3045 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error); | ||
3046 | } | ||
3047 | |||
3048 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | ||
3049 | struct kvm_vcpu *vcpu, u32 access, | ||
3050 | u32 *error) | ||
2739 | { | 3051 | { |
2740 | void *data = val; | 3052 | void *data = val; |
2741 | int r = X86EMUL_CONTINUE; | 3053 | int r = X86EMUL_CONTINUE; |
2742 | 3054 | ||
2743 | while (bytes) { | 3055 | while (bytes) { |
2744 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3056 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error); |
2745 | unsigned offset = addr & (PAGE_SIZE-1); | 3057 | unsigned offset = addr & (PAGE_SIZE-1); |
2746 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); | 3058 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); |
2747 | int ret; | 3059 | int ret; |
@@ -2764,14 +3076,37 @@ out: | |||
2764 | return r; | 3076 | return r; |
2765 | } | 3077 | } |
2766 | 3078 | ||
3079 | /* used for instruction fetching */ | ||
3080 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, | ||
3081 | struct kvm_vcpu *vcpu, u32 *error) | ||
3082 | { | ||
3083 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3084 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | ||
3085 | access | PFERR_FETCH_MASK, error); | ||
3086 | } | ||
3087 | |||
3088 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | ||
3089 | struct kvm_vcpu *vcpu, u32 *error) | ||
3090 | { | ||
3091 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3092 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | ||
3093 | error); | ||
3094 | } | ||
3095 | |||
3096 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | ||
3097 | struct kvm_vcpu *vcpu, u32 *error) | ||
3098 | { | ||
3099 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | ||
3100 | } | ||
3101 | |||
2767 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3102 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, |
2768 | struct kvm_vcpu *vcpu) | 3103 | struct kvm_vcpu *vcpu, u32 *error) |
2769 | { | 3104 | { |
2770 | void *data = val; | 3105 | void *data = val; |
2771 | int r = X86EMUL_CONTINUE; | 3106 | int r = X86EMUL_CONTINUE; |
2772 | 3107 | ||
2773 | while (bytes) { | 3108 | while (bytes) { |
2774 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3109 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); |
2775 | unsigned offset = addr & (PAGE_SIZE-1); | 3110 | unsigned offset = addr & (PAGE_SIZE-1); |
2776 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3111 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
2777 | int ret; | 3112 | int ret; |
@@ -2801,6 +3136,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
2801 | struct kvm_vcpu *vcpu) | 3136 | struct kvm_vcpu *vcpu) |
2802 | { | 3137 | { |
2803 | gpa_t gpa; | 3138 | gpa_t gpa; |
3139 | u32 error_code; | ||
2804 | 3140 | ||
2805 | if (vcpu->mmio_read_completed) { | 3141 | if (vcpu->mmio_read_completed) { |
2806 | memcpy(val, vcpu->mmio_data, bytes); | 3142 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -2810,17 +3146,20 @@ static int emulator_read_emulated(unsigned long addr, | |||
2810 | return X86EMUL_CONTINUE; | 3146 | return X86EMUL_CONTINUE; |
2811 | } | 3147 | } |
2812 | 3148 | ||
2813 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3149 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); |
3150 | |||
3151 | if (gpa == UNMAPPED_GVA) { | ||
3152 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
3153 | return X86EMUL_PROPAGATE_FAULT; | ||
3154 | } | ||
2814 | 3155 | ||
2815 | /* For APIC access vmexit */ | 3156 | /* For APIC access vmexit */ |
2816 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3157 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
2817 | goto mmio; | 3158 | goto mmio; |
2818 | 3159 | ||
2819 | if (kvm_read_guest_virt(addr, val, bytes, vcpu) | 3160 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) |
2820 | == X86EMUL_CONTINUE) | 3161 | == X86EMUL_CONTINUE) |
2821 | return X86EMUL_CONTINUE; | 3162 | return X86EMUL_CONTINUE; |
2822 | if (gpa == UNMAPPED_GVA) | ||
2823 | return X86EMUL_PROPAGATE_FAULT; | ||
2824 | 3163 | ||
2825 | mmio: | 3164 | mmio: |
2826 | /* | 3165 | /* |
@@ -2859,11 +3198,12 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
2859 | struct kvm_vcpu *vcpu) | 3198 | struct kvm_vcpu *vcpu) |
2860 | { | 3199 | { |
2861 | gpa_t gpa; | 3200 | gpa_t gpa; |
3201 | u32 error_code; | ||
2862 | 3202 | ||
2863 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3203 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); |
2864 | 3204 | ||
2865 | if (gpa == UNMAPPED_GVA) { | 3205 | if (gpa == UNMAPPED_GVA) { |
2866 | kvm_inject_page_fault(vcpu, addr, 2); | 3206 | kvm_inject_page_fault(vcpu, addr, error_code); |
2867 | return X86EMUL_PROPAGATE_FAULT; | 3207 | return X86EMUL_PROPAGATE_FAULT; |
2868 | } | 3208 | } |
2869 | 3209 | ||
@@ -2927,7 +3267,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
2927 | char *kaddr; | 3267 | char *kaddr; |
2928 | u64 val; | 3268 | u64 val; |
2929 | 3269 | ||
2930 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3270 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
2931 | 3271 | ||
2932 | if (gpa == UNMAPPED_GVA || | 3272 | if (gpa == UNMAPPED_GVA || |
2933 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3273 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -2964,35 +3304,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
2964 | 3304 | ||
2965 | int emulate_clts(struct kvm_vcpu *vcpu) | 3305 | int emulate_clts(struct kvm_vcpu *vcpu) |
2966 | { | 3306 | { |
2967 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); | 3307 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
3308 | kvm_x86_ops->fpu_activate(vcpu); | ||
2968 | return X86EMUL_CONTINUE; | 3309 | return X86EMUL_CONTINUE; |
2969 | } | 3310 | } |
2970 | 3311 | ||
2971 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3312 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
2972 | { | 3313 | { |
2973 | struct kvm_vcpu *vcpu = ctxt->vcpu; | 3314 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); |
2974 | |||
2975 | switch (dr) { | ||
2976 | case 0 ... 3: | ||
2977 | *dest = kvm_x86_ops->get_dr(vcpu, dr); | ||
2978 | return X86EMUL_CONTINUE; | ||
2979 | default: | ||
2980 | pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr); | ||
2981 | return X86EMUL_UNHANDLEABLE; | ||
2982 | } | ||
2983 | } | 3315 | } |
2984 | 3316 | ||
2985 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3317 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
2986 | { | 3318 | { |
2987 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3319 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
2988 | int exception; | ||
2989 | 3320 | ||
2990 | kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); | 3321 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); |
2991 | if (exception) { | ||
2992 | /* FIXME: better handling */ | ||
2993 | return X86EMUL_UNHANDLEABLE; | ||
2994 | } | ||
2995 | return X86EMUL_CONTINUE; | ||
2996 | } | 3322 | } |
2997 | 3323 | ||
2998 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3324 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
@@ -3006,7 +3332,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3006 | 3332 | ||
3007 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); | 3333 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); |
3008 | 3334 | ||
3009 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); | 3335 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); |
3010 | 3336 | ||
3011 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", | 3337 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", |
3012 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | 3338 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); |
@@ -3014,7 +3340,8 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3014 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3340 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
3015 | 3341 | ||
3016 | static struct x86_emulate_ops emulate_ops = { | 3342 | static struct x86_emulate_ops emulate_ops = { |
3017 | .read_std = kvm_read_guest_virt, | 3343 | .read_std = kvm_read_guest_virt_system, |
3344 | .fetch = kvm_fetch_guest_virt, | ||
3018 | .read_emulated = emulator_read_emulated, | 3345 | .read_emulated = emulator_read_emulated, |
3019 | .write_emulated = emulator_write_emulated, | 3346 | .write_emulated = emulator_write_emulated, |
3020 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3347 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
@@ -3057,8 +3384,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3057 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3384 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
3058 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 3385 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); |
3059 | vcpu->arch.emulate_ctxt.mode = | 3386 | vcpu->arch.emulate_ctxt.mode = |
3387 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
3060 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3388 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
3061 | ? X86EMUL_MODE_REAL : cs_l | 3389 | ? X86EMUL_MODE_VM86 : cs_l |
3062 | ? X86EMUL_MODE_PROT64 : cs_db | 3390 | ? X86EMUL_MODE_PROT64 : cs_db |
3063 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3391 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
3064 | 3392 | ||
@@ -3150,12 +3478,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) | |||
3150 | gva_t q = vcpu->arch.pio.guest_gva; | 3478 | gva_t q = vcpu->arch.pio.guest_gva; |
3151 | unsigned bytes; | 3479 | unsigned bytes; |
3152 | int ret; | 3480 | int ret; |
3481 | u32 error_code; | ||
3153 | 3482 | ||
3154 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | 3483 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; |
3155 | if (vcpu->arch.pio.in) | 3484 | if (vcpu->arch.pio.in) |
3156 | ret = kvm_write_guest_virt(q, p, bytes, vcpu); | 3485 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); |
3157 | else | 3486 | else |
3158 | ret = kvm_read_guest_virt(q, p, bytes, vcpu); | 3487 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); |
3488 | |||
3489 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3490 | kvm_inject_page_fault(vcpu, q, error_code); | ||
3491 | |||
3159 | return ret; | 3492 | return ret; |
3160 | } | 3493 | } |
3161 | 3494 | ||
@@ -3176,7 +3509,7 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
3176 | if (io->in) { | 3509 | if (io->in) { |
3177 | r = pio_copy_data(vcpu); | 3510 | r = pio_copy_data(vcpu); |
3178 | if (r) | 3511 | if (r) |
3179 | return r; | 3512 | goto out; |
3180 | } | 3513 | } |
3181 | 3514 | ||
3182 | delta = 1; | 3515 | delta = 1; |
@@ -3203,7 +3536,7 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
3203 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | 3536 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); |
3204 | } | 3537 | } |
3205 | } | 3538 | } |
3206 | 3539 | out: | |
3207 | io->count -= io->cur_count; | 3540 | io->count -= io->cur_count; |
3208 | io->cur_count = 0; | 3541 | io->cur_count = 0; |
3209 | 3542 | ||
@@ -3216,11 +3549,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
3216 | int r; | 3549 | int r; |
3217 | 3550 | ||
3218 | if (vcpu->arch.pio.in) | 3551 | if (vcpu->arch.pio.in) |
3219 | r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, | 3552 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, |
3220 | vcpu->arch.pio.size, pd); | 3553 | vcpu->arch.pio.size, pd); |
3221 | else | 3554 | else |
3222 | r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, | 3555 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, |
3223 | vcpu->arch.pio.size, pd); | 3556 | vcpu->arch.pio.port, vcpu->arch.pio.size, |
3557 | pd); | ||
3224 | return r; | 3558 | return r; |
3225 | } | 3559 | } |
3226 | 3560 | ||
@@ -3231,7 +3565,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu) | |||
3231 | int i, r = 0; | 3565 | int i, r = 0; |
3232 | 3566 | ||
3233 | for (i = 0; i < io->cur_count; i++) { | 3567 | for (i = 0; i < io->cur_count; i++) { |
3234 | if (kvm_io_bus_write(&vcpu->kvm->pio_bus, | 3568 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, |
3235 | io->port, io->size, pd)) { | 3569 | io->port, io->size, pd)) { |
3236 | r = -EOPNOTSUPP; | 3570 | r = -EOPNOTSUPP; |
3237 | break; | 3571 | break; |
@@ -3245,6 +3579,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | |||
3245 | { | 3579 | { |
3246 | unsigned long val; | 3580 | unsigned long val; |
3247 | 3581 | ||
3582 | trace_kvm_pio(!in, port, size, 1); | ||
3583 | |||
3248 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3584 | vcpu->run->exit_reason = KVM_EXIT_IO; |
3249 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 3585 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
3250 | vcpu->run->io.size = vcpu->arch.pio.size = size; | 3586 | vcpu->run->io.size = vcpu->arch.pio.size = size; |
@@ -3256,11 +3592,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | |||
3256 | vcpu->arch.pio.down = 0; | 3592 | vcpu->arch.pio.down = 0; |
3257 | vcpu->arch.pio.rep = 0; | 3593 | vcpu->arch.pio.rep = 0; |
3258 | 3594 | ||
3259 | trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, | 3595 | if (!vcpu->arch.pio.in) { |
3260 | size, 1); | 3596 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3261 | 3597 | memcpy(vcpu->arch.pio_data, &val, 4); | |
3262 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 3598 | } |
3263 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
3264 | 3599 | ||
3265 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3600 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { |
3266 | complete_pio(vcpu); | 3601 | complete_pio(vcpu); |
@@ -3277,6 +3612,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | |||
3277 | unsigned now, in_page; | 3612 | unsigned now, in_page; |
3278 | int ret = 0; | 3613 | int ret = 0; |
3279 | 3614 | ||
3615 | trace_kvm_pio(!in, port, size, count); | ||
3616 | |||
3280 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3617 | vcpu->run->exit_reason = KVM_EXIT_IO; |
3281 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 3618 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
3282 | vcpu->run->io.size = vcpu->arch.pio.size = size; | 3619 | vcpu->run->io.size = vcpu->arch.pio.size = size; |
@@ -3288,9 +3625,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | |||
3288 | vcpu->arch.pio.down = down; | 3625 | vcpu->arch.pio.down = down; |
3289 | vcpu->arch.pio.rep = rep; | 3626 | vcpu->arch.pio.rep = rep; |
3290 | 3627 | ||
3291 | trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, | ||
3292 | size, count); | ||
3293 | |||
3294 | if (!count) { | 3628 | if (!count) { |
3295 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 3629 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
3296 | return 1; | 3630 | return 1; |
@@ -3322,10 +3656,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | |||
3322 | if (!vcpu->arch.pio.in) { | 3656 | if (!vcpu->arch.pio.in) { |
3323 | /* string PIO write */ | 3657 | /* string PIO write */ |
3324 | ret = pio_copy_data(vcpu); | 3658 | ret = pio_copy_data(vcpu); |
3325 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 3659 | if (ret == X86EMUL_PROPAGATE_FAULT) |
3326 | kvm_inject_gp(vcpu, 0); | ||
3327 | return 1; | 3660 | return 1; |
3328 | } | ||
3329 | if (ret == 0 && !pio_string_write(vcpu)) { | 3661 | if (ret == 0 && !pio_string_write(vcpu)) { |
3330 | complete_pio(vcpu); | 3662 | complete_pio(vcpu); |
3331 | if (vcpu->arch.pio.count == 0) | 3663 | if (vcpu->arch.pio.count == 0) |
@@ -3484,11 +3816,76 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, | |||
3484 | return a0 | ((gpa_t)a1 << 32); | 3816 | return a0 | ((gpa_t)a1 << 32); |
3485 | } | 3817 | } |
3486 | 3818 | ||
3819 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | ||
3820 | { | ||
3821 | u64 param, ingpa, outgpa, ret; | ||
3822 | uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; | ||
3823 | bool fast, longmode; | ||
3824 | int cs_db, cs_l; | ||
3825 | |||
3826 | /* | ||
3827 | * hypercall generates UD from non zero cpl and real mode | ||
3828 | * per HYPER-V spec | ||
3829 | */ | ||
3830 | if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { | ||
3831 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3832 | return 0; | ||
3833 | } | ||
3834 | |||
3835 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | ||
3836 | longmode = is_long_mode(vcpu) && cs_l == 1; | ||
3837 | |||
3838 | if (!longmode) { | ||
3839 | param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | | ||
3840 | (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); | ||
3841 | ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | | ||
3842 | (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); | ||
3843 | outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | | ||
3844 | (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); | ||
3845 | } | ||
3846 | #ifdef CONFIG_X86_64 | ||
3847 | else { | ||
3848 | param = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
3849 | ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
3850 | outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); | ||
3851 | } | ||
3852 | #endif | ||
3853 | |||
3854 | code = param & 0xffff; | ||
3855 | fast = (param >> 16) & 0x1; | ||
3856 | rep_cnt = (param >> 32) & 0xfff; | ||
3857 | rep_idx = (param >> 48) & 0xfff; | ||
3858 | |||
3859 | trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); | ||
3860 | |||
3861 | switch (code) { | ||
3862 | case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: | ||
3863 | kvm_vcpu_on_spin(vcpu); | ||
3864 | break; | ||
3865 | default: | ||
3866 | res = HV_STATUS_INVALID_HYPERCALL_CODE; | ||
3867 | break; | ||
3868 | } | ||
3869 | |||
3870 | ret = res | (((u64)rep_done & 0xfff) << 32); | ||
3871 | if (longmode) { | ||
3872 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); | ||
3873 | } else { | ||
3874 | kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); | ||
3875 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); | ||
3876 | } | ||
3877 | |||
3878 | return 1; | ||
3879 | } | ||
3880 | |||
3487 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 3881 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
3488 | { | 3882 | { |
3489 | unsigned long nr, a0, a1, a2, a3, ret; | 3883 | unsigned long nr, a0, a1, a2, a3, ret; |
3490 | int r = 1; | 3884 | int r = 1; |
3491 | 3885 | ||
3886 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) | ||
3887 | return kvm_hv_hypercall(vcpu); | ||
3888 | |||
3492 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); | 3889 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3493 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); | 3890 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); |
3494 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); | 3891 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); |
@@ -3531,10 +3928,8 @@ EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); | |||
3531 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | 3928 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) |
3532 | { | 3929 | { |
3533 | char instruction[3]; | 3930 | char instruction[3]; |
3534 | int ret = 0; | ||
3535 | unsigned long rip = kvm_rip_read(vcpu); | 3931 | unsigned long rip = kvm_rip_read(vcpu); |
3536 | 3932 | ||
3537 | |||
3538 | /* | 3933 | /* |
3539 | * Blow out the MMU to ensure that no other VCPU has an active mapping | 3934 | * Blow out the MMU to ensure that no other VCPU has an active mapping |
3540 | * to ensure that the updated hypercall appears atomically across all | 3935 | * to ensure that the updated hypercall appears atomically across all |
@@ -3543,11 +3938,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
3543 | kvm_mmu_zap_all(vcpu->kvm); | 3938 | kvm_mmu_zap_all(vcpu->kvm); |
3544 | 3939 | ||
3545 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 3940 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
3546 | if (emulator_write_emulated(rip, instruction, 3, vcpu) | ||
3547 | != X86EMUL_CONTINUE) | ||
3548 | ret = -EFAULT; | ||
3549 | 3941 | ||
3550 | return ret; | 3942 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
3551 | } | 3943 | } |
3552 | 3944 | ||
3553 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 3945 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
@@ -3580,10 +3972,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | |||
3580 | { | 3972 | { |
3581 | unsigned long value; | 3973 | unsigned long value; |
3582 | 3974 | ||
3583 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | ||
3584 | switch (cr) { | 3975 | switch (cr) { |
3585 | case 0: | 3976 | case 0: |
3586 | value = vcpu->arch.cr0; | 3977 | value = kvm_read_cr0(vcpu); |
3587 | break; | 3978 | break; |
3588 | case 2: | 3979 | case 2: |
3589 | value = vcpu->arch.cr2; | 3980 | value = vcpu->arch.cr2; |
@@ -3592,7 +3983,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | |||
3592 | value = vcpu->arch.cr3; | 3983 | value = vcpu->arch.cr3; |
3593 | break; | 3984 | break; |
3594 | case 4: | 3985 | case 4: |
3595 | value = vcpu->arch.cr4; | 3986 | value = kvm_read_cr4(vcpu); |
3596 | break; | 3987 | break; |
3597 | case 8: | 3988 | case 8: |
3598 | value = kvm_get_cr8(vcpu); | 3989 | value = kvm_get_cr8(vcpu); |
@@ -3610,7 +4001,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3610 | { | 4001 | { |
3611 | switch (cr) { | 4002 | switch (cr) { |
3612 | case 0: | 4003 | case 0: |
3613 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); | 4004 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); |
3614 | *rflags = kvm_get_rflags(vcpu); | 4005 | *rflags = kvm_get_rflags(vcpu); |
3615 | break; | 4006 | break; |
3616 | case 2: | 4007 | case 2: |
@@ -3620,7 +4011,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3620 | kvm_set_cr3(vcpu, val); | 4011 | kvm_set_cr3(vcpu, val); |
3621 | break; | 4012 | break; |
3622 | case 4: | 4013 | case 4: |
3623 | kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); | 4014 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
3624 | break; | 4015 | break; |
3625 | case 8: | 4016 | case 8: |
3626 | kvm_set_cr8(vcpu, val & 0xfUL); | 4017 | kvm_set_cr8(vcpu, val & 0xfUL); |
@@ -3687,6 +4078,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | |||
3687 | } | 4078 | } |
3688 | return best; | 4079 | return best; |
3689 | } | 4080 | } |
4081 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
3690 | 4082 | ||
3691 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | 4083 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) |
3692 | { | 4084 | { |
@@ -3770,14 +4162,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu) | |||
3770 | static void vapic_exit(struct kvm_vcpu *vcpu) | 4162 | static void vapic_exit(struct kvm_vcpu *vcpu) |
3771 | { | 4163 | { |
3772 | struct kvm_lapic *apic = vcpu->arch.apic; | 4164 | struct kvm_lapic *apic = vcpu->arch.apic; |
4165 | int idx; | ||
3773 | 4166 | ||
3774 | if (!apic || !apic->vapic_addr) | 4167 | if (!apic || !apic->vapic_addr) |
3775 | return; | 4168 | return; |
3776 | 4169 | ||
3777 | down_read(&vcpu->kvm->slots_lock); | 4170 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
3778 | kvm_release_page_dirty(apic->vapic_page); | 4171 | kvm_release_page_dirty(apic->vapic_page); |
3779 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); | 4172 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); |
3780 | up_read(&vcpu->kvm->slots_lock); | 4173 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
3781 | } | 4174 | } |
3782 | 4175 | ||
3783 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | 4176 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) |
@@ -3873,12 +4266,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
3873 | r = 0; | 4266 | r = 0; |
3874 | goto out; | 4267 | goto out; |
3875 | } | 4268 | } |
4269 | if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { | ||
4270 | vcpu->fpu_active = 0; | ||
4271 | kvm_x86_ops->fpu_deactivate(vcpu); | ||
4272 | } | ||
3876 | } | 4273 | } |
3877 | 4274 | ||
3878 | preempt_disable(); | 4275 | preempt_disable(); |
3879 | 4276 | ||
3880 | kvm_x86_ops->prepare_guest_switch(vcpu); | 4277 | kvm_x86_ops->prepare_guest_switch(vcpu); |
3881 | kvm_load_guest_fpu(vcpu); | 4278 | if (vcpu->fpu_active) |
4279 | kvm_load_guest_fpu(vcpu); | ||
3882 | 4280 | ||
3883 | local_irq_disable(); | 4281 | local_irq_disable(); |
3884 | 4282 | ||
@@ -3906,7 +4304,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
3906 | kvm_lapic_sync_to_vapic(vcpu); | 4304 | kvm_lapic_sync_to_vapic(vcpu); |
3907 | } | 4305 | } |
3908 | 4306 | ||
3909 | up_read(&vcpu->kvm->slots_lock); | 4307 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
3910 | 4308 | ||
3911 | kvm_guest_enter(); | 4309 | kvm_guest_enter(); |
3912 | 4310 | ||
@@ -3948,7 +4346,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
3948 | 4346 | ||
3949 | preempt_enable(); | 4347 | preempt_enable(); |
3950 | 4348 | ||
3951 | down_read(&vcpu->kvm->slots_lock); | 4349 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
3952 | 4350 | ||
3953 | /* | 4351 | /* |
3954 | * Profile KVM exit RIPs: | 4352 | * Profile KVM exit RIPs: |
@@ -3970,6 +4368,7 @@ out: | |||
3970 | static int __vcpu_run(struct kvm_vcpu *vcpu) | 4368 | static int __vcpu_run(struct kvm_vcpu *vcpu) |
3971 | { | 4369 | { |
3972 | int r; | 4370 | int r; |
4371 | struct kvm *kvm = vcpu->kvm; | ||
3973 | 4372 | ||
3974 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { | 4373 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { |
3975 | pr_debug("vcpu %d received sipi with vector # %x\n", | 4374 | pr_debug("vcpu %d received sipi with vector # %x\n", |
@@ -3981,7 +4380,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
3981 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 4380 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
3982 | } | 4381 | } |
3983 | 4382 | ||
3984 | down_read(&vcpu->kvm->slots_lock); | 4383 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
3985 | vapic_enter(vcpu); | 4384 | vapic_enter(vcpu); |
3986 | 4385 | ||
3987 | r = 1; | 4386 | r = 1; |
@@ -3989,9 +4388,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
3989 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 4388 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
3990 | r = vcpu_enter_guest(vcpu); | 4389 | r = vcpu_enter_guest(vcpu); |
3991 | else { | 4390 | else { |
3992 | up_read(&vcpu->kvm->slots_lock); | 4391 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
3993 | kvm_vcpu_block(vcpu); | 4392 | kvm_vcpu_block(vcpu); |
3994 | down_read(&vcpu->kvm->slots_lock); | 4393 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
3995 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 4394 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) |
3996 | { | 4395 | { |
3997 | switch(vcpu->arch.mp_state) { | 4396 | switch(vcpu->arch.mp_state) { |
@@ -4026,13 +4425,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4026 | ++vcpu->stat.signal_exits; | 4425 | ++vcpu->stat.signal_exits; |
4027 | } | 4426 | } |
4028 | if (need_resched()) { | 4427 | if (need_resched()) { |
4029 | up_read(&vcpu->kvm->slots_lock); | 4428 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4030 | kvm_resched(vcpu); | 4429 | kvm_resched(vcpu); |
4031 | down_read(&vcpu->kvm->slots_lock); | 4430 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
4032 | } | 4431 | } |
4033 | } | 4432 | } |
4034 | 4433 | ||
4035 | up_read(&vcpu->kvm->slots_lock); | 4434 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4036 | post_kvm_run_save(vcpu); | 4435 | post_kvm_run_save(vcpu); |
4037 | 4436 | ||
4038 | vapic_exit(vcpu); | 4437 | vapic_exit(vcpu); |
@@ -4062,7 +4461,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4062 | kvm_set_cr8(vcpu, kvm_run->cr8); | 4461 | kvm_set_cr8(vcpu, kvm_run->cr8); |
4063 | 4462 | ||
4064 | if (vcpu->arch.pio.cur_count) { | 4463 | if (vcpu->arch.pio.cur_count) { |
4464 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
4065 | r = complete_pio(vcpu); | 4465 | r = complete_pio(vcpu); |
4466 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
4066 | if (r) | 4467 | if (r) |
4067 | goto out; | 4468 | goto out; |
4068 | } | 4469 | } |
@@ -4071,10 +4472,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4071 | vcpu->mmio_read_completed = 1; | 4472 | vcpu->mmio_read_completed = 1; |
4072 | vcpu->mmio_needed = 0; | 4473 | vcpu->mmio_needed = 0; |
4073 | 4474 | ||
4074 | down_read(&vcpu->kvm->slots_lock); | 4475 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
4075 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, | 4476 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, |
4076 | EMULTYPE_NO_DECODE); | 4477 | EMULTYPE_NO_DECODE); |
4077 | up_read(&vcpu->kvm->slots_lock); | 4478 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4078 | if (r == EMULATE_DO_MMIO) { | 4479 | if (r == EMULATE_DO_MMIO) { |
4079 | /* | 4480 | /* |
4080 | * Read-modify-write. Back to userspace. | 4481 | * Read-modify-write. Back to userspace. |
@@ -4201,13 +4602,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4201 | sregs->gdt.limit = dt.limit; | 4602 | sregs->gdt.limit = dt.limit; |
4202 | sregs->gdt.base = dt.base; | 4603 | sregs->gdt.base = dt.base; |
4203 | 4604 | ||
4204 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 4605 | sregs->cr0 = kvm_read_cr0(vcpu); |
4205 | sregs->cr0 = vcpu->arch.cr0; | ||
4206 | sregs->cr2 = vcpu->arch.cr2; | 4606 | sregs->cr2 = vcpu->arch.cr2; |
4207 | sregs->cr3 = vcpu->arch.cr3; | 4607 | sregs->cr3 = vcpu->arch.cr3; |
4208 | sregs->cr4 = vcpu->arch.cr4; | 4608 | sregs->cr4 = kvm_read_cr4(vcpu); |
4209 | sregs->cr8 = kvm_get_cr8(vcpu); | 4609 | sregs->cr8 = kvm_get_cr8(vcpu); |
4210 | sregs->efer = vcpu->arch.shadow_efer; | 4610 | sregs->efer = vcpu->arch.efer; |
4211 | sregs->apic_base = kvm_get_apic_base(vcpu); | 4611 | sregs->apic_base = kvm_get_apic_base(vcpu); |
4212 | 4612 | ||
4213 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); | 4613 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); |
@@ -4295,14 +4695,23 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
4295 | { | 4695 | { |
4296 | struct descriptor_table dtable; | 4696 | struct descriptor_table dtable; |
4297 | u16 index = selector >> 3; | 4697 | u16 index = selector >> 3; |
4698 | int ret; | ||
4699 | u32 err; | ||
4700 | gva_t addr; | ||
4298 | 4701 | ||
4299 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | 4702 | get_segment_descriptor_dtable(vcpu, selector, &dtable); |
4300 | 4703 | ||
4301 | if (dtable.limit < index * 8 + 7) { | 4704 | if (dtable.limit < index * 8 + 7) { |
4302 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 4705 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
4303 | return 1; | 4706 | return X86EMUL_PROPAGATE_FAULT; |
4304 | } | 4707 | } |
4305 | return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); | 4708 | addr = dtable.base + index * 8; |
4709 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
4710 | vcpu, &err); | ||
4711 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
4712 | kvm_inject_page_fault(vcpu, addr, err); | ||
4713 | |||
4714 | return ret; | ||
4306 | } | 4715 | } |
4307 | 4716 | ||
4308 | /* allowed just for 8 bytes segments */ | 4717 | /* allowed just for 8 bytes segments */ |
@@ -4316,15 +4725,23 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
4316 | 4725 | ||
4317 | if (dtable.limit < index * 8 + 7) | 4726 | if (dtable.limit < index * 8 + 7) |
4318 | return 1; | 4727 | return 1; |
4319 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); | 4728 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); |
4729 | } | ||
4730 | |||
4731 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
4732 | struct desc_struct *seg_desc) | ||
4733 | { | ||
4734 | u32 base_addr = get_desc_base(seg_desc); | ||
4735 | |||
4736 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
4320 | } | 4737 | } |
4321 | 4738 | ||
4322 | static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, | 4739 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, |
4323 | struct desc_struct *seg_desc) | 4740 | struct desc_struct *seg_desc) |
4324 | { | 4741 | { |
4325 | u32 base_addr = get_desc_base(seg_desc); | 4742 | u32 base_addr = get_desc_base(seg_desc); |
4326 | 4743 | ||
4327 | return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); | 4744 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); |
4328 | } | 4745 | } |
4329 | 4746 | ||
4330 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | 4747 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) |
@@ -4335,18 +4752,6 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | |||
4335 | return kvm_seg.selector; | 4752 | return kvm_seg.selector; |
4336 | } | 4753 | } |
4337 | 4754 | ||
4338 | static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, | ||
4339 | u16 selector, | ||
4340 | struct kvm_segment *kvm_seg) | ||
4341 | { | ||
4342 | struct desc_struct seg_desc; | ||
4343 | |||
4344 | if (load_guest_segment_descriptor(vcpu, selector, &seg_desc)) | ||
4345 | return 1; | ||
4346 | seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg); | ||
4347 | return 0; | ||
4348 | } | ||
4349 | |||
4350 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | 4755 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) |
4351 | { | 4756 | { |
4352 | struct kvm_segment segvar = { | 4757 | struct kvm_segment segvar = { |
@@ -4364,7 +4769,7 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se | |||
4364 | .unusable = 0, | 4769 | .unusable = 0, |
4365 | }; | 4770 | }; |
4366 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | 4771 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); |
4367 | return 0; | 4772 | return X86EMUL_CONTINUE; |
4368 | } | 4773 | } |
4369 | 4774 | ||
4370 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | 4775 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) |
@@ -4374,24 +4779,112 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | |||
4374 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); | 4779 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); |
4375 | } | 4780 | } |
4376 | 4781 | ||
4377 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 4782 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) |
4378 | int type_bits, int seg) | ||
4379 | { | 4783 | { |
4380 | struct kvm_segment kvm_seg; | 4784 | struct kvm_segment kvm_seg; |
4785 | struct desc_struct seg_desc; | ||
4786 | u8 dpl, rpl, cpl; | ||
4787 | unsigned err_vec = GP_VECTOR; | ||
4788 | u32 err_code = 0; | ||
4789 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
4790 | int ret; | ||
4381 | 4791 | ||
4382 | if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE)) | 4792 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) |
4383 | return kvm_load_realmode_segment(vcpu, selector, seg); | 4793 | return kvm_load_realmode_segment(vcpu, selector, seg); |
4384 | if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) | ||
4385 | return 1; | ||
4386 | kvm_seg.type |= type_bits; | ||
4387 | 4794 | ||
4388 | if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && | 4795 | /* NULL selector is not valid for TR, CS and SS */ |
4389 | seg != VCPU_SREG_LDTR) | 4796 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) |
4390 | if (!kvm_seg.s) | 4797 | && null_selector) |
4391 | kvm_seg.unusable = 1; | 4798 | goto exception; |
4799 | |||
4800 | /* TR should be in GDT only */ | ||
4801 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
4802 | goto exception; | ||
4803 | |||
4804 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4805 | if (ret) | ||
4806 | return ret; | ||
4807 | |||
4808 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
4809 | |||
4810 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
4811 | kvm_seg.unusable = 1; | ||
4812 | goto load; | ||
4813 | } | ||
4814 | |||
4815 | err_code = selector & 0xfffc; | ||
4816 | err_vec = GP_VECTOR; | ||
4817 | |||
4818 | /* can't load system descriptor into segment selecor */ | ||
4819 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
4820 | goto exception; | ||
4392 | 4821 | ||
4822 | if (!kvm_seg.present) { | ||
4823 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
4824 | goto exception; | ||
4825 | } | ||
4826 | |||
4827 | rpl = selector & 3; | ||
4828 | dpl = kvm_seg.dpl; | ||
4829 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
4830 | |||
4831 | switch (seg) { | ||
4832 | case VCPU_SREG_SS: | ||
4833 | /* | ||
4834 | * segment is not a writable data segment or segment | ||
4835 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
4836 | */ | ||
4837 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
4838 | goto exception; | ||
4839 | break; | ||
4840 | case VCPU_SREG_CS: | ||
4841 | if (!(kvm_seg.type & 8)) | ||
4842 | goto exception; | ||
4843 | |||
4844 | if (kvm_seg.type & 4) { | ||
4845 | /* conforming */ | ||
4846 | if (dpl > cpl) | ||
4847 | goto exception; | ||
4848 | } else { | ||
4849 | /* nonconforming */ | ||
4850 | if (rpl > cpl || dpl != cpl) | ||
4851 | goto exception; | ||
4852 | } | ||
4853 | /* CS(RPL) <- CPL */ | ||
4854 | selector = (selector & 0xfffc) | cpl; | ||
4855 | break; | ||
4856 | case VCPU_SREG_TR: | ||
4857 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
4858 | goto exception; | ||
4859 | break; | ||
4860 | case VCPU_SREG_LDTR: | ||
4861 | if (kvm_seg.s || kvm_seg.type != 2) | ||
4862 | goto exception; | ||
4863 | break; | ||
4864 | default: /* DS, ES, FS, or GS */ | ||
4865 | /* | ||
4866 | * segment is not a data or readable code segment or | ||
4867 | * ((segment is a data or nonconforming code segment) | ||
4868 | * and (both RPL and CPL > DPL)) | ||
4869 | */ | ||
4870 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
4871 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
4872 | goto exception; | ||
4873 | break; | ||
4874 | } | ||
4875 | |||
4876 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
4877 | /* mark segment as accessed */ | ||
4878 | kvm_seg.type |= 1; | ||
4879 | seg_desc.type |= 1; | ||
4880 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4881 | } | ||
4882 | load: | ||
4393 | kvm_set_segment(vcpu, &kvm_seg, seg); | 4883 | kvm_set_segment(vcpu, &kvm_seg, seg); |
4394 | return 0; | 4884 | return X86EMUL_CONTINUE; |
4885 | exception: | ||
4886 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
4887 | return X86EMUL_PROPAGATE_FAULT; | ||
4395 | } | 4888 | } |
4396 | 4889 | ||
4397 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | 4890 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, |
@@ -4417,6 +4910,14 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
4417 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 4910 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
4418 | } | 4911 | } |
4419 | 4912 | ||
4913 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
4914 | { | ||
4915 | struct kvm_segment kvm_seg; | ||
4916 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4917 | kvm_seg.selector = sel; | ||
4918 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4919 | } | ||
4920 | |||
4420 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | 4921 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, |
4421 | struct tss_segment_32 *tss) | 4922 | struct tss_segment_32 *tss) |
4422 | { | 4923 | { |
@@ -4434,25 +4935,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
4434 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | 4935 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); |
4435 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | 4936 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); |
4436 | 4937 | ||
4437 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) | 4938 | /* |
4939 | * SDM says that segment selectors are loaded before segment | ||
4940 | * descriptors | ||
4941 | */ | ||
4942 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
4943 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
4944 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
4945 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
4946 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
4947 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
4948 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
4949 | |||
4950 | /* | ||
4951 | * Now load segment descriptors. If fault happenes at this stage | ||
4952 | * it is handled in a context of new task | ||
4953 | */ | ||
4954 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
4438 | return 1; | 4955 | return 1; |
4439 | 4956 | ||
4440 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 4957 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) |
4441 | return 1; | 4958 | return 1; |
4442 | 4959 | ||
4443 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 4960 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) |
4444 | return 1; | 4961 | return 1; |
4445 | 4962 | ||
4446 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 4963 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) |
4447 | return 1; | 4964 | return 1; |
4448 | 4965 | ||
4449 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 4966 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) |
4450 | return 1; | 4967 | return 1; |
4451 | 4968 | ||
4452 | if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) | 4969 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) |
4453 | return 1; | 4970 | return 1; |
4454 | 4971 | ||
4455 | if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) | 4972 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) |
4456 | return 1; | 4973 | return 1; |
4457 | return 0; | 4974 | return 0; |
4458 | } | 4975 | } |
@@ -4492,19 +5009,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
4492 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | 5009 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); |
4493 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | 5010 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); |
4494 | 5011 | ||
4495 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) | 5012 | /* |
5013 | * SDM says that segment selectors are loaded before segment | ||
5014 | * descriptors | ||
5015 | */ | ||
5016 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
5017 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
5018 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
5019 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5020 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5021 | |||
5022 | /* | ||
5023 | * Now load segment descriptors. If fault happenes at this stage | ||
5024 | * it is handled in a context of new task | ||
5025 | */ | ||
5026 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
4496 | return 1; | 5027 | return 1; |
4497 | 5028 | ||
4498 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 5029 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) |
4499 | return 1; | 5030 | return 1; |
4500 | 5031 | ||
4501 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 5032 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) |
4502 | return 1; | 5033 | return 1; |
4503 | 5034 | ||
4504 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 5035 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) |
4505 | return 1; | 5036 | return 1; |
4506 | 5037 | ||
4507 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 5038 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) |
4508 | return 1; | 5039 | return 1; |
4509 | return 0; | 5040 | return 0; |
4510 | } | 5041 | } |
@@ -4526,7 +5057,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4526 | sizeof tss_segment_16)) | 5057 | sizeof tss_segment_16)) |
4527 | goto out; | 5058 | goto out; |
4528 | 5059 | ||
4529 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | 5060 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), |
4530 | &tss_segment_16, sizeof tss_segment_16)) | 5061 | &tss_segment_16, sizeof tss_segment_16)) |
4531 | goto out; | 5062 | goto out; |
4532 | 5063 | ||
@@ -4534,7 +5065,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4534 | tss_segment_16.prev_task_link = old_tss_sel; | 5065 | tss_segment_16.prev_task_link = old_tss_sel; |
4535 | 5066 | ||
4536 | if (kvm_write_guest(vcpu->kvm, | 5067 | if (kvm_write_guest(vcpu->kvm, |
4537 | get_tss_base_addr(vcpu, nseg_desc), | 5068 | get_tss_base_addr_write(vcpu, nseg_desc), |
4538 | &tss_segment_16.prev_task_link, | 5069 | &tss_segment_16.prev_task_link, |
4539 | sizeof tss_segment_16.prev_task_link)) | 5070 | sizeof tss_segment_16.prev_task_link)) |
4540 | goto out; | 5071 | goto out; |
@@ -4565,7 +5096,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4565 | sizeof tss_segment_32)) | 5096 | sizeof tss_segment_32)) |
4566 | goto out; | 5097 | goto out; |
4567 | 5098 | ||
4568 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | 5099 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), |
4569 | &tss_segment_32, sizeof tss_segment_32)) | 5100 | &tss_segment_32, sizeof tss_segment_32)) |
4570 | goto out; | 5101 | goto out; |
4571 | 5102 | ||
@@ -4573,7 +5104,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4573 | tss_segment_32.prev_task_link = old_tss_sel; | 5104 | tss_segment_32.prev_task_link = old_tss_sel; |
4574 | 5105 | ||
4575 | if (kvm_write_guest(vcpu->kvm, | 5106 | if (kvm_write_guest(vcpu->kvm, |
4576 | get_tss_base_addr(vcpu, nseg_desc), | 5107 | get_tss_base_addr_write(vcpu, nseg_desc), |
4577 | &tss_segment_32.prev_task_link, | 5108 | &tss_segment_32.prev_task_link, |
4578 | sizeof tss_segment_32.prev_task_link)) | 5109 | sizeof tss_segment_32.prev_task_link)) |
4579 | goto out; | 5110 | goto out; |
@@ -4595,8 +5126,9 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4595 | int ret = 0; | 5126 | int ret = 0; |
4596 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | 5127 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); |
4597 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | 5128 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); |
5129 | u32 desc_limit; | ||
4598 | 5130 | ||
4599 | old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); | 5131 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); |
4600 | 5132 | ||
4601 | /* FIXME: Handle errors. Failure to read either TSS or their | 5133 | /* FIXME: Handle errors. Failure to read either TSS or their |
4602 | * descriptors should generate a pagefault. | 5134 | * descriptors should generate a pagefault. |
@@ -4617,7 +5149,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4617 | } | 5149 | } |
4618 | } | 5150 | } |
4619 | 5151 | ||
4620 | if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { | 5152 | desc_limit = get_desc_limit(&nseg_desc); |
5153 | if (!nseg_desc.p || | ||
5154 | ((desc_limit < 0x67 && (nseg_desc.type & 8)) || | ||
5155 | desc_limit < 0x2b)) { | ||
4621 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); | 5156 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); |
4622 | return 1; | 5157 | return 1; |
4623 | } | 5158 | } |
@@ -4655,7 +5190,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4655 | &nseg_desc); | 5190 | &nseg_desc); |
4656 | } | 5191 | } |
4657 | 5192 | ||
4658 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); | 5193 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); |
4659 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | 5194 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); |
4660 | tr_seg.type = 11; | 5195 | tr_seg.type = 11; |
4661 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 5196 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); |
@@ -4686,17 +5221,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4686 | 5221 | ||
4687 | kvm_set_cr8(vcpu, sregs->cr8); | 5222 | kvm_set_cr8(vcpu, sregs->cr8); |
4688 | 5223 | ||
4689 | mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; | 5224 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; |
4690 | kvm_x86_ops->set_efer(vcpu, sregs->efer); | 5225 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
4691 | kvm_set_apic_base(vcpu, sregs->apic_base); | 5226 | kvm_set_apic_base(vcpu, sregs->apic_base); |
4692 | 5227 | ||
4693 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 5228 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; |
4694 | |||
4695 | mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0; | ||
4696 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | 5229 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); |
4697 | vcpu->arch.cr0 = sregs->cr0; | 5230 | vcpu->arch.cr0 = sregs->cr0; |
4698 | 5231 | ||
4699 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; | 5232 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
4700 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5233 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
4701 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5234 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
4702 | load_pdptrs(vcpu, vcpu->arch.cr3); | 5235 | load_pdptrs(vcpu, vcpu->arch.cr3); |
@@ -4731,7 +5264,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4731 | /* Older userspace won't unhalt the vcpu on reset. */ | 5264 | /* Older userspace won't unhalt the vcpu on reset. */ |
4732 | if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && | 5265 | if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && |
4733 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && | 5266 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && |
4734 | !(vcpu->arch.cr0 & X86_CR0_PE)) | 5267 | !is_protmode(vcpu)) |
4735 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5268 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
4736 | 5269 | ||
4737 | vcpu_put(vcpu); | 5270 | vcpu_put(vcpu); |
@@ -4829,11 +5362,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
4829 | { | 5362 | { |
4830 | unsigned long vaddr = tr->linear_address; | 5363 | unsigned long vaddr = tr->linear_address; |
4831 | gpa_t gpa; | 5364 | gpa_t gpa; |
5365 | int idx; | ||
4832 | 5366 | ||
4833 | vcpu_load(vcpu); | 5367 | vcpu_load(vcpu); |
4834 | down_read(&vcpu->kvm->slots_lock); | 5368 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
4835 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); | 5369 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); |
4836 | up_read(&vcpu->kvm->slots_lock); | 5370 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
4837 | tr->physical_address = gpa; | 5371 | tr->physical_address = gpa; |
4838 | tr->valid = gpa != UNMAPPED_GVA; | 5372 | tr->valid = gpa != UNMAPPED_GVA; |
4839 | tr->writeable = 1; | 5373 | tr->writeable = 1; |
@@ -4914,14 +5448,14 @@ EXPORT_SYMBOL_GPL(fx_init); | |||
4914 | 5448 | ||
4915 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 5449 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
4916 | { | 5450 | { |
4917 | if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) | 5451 | if (vcpu->guest_fpu_loaded) |
4918 | return; | 5452 | return; |
4919 | 5453 | ||
4920 | vcpu->guest_fpu_loaded = 1; | 5454 | vcpu->guest_fpu_loaded = 1; |
4921 | kvm_fx_save(&vcpu->arch.host_fx_image); | 5455 | kvm_fx_save(&vcpu->arch.host_fx_image); |
4922 | kvm_fx_restore(&vcpu->arch.guest_fx_image); | 5456 | kvm_fx_restore(&vcpu->arch.guest_fx_image); |
5457 | trace_kvm_fpu(1); | ||
4923 | } | 5458 | } |
4924 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); | ||
4925 | 5459 | ||
4926 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | 5460 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) |
4927 | { | 5461 | { |
@@ -4932,8 +5466,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
4932 | kvm_fx_save(&vcpu->arch.guest_fx_image); | 5466 | kvm_fx_save(&vcpu->arch.guest_fx_image); |
4933 | kvm_fx_restore(&vcpu->arch.host_fx_image); | 5467 | kvm_fx_restore(&vcpu->arch.host_fx_image); |
4934 | ++vcpu->stat.fpu_reload; | 5468 | ++vcpu->stat.fpu_reload; |
5469 | set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); | ||
5470 | trace_kvm_fpu(0); | ||
4935 | } | 5471 | } |
4936 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); | ||
4937 | 5472 | ||
4938 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 5473 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
4939 | { | 5474 | { |
@@ -5068,12 +5603,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5068 | GFP_KERNEL); | 5603 | GFP_KERNEL); |
5069 | if (!vcpu->arch.mce_banks) { | 5604 | if (!vcpu->arch.mce_banks) { |
5070 | r = -ENOMEM; | 5605 | r = -ENOMEM; |
5071 | goto fail_mmu_destroy; | 5606 | goto fail_free_lapic; |
5072 | } | 5607 | } |
5073 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 5608 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
5074 | 5609 | ||
5075 | return 0; | 5610 | return 0; |
5076 | 5611 | fail_free_lapic: | |
5612 | kvm_free_lapic(vcpu); | ||
5077 | fail_mmu_destroy: | 5613 | fail_mmu_destroy: |
5078 | kvm_mmu_destroy(vcpu); | 5614 | kvm_mmu_destroy(vcpu); |
5079 | fail_free_pio_data: | 5615 | fail_free_pio_data: |
@@ -5084,10 +5620,13 @@ fail: | |||
5084 | 5620 | ||
5085 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | 5621 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) |
5086 | { | 5622 | { |
5623 | int idx; | ||
5624 | |||
5625 | kfree(vcpu->arch.mce_banks); | ||
5087 | kvm_free_lapic(vcpu); | 5626 | kvm_free_lapic(vcpu); |
5088 | down_read(&vcpu->kvm->slots_lock); | 5627 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
5089 | kvm_mmu_destroy(vcpu); | 5628 | kvm_mmu_destroy(vcpu); |
5090 | up_read(&vcpu->kvm->slots_lock); | 5629 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
5091 | free_page((unsigned long)vcpu->arch.pio_data); | 5630 | free_page((unsigned long)vcpu->arch.pio_data); |
5092 | } | 5631 | } |
5093 | 5632 | ||
@@ -5098,6 +5637,12 @@ struct kvm *kvm_arch_create_vm(void) | |||
5098 | if (!kvm) | 5637 | if (!kvm) |
5099 | return ERR_PTR(-ENOMEM); | 5638 | return ERR_PTR(-ENOMEM); |
5100 | 5639 | ||
5640 | kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
5641 | if (!kvm->arch.aliases) { | ||
5642 | kfree(kvm); | ||
5643 | return ERR_PTR(-ENOMEM); | ||
5644 | } | ||
5645 | |||
5101 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 5646 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
5102 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 5647 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
5103 | 5648 | ||
@@ -5154,16 +5699,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
5154 | put_page(kvm->arch.apic_access_page); | 5699 | put_page(kvm->arch.apic_access_page); |
5155 | if (kvm->arch.ept_identity_pagetable) | 5700 | if (kvm->arch.ept_identity_pagetable) |
5156 | put_page(kvm->arch.ept_identity_pagetable); | 5701 | put_page(kvm->arch.ept_identity_pagetable); |
5702 | cleanup_srcu_struct(&kvm->srcu); | ||
5703 | kfree(kvm->arch.aliases); | ||
5157 | kfree(kvm); | 5704 | kfree(kvm); |
5158 | } | 5705 | } |
5159 | 5706 | ||
5160 | int kvm_arch_set_memory_region(struct kvm *kvm, | 5707 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
5161 | struct kvm_userspace_memory_region *mem, | 5708 | struct kvm_memory_slot *memslot, |
5162 | struct kvm_memory_slot old, | 5709 | struct kvm_memory_slot old, |
5710 | struct kvm_userspace_memory_region *mem, | ||
5163 | int user_alloc) | 5711 | int user_alloc) |
5164 | { | 5712 | { |
5165 | int npages = mem->memory_size >> PAGE_SHIFT; | 5713 | int npages = memslot->npages; |
5166 | struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; | ||
5167 | 5714 | ||
5168 | /*To keep backward compatibility with older userspace, | 5715 | /*To keep backward compatibility with older userspace, |
5169 | *x86 needs to hanlde !user_alloc case. | 5716 | *x86 needs to hanlde !user_alloc case. |
@@ -5183,26 +5730,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
5183 | if (IS_ERR((void *)userspace_addr)) | 5730 | if (IS_ERR((void *)userspace_addr)) |
5184 | return PTR_ERR((void *)userspace_addr); | 5731 | return PTR_ERR((void *)userspace_addr); |
5185 | 5732 | ||
5186 | /* set userspace_addr atomically for kvm_hva_to_rmapp */ | ||
5187 | spin_lock(&kvm->mmu_lock); | ||
5188 | memslot->userspace_addr = userspace_addr; | 5733 | memslot->userspace_addr = userspace_addr; |
5189 | spin_unlock(&kvm->mmu_lock); | ||
5190 | } else { | ||
5191 | if (!old.user_alloc && old.rmap) { | ||
5192 | int ret; | ||
5193 | |||
5194 | down_write(¤t->mm->mmap_sem); | ||
5195 | ret = do_munmap(current->mm, old.userspace_addr, | ||
5196 | old.npages * PAGE_SIZE); | ||
5197 | up_write(¤t->mm->mmap_sem); | ||
5198 | if (ret < 0) | ||
5199 | printk(KERN_WARNING | ||
5200 | "kvm_vm_ioctl_set_memory_region: " | ||
5201 | "failed to munmap memory\n"); | ||
5202 | } | ||
5203 | } | 5734 | } |
5204 | } | 5735 | } |
5205 | 5736 | ||
5737 | |||
5738 | return 0; | ||
5739 | } | ||
5740 | |||
5741 | void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
5742 | struct kvm_userspace_memory_region *mem, | ||
5743 | struct kvm_memory_slot old, | ||
5744 | int user_alloc) | ||
5745 | { | ||
5746 | |||
5747 | int npages = mem->memory_size >> PAGE_SHIFT; | ||
5748 | |||
5749 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | ||
5750 | int ret; | ||
5751 | |||
5752 | down_write(¤t->mm->mmap_sem); | ||
5753 | ret = do_munmap(current->mm, old.userspace_addr, | ||
5754 | old.npages * PAGE_SIZE); | ||
5755 | up_write(¤t->mm->mmap_sem); | ||
5756 | if (ret < 0) | ||
5757 | printk(KERN_WARNING | ||
5758 | "kvm_vm_ioctl_set_memory_region: " | ||
5759 | "failed to munmap memory\n"); | ||
5760 | } | ||
5761 | |||
5206 | spin_lock(&kvm->mmu_lock); | 5762 | spin_lock(&kvm->mmu_lock); |
5207 | if (!kvm->arch.n_requested_mmu_pages) { | 5763 | if (!kvm->arch.n_requested_mmu_pages) { |
5208 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 5764 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); |
@@ -5211,8 +5767,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
5211 | 5767 | ||
5212 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 5768 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
5213 | spin_unlock(&kvm->mmu_lock); | 5769 | spin_unlock(&kvm->mmu_lock); |
5214 | |||
5215 | return 0; | ||
5216 | } | 5770 | } |
5217 | 5771 | ||
5218 | void kvm_arch_flush_shadow(struct kvm *kvm) | 5772 | void kvm_arch_flush_shadow(struct kvm *kvm) |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 5eadea585d2a..2d101639bd8d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define ARCH_X86_KVM_X86_H | 2 | #define ARCH_X86_KVM_X86_H |
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | #include "kvm_cache_regs.h" | ||
5 | 6 | ||
6 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | 7 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) |
7 | { | 8 | { |
@@ -35,4 +36,33 @@ static inline bool kvm_exception_is_soft(unsigned int nr) | |||
35 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | 36 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, |
36 | u32 function, u32 index); | 37 | u32 function, u32 index); |
37 | 38 | ||
39 | static inline bool is_protmode(struct kvm_vcpu *vcpu) | ||
40 | { | ||
41 | return kvm_read_cr0_bits(vcpu, X86_CR0_PE); | ||
42 | } | ||
43 | |||
44 | static inline int is_long_mode(struct kvm_vcpu *vcpu) | ||
45 | { | ||
46 | #ifdef CONFIG_X86_64 | ||
47 | return vcpu->arch.efer & EFER_LMA; | ||
48 | #else | ||
49 | return 0; | ||
50 | #endif | ||
51 | } | ||
52 | |||
53 | static inline int is_pae(struct kvm_vcpu *vcpu) | ||
54 | { | ||
55 | return kvm_read_cr4_bits(vcpu, X86_CR4_PAE); | ||
56 | } | ||
57 | |||
58 | static inline int is_pse(struct kvm_vcpu *vcpu) | ||
59 | { | ||
60 | return kvm_read_cr4_bits(vcpu, X86_CR4_PSE); | ||
61 | } | ||
62 | |||
63 | static inline int is_paging(struct kvm_vcpu *vcpu) | ||
64 | { | ||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | ||
66 | } | ||
67 | |||
38 | #endif | 68 | #endif |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7e59dc1d3fc2..2bdf628066bd 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1, | |||
115 | local_irq_save(flags); | 115 | local_irq_save(flags); |
116 | if (lguest_data.hcall_status[next_call] != 0xFF) { | 116 | if (lguest_data.hcall_status[next_call] != 0xFF) { |
117 | /* Table full, so do normal hcall which will flush table. */ | 117 | /* Table full, so do normal hcall which will flush table. */ |
118 | kvm_hypercall4(call, arg1, arg2, arg3, arg4); | 118 | hcall(call, arg1, arg2, arg3, arg4); |
119 | } else { | 119 | } else { |
120 | lguest_data.hcalls[next_call].arg0 = call; | 120 | lguest_data.hcalls[next_call].arg0 = call; |
121 | lguest_data.hcalls[next_call].arg1 = arg1; | 121 | lguest_data.hcalls[next_call].arg1 = arg1; |
@@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1, | |||
145 | * So, when we're in lazy mode, we call async_hcall() to store the call for | 145 | * So, when we're in lazy mode, we call async_hcall() to store the call for |
146 | * future processing: | 146 | * future processing: |
147 | */ | 147 | */ |
148 | static void lazy_hcall1(unsigned long call, | 148 | static void lazy_hcall1(unsigned long call, unsigned long arg1) |
149 | unsigned long arg1) | ||
150 | { | 149 | { |
151 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 150 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
152 | kvm_hypercall1(call, arg1); | 151 | hcall(call, arg1, 0, 0, 0); |
153 | else | 152 | else |
154 | async_hcall(call, arg1, 0, 0, 0); | 153 | async_hcall(call, arg1, 0, 0, 0); |
155 | } | 154 | } |
156 | 155 | ||
157 | /* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ | 156 | /* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ |
158 | static void lazy_hcall2(unsigned long call, | 157 | static void lazy_hcall2(unsigned long call, |
159 | unsigned long arg1, | 158 | unsigned long arg1, |
160 | unsigned long arg2) | 159 | unsigned long arg2) |
161 | { | 160 | { |
162 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 161 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
163 | kvm_hypercall2(call, arg1, arg2); | 162 | hcall(call, arg1, arg2, 0, 0); |
164 | else | 163 | else |
165 | async_hcall(call, arg1, arg2, 0, 0); | 164 | async_hcall(call, arg1, arg2, 0, 0); |
166 | } | 165 | } |
167 | 166 | ||
168 | static void lazy_hcall3(unsigned long call, | 167 | static void lazy_hcall3(unsigned long call, |
169 | unsigned long arg1, | 168 | unsigned long arg1, |
170 | unsigned long arg2, | 169 | unsigned long arg2, |
171 | unsigned long arg3) | 170 | unsigned long arg3) |
172 | { | 171 | { |
173 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 172 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
174 | kvm_hypercall3(call, arg1, arg2, arg3); | 173 | hcall(call, arg1, arg2, arg3, 0); |
175 | else | 174 | else |
176 | async_hcall(call, arg1, arg2, arg3, 0); | 175 | async_hcall(call, arg1, arg2, arg3, 0); |
177 | } | 176 | } |
178 | 177 | ||
179 | #ifdef CONFIG_X86_PAE | 178 | #ifdef CONFIG_X86_PAE |
180 | static void lazy_hcall4(unsigned long call, | 179 | static void lazy_hcall4(unsigned long call, |
181 | unsigned long arg1, | 180 | unsigned long arg1, |
182 | unsigned long arg2, | 181 | unsigned long arg2, |
183 | unsigned long arg3, | 182 | unsigned long arg3, |
184 | unsigned long arg4) | 183 | unsigned long arg4) |
185 | { | 184 | { |
186 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 185 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
187 | kvm_hypercall4(call, arg1, arg2, arg3, arg4); | 186 | hcall(call, arg1, arg2, arg3, arg4); |
188 | else | 187 | else |
189 | async_hcall(call, arg1, arg2, arg3, arg4); | 188 | async_hcall(call, arg1, arg2, arg3, arg4); |
190 | } | 189 | } |
@@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call, | |||
196 | :*/ | 195 | :*/ |
197 | static void lguest_leave_lazy_mmu_mode(void) | 196 | static void lguest_leave_lazy_mmu_mode(void) |
198 | { | 197 | { |
199 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); | 198 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); |
200 | paravirt_leave_lazy_mmu(); | 199 | paravirt_leave_lazy_mmu(); |
201 | } | 200 | } |
202 | 201 | ||
203 | static void lguest_end_context_switch(struct task_struct *next) | 202 | static void lguest_end_context_switch(struct task_struct *next) |
204 | { | 203 | { |
205 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); | 204 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); |
206 | paravirt_end_context_switch(next); | 205 | paravirt_end_context_switch(next); |
207 | } | 206 | } |
208 | 207 | ||
@@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt, | |||
286 | /* Keep the local copy up to date. */ | 285 | /* Keep the local copy up to date. */ |
287 | native_write_idt_entry(dt, entrynum, g); | 286 | native_write_idt_entry(dt, entrynum, g); |
288 | /* Tell Host about this new entry. */ | 287 | /* Tell Host about this new entry. */ |
289 | kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); | 288 | hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0); |
290 | } | 289 | } |
291 | 290 | ||
292 | /* | 291 | /* |
@@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc) | |||
300 | struct desc_struct *idt = (void *)desc->address; | 299 | struct desc_struct *idt = (void *)desc->address; |
301 | 300 | ||
302 | for (i = 0; i < (desc->size+1)/8; i++) | 301 | for (i = 0; i < (desc->size+1)/8; i++) |
303 | kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); | 302 | hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0); |
304 | } | 303 | } |
305 | 304 | ||
306 | /* | 305 | /* |
@@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc) | |||
321 | struct desc_struct *gdt = (void *)desc->address; | 320 | struct desc_struct *gdt = (void *)desc->address; |
322 | 321 | ||
323 | for (i = 0; i < (desc->size+1)/8; i++) | 322 | for (i = 0; i < (desc->size+1)/8; i++) |
324 | kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); | 323 | hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0); |
325 | } | 324 | } |
326 | 325 | ||
327 | /* | 326 | /* |
@@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, | |||
334 | { | 333 | { |
335 | native_write_gdt_entry(dt, entrynum, desc, type); | 334 | native_write_gdt_entry(dt, entrynum, desc, type); |
336 | /* Tell Host about this new entry. */ | 335 | /* Tell Host about this new entry. */ |
337 | kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum, | 336 | hcall(LHCALL_LOAD_GDT_ENTRY, entrynum, |
338 | dt[entrynum].a, dt[entrynum].b); | 337 | dt[entrynum].a, dt[entrynum].b, 0); |
339 | } | 338 | } |
340 | 339 | ||
341 | /* | 340 | /* |
@@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta, | |||
931 | } | 930 | } |
932 | 931 | ||
933 | /* Please wake us this far in the future. */ | 932 | /* Please wake us this far in the future. */ |
934 | kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta); | 933 | hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0); |
935 | return 0; | 934 | return 0; |
936 | } | 935 | } |
937 | 936 | ||
@@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode, | |||
942 | case CLOCK_EVT_MODE_UNUSED: | 941 | case CLOCK_EVT_MODE_UNUSED: |
943 | case CLOCK_EVT_MODE_SHUTDOWN: | 942 | case CLOCK_EVT_MODE_SHUTDOWN: |
944 | /* A 0 argument shuts the clock down. */ | 943 | /* A 0 argument shuts the clock down. */ |
945 | kvm_hypercall0(LHCALL_SET_CLOCKEVENT); | 944 | hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0); |
946 | break; | 945 | break; |
947 | case CLOCK_EVT_MODE_ONESHOT: | 946 | case CLOCK_EVT_MODE_ONESHOT: |
948 | /* This is what we expect. */ | 947 | /* This is what we expect. */ |
@@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void) | |||
1100 | /* STOP! Until an interrupt comes in. */ | 1099 | /* STOP! Until an interrupt comes in. */ |
1101 | static void lguest_safe_halt(void) | 1100 | static void lguest_safe_halt(void) |
1102 | { | 1101 | { |
1103 | kvm_hypercall0(LHCALL_HALT); | 1102 | hcall(LHCALL_HALT, 0, 0, 0, 0); |
1104 | } | 1103 | } |
1105 | 1104 | ||
1106 | /* | 1105 | /* |
@@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void) | |||
1112 | */ | 1111 | */ |
1113 | static void lguest_power_off(void) | 1112 | static void lguest_power_off(void) |
1114 | { | 1113 | { |
1115 | kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), | 1114 | hcall(LHCALL_SHUTDOWN, __pa("Power down"), |
1116 | LGUEST_SHUTDOWN_POWEROFF); | 1115 | LGUEST_SHUTDOWN_POWEROFF, 0, 0); |
1117 | } | 1116 | } |
1118 | 1117 | ||
1119 | /* | 1118 | /* |
@@ -1123,7 +1122,7 @@ static void lguest_power_off(void) | |||
1123 | */ | 1122 | */ |
1124 | static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) | 1123 | static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) |
1125 | { | 1124 | { |
1126 | kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF); | 1125 | hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0); |
1127 | /* The hcall won't return, but to keep gcc happy, we're "done". */ | 1126 | /* The hcall won't return, but to keep gcc happy, we're "done". */ |
1128 | return NOTIFY_DONE; | 1127 | return NOTIFY_DONE; |
1129 | } | 1128 | } |
@@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) | |||
1162 | len = sizeof(scratch) - 1; | 1161 | len = sizeof(scratch) - 1; |
1163 | scratch[len] = '\0'; | 1162 | scratch[len] = '\0'; |
1164 | memcpy(scratch, buf, len); | 1163 | memcpy(scratch, buf, len); |
1165 | kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch)); | 1164 | hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0); |
1166 | 1165 | ||
1167 | /* This routine returns the number of bytes actually written. */ | 1166 | /* This routine returns the number of bytes actually written. */ |
1168 | return len; | 1167 | return len; |
@@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) | |||
1174 | */ | 1173 | */ |
1175 | static void lguest_restart(char *reason) | 1174 | static void lguest_restart(char *reason) |
1176 | { | 1175 | { |
1177 | kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); | 1176 | hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0); |
1178 | } | 1177 | } |
1179 | 1178 | ||
1180 | /*G:050 | 1179 | /*G:050 |
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 27eac0faee48..4f420c2f2d55 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -32,7 +32,7 @@ ENTRY(lguest_entry) | |||
32 | */ | 32 | */ |
33 | movl $LHCALL_LGUEST_INIT, %eax | 33 | movl $LHCALL_LGUEST_INIT, %eax |
34 | movl $lguest_data - __PAGE_OFFSET, %ebx | 34 | movl $lguest_data - __PAGE_OFFSET, %ebx |
35 | .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ | 35 | int $LGUEST_TRAP_ENTRY |
36 | 36 | ||
37 | /* Set up the initial stack so we can run C code. */ | 37 | /* Set up the initial stack so we can run C code. */ |
38 | movl $(init_thread_union+THREAD_SIZE),%esp | 38 | movl $(init_thread_union+THREAD_SIZE),%esp |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 05d686bbbe9f..3ac4a8ade627 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c | |||
14 | 14 | ||
15 | clean-files := inat-tables.c | 15 | clean-files := inat-tables.c |
16 | 16 | ||
17 | obj-$(CONFIG_SMP) += msr-smp.o | 17 | obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o |
18 | 18 | ||
19 | lib-y := delay.o | 19 | lib-y := delay.o |
20 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
@@ -35,9 +35,10 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y) | |||
35 | endif | 35 | endif |
36 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | 36 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o |
37 | else | 37 | else |
38 | obj-y += io_64.o iomap_copy_64.o | 38 | obj-y += iomap_copy_64.o |
39 | lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o | 39 | lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o |
40 | lib-y += thunk_64.o clear_page_64.o copy_page_64.o | 40 | lib-y += thunk_64.o clear_page_64.o copy_page_64.o |
41 | lib-y += memmove_64.o memset_64.o | 41 | lib-y += memmove_64.o memset_64.o |
42 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | 42 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o |
43 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | ||
43 | endif | 44 | endif |
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c new file mode 100644 index 000000000000..a3c668875038 --- /dev/null +++ b/arch/x86/lib/cache-smp.c | |||
@@ -0,0 +1,19 @@ | |||
1 | #include <linux/smp.h> | ||
2 | #include <linux/module.h> | ||
3 | |||
4 | static void __wbinvd(void *dummy) | ||
5 | { | ||
6 | wbinvd(); | ||
7 | } | ||
8 | |||
9 | void wbinvd_on_cpu(int cpu) | ||
10 | { | ||
11 | smp_call_function_single(cpu, __wbinvd, NULL, 1); | ||
12 | } | ||
13 | EXPORT_SYMBOL(wbinvd_on_cpu); | ||
14 | |||
15 | int wbinvd_on_all_cpus(void) | ||
16 | { | ||
17 | return on_each_cpu(__wbinvd, NULL, 1); | ||
18 | } | ||
19 | EXPORT_SYMBOL(wbinvd_on_all_cpus); | ||
diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c deleted file mode 100644 index 3f1eb59b5f08..000000000000 --- a/arch/x86/lib/io_64.c +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | #include <linux/string.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <asm/io.h> | ||
4 | |||
5 | void __memcpy_toio(unsigned long dst, const void *src, unsigned len) | ||
6 | { | ||
7 | __inline_memcpy((void *)dst, src, len); | ||
8 | } | ||
9 | EXPORT_SYMBOL(__memcpy_toio); | ||
10 | |||
11 | void __memcpy_fromio(void *dst, unsigned long src, unsigned len) | ||
12 | { | ||
13 | __inline_memcpy(dst, (const void *)src, len); | ||
14 | } | ||
15 | EXPORT_SYMBOL(__memcpy_fromio); | ||
16 | |||
17 | void memset_io(volatile void __iomem *a, int b, size_t c) | ||
18 | { | ||
19 | /* | ||
20 | * TODO: memset can mangle the IO patterns quite a bit. | ||
21 | * perhaps it would be better to use a dumb one: | ||
22 | */ | ||
23 | memset((void *)a, b, c); | ||
24 | } | ||
25 | EXPORT_SYMBOL(memset_io); | ||
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S new file mode 100644 index 000000000000..15acecf0d7aa --- /dev/null +++ b/arch/x86/lib/rwsem_64.S | |||
@@ -0,0 +1,81 @@ | |||
1 | /* | ||
2 | * x86-64 rwsem wrappers | ||
3 | * | ||
4 | * This interfaces the inline asm code to the slow-path | ||
5 | * C routines. We need to save the call-clobbered regs | ||
6 | * that the asm does not mark as clobbered, and move the | ||
7 | * argument from %rax to %rdi. | ||
8 | * | ||
9 | * NOTE! We don't need to save %rax, because the functions | ||
10 | * will always return the semaphore pointer in %rax (which | ||
11 | * is also the input argument to these helpers) | ||
12 | * | ||
13 | * The following can clobber %rdx because the asm clobbers it: | ||
14 | * call_rwsem_down_write_failed | ||
15 | * call_rwsem_wake | ||
16 | * but %rdi, %rsi, %rcx, %r8-r11 always need saving. | ||
17 | */ | ||
18 | |||
19 | #include <linux/linkage.h> | ||
20 | #include <asm/rwlock.h> | ||
21 | #include <asm/alternative-asm.h> | ||
22 | #include <asm/frame.h> | ||
23 | #include <asm/dwarf2.h> | ||
24 | |||
25 | #define save_common_regs \ | ||
26 | pushq %rdi; \ | ||
27 | pushq %rsi; \ | ||
28 | pushq %rcx; \ | ||
29 | pushq %r8; \ | ||
30 | pushq %r9; \ | ||
31 | pushq %r10; \ | ||
32 | pushq %r11 | ||
33 | |||
34 | #define restore_common_regs \ | ||
35 | popq %r11; \ | ||
36 | popq %r10; \ | ||
37 | popq %r9; \ | ||
38 | popq %r8; \ | ||
39 | popq %rcx; \ | ||
40 | popq %rsi; \ | ||
41 | popq %rdi | ||
42 | |||
43 | /* Fix up special calling conventions */ | ||
44 | ENTRY(call_rwsem_down_read_failed) | ||
45 | save_common_regs | ||
46 | pushq %rdx | ||
47 | movq %rax,%rdi | ||
48 | call rwsem_down_read_failed | ||
49 | popq %rdx | ||
50 | restore_common_regs | ||
51 | ret | ||
52 | ENDPROC(call_rwsem_down_read_failed) | ||
53 | |||
54 | ENTRY(call_rwsem_down_write_failed) | ||
55 | save_common_regs | ||
56 | movq %rax,%rdi | ||
57 | call rwsem_down_write_failed | ||
58 | restore_common_regs | ||
59 | ret | ||
60 | ENDPROC(call_rwsem_down_write_failed) | ||
61 | |||
62 | ENTRY(call_rwsem_wake) | ||
63 | decw %dx /* do nothing if still outstanding active readers */ | ||
64 | jnz 1f | ||
65 | save_common_regs | ||
66 | movq %rax,%rdi | ||
67 | call rwsem_wake | ||
68 | restore_common_regs | ||
69 | 1: ret | ||
70 | ENDPROC(call_rwsem_wake) | ||
71 | |||
72 | /* Fix up special calling conventions */ | ||
73 | ENTRY(call_rwsem_downgrade_wake) | ||
74 | save_common_regs | ||
75 | pushq %rdx | ||
76 | movq %rax,%rdi | ||
77 | call rwsem_downgrade_wake | ||
78 | popq %rdx | ||
79 | restore_common_regs | ||
80 | ret | ||
81 | ENDPROC(call_rwsem_downgrade_wake) | ||
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bca13cb..738e6593799d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) | |||
18 | #else | 18 | #else |
19 | /* | 19 | /* |
20 | * With get_user_pages_fast, we walk down the pagetables without taking | 20 | * With get_user_pages_fast, we walk down the pagetables without taking |
21 | * any locks. For this we would like to load the pointers atoimcally, | 21 | * any locks. For this we would like to load the pointers atomically, |
22 | * but that is not possible (without expensive cmpxchg8b) on PAE. What | 22 | * but that is not possible (without expensive cmpxchg8b) on PAE. What |
23 | * we do have is the guarantee that a pte will only either go from not | 23 | * we do have is the guarantee that a pte will only either go from not |
24 | * present to present, or present to not present or both -- it will not | 24 | * present to present, or present to not present or both -- it will not |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f46c340727b8..069ce7c37c01 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/hugetlb.h> | 10 | #include <linux/hugetlb.h> |
11 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
12 | #include <linux/slab.h> | ||
13 | #include <linux/err.h> | 12 | #include <linux/err.h> |
14 | #include <linux/sysctl.h> | 13 | #include <linux/sysctl.h> |
15 | #include <asm/mman.h> | 14 | #include <asm/mman.h> |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d406c5239019..b278535b14aa 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/gfp.h> | ||
1 | #include <linux/initrd.h> | 2 | #include <linux/initrd.h> |
2 | #include <linux/ioport.h> | 3 | #include <linux/ioport.h> |
3 | #include <linux/swap.h> | 4 | #include <linux/swap.h> |
@@ -266,16 +267,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
266 | if (!after_bootmem) | 267 | if (!after_bootmem) |
267 | find_early_table_space(end, use_pse, use_gbpages); | 268 | find_early_table_space(end, use_pse, use_gbpages); |
268 | 269 | ||
269 | #ifdef CONFIG_X86_32 | ||
270 | for (i = 0; i < nr_range; i++) | ||
271 | kernel_physical_mapping_init(mr[i].start, mr[i].end, | ||
272 | mr[i].page_size_mask); | ||
273 | ret = end; | ||
274 | #else /* CONFIG_X86_64 */ | ||
275 | for (i = 0; i < nr_range; i++) | 270 | for (i = 0; i < nr_range; i++) |
276 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, | 271 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, |
277 | mr[i].page_size_mask); | 272 | mr[i].page_size_mask); |
278 | #endif | ||
279 | 273 | ||
280 | #ifdef CONFIG_X86_32 | 274 | #ifdef CONFIG_X86_32 |
281 | early_ioremap_page_table_range_init(); | 275 | early_ioremap_page_table_range_init(); |
@@ -338,11 +332,23 @@ int devmem_is_allowed(unsigned long pagenr) | |||
338 | 332 | ||
339 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 333 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
340 | { | 334 | { |
341 | unsigned long addr = begin; | 335 | unsigned long addr; |
336 | unsigned long begin_aligned, end_aligned; | ||
337 | |||
338 | /* Make sure boundaries are page aligned */ | ||
339 | begin_aligned = PAGE_ALIGN(begin); | ||
340 | end_aligned = end & PAGE_MASK; | ||
342 | 341 | ||
343 | if (addr >= end) | 342 | if (WARN_ON(begin_aligned != begin || end_aligned != end)) { |
343 | begin = begin_aligned; | ||
344 | end = end_aligned; | ||
345 | } | ||
346 | |||
347 | if (begin >= end) | ||
344 | return; | 348 | return; |
345 | 349 | ||
350 | addr = begin; | ||
351 | |||
346 | /* | 352 | /* |
347 | * If debugging page accesses then do not free this memory but | 353 | * If debugging page accesses then do not free this memory but |
348 | * mark them not present - any buggy init-section access will | 354 | * mark them not present - any buggy init-section access will |
@@ -350,7 +356,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
350 | */ | 356 | */ |
351 | #ifdef CONFIG_DEBUG_PAGEALLOC | 357 | #ifdef CONFIG_DEBUG_PAGEALLOC |
352 | printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", | 358 | printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", |
353 | begin, PAGE_ALIGN(end)); | 359 | begin, end); |
354 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | 360 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); |
355 | #else | 361 | #else |
356 | /* | 362 | /* |
@@ -365,8 +371,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
365 | for (; addr < end; addr += PAGE_SIZE) { | 371 | for (; addr < end; addr += PAGE_SIZE) { |
366 | ClearPageReserved(virt_to_page(addr)); | 372 | ClearPageReserved(virt_to_page(addr)); |
367 | init_page_count(virt_to_page(addr)); | 373 | init_page_count(virt_to_page(addr)); |
368 | memset((void *)(addr & ~(PAGE_SIZE-1)), | 374 | memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); |
369 | POISON_FREE_INITMEM, PAGE_SIZE); | ||
370 | free_page(addr); | 375 | free_page(addr); |
371 | totalram_pages++; | 376 | totalram_pages++; |
372 | } | 377 | } |
@@ -383,6 +388,15 @@ void free_initmem(void) | |||
383 | #ifdef CONFIG_BLK_DEV_INITRD | 388 | #ifdef CONFIG_BLK_DEV_INITRD |
384 | void free_initrd_mem(unsigned long start, unsigned long end) | 389 | void free_initrd_mem(unsigned long start, unsigned long end) |
385 | { | 390 | { |
386 | free_init_pages("initrd memory", start, end); | 391 | /* |
392 | * end could be not aligned, and We can not align that, | ||
393 | * decompresser could be confused by aligned initrd_end | ||
394 | * We already reserve the end partial page before in | ||
395 | * - i386_start_kernel() | ||
396 | * - x86_64_start_kernel() | ||
397 | * - relocate_initrd() | ||
398 | * So here We can do PAGE_ALIGN() safely to get partial page to be freed | ||
399 | */ | ||
400 | free_init_pages("initrd memory", start, PAGE_ALIGN(end)); | ||
387 | } | 401 | } |
388 | #endif | 402 | #endif |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c973f8e2a6cf..bca79091b9d6 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -25,11 +25,11 @@ | |||
25 | #include <linux/pfn.h> | 25 | #include <linux/pfn.h> |
26 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
27 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
28 | #include <linux/slab.h> | ||
29 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
30 | #include <linux/memory_hotplug.h> | 29 | #include <linux/memory_hotplug.h> |
31 | #include <linux/initrd.h> | 30 | #include <linux/initrd.h> |
32 | #include <linux/cpumask.h> | 31 | #include <linux/cpumask.h> |
32 | #include <linux/gfp.h> | ||
33 | 33 | ||
34 | #include <asm/asm.h> | 34 | #include <asm/asm.h> |
35 | #include <asm/bios_ebda.h> | 35 | #include <asm/bios_ebda.h> |
@@ -241,6 +241,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
241 | unsigned long page_size_mask) | 241 | unsigned long page_size_mask) |
242 | { | 242 | { |
243 | int use_pse = page_size_mask == (1<<PG_LEVEL_2M); | 243 | int use_pse = page_size_mask == (1<<PG_LEVEL_2M); |
244 | unsigned long last_map_addr = end; | ||
244 | unsigned long start_pfn, end_pfn; | 245 | unsigned long start_pfn, end_pfn; |
245 | pgd_t *pgd_base = swapper_pg_dir; | 246 | pgd_t *pgd_base = swapper_pg_dir; |
246 | int pgd_idx, pmd_idx, pte_ofs; | 247 | int pgd_idx, pmd_idx, pte_ofs; |
@@ -341,9 +342,10 @@ repeat: | |||
341 | prot = PAGE_KERNEL_EXEC; | 342 | prot = PAGE_KERNEL_EXEC; |
342 | 343 | ||
343 | pages_4k++; | 344 | pages_4k++; |
344 | if (mapping_iter == 1) | 345 | if (mapping_iter == 1) { |
345 | set_pte(pte, pfn_pte(pfn, init_prot)); | 346 | set_pte(pte, pfn_pte(pfn, init_prot)); |
346 | else | 347 | last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE; |
348 | } else | ||
347 | set_pte(pte, pfn_pte(pfn, prot)); | 349 | set_pte(pte, pfn_pte(pfn, prot)); |
348 | } | 350 | } |
349 | } | 351 | } |
@@ -368,7 +370,7 @@ repeat: | |||
368 | mapping_iter = 2; | 370 | mapping_iter = 2; |
369 | goto repeat; | 371 | goto repeat; |
370 | } | 372 | } |
371 | return 0; | 373 | return last_map_addr; |
372 | } | 374 | } |
373 | 375 | ||
374 | pte_t *kmap_pte; | 376 | pte_t *kmap_pte; |
@@ -748,6 +750,7 @@ static void __init zone_sizes_init(void) | |||
748 | free_area_init_nodes(max_zone_pfns); | 750 | free_area_init_nodes(max_zone_pfns); |
749 | } | 751 | } |
750 | 752 | ||
753 | #ifndef CONFIG_NO_BOOTMEM | ||
751 | static unsigned long __init setup_node_bootmem(int nodeid, | 754 | static unsigned long __init setup_node_bootmem(int nodeid, |
752 | unsigned long start_pfn, | 755 | unsigned long start_pfn, |
753 | unsigned long end_pfn, | 756 | unsigned long end_pfn, |
@@ -764,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid, | |||
764 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", | 767 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", |
765 | nodeid, bootmap, bootmap + bootmap_size); | 768 | nodeid, bootmap, bootmap + bootmap_size); |
766 | free_bootmem_with_active_regions(nodeid, end_pfn); | 769 | free_bootmem_with_active_regions(nodeid, end_pfn); |
767 | early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
768 | 770 | ||
769 | return bootmap + bootmap_size; | 771 | return bootmap + bootmap_size; |
770 | } | 772 | } |
773 | #endif | ||
771 | 774 | ||
772 | void __init setup_bootmem_allocator(void) | 775 | void __init setup_bootmem_allocator(void) |
773 | { | 776 | { |
777 | #ifndef CONFIG_NO_BOOTMEM | ||
774 | int nodeid; | 778 | int nodeid; |
775 | unsigned long bootmap_size, bootmap; | 779 | unsigned long bootmap_size, bootmap; |
776 | /* | 780 | /* |
@@ -782,11 +786,13 @@ void __init setup_bootmem_allocator(void) | |||
782 | if (bootmap == -1L) | 786 | if (bootmap == -1L) |
783 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 787 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
784 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | 788 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
789 | #endif | ||
785 | 790 | ||
786 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 791 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
787 | max_pfn_mapped<<PAGE_SHIFT); | 792 | max_pfn_mapped<<PAGE_SHIFT); |
788 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); | 793 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
789 | 794 | ||
795 | #ifndef CONFIG_NO_BOOTMEM | ||
790 | for_each_online_node(nodeid) { | 796 | for_each_online_node(nodeid) { |
791 | unsigned long start_pfn, end_pfn; | 797 | unsigned long start_pfn, end_pfn; |
792 | 798 | ||
@@ -804,6 +810,7 @@ void __init setup_bootmem_allocator(void) | |||
804 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, | 810 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, |
805 | bootmap); | 811 | bootmap); |
806 | } | 812 | } |
813 | #endif | ||
807 | 814 | ||
808 | after_bootmem = 1; | 815 | after_bootmem = 1; |
809 | } | 816 | } |
@@ -892,8 +899,7 @@ void __init mem_init(void) | |||
892 | reservedpages << (PAGE_SHIFT-10), | 899 | reservedpages << (PAGE_SHIFT-10), |
893 | datasize >> 10, | 900 | datasize >> 10, |
894 | initsize >> 10, | 901 | initsize >> 10, |
895 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) | 902 | totalhigh_pages << (PAGE_SHIFT-10)); |
896 | ); | ||
897 | 903 | ||
898 | printk(KERN_INFO "virtual kernel memory layout:\n" | 904 | printk(KERN_INFO "virtual kernel memory layout:\n" |
899 | " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | 905 | " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5198b9bb34ef..ee41bba315d1 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/memory_hotplug.h> | 30 | #include <linux/memory_hotplug.h> |
31 | #include <linux/nmi.h> | 31 | #include <linux/nmi.h> |
32 | #include <linux/gfp.h> | ||
32 | 33 | ||
33 | #include <asm/processor.h> | 34 | #include <asm/processor.h> |
34 | #include <asm/bios_ebda.h> | 35 | #include <asm/bios_ebda.h> |
@@ -49,6 +50,7 @@ | |||
49 | #include <asm/numa.h> | 50 | #include <asm/numa.h> |
50 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
51 | #include <asm/init.h> | 52 | #include <asm/init.h> |
53 | #include <linux/bootmem.h> | ||
52 | 54 | ||
53 | static unsigned long dma_reserve __initdata; | 55 | static unsigned long dma_reserve __initdata; |
54 | 56 | ||
@@ -571,6 +573,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 573 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
572 | int acpi, int k8) | 574 | int acpi, int k8) |
573 | { | 575 | { |
576 | #ifndef CONFIG_NO_BOOTMEM | ||
574 | unsigned long bootmap_size, bootmap; | 577 | unsigned long bootmap_size, bootmap; |
575 | 578 | ||
576 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 579 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
@@ -578,13 +581,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
578 | PAGE_SIZE); | 581 | PAGE_SIZE); |
579 | if (bootmap == -1L) | 582 | if (bootmap == -1L) |
580 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 583 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
584 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | ||
581 | /* don't touch min_low_pfn */ | 585 | /* don't touch min_low_pfn */ |
582 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | 586 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, |
583 | 0, end_pfn); | 587 | 0, end_pfn); |
584 | e820_register_active_regions(0, start_pfn, end_pfn); | 588 | e820_register_active_regions(0, start_pfn, end_pfn); |
585 | free_bootmem_with_active_regions(0, end_pfn); | 589 | free_bootmem_with_active_regions(0, end_pfn); |
586 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 590 | #else |
587 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 591 | e820_register_active_regions(0, start_pfn, end_pfn); |
592 | #endif | ||
588 | } | 593 | } |
589 | #endif | 594 | #endif |
590 | 595 | ||
@@ -616,6 +621,21 @@ void __init paging_init(void) | |||
616 | */ | 621 | */ |
617 | #ifdef CONFIG_MEMORY_HOTPLUG | 622 | #ifdef CONFIG_MEMORY_HOTPLUG |
618 | /* | 623 | /* |
624 | * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need | ||
625 | * updating. | ||
626 | */ | ||
627 | static void update_end_of_memory_vars(u64 start, u64 size) | ||
628 | { | ||
629 | unsigned long end_pfn = PFN_UP(start + size); | ||
630 | |||
631 | if (end_pfn > max_pfn) { | ||
632 | max_pfn = end_pfn; | ||
633 | max_low_pfn = end_pfn; | ||
634 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; | ||
635 | } | ||
636 | } | ||
637 | |||
638 | /* | ||
619 | * Memory is added always to NORMAL zone. This means you will never get | 639 | * Memory is added always to NORMAL zone. This means you will never get |
620 | * additional DMA/DMA32 memory. | 640 | * additional DMA/DMA32 memory. |
621 | */ | 641 | */ |
@@ -634,6 +654,9 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
634 | ret = __add_pages(nid, zone, start_pfn, nr_pages); | 654 | ret = __add_pages(nid, zone, start_pfn, nr_pages); |
635 | WARN_ON_ONCE(ret); | 655 | WARN_ON_ONCE(ret); |
636 | 656 | ||
657 | /* update max_pfn, max_low_pfn and high_memory */ | ||
658 | update_end_of_memory_vars(start, size); | ||
659 | |||
637 | return ret; | 660 | return ret; |
638 | } | 661 | } |
639 | EXPORT_SYMBOL_GPL(arch_add_memory); | 662 | EXPORT_SYMBOL_GPL(arch_add_memory); |
@@ -955,7 +978,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
955 | if (pmd_none(*pmd)) { | 978 | if (pmd_none(*pmd)) { |
956 | pte_t entry; | 979 | pte_t entry; |
957 | 980 | ||
958 | p = vmemmap_alloc_block(PMD_SIZE, node); | 981 | p = vmemmap_alloc_block_buf(PMD_SIZE, node); |
959 | if (!p) | 982 | if (!p) |
960 | return -ENOMEM; | 983 | return -ENOMEM; |
961 | 984 | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 03c75ffd5c2a..5eb1ba74a3a9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -24,43 +24,6 @@ | |||
24 | 24 | ||
25 | #include "physaddr.h" | 25 | #include "physaddr.h" |
26 | 26 | ||
27 | int page_is_ram(unsigned long pagenr) | ||
28 | { | ||
29 | resource_size_t addr, end; | ||
30 | int i; | ||
31 | |||
32 | /* | ||
33 | * A special case is the first 4Kb of memory; | ||
34 | * This is a BIOS owned area, not kernel ram, but generally | ||
35 | * not listed as such in the E820 table. | ||
36 | */ | ||
37 | if (pagenr == 0) | ||
38 | return 0; | ||
39 | |||
40 | /* | ||
41 | * Second special case: Some BIOSen report the PC BIOS | ||
42 | * area (640->1Mb) as ram even though it is not. | ||
43 | */ | ||
44 | if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) && | ||
45 | pagenr < (BIOS_END >> PAGE_SHIFT)) | ||
46 | return 0; | ||
47 | |||
48 | for (i = 0; i < e820.nr_map; i++) { | ||
49 | /* | ||
50 | * Not usable memory: | ||
51 | */ | ||
52 | if (e820.map[i].type != E820_RAM) | ||
53 | continue; | ||
54 | addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT; | ||
55 | end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT; | ||
56 | |||
57 | |||
58 | if ((pagenr >= addr) && (pagenr < end)) | ||
59 | return 1; | ||
60 | } | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | /* | 27 | /* |
65 | * Fix up the linear direct mapping of the kernel to avoid cache attribute | 28 | * Fix up the linear direct mapping of the kernel to avoid cache attribute |
66 | * conflicts. | 29 | * conflicts. |
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 4901d0dafda6..af3b6c8a436f 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -106,26 +106,25 @@ void kmemcheck_error_recall(void) | |||
106 | 106 | ||
107 | switch (e->type) { | 107 | switch (e->type) { |
108 | case KMEMCHECK_ERROR_INVALID_ACCESS: | 108 | case KMEMCHECK_ERROR_INVALID_ACCESS: |
109 | printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " | 109 | printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", |
110 | "from %s memory (%p)\n", | ||
111 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? | 110 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? |
112 | desc[e->state] : "(invalid shadow state)", | 111 | desc[e->state] : "(invalid shadow state)", |
113 | (void *) e->address); | 112 | (void *) e->address); |
114 | 113 | ||
115 | printk(KERN_INFO); | 114 | printk(KERN_WARNING); |
116 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) | 115 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) |
117 | printk("%02x", e->memory_copy[i]); | 116 | printk(KERN_CONT "%02x", e->memory_copy[i]); |
118 | printk("\n"); | 117 | printk(KERN_CONT "\n"); |
119 | 118 | ||
120 | printk(KERN_INFO); | 119 | printk(KERN_WARNING); |
121 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { | 120 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { |
122 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) | 121 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) |
123 | printk(" %c", short_desc[e->shadow_copy[i]]); | 122 | printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); |
124 | else | 123 | else |
125 | printk(" ?"); | 124 | printk(KERN_CONT " ?"); |
126 | } | 125 | } |
127 | printk("\n"); | 126 | printk(KERN_CONT "\n"); |
128 | printk(KERN_INFO "%*c\n", 2 + 2 | 127 | printk(KERN_WARNING "%*c\n", 2 + 2 |
129 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); | 128 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); |
130 | break; | 129 | break; |
131 | case KMEMCHECK_ERROR_BUG: | 130 | case KMEMCHECK_ERROR_BUG: |
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 8cc183344140..b3b531a4f8e5 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -337,7 +337,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) | |||
337 | if (!shadow) | 337 | if (!shadow) |
338 | return true; | 338 | return true; |
339 | 339 | ||
340 | status = kmemcheck_shadow_test(shadow, size); | 340 | status = kmemcheck_shadow_test_all(shadow, size); |
341 | 341 | ||
342 | return status == KMEMCHECK_SHADOW_INITIALIZED; | 342 | return status == KMEMCHECK_SHADOW_INITIALIZED; |
343 | } | 343 | } |
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c index 3f66b82076a3..aec124214d97 100644 --- a/arch/x86/mm/kmemcheck/shadow.c +++ b/arch/x86/mm/kmemcheck/shadow.c | |||
@@ -125,12 +125,12 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) | |||
125 | 125 | ||
126 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | 126 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) |
127 | { | 127 | { |
128 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
128 | uint8_t *x; | 129 | uint8_t *x; |
129 | unsigned int i; | 130 | unsigned int i; |
130 | 131 | ||
131 | x = shadow; | 132 | x = shadow; |
132 | 133 | ||
133 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
134 | /* | 134 | /* |
135 | * Make sure _some_ bytes are initialized. Gcc frequently generates | 135 | * Make sure _some_ bytes are initialized. Gcc frequently generates |
136 | * code to access neighboring bytes. | 136 | * code to access neighboring bytes. |
@@ -139,13 +139,25 @@ enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | |||
139 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) | 139 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) |
140 | return x[i]; | 140 | return x[i]; |
141 | } | 141 | } |
142 | |||
143 | return x[0]; | ||
142 | #else | 144 | #else |
145 | return kmemcheck_shadow_test_all(shadow, size); | ||
146 | #endif | ||
147 | } | ||
148 | |||
149 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size) | ||
150 | { | ||
151 | uint8_t *x; | ||
152 | unsigned int i; | ||
153 | |||
154 | x = shadow; | ||
155 | |||
143 | /* All bytes must be initialized. */ | 156 | /* All bytes must be initialized. */ |
144 | for (i = 0; i < size; ++i) { | 157 | for (i = 0; i < size; ++i) { |
145 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) | 158 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) |
146 | return x[i]; | 159 | return x[i]; |
147 | } | 160 | } |
148 | #endif | ||
149 | 161 | ||
150 | return x[0]; | 162 | return x[0]; |
151 | } | 163 | } |
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h index af46d9ab9d86..ff0b2f70fbcb 100644 --- a/arch/x86/mm/kmemcheck/shadow.h +++ b/arch/x86/mm/kmemcheck/shadow.h | |||
@@ -11,6 +11,8 @@ enum kmemcheck_shadow { | |||
11 | void *kmemcheck_shadow_lookup(unsigned long address); | 11 | void *kmemcheck_shadow_lookup(unsigned long address); |
12 | 12 | ||
13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); | 13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); |
14 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, | ||
15 | unsigned int size); | ||
14 | void kmemcheck_shadow_set(void *shadow, unsigned int size); | 16 | void kmemcheck_shadow_set(void *shadow, unsigned int size); |
15 | 17 | ||
16 | #endif | 18 | #endif |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index c0f6198565eb..5d0e67fff1a6 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/kdebug.h> | 21 | #include <linux/kdebug.h> |
22 | #include <linux/mutex.h> | 22 | #include <linux/mutex.h> |
23 | #include <linux/io.h> | 23 | #include <linux/io.h> |
24 | #include <linux/slab.h> | ||
24 | #include <asm/cacheflush.h> | 25 | #include <asm/cacheflush.h> |
25 | #include <asm/tlbflush.h> | 26 | #include <asm/tlbflush.h> |
26 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
@@ -538,14 +539,15 @@ static int | |||
538 | kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) | 539 | kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) |
539 | { | 540 | { |
540 | struct die_args *arg = args; | 541 | struct die_args *arg = args; |
542 | unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err); | ||
541 | 543 | ||
542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) | 544 | if (val == DIE_DEBUG && (*dr6_p & DR_STEP)) |
543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) { | 545 | if (post_kmmio_handler(*dr6_p, arg->regs) == 1) { |
544 | /* | 546 | /* |
545 | * Reset the BS bit in dr6 (pointed by args->err) to | 547 | * Reset the BS bit in dr6 (pointed by args->err) to |
546 | * denote completion of processing | 548 | * denote completion of processing |
547 | */ | 549 | */ |
548 | (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; | 550 | *dr6_p &= ~DR_STEP; |
549 | return NOTIFY_STOP; | 551 | return NOTIFY_STOP; |
550 | } | 552 | } |
551 | 553 | ||
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c8191defc38a..1dab5194fd9d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -71,7 +71,7 @@ static int mmap_is_legacy(void) | |||
71 | if (current->personality & ADDR_COMPAT_LAYOUT) | 71 | if (current->personality & ADDR_COMPAT_LAYOUT) |
72 | return 1; | 72 | return 1; |
73 | 73 | ||
74 | if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) | 74 | if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) |
75 | return 1; | 75 | return 1; |
76 | 76 | ||
77 | return sysctl_legacy_va_layout; | 77 | return sysctl_legacy_va_layout; |
@@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void) | |||
96 | 96 | ||
97 | static unsigned long mmap_base(void) | 97 | static unsigned long mmap_base(void) |
98 | { | 98 | { |
99 | unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; | 99 | unsigned long gap = rlimit(RLIMIT_STACK); |
100 | 100 | ||
101 | if (gap < MIN_GAP) | 101 | if (gap < MIN_GAP) |
102 | gap = MIN_GAP; | 102 | gap = MIN_GAP; |
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 34a3291ca103..3adff7dcc148 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/debugfs.h> | 28 | #include <linux/debugfs.h> |
29 | #include <linux/slab.h> | ||
29 | #include <linux/uaccess.h> | 30 | #include <linux/uaccess.h> |
30 | #include <linux/io.h> | 31 | #include <linux/io.h> |
31 | #include <linux/version.h> | 32 | #include <linux/version.h> |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index b20760ca7244..809baaaf48b1 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
418 | 418 | ||
419 | for_each_online_node(nid) { | 419 | for_each_online_node(nid) { |
420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
421 | NODE_DATA(nid)->node_id = nid; | ||
422 | #ifndef CONFIG_NO_BOOTMEM | ||
421 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | 423 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; |
424 | #endif | ||
422 | } | 425 | } |
423 | 426 | ||
424 | setup_bootmem_allocator(); | 427 | setup_bootmem_allocator(); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 83bbc70d11bb..8948f47fde05 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
163 | unsigned long end, unsigned long size, | 163 | unsigned long end, unsigned long size, |
164 | unsigned long align) | 164 | unsigned long align) |
165 | { | 165 | { |
166 | unsigned long mem = find_e820_area(start, end, size, align); | 166 | unsigned long mem; |
167 | void *ptr; | ||
168 | 167 | ||
168 | /* | ||
169 | * put it on high as possible | ||
170 | * something will go with NODE_DATA | ||
171 | */ | ||
172 | if (start < (MAX_DMA_PFN<<PAGE_SHIFT)) | ||
173 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
174 | if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && | ||
175 | end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
176 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
177 | mem = find_e820_area(start, end, size, align); | ||
178 | if (mem != -1L) | ||
179 | return __va(mem); | ||
180 | |||
181 | /* extend the search scope */ | ||
182 | end = max_pfn_mapped << PAGE_SHIFT; | ||
183 | if (end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
184 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
185 | else | ||
186 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
187 | mem = find_e820_area(start, end, size, align); | ||
169 | if (mem != -1L) | 188 | if (mem != -1L) |
170 | return __va(mem); | 189 | return __va(mem); |
171 | 190 | ||
172 | ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); | 191 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", |
173 | if (ptr == NULL) { | ||
174 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", | ||
175 | size, nodeid); | 192 | size, nodeid); |
176 | return NULL; | 193 | |
177 | } | 194 | return NULL; |
178 | return ptr; | ||
179 | } | 195 | } |
180 | 196 | ||
181 | /* Initialize bootmem allocator for a node */ | 197 | /* Initialize bootmem allocator for a node */ |
182 | void __init | 198 | void __init |
183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | 199 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 200 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 201 | unsigned long start_pfn, last_pfn, nodedata_phys; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 202 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
187 | unsigned long bootmap_start, nodedata_phys; | ||
188 | void *bootmap; | ||
189 | int nid; | 203 | int nid; |
204 | #ifndef CONFIG_NO_BOOTMEM | ||
205 | unsigned long bootmap_start, bootmap_pages, bootmap_size; | ||
206 | void *bootmap; | ||
207 | #endif | ||
190 | 208 | ||
191 | if (!end) | 209 | if (!end) |
192 | return; | 210 | return; |
@@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
200 | 218 | ||
201 | start = roundup(start, ZONE_ALIGN); | 219 | start = roundup(start, ZONE_ALIGN); |
202 | 220 | ||
203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 221 | printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, |
204 | start, end); | 222 | start, end); |
205 | 223 | ||
206 | start_pfn = start >> PAGE_SHIFT; | 224 | start_pfn = start >> PAGE_SHIFT; |
@@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
211 | if (node_data[nodeid] == NULL) | 229 | if (node_data[nodeid] == NULL) |
212 | return; | 230 | return; |
213 | nodedata_phys = __pa(node_data[nodeid]); | 231 | nodedata_phys = __pa(node_data[nodeid]); |
232 | reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); | ||
214 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, | 233 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, |
215 | nodedata_phys + pgdat_size - 1); | 234 | nodedata_phys + pgdat_size - 1); |
235 | nid = phys_to_nid(nodedata_phys); | ||
236 | if (nid != nodeid) | ||
237 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
216 | 238 | ||
217 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 239 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
218 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | 240 | NODE_DATA(nodeid)->node_id = nodeid; |
219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 241 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
220 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 242 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
221 | 243 | ||
244 | #ifndef CONFIG_NO_BOOTMEM | ||
245 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | ||
246 | |||
222 | /* | 247 | /* |
223 | * Find a place for the bootmem map | 248 | * Find a place for the bootmem map |
224 | * nodedata_phys could be on other nodes by alloc_bootmem, | 249 | * nodedata_phys could be on other nodes by alloc_bootmem, |
@@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
227 | * of alloc_bootmem, that could clash with reserved range | 252 | * of alloc_bootmem, that could clash with reserved range |
228 | */ | 253 | */ |
229 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); | 254 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); |
230 | nid = phys_to_nid(nodedata_phys); | 255 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); |
231 | if (nid == nodeid) | ||
232 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
233 | else | ||
234 | bootmap_start = roundup(start, PAGE_SIZE); | ||
235 | /* | 256 | /* |
236 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like | 257 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like |
237 | * to use that to align to PAGE_SIZE | 258 | * to use that to align to PAGE_SIZE |
@@ -239,18 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 260 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | 261 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
241 | if (bootmap == NULL) { | 262 | if (bootmap == NULL) { |
242 | if (nodedata_phys < start || nodedata_phys >= end) { | 263 | free_early(nodedata_phys, nodedata_phys + pgdat_size); |
243 | /* | ||
244 | * only need to free it if it is from other node | ||
245 | * bootmem | ||
246 | */ | ||
247 | if (nid != nodeid) | ||
248 | free_bootmem(nodedata_phys, pgdat_size); | ||
249 | } | ||
250 | node_data[nodeid] = NULL; | 264 | node_data[nodeid] = NULL; |
251 | return; | 265 | return; |
252 | } | 266 | } |
253 | bootmap_start = __pa(bootmap); | 267 | bootmap_start = __pa(bootmap); |
268 | reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT), | ||
269 | "BOOTMAP"); | ||
254 | 270 | ||
255 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | 271 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), |
256 | bootmap_start >> PAGE_SHIFT, | 272 | bootmap_start >> PAGE_SHIFT, |
@@ -259,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
259 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", | 275 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", |
260 | bootmap_start, bootmap_start + bootmap_size - 1, | 276 | bootmap_start, bootmap_start + bootmap_size - 1, |
261 | bootmap_pages); | 277 | bootmap_pages); |
262 | |||
263 | free_bootmem_with_active_regions(nodeid, end); | ||
264 | |||
265 | /* | ||
266 | * convert early reserve to bootmem reserve earlier | ||
267 | * otherwise early_node_mem could use early reserved mem | ||
268 | * on previous node | ||
269 | */ | ||
270 | early_res_to_bootmem(start, end); | ||
271 | |||
272 | /* | ||
273 | * in some case early_node_mem could use alloc_bootmem | ||
274 | * to get range on other node, don't reserve that again | ||
275 | */ | ||
276 | if (nid != nodeid) | ||
277 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
278 | else | ||
279 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
280 | pgdat_size, BOOTMEM_DEFAULT); | ||
281 | nid = phys_to_nid(bootmap_start); | 278 | nid = phys_to_nid(bootmap_start); |
282 | if (nid != nodeid) | 279 | if (nid != nodeid) |
283 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | 280 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); |
284 | else | 281 | |
285 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 282 | free_bootmem_with_active_regions(nodeid, end); |
286 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 283 | #endif |
287 | 284 | ||
288 | node_set_online(nodeid); | 285 | node_set_online(nodeid); |
289 | } | 286 | } |
@@ -427,7 +424,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, | |||
427 | * Calculate the number of big nodes that can be allocated as a result | 424 | * Calculate the number of big nodes that can be allocated as a result |
428 | * of consolidating the remainder. | 425 | * of consolidating the remainder. |
429 | */ | 426 | */ |
430 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) / | 427 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) / |
431 | FAKE_NODE_MIN_SIZE; | 428 | FAKE_NODE_MIN_SIZE; |
432 | 429 | ||
433 | size &= FAKE_NODE_MIN_HASH_MASK; | 430 | size &= FAKE_NODE_MIN_HASH_MASK; |
@@ -502,77 +499,99 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, | |||
502 | } | 499 | } |
503 | 500 | ||
504 | /* | 501 | /* |
505 | * Splits num_nodes nodes up equally starting at node_start. The return value | 502 | * Returns the end address of a node so that there is at least `size' amount of |
506 | * is the number of nodes split up and addr is adjusted to be at the end of the | 503 | * non-reserved memory or `max_addr' is reached. |
507 | * last node allocated. | ||
508 | */ | 504 | */ |
509 | static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, | 505 | static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) |
510 | int num_nodes) | ||
511 | { | 506 | { |
512 | unsigned int big; | 507 | u64 end = start + size; |
513 | u64 size; | ||
514 | int i; | ||
515 | 508 | ||
516 | if (num_nodes <= 0) | 509 | while (end - start - e820_hole_size(start, end) < size) { |
517 | return -1; | 510 | end += FAKE_NODE_MIN_SIZE; |
518 | if (num_nodes > MAX_NUMNODES) | 511 | if (end > max_addr) { |
519 | num_nodes = MAX_NUMNODES; | ||
520 | size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) / | ||
521 | num_nodes; | ||
522 | /* | ||
523 | * Calculate the number of big nodes that can be allocated as a result | ||
524 | * of consolidating the leftovers. | ||
525 | */ | ||
526 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / | ||
527 | FAKE_NODE_MIN_SIZE; | ||
528 | |||
529 | /* Round down to nearest FAKE_NODE_MIN_SIZE. */ | ||
530 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
531 | if (!size) { | ||
532 | printk(KERN_ERR "Not enough memory for each node. " | ||
533 | "NUMA emulation disabled.\n"); | ||
534 | return -1; | ||
535 | } | ||
536 | |||
537 | for (i = node_start; i < num_nodes + node_start; i++) { | ||
538 | u64 end = *addr + size; | ||
539 | |||
540 | if (i < big) | ||
541 | end += FAKE_NODE_MIN_SIZE; | ||
542 | /* | ||
543 | * The final node can have the remaining system RAM. Other | ||
544 | * nodes receive roughly the same amount of available pages. | ||
545 | */ | ||
546 | if (i == num_nodes + node_start - 1) | ||
547 | end = max_addr; | 512 | end = max_addr; |
548 | else | ||
549 | while (end - *addr - e820_hole_size(*addr, end) < | ||
550 | size) { | ||
551 | end += FAKE_NODE_MIN_SIZE; | ||
552 | if (end > max_addr) { | ||
553 | end = max_addr; | ||
554 | break; | ||
555 | } | ||
556 | } | ||
557 | if (setup_node_range(i, addr, end - *addr, max_addr) < 0) | ||
558 | break; | 513 | break; |
514 | } | ||
559 | } | 515 | } |
560 | return i - node_start + 1; | 516 | return end; |
561 | } | 517 | } |
562 | 518 | ||
563 | /* | 519 | /* |
564 | * Splits the remaining system RAM into chunks of size. The remaining memory is | 520 | * Sets up fake nodes of `size' interleaved over physical nodes ranging from |
565 | * always assigned to a final node and can be asymmetric. Returns the number of | 521 | * `addr' to `max_addr'. The return value is the number of nodes allocated. |
566 | * nodes split. | ||
567 | */ | 522 | */ |
568 | static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, | 523 | static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) |
569 | u64 size) | ||
570 | { | 524 | { |
571 | int i = node_start; | 525 | nodemask_t physnode_mask = NODE_MASK_NONE; |
572 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | 526 | u64 min_size; |
573 | while (!setup_node_range(i++, addr, size, max_addr)) | 527 | int ret = 0; |
574 | ; | 528 | int i; |
575 | return i - node_start; | 529 | |
530 | if (!size) | ||
531 | return -1; | ||
532 | /* | ||
533 | * The limit on emulated nodes is MAX_NUMNODES, so the size per node is | ||
534 | * increased accordingly if the requested size is too small. This | ||
535 | * creates a uniform distribution of node sizes across the entire | ||
536 | * machine (but not necessarily over physical nodes). | ||
537 | */ | ||
538 | min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) / | ||
539 | MAX_NUMNODES; | ||
540 | min_size = max(min_size, FAKE_NODE_MIN_SIZE); | ||
541 | if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) | ||
542 | min_size = (min_size + FAKE_NODE_MIN_SIZE) & | ||
543 | FAKE_NODE_MIN_HASH_MASK; | ||
544 | if (size < min_size) { | ||
545 | pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", | ||
546 | size >> 20, min_size >> 20); | ||
547 | size = min_size; | ||
548 | } | ||
549 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
550 | |||
551 | for (i = 0; i < MAX_NUMNODES; i++) | ||
552 | if (physnodes[i].start != physnodes[i].end) | ||
553 | node_set(i, physnode_mask); | ||
554 | /* | ||
555 | * Fill physical nodes with fake nodes of size until there is no memory | ||
556 | * left on any of them. | ||
557 | */ | ||
558 | while (nodes_weight(physnode_mask)) { | ||
559 | for_each_node_mask(i, physnode_mask) { | ||
560 | u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; | ||
561 | u64 end; | ||
562 | |||
563 | end = find_end_of_node(physnodes[i].start, | ||
564 | physnodes[i].end, size); | ||
565 | /* | ||
566 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
567 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
568 | * this one must extend to the boundary. | ||
569 | */ | ||
570 | if (end < dma32_end && dma32_end - end - | ||
571 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
572 | end = dma32_end; | ||
573 | |||
574 | /* | ||
575 | * If there won't be enough non-reserved memory for the | ||
576 | * next node, this one must extend to the end of the | ||
577 | * physical node. | ||
578 | */ | ||
579 | if (physnodes[i].end - end - | ||
580 | e820_hole_size(end, physnodes[i].end) < size) | ||
581 | end = physnodes[i].end; | ||
582 | |||
583 | /* | ||
584 | * Setup the fake node that will be allocated as bootmem | ||
585 | * later. If setup_node_range() returns non-zero, there | ||
586 | * is no more memory available on this physical node. | ||
587 | */ | ||
588 | if (setup_node_range(ret++, &physnodes[i].start, | ||
589 | end - physnodes[i].start, | ||
590 | physnodes[i].end) < 0) | ||
591 | node_clear(i, physnode_mask); | ||
592 | } | ||
593 | } | ||
594 | return ret; | ||
576 | } | 595 | } |
577 | 596 | ||
578 | /* | 597 | /* |
@@ -582,87 +601,32 @@ static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, | |||
582 | static int __init numa_emulation(unsigned long start_pfn, | 601 | static int __init numa_emulation(unsigned long start_pfn, |
583 | unsigned long last_pfn, int acpi, int k8) | 602 | unsigned long last_pfn, int acpi, int k8) |
584 | { | 603 | { |
585 | u64 size, addr = start_pfn << PAGE_SHIFT; | 604 | u64 addr = start_pfn << PAGE_SHIFT; |
586 | u64 max_addr = last_pfn << PAGE_SHIFT; | 605 | u64 max_addr = last_pfn << PAGE_SHIFT; |
587 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | ||
588 | int num_phys_nodes; | 606 | int num_phys_nodes; |
607 | int num_nodes; | ||
608 | int i; | ||
589 | 609 | ||
590 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); | 610 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); |
591 | /* | 611 | /* |
592 | * If the numa=fake command-line is just a single number N, split the | 612 | * If the numa=fake command-line contains a 'M' or 'G', it represents |
593 | * system RAM into N fake nodes. | 613 | * the fixed node size. Otherwise, if it is just a single number N, |
614 | * split the system RAM into N fake nodes. | ||
594 | */ | 615 | */ |
595 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | 616 | if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) { |
596 | long n = simple_strtol(cmdline, NULL, 0); | 617 | u64 size; |
597 | |||
598 | num_nodes = split_nodes_interleave(addr, max_addr, | ||
599 | num_phys_nodes, n); | ||
600 | if (num_nodes < 0) | ||
601 | return num_nodes; | ||
602 | goto out; | ||
603 | } | ||
604 | 618 | ||
605 | /* Parse the command line. */ | 619 | size = memparse(cmdline, &cmdline); |
606 | for (coeff_flag = 0; ; cmdline++) { | 620 | num_nodes = split_nodes_size_interleave(addr, max_addr, size); |
607 | if (*cmdline && isdigit(*cmdline)) { | 621 | } else { |
608 | num = num * 10 + *cmdline - '0'; | 622 | unsigned long n; |
609 | continue; | 623 | |
610 | } | 624 | n = simple_strtoul(cmdline, NULL, 0); |
611 | if (*cmdline == '*') { | 625 | num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n); |
612 | if (num > 0) | ||
613 | coeff = num; | ||
614 | coeff_flag = 1; | ||
615 | } | ||
616 | if (!*cmdline || *cmdline == ',') { | ||
617 | if (!coeff_flag) | ||
618 | coeff = 1; | ||
619 | /* | ||
620 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | ||
621 | * Command-line coefficients are in megabytes. | ||
622 | */ | ||
623 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
624 | if (size) | ||
625 | for (i = 0; i < coeff; i++, num_nodes++) | ||
626 | if (setup_node_range(num_nodes, &addr, | ||
627 | size, max_addr) < 0) | ||
628 | goto done; | ||
629 | if (!*cmdline) | ||
630 | break; | ||
631 | coeff_flag = 0; | ||
632 | coeff = -1; | ||
633 | } | ||
634 | num = 0; | ||
635 | } | ||
636 | done: | ||
637 | if (!num_nodes) | ||
638 | return -1; | ||
639 | /* Fill remainder of system RAM, if appropriate. */ | ||
640 | if (addr < max_addr) { | ||
641 | if (coeff_flag && coeff < 0) { | ||
642 | /* Split remaining nodes into num-sized chunks */ | ||
643 | num_nodes += split_nodes_by_size(&addr, max_addr, | ||
644 | num_nodes, num); | ||
645 | goto out; | ||
646 | } | ||
647 | switch (*(cmdline - 1)) { | ||
648 | case '*': | ||
649 | /* Split remaining nodes into coeff chunks */ | ||
650 | if (coeff <= 0) | ||
651 | break; | ||
652 | num_nodes += split_nodes_equally(&addr, max_addr, | ||
653 | num_nodes, coeff); | ||
654 | break; | ||
655 | case ',': | ||
656 | /* Do not allocate remaining system RAM */ | ||
657 | break; | ||
658 | default: | ||
659 | /* Give one final node */ | ||
660 | setup_node_range(num_nodes, &addr, max_addr - addr, | ||
661 | max_addr); | ||
662 | num_nodes++; | ||
663 | } | ||
664 | } | 626 | } |
665 | out: | 627 | |
628 | if (num_nodes < 0) | ||
629 | return num_nodes; | ||
666 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); | 630 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); |
667 | if (memnode_shift < 0) { | 631 | if (memnode_shift < 0) { |
668 | memnode_shift = 0; | 632 | memnode_shift = 0; |
@@ -742,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void) | |||
742 | for_each_online_node(i) | 706 | for_each_online_node(i) |
743 | pages += free_all_bootmem_node(NODE_DATA(i)); | 707 | pages += free_all_bootmem_node(NODE_DATA(i)); |
744 | 708 | ||
709 | #ifdef CONFIG_NO_BOOTMEM | ||
710 | pages += free_all_memory_core_early(MAX_NUMNODES); | ||
711 | #endif | ||
712 | |||
745 | return pages; | 713 | return pages; |
746 | } | 714 | } |
747 | 715 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1d4eb93d333c..28195c350b97 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -6,13 +6,13 @@ | |||
6 | #include <linux/bootmem.h> | 6 | #include <linux/bootmem.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/slab.h> | ||
10 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
11 | #include <linux/interrupt.h> | 10 | #include <linux/interrupt.h> |
12 | #include <linux/seq_file.h> | 11 | #include <linux/seq_file.h> |
13 | #include <linux/debugfs.h> | 12 | #include <linux/debugfs.h> |
14 | #include <linux/pfn.h> | 13 | #include <linux/pfn.h> |
15 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
15 | #include <linux/gfp.h> | ||
16 | 16 | ||
17 | #include <asm/e820.h> | 17 | #include <asm/e820.h> |
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
@@ -291,8 +291,29 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
291 | */ | 291 | */ |
292 | if (kernel_set_to_readonly && | 292 | if (kernel_set_to_readonly && |
293 | within(address, (unsigned long)_text, | 293 | within(address, (unsigned long)_text, |
294 | (unsigned long)__end_rodata_hpage_align)) | 294 | (unsigned long)__end_rodata_hpage_align)) { |
295 | pgprot_val(forbidden) |= _PAGE_RW; | 295 | unsigned int level; |
296 | |||
297 | /* | ||
298 | * Don't enforce the !RW mapping for the kernel text mapping, | ||
299 | * if the current mapping is already using small page mapping. | ||
300 | * No need to work hard to preserve large page mappings in this | ||
301 | * case. | ||
302 | * | ||
303 | * This also fixes the Linux Xen paravirt guest boot failure | ||
304 | * (because of unexpected read-only mappings for kernel identity | ||
305 | * mappings). In this paravirt guest case, the kernel text | ||
306 | * mapping and the kernel identity mapping share the same | ||
307 | * page-table pages. Thus we can't really use different | ||
308 | * protections for the kernel text and identity mappings. Also, | ||
309 | * these shared mappings are made of small page mappings. | ||
310 | * Thus this don't enforce !RW mapping for small page kernel | ||
311 | * text mapping logic will help Linux Xen parvirt guest boot | ||
312 | * aswell. | ||
313 | */ | ||
314 | if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) | ||
315 | pgprot_val(forbidden) |= _PAGE_RW; | ||
316 | } | ||
296 | #endif | 317 | #endif |
297 | 318 | ||
298 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | 319 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index ae9648eb1c7f..edc8b95afc1a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/debugfs.h> | 12 | #include <linux/debugfs.h> |
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/gfp.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/rbtree.h> | 18 | #include <linux/rbtree.h> |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ed34f5e35999..5c4ee422590e 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/mm.h> | 1 | #include <linux/mm.h> |
2 | #include <linux/gfp.h> | ||
2 | #include <asm/pgalloc.h> | 3 | #include <asm/pgalloc.h> |
3 | #include <asm/pgtable.h> | 4 | #include <asm/pgtable.h> |
4 | #include <asm/tlb.h> | 5 | #include <asm/tlb.h> |
@@ -6,6 +7,14 @@ | |||
6 | 7 | ||
7 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | 8 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO |
8 | 9 | ||
10 | #ifdef CONFIG_HIGHPTE | ||
11 | #define PGALLOC_USER_GFP __GFP_HIGHMEM | ||
12 | #else | ||
13 | #define PGALLOC_USER_GFP 0 | ||
14 | #endif | ||
15 | |||
16 | gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; | ||
17 | |||
9 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 18 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
10 | { | 19 | { |
11 | return (pte_t *)__get_free_page(PGALLOC_GFP); | 20 | return (pte_t *)__get_free_page(PGALLOC_GFP); |
@@ -15,16 +24,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
15 | { | 24 | { |
16 | struct page *pte; | 25 | struct page *pte; |
17 | 26 | ||
18 | #ifdef CONFIG_HIGHPTE | 27 | pte = alloc_pages(__userpte_alloc_gfp, 0); |
19 | pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); | ||
20 | #else | ||
21 | pte = alloc_pages(PGALLOC_GFP, 0); | ||
22 | #endif | ||
23 | if (pte) | 28 | if (pte) |
24 | pgtable_page_ctor(pte); | 29 | pgtable_page_ctor(pte); |
25 | return pte; | 30 | return pte; |
26 | } | 31 | } |
27 | 32 | ||
33 | static int __init setup_userpte(char *arg) | ||
34 | { | ||
35 | if (!arg) | ||
36 | return -EINVAL; | ||
37 | |||
38 | /* | ||
39 | * "userpte=nohigh" disables allocation of user pagetables in | ||
40 | * high memory. | ||
41 | */ | ||
42 | if (strcmp(arg, "nohigh") == 0) | ||
43 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
44 | else | ||
45 | return -EINVAL; | ||
46 | return 0; | ||
47 | } | ||
48 | early_param("userpte", setup_userpte); | ||
49 | |||
28 | void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | 50 | void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) |
29 | { | 51 | { |
30 | pgtable_page_dtor(pte); | 52 | pgtable_page_dtor(pte); |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 46c8834aedc0..1a8faf09afed 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <linux/swap.h> | 6 | #include <linux/swap.h> |
7 | #include <linux/smp.h> | 7 | #include <linux/smp.h> |
8 | #include <linux/highmem.h> | 8 | #include <linux/highmem.h> |
9 | #include <linux/slab.h> | ||
10 | #include <linux/pagemap.h> | 9 | #include <linux/pagemap.h> |
11 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
12 | #include <linux/module.h> | 11 | #include <linux/module.h> |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a27124185fc1..28c68762648f 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -229,9 +229,11 @@ update_nodes_add(int node, unsigned long start, unsigned long end) | |||
229 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | 229 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); |
230 | } | 230 | } |
231 | 231 | ||
232 | if (changed) | 232 | if (changed) { |
233 | node_set(node, cpu_nodes_parsed); | ||
233 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", | 234 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", |
234 | nd->start, nd->end); | 235 | nd->start, nd->end); |
236 | } | ||
235 | } | 237 | } |
236 | 238 | ||
237 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 239 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 65b58e4b0b8b..426f3a1a64d3 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -41,7 +41,7 @@ union smp_flush_state { | |||
41 | struct { | 41 | struct { |
42 | struct mm_struct *flush_mm; | 42 | struct mm_struct *flush_mm; |
43 | unsigned long flush_va; | 43 | unsigned long flush_va; |
44 | spinlock_t tlbstate_lock; | 44 | raw_spinlock_t tlbstate_lock; |
45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | 45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); |
46 | }; | 46 | }; |
47 | char pad[INTERNODE_CACHE_BYTES]; | 47 | char pad[INTERNODE_CACHE_BYTES]; |
@@ -181,7 +181,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
181 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | 181 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is |
182 | * probably not worth checking this for a cache-hot lock. | 182 | * probably not worth checking this for a cache-hot lock. |
183 | */ | 183 | */ |
184 | spin_lock(&f->tlbstate_lock); | 184 | raw_spin_lock(&f->tlbstate_lock); |
185 | 185 | ||
186 | f->flush_mm = mm; | 186 | f->flush_mm = mm; |
187 | f->flush_va = va; | 187 | f->flush_va = va; |
@@ -199,7 +199,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
199 | 199 | ||
200 | f->flush_mm = NULL; | 200 | f->flush_mm = NULL; |
201 | f->flush_va = 0; | 201 | f->flush_va = 0; |
202 | spin_unlock(&f->tlbstate_lock); | 202 | raw_spin_unlock(&f->tlbstate_lock); |
203 | } | 203 | } |
204 | 204 | ||
205 | void native_flush_tlb_others(const struct cpumask *cpumask, | 205 | void native_flush_tlb_others(const struct cpumask *cpumask, |
@@ -223,7 +223,7 @@ static int __cpuinit init_smp_flush(void) | |||
223 | int i; | 223 | int i; |
224 | 224 | ||
225 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | 225 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) |
226 | spin_lock_init(&flush_state[i].tlbstate_lock); | 226 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); |
227 | 227 | ||
228 | return 0; | 228 | return 0; |
229 | } | 229 | } |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cb88b1a0bd5f..2c505ee71014 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -159,7 +159,7 @@ static int nmi_setup_mux(void) | |||
159 | 159 | ||
160 | for_each_possible_cpu(i) { | 160 | for_each_possible_cpu(i) { |
161 | per_cpu(cpu_msrs, i).multiplex = | 161 | per_cpu(cpu_msrs, i).multiplex = |
162 | kmalloc(multiplex_size, GFP_KERNEL); | 162 | kzalloc(multiplex_size, GFP_KERNEL); |
163 | if (!per_cpu(cpu_msrs, i).multiplex) | 163 | if (!per_cpu(cpu_msrs, i).multiplex) |
164 | return 0; | 164 | return 0; |
165 | } | 165 | } |
@@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) | |||
179 | if (counter_config[i].enabled) { | 179 | if (counter_config[i].enabled) { |
180 | multiplex[i].saved = -(u64)counter_config[i].count; | 180 | multiplex[i].saved = -(u64)counter_config[i].count; |
181 | } else { | 181 | } else { |
182 | multiplex[i].addr = 0; | ||
183 | multiplex[i].saved = 0; | 182 | multiplex[i].saved = 0; |
184 | } | 183 | } |
185 | } | 184 | } |
@@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) | |||
189 | 188 | ||
190 | static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) | 189 | static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) |
191 | { | 190 | { |
191 | struct op_msr *counters = msrs->counters; | ||
192 | struct op_msr *multiplex = msrs->multiplex; | 192 | struct op_msr *multiplex = msrs->multiplex; |
193 | int i; | 193 | int i; |
194 | 194 | ||
195 | for (i = 0; i < model->num_counters; ++i) { | 195 | for (i = 0; i < model->num_counters; ++i) { |
196 | int virt = op_x86_phys_to_virt(i); | 196 | int virt = op_x86_phys_to_virt(i); |
197 | if (multiplex[virt].addr) | 197 | if (counters[i].addr) |
198 | rdmsrl(multiplex[virt].addr, multiplex[virt].saved); | 198 | rdmsrl(counters[i].addr, multiplex[virt].saved); |
199 | } | 199 | } |
200 | } | 200 | } |
201 | 201 | ||
202 | static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) | 202 | static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) |
203 | { | 203 | { |
204 | struct op_msr *counters = msrs->counters; | ||
204 | struct op_msr *multiplex = msrs->multiplex; | 205 | struct op_msr *multiplex = msrs->multiplex; |
205 | int i; | 206 | int i; |
206 | 207 | ||
207 | for (i = 0; i < model->num_counters; ++i) { | 208 | for (i = 0; i < model->num_counters; ++i) { |
208 | int virt = op_x86_phys_to_virt(i); | 209 | int virt = op_x86_phys_to_virt(i); |
209 | if (multiplex[virt].addr) | 210 | if (counters[i].addr) |
210 | wrmsrl(multiplex[virt].addr, multiplex[virt].saved); | 211 | wrmsrl(counters[i].addr, multiplex[virt].saved); |
211 | } | 212 | } |
212 | } | 213 | } |
213 | 214 | ||
@@ -222,7 +223,7 @@ static void nmi_cpu_switch(void *dummy) | |||
222 | 223 | ||
223 | /* move to next set */ | 224 | /* move to next set */ |
224 | si += model->num_counters; | 225 | si += model->num_counters; |
225 | if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) | 226 | if ((si >= model->num_virt_counters) || (counter_config[si].count == 0)) |
226 | per_cpu(switch_index, cpu) = 0; | 227 | per_cpu(switch_index, cpu) = 0; |
227 | else | 228 | else |
228 | per_cpu(switch_index, cpu) = si; | 229 | per_cpu(switch_index, cpu) = si; |
@@ -303,11 +304,11 @@ static int allocate_msrs(void) | |||
303 | 304 | ||
304 | int i; | 305 | int i; |
305 | for_each_possible_cpu(i) { | 306 | for_each_possible_cpu(i) { |
306 | per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, | 307 | per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, |
307 | GFP_KERNEL); | 308 | GFP_KERNEL); |
308 | if (!per_cpu(cpu_msrs, i).counters) | 309 | if (!per_cpu(cpu_msrs, i).counters) |
309 | return 0; | 310 | return 0; |
310 | per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, | 311 | per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, |
311 | GFP_KERNEL); | 312 | GFP_KERNEL); |
312 | if (!per_cpu(cpu_msrs, i).controls) | 313 | if (!per_cpu(cpu_msrs, i).controls) |
313 | return 0; | 314 | return 0; |
@@ -598,6 +599,7 @@ static int __init ppro_init(char **cpu_type) | |||
598 | case 15: case 23: | 599 | case 15: case 23: |
599 | *cpu_type = "i386/core_2"; | 600 | *cpu_type = "i386/core_2"; |
600 | break; | 601 | break; |
602 | case 0x2e: | ||
601 | case 26: | 603 | case 26: |
602 | spec = &op_arch_perfmon_spec; | 604 | spec = &op_arch_perfmon_spec; |
603 | *cpu_type = "i386/core_i7"; | 605 | *cpu_type = "i386/core_i7"; |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 39686c29f03a..090cbbec7dbd 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -22,6 +22,9 @@ | |||
22 | #include <asm/ptrace.h> | 22 | #include <asm/ptrace.h> |
23 | #include <asm/msr.h> | 23 | #include <asm/msr.h> |
24 | #include <asm/nmi.h> | 24 | #include <asm/nmi.h> |
25 | #include <asm/apic.h> | ||
26 | #include <asm/processor.h> | ||
27 | #include <asm/cpufeature.h> | ||
25 | 28 | ||
26 | #include "op_x86_model.h" | 29 | #include "op_x86_model.h" |
27 | #include "op_counter.h" | 30 | #include "op_counter.h" |
@@ -43,23 +46,10 @@ | |||
43 | 46 | ||
44 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; | 47 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; |
45 | 48 | ||
46 | #ifdef CONFIG_OPROFILE_IBS | ||
47 | |||
48 | /* IbsFetchCtl bits/masks */ | ||
49 | #define IBS_FETCH_RAND_EN (1ULL<<57) | ||
50 | #define IBS_FETCH_VAL (1ULL<<49) | ||
51 | #define IBS_FETCH_ENABLE (1ULL<<48) | ||
52 | #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL | ||
53 | |||
54 | /*IbsOpCtl bits */ | ||
55 | #define IBS_OP_CNT_CTL (1ULL<<19) | ||
56 | #define IBS_OP_VAL (1ULL<<18) | ||
57 | #define IBS_OP_ENABLE (1ULL<<17) | ||
58 | |||
59 | #define IBS_FETCH_SIZE 6 | 49 | #define IBS_FETCH_SIZE 6 |
60 | #define IBS_OP_SIZE 12 | 50 | #define IBS_OP_SIZE 12 |
61 | 51 | ||
62 | static int has_ibs; /* AMD Family10h and later */ | 52 | static u32 ibs_caps; |
63 | 53 | ||
64 | struct op_ibs_config { | 54 | struct op_ibs_config { |
65 | unsigned long op_enabled; | 55 | unsigned long op_enabled; |
@@ -71,24 +61,52 @@ struct op_ibs_config { | |||
71 | }; | 61 | }; |
72 | 62 | ||
73 | static struct op_ibs_config ibs_config; | 63 | static struct op_ibs_config ibs_config; |
64 | static u64 ibs_op_ctl; | ||
74 | 65 | ||
75 | #endif | 66 | /* |
67 | * IBS cpuid feature detection | ||
68 | */ | ||
76 | 69 | ||
77 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 70 | #define IBS_CPUID_FEATURES 0x8000001b |
71 | |||
72 | /* | ||
73 | * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but | ||
74 | * bit 0 is used to indicate the existence of IBS. | ||
75 | */ | ||
76 | #define IBS_CAPS_AVAIL (1LL<<0) | ||
77 | #define IBS_CAPS_RDWROPCNT (1LL<<3) | ||
78 | #define IBS_CAPS_OPCNT (1LL<<4) | ||
78 | 79 | ||
79 | static void op_mux_fill_in_addresses(struct op_msrs * const msrs) | 80 | /* |
81 | * IBS randomization macros | ||
82 | */ | ||
83 | #define IBS_RANDOM_BITS 12 | ||
84 | #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) | ||
85 | #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) | ||
86 | |||
87 | static u32 get_ibs_caps(void) | ||
80 | { | 88 | { |
81 | int i; | 89 | u32 ibs_caps; |
90 | unsigned int max_level; | ||
82 | 91 | ||
83 | for (i = 0; i < NUM_VIRT_COUNTERS; i++) { | 92 | if (!boot_cpu_has(X86_FEATURE_IBS)) |
84 | int hw_counter = op_x86_virt_to_phys(i); | 93 | return 0; |
85 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | 94 | |
86 | msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; | 95 | /* check IBS cpuid feature flags */ |
87 | else | 96 | max_level = cpuid_eax(0x80000000); |
88 | msrs->multiplex[i].addr = 0; | 97 | if (max_level < IBS_CPUID_FEATURES) |
89 | } | 98 | return IBS_CAPS_AVAIL; |
99 | |||
100 | ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); | ||
101 | if (!(ibs_caps & IBS_CAPS_AVAIL)) | ||
102 | /* cpuid flags not valid */ | ||
103 | return IBS_CAPS_AVAIL; | ||
104 | |||
105 | return ibs_caps; | ||
90 | } | 106 | } |
91 | 107 | ||
108 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
109 | |||
92 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | 110 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, |
93 | struct op_msrs const * const msrs) | 111 | struct op_msrs const * const msrs) |
94 | { | 112 | { |
@@ -98,7 +116,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | |||
98 | /* enable active counters */ | 116 | /* enable active counters */ |
99 | for (i = 0; i < NUM_COUNTERS; ++i) { | 117 | for (i = 0; i < NUM_COUNTERS; ++i) { |
100 | int virt = op_x86_phys_to_virt(i); | 118 | int virt = op_x86_phys_to_virt(i); |
101 | if (!counter_config[virt].enabled) | 119 | if (!reset_value[virt]) |
102 | continue; | 120 | continue; |
103 | rdmsrl(msrs->controls[i].addr, val); | 121 | rdmsrl(msrs->controls[i].addr, val); |
104 | val &= model->reserved; | 122 | val &= model->reserved; |
@@ -107,10 +125,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | |||
107 | } | 125 | } |
108 | } | 126 | } |
109 | 127 | ||
110 | #else | ||
111 | |||
112 | static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } | ||
113 | |||
114 | #endif | 128 | #endif |
115 | 129 | ||
116 | /* functions for op_amd_spec */ | 130 | /* functions for op_amd_spec */ |
@@ -122,18 +136,12 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
122 | for (i = 0; i < NUM_COUNTERS; i++) { | 136 | for (i = 0; i < NUM_COUNTERS; i++) { |
123 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | 137 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) |
124 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | 138 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; |
125 | else | ||
126 | msrs->counters[i].addr = 0; | ||
127 | } | 139 | } |
128 | 140 | ||
129 | for (i = 0; i < NUM_CONTROLS; i++) { | 141 | for (i = 0; i < NUM_CONTROLS; i++) { |
130 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) | 142 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) |
131 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | 143 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; |
132 | else | ||
133 | msrs->controls[i].addr = 0; | ||
134 | } | 144 | } |
135 | |||
136 | op_mux_fill_in_addresses(msrs); | ||
137 | } | 145 | } |
138 | 146 | ||
139 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | 147 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, |
@@ -144,7 +152,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
144 | 152 | ||
145 | /* setup reset_value */ | 153 | /* setup reset_value */ |
146 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | 154 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { |
147 | if (counter_config[i].enabled) | 155 | if (counter_config[i].enabled |
156 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | ||
148 | reset_value[i] = counter_config[i].count; | 157 | reset_value[i] = counter_config[i].count; |
149 | else | 158 | else |
150 | reset_value[i] = 0; | 159 | reset_value[i] = 0; |
@@ -152,9 +161,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
152 | 161 | ||
153 | /* clear all counters */ | 162 | /* clear all counters */ |
154 | for (i = 0; i < NUM_CONTROLS; ++i) { | 163 | for (i = 0; i < NUM_CONTROLS; ++i) { |
155 | if (unlikely(!msrs->controls[i].addr)) | 164 | if (unlikely(!msrs->controls[i].addr)) { |
165 | if (counter_config[i].enabled && !smp_processor_id()) | ||
166 | /* | ||
167 | * counter is reserved, this is on all | ||
168 | * cpus, so report only for cpu #0 | ||
169 | */ | ||
170 | op_x86_warn_reserved(i); | ||
156 | continue; | 171 | continue; |
172 | } | ||
157 | rdmsrl(msrs->controls[i].addr, val); | 173 | rdmsrl(msrs->controls[i].addr, val); |
174 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
175 | op_x86_warn_in_use(i); | ||
158 | val &= model->reserved; | 176 | val &= model->reserved; |
159 | wrmsrl(msrs->controls[i].addr, val); | 177 | wrmsrl(msrs->controls[i].addr, val); |
160 | } | 178 | } |
@@ -169,9 +187,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
169 | /* enable active counters */ | 187 | /* enable active counters */ |
170 | for (i = 0; i < NUM_COUNTERS; ++i) { | 188 | for (i = 0; i < NUM_COUNTERS; ++i) { |
171 | int virt = op_x86_phys_to_virt(i); | 189 | int virt = op_x86_phys_to_virt(i); |
172 | if (!counter_config[virt].enabled) | 190 | if (!reset_value[virt]) |
173 | continue; | ||
174 | if (!msrs->counters[i].addr) | ||
175 | continue; | 191 | continue; |
176 | 192 | ||
177 | /* setup counter registers */ | 193 | /* setup counter registers */ |
@@ -185,7 +201,60 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
185 | } | 201 | } |
186 | } | 202 | } |
187 | 203 | ||
188 | #ifdef CONFIG_OPROFILE_IBS | 204 | /* |
205 | * 16-bit Linear Feedback Shift Register (LFSR) | ||
206 | * | ||
207 | * 16 14 13 11 | ||
208 | * Feedback polynomial = X + X + X + X + 1 | ||
209 | */ | ||
210 | static unsigned int lfsr_random(void) | ||
211 | { | ||
212 | static unsigned int lfsr_value = 0xF00D; | ||
213 | unsigned int bit; | ||
214 | |||
215 | /* Compute next bit to shift in */ | ||
216 | bit = ((lfsr_value >> 0) ^ | ||
217 | (lfsr_value >> 2) ^ | ||
218 | (lfsr_value >> 3) ^ | ||
219 | (lfsr_value >> 5)) & 0x0001; | ||
220 | |||
221 | /* Advance to next register value */ | ||
222 | lfsr_value = (lfsr_value >> 1) | (bit << 15); | ||
223 | |||
224 | return lfsr_value; | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * IBS software randomization | ||
229 | * | ||
230 | * The IBS periodic op counter is randomized in software. The lower 12 | ||
231 | * bits of the 20 bit counter are randomized. IbsOpCurCnt is | ||
232 | * initialized with a 12 bit random value. | ||
233 | */ | ||
234 | static inline u64 op_amd_randomize_ibs_op(u64 val) | ||
235 | { | ||
236 | unsigned int random = lfsr_random(); | ||
237 | |||
238 | if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) | ||
239 | /* | ||
240 | * Work around if the hw can not write to IbsOpCurCnt | ||
241 | * | ||
242 | * Randomize the lower 8 bits of the 16 bit | ||
243 | * IbsOpMaxCnt [15:0] value in the range of -128 to | ||
244 | * +127 by adding/subtracting an offset to the | ||
245 | * maximum count (IbsOpMaxCnt). | ||
246 | * | ||
247 | * To avoid over or underflows and protect upper bits | ||
248 | * starting at bit 16, the initial value for | ||
249 | * IbsOpMaxCnt must fit in the range from 0x0081 to | ||
250 | * 0xff80. | ||
251 | */ | ||
252 | val += (s8)(random >> 4); | ||
253 | else | ||
254 | val |= (u64)(random & IBS_RANDOM_MASK) << 32; | ||
255 | |||
256 | return val; | ||
257 | } | ||
189 | 258 | ||
190 | static inline void | 259 | static inline void |
191 | op_amd_handle_ibs(struct pt_regs * const regs, | 260 | op_amd_handle_ibs(struct pt_regs * const regs, |
@@ -194,7 +263,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
194 | u64 val, ctl; | 263 | u64 val, ctl; |
195 | struct op_entry entry; | 264 | struct op_entry entry; |
196 | 265 | ||
197 | if (!has_ibs) | 266 | if (!ibs_caps) |
198 | return; | 267 | return; |
199 | 268 | ||
200 | if (ibs_config.fetch_enabled) { | 269 | if (ibs_config.fetch_enabled) { |
@@ -210,7 +279,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
210 | oprofile_write_commit(&entry); | 279 | oprofile_write_commit(&entry); |
211 | 280 | ||
212 | /* reenable the IRQ */ | 281 | /* reenable the IRQ */ |
213 | ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); | 282 | ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT); |
214 | ctl |= IBS_FETCH_ENABLE; | 283 | ctl |= IBS_FETCH_ENABLE; |
215 | wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); | 284 | wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); |
216 | } | 285 | } |
@@ -236,8 +305,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
236 | oprofile_write_commit(&entry); | 305 | oprofile_write_commit(&entry); |
237 | 306 | ||
238 | /* reenable the IRQ */ | 307 | /* reenable the IRQ */ |
239 | ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; | 308 | ctl = op_amd_randomize_ibs_op(ibs_op_ctl); |
240 | ctl |= IBS_OP_ENABLE; | ||
241 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); | 309 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); |
242 | } | 310 | } |
243 | } | 311 | } |
@@ -246,41 +314,57 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
246 | static inline void op_amd_start_ibs(void) | 314 | static inline void op_amd_start_ibs(void) |
247 | { | 315 | { |
248 | u64 val; | 316 | u64 val; |
249 | if (has_ibs && ibs_config.fetch_enabled) { | 317 | |
250 | val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | 318 | if (!ibs_caps) |
319 | return; | ||
320 | |||
321 | if (ibs_config.fetch_enabled) { | ||
322 | val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT; | ||
251 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; | 323 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; |
252 | val |= IBS_FETCH_ENABLE; | 324 | val |= IBS_FETCH_ENABLE; |
253 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); | 325 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); |
254 | } | 326 | } |
255 | 327 | ||
256 | if (has_ibs && ibs_config.op_enabled) { | 328 | if (ibs_config.op_enabled) { |
257 | val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; | 329 | ibs_op_ctl = ibs_config.max_cnt_op >> 4; |
258 | val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; | 330 | if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { |
259 | val |= IBS_OP_ENABLE; | 331 | /* |
332 | * IbsOpCurCnt not supported. See | ||
333 | * op_amd_randomize_ibs_op() for details. | ||
334 | */ | ||
335 | ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); | ||
336 | } else { | ||
337 | /* | ||
338 | * The start value is randomized with a | ||
339 | * positive offset, we need to compensate it | ||
340 | * with the half of the randomized range. Also | ||
341 | * avoid underflows. | ||
342 | */ | ||
343 | ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, | ||
344 | IBS_OP_MAX_CNT); | ||
345 | } | ||
346 | if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) | ||
347 | ibs_op_ctl |= IBS_OP_CNT_CTL; | ||
348 | ibs_op_ctl |= IBS_OP_ENABLE; | ||
349 | val = op_amd_randomize_ibs_op(ibs_op_ctl); | ||
260 | wrmsrl(MSR_AMD64_IBSOPCTL, val); | 350 | wrmsrl(MSR_AMD64_IBSOPCTL, val); |
261 | } | 351 | } |
262 | } | 352 | } |
263 | 353 | ||
264 | static void op_amd_stop_ibs(void) | 354 | static void op_amd_stop_ibs(void) |
265 | { | 355 | { |
266 | if (has_ibs && ibs_config.fetch_enabled) | 356 | if (!ibs_caps) |
357 | return; | ||
358 | |||
359 | if (ibs_config.fetch_enabled) | ||
267 | /* clear max count and enable */ | 360 | /* clear max count and enable */ |
268 | wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); | 361 | wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); |
269 | 362 | ||
270 | if (has_ibs && ibs_config.op_enabled) | 363 | if (ibs_config.op_enabled) |
271 | /* clear max count and enable */ | 364 | /* clear max count and enable */ |
272 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); | 365 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); |
273 | } | 366 | } |
274 | 367 | ||
275 | #else | ||
276 | |||
277 | static inline void op_amd_handle_ibs(struct pt_regs * const regs, | ||
278 | struct op_msrs const * const msrs) { } | ||
279 | static inline void op_amd_start_ibs(void) { } | ||
280 | static inline void op_amd_stop_ibs(void) { } | ||
281 | |||
282 | #endif | ||
283 | |||
284 | static int op_amd_check_ctrs(struct pt_regs * const regs, | 368 | static int op_amd_check_ctrs(struct pt_regs * const regs, |
285 | struct op_msrs const * const msrs) | 369 | struct op_msrs const * const msrs) |
286 | { | 370 | { |
@@ -314,7 +398,7 @@ static void op_amd_start(struct op_msrs const * const msrs) | |||
314 | if (!reset_value[op_x86_phys_to_virt(i)]) | 398 | if (!reset_value[op_x86_phys_to_virt(i)]) |
315 | continue; | 399 | continue; |
316 | rdmsrl(msrs->controls[i].addr, val); | 400 | rdmsrl(msrs->controls[i].addr, val); |
317 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 401 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
318 | wrmsrl(msrs->controls[i].addr, val); | 402 | wrmsrl(msrs->controls[i].addr, val); |
319 | } | 403 | } |
320 | 404 | ||
@@ -334,7 +418,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
334 | if (!reset_value[op_x86_phys_to_virt(i)]) | 418 | if (!reset_value[op_x86_phys_to_virt(i)]) |
335 | continue; | 419 | continue; |
336 | rdmsrl(msrs->controls[i].addr, val); | 420 | rdmsrl(msrs->controls[i].addr, val); |
337 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | 421 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
338 | wrmsrl(msrs->controls[i].addr, val); | 422 | wrmsrl(msrs->controls[i].addr, val); |
339 | } | 423 | } |
340 | 424 | ||
@@ -355,8 +439,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) | |||
355 | } | 439 | } |
356 | } | 440 | } |
357 | 441 | ||
358 | #ifdef CONFIG_OPROFILE_IBS | ||
359 | |||
360 | static u8 ibs_eilvt_off; | 442 | static u8 ibs_eilvt_off; |
361 | 443 | ||
362 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) | 444 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) |
@@ -405,45 +487,36 @@ static int init_ibs_nmi(void) | |||
405 | return 1; | 487 | return 1; |
406 | } | 488 | } |
407 | 489 | ||
408 | #ifdef CONFIG_NUMA | ||
409 | /* Sanity check */ | ||
410 | /* Works only for 64bit with proper numa implementation. */ | ||
411 | if (nodes != num_possible_nodes()) { | ||
412 | printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " | ||
413 | "found: %d, expected %d", | ||
414 | nodes, num_possible_nodes()); | ||
415 | return 1; | ||
416 | } | ||
417 | #endif | ||
418 | return 0; | 490 | return 0; |
419 | } | 491 | } |
420 | 492 | ||
421 | /* uninitialize the APIC for the IBS interrupts if needed */ | 493 | /* uninitialize the APIC for the IBS interrupts if needed */ |
422 | static void clear_ibs_nmi(void) | 494 | static void clear_ibs_nmi(void) |
423 | { | 495 | { |
424 | if (has_ibs) | 496 | if (ibs_caps) |
425 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | 497 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); |
426 | } | 498 | } |
427 | 499 | ||
428 | /* initialize the APIC for the IBS interrupts if available */ | 500 | /* initialize the APIC for the IBS interrupts if available */ |
429 | static void ibs_init(void) | 501 | static void ibs_init(void) |
430 | { | 502 | { |
431 | has_ibs = boot_cpu_has(X86_FEATURE_IBS); | 503 | ibs_caps = get_ibs_caps(); |
432 | 504 | ||
433 | if (!has_ibs) | 505 | if (!ibs_caps) |
434 | return; | 506 | return; |
435 | 507 | ||
436 | if (init_ibs_nmi()) { | 508 | if (init_ibs_nmi()) { |
437 | has_ibs = 0; | 509 | ibs_caps = 0; |
438 | return; | 510 | return; |
439 | } | 511 | } |
440 | 512 | ||
441 | printk(KERN_INFO "oprofile: AMD IBS detected\n"); | 513 | printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", |
514 | (unsigned)ibs_caps); | ||
442 | } | 515 | } |
443 | 516 | ||
444 | static void ibs_exit(void) | 517 | static void ibs_exit(void) |
445 | { | 518 | { |
446 | if (!has_ibs) | 519 | if (!ibs_caps) |
447 | return; | 520 | return; |
448 | 521 | ||
449 | clear_ibs_nmi(); | 522 | clear_ibs_nmi(); |
@@ -463,7 +536,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
463 | if (ret) | 536 | if (ret) |
464 | return ret; | 537 | return ret; |
465 | 538 | ||
466 | if (!has_ibs) | 539 | if (!ibs_caps) |
467 | return ret; | 540 | return ret; |
468 | 541 | ||
469 | /* model specific files */ | 542 | /* model specific files */ |
@@ -473,7 +546,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
473 | ibs_config.fetch_enabled = 0; | 546 | ibs_config.fetch_enabled = 0; |
474 | ibs_config.max_cnt_op = 250000; | 547 | ibs_config.max_cnt_op = 250000; |
475 | ibs_config.op_enabled = 0; | 548 | ibs_config.op_enabled = 0; |
476 | ibs_config.dispatched_ops = 1; | 549 | ibs_config.dispatched_ops = 0; |
477 | 550 | ||
478 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); | 551 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); |
479 | oprofilefs_create_ulong(sb, dir, "enable", | 552 | oprofilefs_create_ulong(sb, dir, "enable", |
@@ -488,8 +561,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
488 | &ibs_config.op_enabled); | 561 | &ibs_config.op_enabled); |
489 | oprofilefs_create_ulong(sb, dir, "max_count", | 562 | oprofilefs_create_ulong(sb, dir, "max_count", |
490 | &ibs_config.max_cnt_op); | 563 | &ibs_config.max_cnt_op); |
491 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", | 564 | if (ibs_caps & IBS_CAPS_OPCNT) |
492 | &ibs_config.dispatched_ops); | 565 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", |
566 | &ibs_config.dispatched_ops); | ||
493 | 567 | ||
494 | return 0; | 568 | return 0; |
495 | } | 569 | } |
@@ -507,19 +581,6 @@ static void op_amd_exit(void) | |||
507 | ibs_exit(); | 581 | ibs_exit(); |
508 | } | 582 | } |
509 | 583 | ||
510 | #else | ||
511 | |||
512 | /* no IBS support */ | ||
513 | |||
514 | static int op_amd_init(struct oprofile_operations *ops) | ||
515 | { | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static void op_amd_exit(void) {} | ||
520 | |||
521 | #endif /* CONFIG_OPROFILE_IBS */ | ||
522 | |||
523 | struct op_x86_model_spec op_amd_spec = { | 584 | struct op_x86_model_spec op_amd_spec = { |
524 | .num_counters = NUM_COUNTERS, | 585 | .num_counters = NUM_COUNTERS, |
525 | .num_controls = NUM_CONTROLS, | 586 | .num_controls = NUM_CONTROLS, |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index ac6b354becdf..e6a160a4684a 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) | |||
394 | setup_num_counters(); | 394 | setup_num_counters(); |
395 | stag = get_stagger(); | 395 | stag = get_stagger(); |
396 | 396 | ||
397 | /* initialize some registers */ | ||
398 | for (i = 0; i < num_counters; ++i) | ||
399 | msrs->counters[i].addr = 0; | ||
400 | for (i = 0; i < num_controls; ++i) | ||
401 | msrs->controls[i].addr = 0; | ||
402 | |||
403 | /* the counter & cccr registers we pay attention to */ | 397 | /* the counter & cccr registers we pay attention to */ |
404 | for (i = 0; i < num_counters; ++i) { | 398 | for (i = 0; i < num_counters; ++i) { |
405 | addr = p4_counters[VIRT_CTR(stag, i)].counter_address; | 399 | addr = p4_counters[VIRT_CTR(stag, i)].counter_address; |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 8eb05878554c..2bf90fafa7b5 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) | |||
37 | for (i = 0; i < num_counters; i++) { | 37 | for (i = 0; i < num_counters; i++) { |
38 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 38 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
39 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 39 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; |
40 | else | ||
41 | msrs->counters[i].addr = 0; | ||
42 | } | 40 | } |
43 | 41 | ||
44 | for (i = 0; i < num_counters; i++) { | 42 | for (i = 0; i < num_counters; i++) { |
45 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 43 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
46 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 44 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; |
47 | else | ||
48 | msrs->controls[i].addr = 0; | ||
49 | } | 45 | } |
50 | } | 46 | } |
51 | 47 | ||
@@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
57 | int i; | 53 | int i; |
58 | 54 | ||
59 | if (!reset_value) { | 55 | if (!reset_value) { |
60 | reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, | 56 | reset_value = kzalloc(sizeof(reset_value[0]) * num_counters, |
61 | GFP_ATOMIC); | 57 | GFP_ATOMIC); |
62 | if (!reset_value) | 58 | if (!reset_value) |
63 | return; | 59 | return; |
@@ -82,9 +78,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
82 | 78 | ||
83 | /* clear all counters */ | 79 | /* clear all counters */ |
84 | for (i = 0; i < num_counters; ++i) { | 80 | for (i = 0; i < num_counters; ++i) { |
85 | if (unlikely(!msrs->controls[i].addr)) | 81 | if (unlikely(!msrs->controls[i].addr)) { |
82 | if (counter_config[i].enabled && !smp_processor_id()) | ||
83 | /* | ||
84 | * counter is reserved, this is on all | ||
85 | * cpus, so report only for cpu #0 | ||
86 | */ | ||
87 | op_x86_warn_reserved(i); | ||
86 | continue; | 88 | continue; |
89 | } | ||
87 | rdmsrl(msrs->controls[i].addr, val); | 90 | rdmsrl(msrs->controls[i].addr, val); |
91 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
92 | op_x86_warn_in_use(i); | ||
88 | val &= model->reserved; | 93 | val &= model->reserved; |
89 | wrmsrl(msrs->controls[i].addr, val); | 94 | wrmsrl(msrs->controls[i].addr, val); |
90 | } | 95 | } |
@@ -161,7 +166,7 @@ static void ppro_start(struct op_msrs const * const msrs) | |||
161 | for (i = 0; i < num_counters; ++i) { | 166 | for (i = 0; i < num_counters; ++i) { |
162 | if (reset_value[i]) { | 167 | if (reset_value[i]) { |
163 | rdmsrl(msrs->controls[i].addr, val); | 168 | rdmsrl(msrs->controls[i].addr, val); |
164 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 169 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
165 | wrmsrl(msrs->controls[i].addr, val); | 170 | wrmsrl(msrs->controls[i].addr, val); |
166 | } | 171 | } |
167 | } | 172 | } |
@@ -179,7 +184,7 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
179 | if (!reset_value[i]) | 184 | if (!reset_value[i]) |
180 | continue; | 185 | continue; |
181 | rdmsrl(msrs->controls[i].addr, val); | 186 | rdmsrl(msrs->controls[i].addr, val); |
182 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | 187 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
183 | wrmsrl(msrs->controls[i].addr, val); | 188 | wrmsrl(msrs->controls[i].addr, val); |
184 | } | 189 | } |
185 | } | 190 | } |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 7b8e75d16081..ff82a755edd4 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -57,6 +57,26 @@ struct op_x86_model_spec { | |||
57 | 57 | ||
58 | struct op_counter_config; | 58 | struct op_counter_config; |
59 | 59 | ||
60 | static inline void op_x86_warn_in_use(int counter) | ||
61 | { | ||
62 | /* | ||
63 | * The warning indicates an already running counter. If | ||
64 | * oprofile doesn't collect data, then try using a different | ||
65 | * performance counter on your platform to monitor the desired | ||
66 | * event. Delete counter #%d from the desired event by editing | ||
67 | * the /usr/share/oprofile/%s/<cpu>/events file. If the event | ||
68 | * cannot be monitored by any other counter, contact your | ||
69 | * hardware or BIOS vendor. | ||
70 | */ | ||
71 | pr_warning("oprofile: counter #%d on cpu #%d may already be used\n", | ||
72 | counter, smp_processor_id()); | ||
73 | } | ||
74 | |||
75 | static inline void op_x86_warn_reserved(int counter) | ||
76 | { | ||
77 | pr_warning("oprofile: counter #%d is already reserved\n", counter); | ||
78 | } | ||
79 | |||
60 | extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, | 80 | extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, |
61 | struct op_counter_config *counter_config); | 81 | struct op_counter_config *counter_config); |
62 | extern int op_x86_phys_to_virt(int phys); | 82 | extern int op_x86_phys_to_virt(int phys); |
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 564b008a51c7..b110d97fb925 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -13,9 +13,10 @@ obj-$(CONFIG_X86_VISWS) += visws.o | |||
13 | 13 | ||
14 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 14 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
15 | 15 | ||
16 | obj-$(CONFIG_X86_MRST) += mrst.o | ||
17 | |||
16 | obj-y += common.o early.o | 18 | obj-y += common.o early.o |
17 | obj-y += amd_bus.o | 19 | obj-y += amd_bus.o bus_numa.o |
18 | obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o | ||
19 | 20 | ||
20 | ifeq ($(CONFIG_PCI_DEBUG),y) | 21 | ifeq ($(CONFIG_PCI_DEBUG),y) |
21 | EXTRA_CFLAGS += -DDEBUG | 22 | EXTRA_CFLAGS += -DDEBUG |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 959e548a7039..44f83ce02470 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | #include <linux/irq.h> | 4 | #include <linux/irq.h> |
5 | #include <linux/dmi.h> | 5 | #include <linux/dmi.h> |
6 | #include <linux/slab.h> | ||
6 | #include <asm/numa.h> | 7 | #include <asm/numa.h> |
7 | #include <asm/pci_x86.h> | 8 | #include <asm/pci_x86.h> |
8 | 9 | ||
@@ -15,19 +16,94 @@ struct pci_root_info { | |||
15 | int busnum; | 16 | int busnum; |
16 | }; | 17 | }; |
17 | 18 | ||
19 | static bool pci_use_crs = true; | ||
20 | |||
21 | static int __init set_use_crs(const struct dmi_system_id *id) | ||
22 | { | ||
23 | pci_use_crs = true; | ||
24 | return 0; | ||
25 | } | ||
26 | |||
27 | static const struct dmi_system_id pci_use_crs_table[] __initconst = { | ||
28 | /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */ | ||
29 | { | ||
30 | .callback = set_use_crs, | ||
31 | .ident = "IBM System x3800", | ||
32 | .matches = { | ||
33 | DMI_MATCH(DMI_SYS_VENDOR, "IBM"), | ||
34 | DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), | ||
35 | }, | ||
36 | }, | ||
37 | {} | ||
38 | }; | ||
39 | |||
40 | void __init pci_acpi_crs_quirks(void) | ||
41 | { | ||
42 | int year; | ||
43 | |||
44 | if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) | ||
45 | pci_use_crs = false; | ||
46 | |||
47 | dmi_check_system(pci_use_crs_table); | ||
48 | |||
49 | /* | ||
50 | * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that | ||
51 | * takes precedence over anything we figured out above. | ||
52 | */ | ||
53 | if (pci_probe & PCI_ROOT_NO_CRS) | ||
54 | pci_use_crs = false; | ||
55 | else if (pci_probe & PCI_USE__CRS) | ||
56 | pci_use_crs = true; | ||
57 | |||
58 | printk(KERN_INFO "PCI: %s host bridge windows from ACPI; " | ||
59 | "if necessary, use \"pci=%s\" and report a bug\n", | ||
60 | pci_use_crs ? "Using" : "Ignoring", | ||
61 | pci_use_crs ? "nocrs" : "use_crs"); | ||
62 | } | ||
63 | |||
18 | static acpi_status | 64 | static acpi_status |
19 | resource_to_addr(struct acpi_resource *resource, | 65 | resource_to_addr(struct acpi_resource *resource, |
20 | struct acpi_resource_address64 *addr) | 66 | struct acpi_resource_address64 *addr) |
21 | { | 67 | { |
22 | acpi_status status; | 68 | acpi_status status; |
23 | 69 | struct acpi_resource_memory24 *memory24; | |
24 | status = acpi_resource_to_address64(resource, addr); | 70 | struct acpi_resource_memory32 *memory32; |
25 | if (ACPI_SUCCESS(status) && | 71 | struct acpi_resource_fixed_memory32 *fixed_memory32; |
26 | (addr->resource_type == ACPI_MEMORY_RANGE || | 72 | |
27 | addr->resource_type == ACPI_IO_RANGE) && | 73 | memset(addr, 0, sizeof(*addr)); |
28 | addr->address_length > 0 && | 74 | switch (resource->type) { |
29 | addr->producer_consumer == ACPI_PRODUCER) { | 75 | case ACPI_RESOURCE_TYPE_MEMORY24: |
76 | memory24 = &resource->data.memory24; | ||
77 | addr->resource_type = ACPI_MEMORY_RANGE; | ||
78 | addr->minimum = memory24->minimum; | ||
79 | addr->address_length = memory24->address_length; | ||
80 | addr->maximum = addr->minimum + addr->address_length - 1; | ||
30 | return AE_OK; | 81 | return AE_OK; |
82 | case ACPI_RESOURCE_TYPE_MEMORY32: | ||
83 | memory32 = &resource->data.memory32; | ||
84 | addr->resource_type = ACPI_MEMORY_RANGE; | ||
85 | addr->minimum = memory32->minimum; | ||
86 | addr->address_length = memory32->address_length; | ||
87 | addr->maximum = addr->minimum + addr->address_length - 1; | ||
88 | return AE_OK; | ||
89 | case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: | ||
90 | fixed_memory32 = &resource->data.fixed_memory32; | ||
91 | addr->resource_type = ACPI_MEMORY_RANGE; | ||
92 | addr->minimum = fixed_memory32->address; | ||
93 | addr->address_length = fixed_memory32->address_length; | ||
94 | addr->maximum = addr->minimum + addr->address_length - 1; | ||
95 | return AE_OK; | ||
96 | case ACPI_RESOURCE_TYPE_ADDRESS16: | ||
97 | case ACPI_RESOURCE_TYPE_ADDRESS32: | ||
98 | case ACPI_RESOURCE_TYPE_ADDRESS64: | ||
99 | status = acpi_resource_to_address64(resource, addr); | ||
100 | if (ACPI_SUCCESS(status) && | ||
101 | (addr->resource_type == ACPI_MEMORY_RANGE || | ||
102 | addr->resource_type == ACPI_IO_RANGE) && | ||
103 | addr->address_length > 0) { | ||
104 | return AE_OK; | ||
105 | } | ||
106 | break; | ||
31 | } | 107 | } |
32 | return AE_ERROR; | 108 | return AE_ERROR; |
33 | } | 109 | } |
@@ -45,20 +121,6 @@ count_resource(struct acpi_resource *acpi_res, void *data) | |||
45 | return AE_OK; | 121 | return AE_OK; |
46 | } | 122 | } |
47 | 123 | ||
48 | static int | ||
49 | bus_has_transparent_bridge(struct pci_bus *bus) | ||
50 | { | ||
51 | struct pci_dev *dev; | ||
52 | |||
53 | list_for_each_entry(dev, &bus->devices, bus_list) { | ||
54 | u16 class = dev->class >> 8; | ||
55 | |||
56 | if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) | ||
57 | return true; | ||
58 | } | ||
59 | return false; | ||
60 | } | ||
61 | |||
62 | static void | 124 | static void |
63 | align_resource(struct acpi_device *bridge, struct resource *res) | 125 | align_resource(struct acpi_device *bridge, struct resource *res) |
64 | { | 126 | { |
@@ -91,12 +153,8 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
91 | struct acpi_resource_address64 addr; | 153 | struct acpi_resource_address64 addr; |
92 | acpi_status status; | 154 | acpi_status status; |
93 | unsigned long flags; | 155 | unsigned long flags; |
94 | struct resource *root; | 156 | struct resource *root, *conflict; |
95 | int max_root_bus_resources = PCI_BUS_NUM_RESOURCES; | 157 | u64 start, end, max_len; |
96 | u64 start, end; | ||
97 | |||
98 | if (bus_has_transparent_bridge(info->bus)) | ||
99 | max_root_bus_resources -= 3; | ||
100 | 158 | ||
101 | status = resource_to_addr(acpi_res, &addr); | 159 | status = resource_to_addr(acpi_res, &addr); |
102 | if (!ACPI_SUCCESS(status)) | 160 | if (!ACPI_SUCCESS(status)) |
@@ -113,17 +171,19 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
113 | } else | 171 | } else |
114 | return AE_OK; | 172 | return AE_OK; |
115 | 173 | ||
174 | max_len = addr.maximum - addr.minimum + 1; | ||
175 | if (addr.address_length > max_len) { | ||
176 | dev_printk(KERN_DEBUG, &info->bridge->dev, | ||
177 | "host bridge window length %#llx doesn't fit in " | ||
178 | "%#llx-%#llx, trimming\n", | ||
179 | (unsigned long long) addr.address_length, | ||
180 | (unsigned long long) addr.minimum, | ||
181 | (unsigned long long) addr.maximum); | ||
182 | addr.address_length = max_len; | ||
183 | } | ||
184 | |||
116 | start = addr.minimum + addr.translation_offset; | 185 | start = addr.minimum + addr.translation_offset; |
117 | end = start + addr.address_length - 1; | 186 | end = start + addr.address_length - 1; |
118 | if (info->res_num >= max_root_bus_resources) { | ||
119 | if (pci_probe & PCI_USE__CRS) | ||
120 | printk(KERN_WARNING "PCI: Failed to allocate " | ||
121 | "0x%lx-0x%lx from %s for %s due to _CRS " | ||
122 | "returning more than %d resource descriptors\n", | ||
123 | (unsigned long) start, (unsigned long) end, | ||
124 | root->name, info->name, max_root_bus_resources); | ||
125 | return AE_OK; | ||
126 | } | ||
127 | 187 | ||
128 | res = &info->res[info->res_num]; | 188 | res = &info->res[info->res_num]; |
129 | res->name = info->name; | 189 | res->name = info->name; |
@@ -133,17 +193,20 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
133 | res->child = NULL; | 193 | res->child = NULL; |
134 | align_resource(info->bridge, res); | 194 | align_resource(info->bridge, res); |
135 | 195 | ||
136 | if (!(pci_probe & PCI_USE__CRS)) { | 196 | if (!pci_use_crs) { |
137 | dev_printk(KERN_DEBUG, &info->bridge->dev, | 197 | dev_printk(KERN_DEBUG, &info->bridge->dev, |
138 | "host bridge window %pR (ignored)\n", res); | 198 | "host bridge window %pR (ignored)\n", res); |
139 | return AE_OK; | 199 | return AE_OK; |
140 | } | 200 | } |
141 | 201 | ||
142 | if (insert_resource(root, res)) { | 202 | conflict = insert_resource_conflict(root, res); |
203 | if (conflict) { | ||
143 | dev_err(&info->bridge->dev, | 204 | dev_err(&info->bridge->dev, |
144 | "can't allocate host bridge window %pR\n", res); | 205 | "address space collision: host bridge window %pR " |
206 | "conflicts with %s %pR\n", | ||
207 | res, conflict->name, conflict); | ||
145 | } else { | 208 | } else { |
146 | info->bus->resource[info->res_num] = res; | 209 | pci_bus_add_resource(info->bus, res, 0); |
147 | info->res_num++; | 210 | info->res_num++; |
148 | if (addr.translation_offset) | 211 | if (addr.translation_offset) |
149 | dev_info(&info->bridge->dev, "host bridge window %pR " | 212 | dev_info(&info->bridge->dev, "host bridge window %pR " |
@@ -164,10 +227,8 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
164 | struct pci_root_info info; | 227 | struct pci_root_info info; |
165 | size_t size; | 228 | size_t size; |
166 | 229 | ||
167 | if (!(pci_probe & PCI_USE__CRS)) | 230 | if (pci_use_crs) |
168 | dev_info(&device->dev, | 231 | pci_bus_remove_resources(bus); |
169 | "ignoring host bridge windows from ACPI; " | ||
170 | "boot with \"pci=use_crs\" to use them\n"); | ||
171 | 232 | ||
172 | info.bridge = device; | 233 | info.bridge = device; |
173 | info.bus = bus; | 234 | info.bus = bus; |
@@ -282,17 +343,14 @@ int __init pci_acpi_init(void) | |||
282 | { | 343 | { |
283 | struct pci_dev *dev = NULL; | 344 | struct pci_dev *dev = NULL; |
284 | 345 | ||
285 | if (pcibios_scanned) | ||
286 | return 0; | ||
287 | |||
288 | if (acpi_noirq) | 346 | if (acpi_noirq) |
289 | return 0; | 347 | return -ENODEV; |
290 | 348 | ||
291 | printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); | 349 | printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); |
292 | acpi_irq_penalty_init(); | 350 | acpi_irq_penalty_init(); |
293 | pcibios_scanned++; | ||
294 | pcibios_enable_irq = acpi_pci_irq_enable; | 351 | pcibios_enable_irq = acpi_pci_irq_enable; |
295 | pcibios_disable_irq = acpi_pci_irq_disable; | 352 | pcibios_disable_irq = acpi_pci_irq_disable; |
353 | x86_init.pci.init_irq = x86_init_noop; | ||
296 | 354 | ||
297 | if (pci_routeirq) { | 355 | if (pci_routeirq) { |
298 | /* | 356 | /* |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 95ecbd495955..fc1e8fe07e5c 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -2,11 +2,11 @@ | |||
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <linux/topology.h> | 3 | #include <linux/topology.h> |
4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
5 | #include <linux/range.h> | ||
6 | |||
5 | #include <asm/pci_x86.h> | 7 | #include <asm/pci_x86.h> |
6 | 8 | ||
7 | #ifdef CONFIG_X86_64 | ||
8 | #include <asm/pci-direct.h> | 9 | #include <asm/pci-direct.h> |
9 | #endif | ||
10 | 10 | ||
11 | #include "bus_numa.h" | 11 | #include "bus_numa.h" |
12 | 12 | ||
@@ -15,60 +15,6 @@ | |||
15 | * also get peer root bus resource for io,mmio | 15 | * also get peer root bus resource for io,mmio |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #ifdef CONFIG_X86_64 | ||
19 | |||
20 | #define RANGE_NUM 16 | ||
21 | |||
22 | struct res_range { | ||
23 | size_t start; | ||
24 | size_t end; | ||
25 | }; | ||
26 | |||
27 | static void __init update_range(struct res_range *range, size_t start, | ||
28 | size_t end) | ||
29 | { | ||
30 | int i; | ||
31 | int j; | ||
32 | |||
33 | for (j = 0; j < RANGE_NUM; j++) { | ||
34 | if (!range[j].end) | ||
35 | continue; | ||
36 | |||
37 | if (start <= range[j].start && end >= range[j].end) { | ||
38 | range[j].start = 0; | ||
39 | range[j].end = 0; | ||
40 | continue; | ||
41 | } | ||
42 | |||
43 | if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { | ||
44 | range[j].start = end + 1; | ||
45 | continue; | ||
46 | } | ||
47 | |||
48 | |||
49 | if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { | ||
50 | range[j].end = start - 1; | ||
51 | continue; | ||
52 | } | ||
53 | |||
54 | if (start > range[j].start && end < range[j].end) { | ||
55 | /* find the new spare */ | ||
56 | for (i = 0; i < RANGE_NUM; i++) { | ||
57 | if (range[i].end == 0) | ||
58 | break; | ||
59 | } | ||
60 | if (i < RANGE_NUM) { | ||
61 | range[i].end = range[j].end; | ||
62 | range[i].start = end + 1; | ||
63 | } else { | ||
64 | printk(KERN_ERR "run of slot in ranges\n"); | ||
65 | } | ||
66 | range[j].end = start - 1; | ||
67 | continue; | ||
68 | } | ||
69 | } | ||
70 | } | ||
71 | |||
72 | struct pci_hostbridge_probe { | 18 | struct pci_hostbridge_probe { |
73 | u32 bus; | 19 | u32 bus; |
74 | u32 slot; | 20 | u32 slot; |
@@ -111,6 +57,8 @@ static void __init get_pci_mmcfg_amd_fam10h_range(void) | |||
111 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; | 57 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; |
112 | } | 58 | } |
113 | 59 | ||
60 | #define RANGE_NUM 16 | ||
61 | |||
114 | /** | 62 | /** |
115 | * early_fill_mp_bus_to_node() | 63 | * early_fill_mp_bus_to_node() |
116 | * called before pcibios_scan_root and pci_scan_bus | 64 | * called before pcibios_scan_root and pci_scan_bus |
@@ -130,16 +78,17 @@ static int __init early_fill_mp_bus_info(void) | |||
130 | struct pci_root_info *info; | 78 | struct pci_root_info *info; |
131 | u32 reg; | 79 | u32 reg; |
132 | struct resource *res; | 80 | struct resource *res; |
133 | size_t start; | 81 | u64 start; |
134 | size_t end; | 82 | u64 end; |
135 | struct res_range range[RANGE_NUM]; | 83 | struct range range[RANGE_NUM]; |
136 | u64 val; | 84 | u64 val; |
137 | u32 address; | 85 | u32 address; |
86 | bool found; | ||
138 | 87 | ||
139 | if (!early_pci_allowed()) | 88 | if (!early_pci_allowed()) |
140 | return -1; | 89 | return -1; |
141 | 90 | ||
142 | found_all_numa_early = 0; | 91 | found = false; |
143 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { | 92 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { |
144 | u32 id; | 93 | u32 id; |
145 | u16 device; | 94 | u16 device; |
@@ -153,12 +102,12 @@ static int __init early_fill_mp_bus_info(void) | |||
153 | device = (id>>16) & 0xffff; | 102 | device = (id>>16) & 0xffff; |
154 | if (pci_probes[i].vendor == vendor && | 103 | if (pci_probes[i].vendor == vendor && |
155 | pci_probes[i].device == device) { | 104 | pci_probes[i].device == device) { |
156 | found_all_numa_early = 1; | 105 | found = true; |
157 | break; | 106 | break; |
158 | } | 107 | } |
159 | } | 108 | } |
160 | 109 | ||
161 | if (!found_all_numa_early) | 110 | if (!found) |
162 | return 0; | 111 | return 0; |
163 | 112 | ||
164 | pci_root_num = 0; | 113 | pci_root_num = 0; |
@@ -196,7 +145,7 @@ static int __init early_fill_mp_bus_info(void) | |||
196 | def_link = (reg >> 8) & 0x03; | 145 | def_link = (reg >> 8) & 0x03; |
197 | 146 | ||
198 | memset(range, 0, sizeof(range)); | 147 | memset(range, 0, sizeof(range)); |
199 | range[0].end = 0xffff; | 148 | add_range(range, RANGE_NUM, 0, 0, 0xffff + 1); |
200 | /* io port resource */ | 149 | /* io port resource */ |
201 | for (i = 0; i < 4; i++) { | 150 | for (i = 0; i < 4; i++) { |
202 | reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); | 151 | reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); |
@@ -220,13 +169,13 @@ static int __init early_fill_mp_bus_info(void) | |||
220 | 169 | ||
221 | info = &pci_root_info[j]; | 170 | info = &pci_root_info[j]; |
222 | printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", | 171 | printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", |
223 | node, link, (u64)start, (u64)end); | 172 | node, link, start, end); |
224 | 173 | ||
225 | /* kernel only handle 16 bit only */ | 174 | /* kernel only handle 16 bit only */ |
226 | if (end > 0xffff) | 175 | if (end > 0xffff) |
227 | end = 0xffff; | 176 | end = 0xffff; |
228 | update_res(info, start, end, IORESOURCE_IO, 1); | 177 | update_res(info, start, end, IORESOURCE_IO, 1); |
229 | update_range(range, start, end); | 178 | subtract_range(range, RANGE_NUM, start, end + 1); |
230 | } | 179 | } |
231 | /* add left over io port range to def node/link, [0, 0xffff] */ | 180 | /* add left over io port range to def node/link, [0, 0xffff] */ |
232 | /* find the position */ | 181 | /* find the position */ |
@@ -241,29 +190,32 @@ static int __init early_fill_mp_bus_info(void) | |||
241 | if (!range[i].end) | 190 | if (!range[i].end) |
242 | continue; | 191 | continue; |
243 | 192 | ||
244 | update_res(info, range[i].start, range[i].end, | 193 | update_res(info, range[i].start, range[i].end - 1, |
245 | IORESOURCE_IO, 1); | 194 | IORESOURCE_IO, 1); |
246 | } | 195 | } |
247 | } | 196 | } |
248 | 197 | ||
249 | memset(range, 0, sizeof(range)); | 198 | memset(range, 0, sizeof(range)); |
250 | /* 0xfd00000000-0xffffffffff for HT */ | 199 | /* 0xfd00000000-0xffffffffff for HT */ |
251 | range[0].end = (0xfdULL<<32) - 1; | 200 | end = cap_resource((0xfdULL<<32) - 1); |
201 | end++; | ||
202 | add_range(range, RANGE_NUM, 0, 0, end); | ||
252 | 203 | ||
253 | /* need to take out [0, TOM) for RAM*/ | 204 | /* need to take out [0, TOM) for RAM*/ |
254 | address = MSR_K8_TOP_MEM1; | 205 | address = MSR_K8_TOP_MEM1; |
255 | rdmsrl(address, val); | 206 | rdmsrl(address, val); |
256 | end = (val & 0xffffff800000ULL); | 207 | end = (val & 0xffffff800000ULL); |
257 | printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); | 208 | printk(KERN_INFO "TOM: %016llx aka %lldM\n", end, end>>20); |
258 | if (end < (1ULL<<32)) | 209 | if (end < (1ULL<<32)) |
259 | update_range(range, 0, end - 1); | 210 | subtract_range(range, RANGE_NUM, 0, end); |
260 | 211 | ||
261 | /* get mmconfig */ | 212 | /* get mmconfig */ |
262 | get_pci_mmcfg_amd_fam10h_range(); | 213 | get_pci_mmcfg_amd_fam10h_range(); |
263 | /* need to take out mmconf range */ | 214 | /* need to take out mmconf range */ |
264 | if (fam10h_mmconf_end) { | 215 | if (fam10h_mmconf_end) { |
265 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); | 216 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); |
266 | update_range(range, fam10h_mmconf_start, fam10h_mmconf_end); | 217 | subtract_range(range, RANGE_NUM, fam10h_mmconf_start, |
218 | fam10h_mmconf_end + 1); | ||
267 | } | 219 | } |
268 | 220 | ||
269 | /* mmio resource */ | 221 | /* mmio resource */ |
@@ -293,7 +245,7 @@ static int __init early_fill_mp_bus_info(void) | |||
293 | info = &pci_root_info[j]; | 245 | info = &pci_root_info[j]; |
294 | 246 | ||
295 | printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", | 247 | printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", |
296 | node, link, (u64)start, (u64)end); | 248 | node, link, start, end); |
297 | /* | 249 | /* |
298 | * some sick allocation would have range overlap with fam10h | 250 | * some sick allocation would have range overlap with fam10h |
299 | * mmconf range, so need to update start and end. | 251 | * mmconf range, so need to update start and end. |
@@ -318,14 +270,15 @@ static int __init early_fill_mp_bus_info(void) | |||
318 | /* we got a hole */ | 270 | /* we got a hole */ |
319 | endx = fam10h_mmconf_start - 1; | 271 | endx = fam10h_mmconf_start - 1; |
320 | update_res(info, start, endx, IORESOURCE_MEM, 0); | 272 | update_res(info, start, endx, IORESOURCE_MEM, 0); |
321 | update_range(range, start, endx); | 273 | subtract_range(range, RANGE_NUM, start, |
322 | printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx); | 274 | endx + 1); |
275 | printk(KERN_CONT " ==> [%llx, %llx]", start, endx); | ||
323 | start = fam10h_mmconf_end + 1; | 276 | start = fam10h_mmconf_end + 1; |
324 | changed = 1; | 277 | changed = 1; |
325 | } | 278 | } |
326 | if (changed) { | 279 | if (changed) { |
327 | if (start <= end) { | 280 | if (start <= end) { |
328 | printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end); | 281 | printk(KERN_CONT " %s [%llx, %llx]", endx ? "and" : "==>", start, end); |
329 | } else { | 282 | } else { |
330 | printk(KERN_CONT "%s\n", endx?"":" ==> none"); | 283 | printk(KERN_CONT "%s\n", endx?"":" ==> none"); |
331 | continue; | 284 | continue; |
@@ -333,8 +286,9 @@ static int __init early_fill_mp_bus_info(void) | |||
333 | } | 286 | } |
334 | } | 287 | } |
335 | 288 | ||
336 | update_res(info, start, end, IORESOURCE_MEM, 1); | 289 | update_res(info, cap_resource(start), cap_resource(end), |
337 | update_range(range, start, end); | 290 | IORESOURCE_MEM, 1); |
291 | subtract_range(range, RANGE_NUM, start, end + 1); | ||
338 | printk(KERN_CONT "\n"); | 292 | printk(KERN_CONT "\n"); |
339 | } | 293 | } |
340 | 294 | ||
@@ -348,8 +302,8 @@ static int __init early_fill_mp_bus_info(void) | |||
348 | address = MSR_K8_TOP_MEM2; | 302 | address = MSR_K8_TOP_MEM2; |
349 | rdmsrl(address, val); | 303 | rdmsrl(address, val); |
350 | end = (val & 0xffffff800000ULL); | 304 | end = (val & 0xffffff800000ULL); |
351 | printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); | 305 | printk(KERN_INFO "TOM2: %016llx aka %lldM\n", end, end>>20); |
352 | update_range(range, 1ULL<<32, end - 1); | 306 | subtract_range(range, RANGE_NUM, 1ULL<<32, end); |
353 | } | 307 | } |
354 | 308 | ||
355 | /* | 309 | /* |
@@ -368,7 +322,8 @@ static int __init early_fill_mp_bus_info(void) | |||
368 | if (!range[i].end) | 322 | if (!range[i].end) |
369 | continue; | 323 | continue; |
370 | 324 | ||
371 | update_res(info, range[i].start, range[i].end, | 325 | update_res(info, cap_resource(range[i].start), |
326 | cap_resource(range[i].end - 1), | ||
372 | IORESOURCE_MEM, 1); | 327 | IORESOURCE_MEM, 1); |
373 | } | 328 | } |
374 | } | 329 | } |
@@ -384,24 +339,14 @@ static int __init early_fill_mp_bus_info(void) | |||
384 | info->bus_min, info->bus_max, info->node, info->link); | 339 | info->bus_min, info->bus_max, info->node, info->link); |
385 | for (j = 0; j < res_num; j++) { | 340 | for (j = 0; j < res_num; j++) { |
386 | res = &info->res[j]; | 341 | res = &info->res[j]; |
387 | printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", | 342 | printk(KERN_DEBUG "bus: %02x index %x %pR\n", |
388 | busnum, j, | 343 | busnum, j, res); |
389 | (res->flags & IORESOURCE_IO)?"io port":"mmio", | ||
390 | res->start, res->end); | ||
391 | } | 344 | } |
392 | } | 345 | } |
393 | 346 | ||
394 | return 0; | 347 | return 0; |
395 | } | 348 | } |
396 | 349 | ||
397 | #else /* !CONFIG_X86_64 */ | ||
398 | |||
399 | static int __init early_fill_mp_bus_info(void) { return 0; } | ||
400 | |||
401 | #endif /* !CONFIG_X86_64 */ | ||
402 | |||
403 | /* common 32/64 bit code */ | ||
404 | |||
405 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) | 350 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) |
406 | 351 | ||
407 | static void enable_pci_io_ecs(void *unused) | 352 | static void enable_pci_io_ecs(void *unused) |
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 145df00e0387..64a122883896 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
@@ -1,11 +1,11 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <linux/range.h> | ||
3 | 4 | ||
4 | #include "bus_numa.h" | 5 | #include "bus_numa.h" |
5 | 6 | ||
6 | int pci_root_num; | 7 | int pci_root_num; |
7 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 8 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
8 | int found_all_numa_early; | ||
9 | 9 | ||
10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) | 10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) |
11 | { | 11 | { |
@@ -21,10 +21,6 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
21 | if (!pci_root_num) | 21 | if (!pci_root_num) |
22 | return; | 22 | return; |
23 | 23 | ||
24 | /* for amd, if only one root bus, don't need to do anything */ | ||
25 | if (pci_root_num < 2 && found_all_numa_early) | ||
26 | return; | ||
27 | |||
28 | for (i = 0; i < pci_root_num; i++) { | 24 | for (i = 0; i < pci_root_num; i++) { |
29 | if (pci_root_info[i].bus_min == b->number) | 25 | if (pci_root_info[i].bus_min == b->number) |
30 | break; | 26 | break; |
@@ -36,13 +32,14 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
36 | printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", | 32 | printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", |
37 | b->number); | 33 | b->number); |
38 | 34 | ||
35 | pci_bus_remove_resources(b); | ||
39 | info = &pci_root_info[i]; | 36 | info = &pci_root_info[i]; |
40 | for (j = 0; j < info->res_num; j++) { | 37 | for (j = 0; j < info->res_num; j++) { |
41 | struct resource *res; | 38 | struct resource *res; |
42 | struct resource *root; | 39 | struct resource *root; |
43 | 40 | ||
44 | res = &info->res[j]; | 41 | res = &info->res[j]; |
45 | b->resource[j] = res; | 42 | pci_bus_add_resource(b, res, 0); |
46 | if (res->flags & IORESOURCE_IO) | 43 | if (res->flags & IORESOURCE_IO) |
47 | root = &ioport_resource; | 44 | root = &ioport_resource; |
48 | else | 45 | else |
@@ -51,8 +48,8 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
51 | } | 48 | } |
52 | } | 49 | } |
53 | 50 | ||
54 | void __init update_res(struct pci_root_info *info, size_t start, | 51 | void __devinit update_res(struct pci_root_info *info, resource_size_t start, |
55 | size_t end, unsigned long flags, int merge) | 52 | resource_size_t end, unsigned long flags, int merge) |
56 | { | 53 | { |
57 | int i; | 54 | int i; |
58 | struct resource *res; | 55 | struct resource *res; |
@@ -60,25 +57,28 @@ void __init update_res(struct pci_root_info *info, size_t start, | |||
60 | if (start > end) | 57 | if (start > end) |
61 | return; | 58 | return; |
62 | 59 | ||
60 | if (start == MAX_RESOURCE) | ||
61 | return; | ||
62 | |||
63 | if (!merge) | 63 | if (!merge) |
64 | goto addit; | 64 | goto addit; |
65 | 65 | ||
66 | /* try to merge it with old one */ | 66 | /* try to merge it with old one */ |
67 | for (i = 0; i < info->res_num; i++) { | 67 | for (i = 0; i < info->res_num; i++) { |
68 | size_t final_start, final_end; | 68 | resource_size_t final_start, final_end; |
69 | size_t common_start, common_end; | 69 | resource_size_t common_start, common_end; |
70 | 70 | ||
71 | res = &info->res[i]; | 71 | res = &info->res[i]; |
72 | if (res->flags != flags) | 72 | if (res->flags != flags) |
73 | continue; | 73 | continue; |
74 | 74 | ||
75 | common_start = max((size_t)res->start, start); | 75 | common_start = max(res->start, start); |
76 | common_end = min((size_t)res->end, end); | 76 | common_end = min(res->end, end); |
77 | if (common_start > common_end + 1) | 77 | if (common_start > common_end + 1) |
78 | continue; | 78 | continue; |
79 | 79 | ||
80 | final_start = min((size_t)res->start, start); | 80 | final_start = min(res->start, start); |
81 | final_end = max((size_t)res->end, end); | 81 | final_end = max(res->end, end); |
82 | 82 | ||
83 | res->start = final_start; | 83 | res->start = final_start; |
84 | res->end = final_end; | 84 | res->end = final_end; |
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index adbc23fe82ac..804a4b40c31a 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h | |||
@@ -1,9 +1,8 @@ | |||
1 | #ifdef CONFIG_X86_64 | 1 | #ifndef __BUS_NUMA_H |
2 | 2 | #define __BUS_NUMA_H | |
3 | /* | 3 | /* |
4 | * sub bus (transparent) will use entres from 3 to store extra from | 4 | * sub bus (transparent) will use entres from 3 to store extra from |
5 | * root, so need to make sure we have enough slot there, Should we | 5 | * root, so need to make sure we have enough slot there. |
6 | * increase PCI_BUS_NUM_RESOURCES? | ||
7 | */ | 6 | */ |
8 | #define RES_NUM 16 | 7 | #define RES_NUM 16 |
9 | struct pci_root_info { | 8 | struct pci_root_info { |
@@ -20,8 +19,7 @@ struct pci_root_info { | |||
20 | #define PCI_ROOT_NR 4 | 19 | #define PCI_ROOT_NR 4 |
21 | extern int pci_root_num; | 20 | extern int pci_root_num; |
22 | extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 21 | extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
23 | extern int found_all_numa_early; | ||
24 | 22 | ||
25 | extern void update_res(struct pci_root_info *info, size_t start, | 23 | extern void update_res(struct pci_root_info *info, resource_size_t start, |
26 | size_t end, unsigned long flags, int merge); | 24 | resource_size_t end, unsigned long flags, int merge); |
27 | #endif | 25 | #endif |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index d2552c68e94d..cf2e93869c48 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/ioport.h> | 9 | #include <linux/ioport.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/dmi.h> | 11 | #include <linux/dmi.h> |
12 | #include <linux/slab.h> | ||
12 | 13 | ||
13 | #include <asm/acpi.h> | 14 | #include <asm/acpi.h> |
14 | #include <asm/segment.h> | 15 | #include <asm/segment.h> |
@@ -72,12 +73,6 @@ struct pci_ops pci_root_ops = { | |||
72 | }; | 73 | }; |
73 | 74 | ||
74 | /* | 75 | /* |
75 | * legacy, numa, and acpi all want to call pcibios_scan_root | ||
76 | * from their initcalls. This flag prevents that. | ||
77 | */ | ||
78 | int pcibios_scanned; | ||
79 | |||
80 | /* | ||
81 | * This interrupt-safe spinlock protects all accesses to PCI | 76 | * This interrupt-safe spinlock protects all accesses to PCI |
82 | * configuration space. | 77 | * configuration space. |
83 | */ | 78 | */ |
@@ -520,6 +515,9 @@ char * __devinit pcibios_setup(char *str) | |||
520 | } else if (!strcmp(str, "use_crs")) { | 515 | } else if (!strcmp(str, "use_crs")) { |
521 | pci_probe |= PCI_USE__CRS; | 516 | pci_probe |= PCI_USE__CRS; |
522 | return NULL; | 517 | return NULL; |
518 | } else if (!strcmp(str, "nocrs")) { | ||
519 | pci_probe |= PCI_ROOT_NO_CRS; | ||
520 | return NULL; | ||
523 | } else if (!strcmp(str, "earlydump")) { | 521 | } else if (!strcmp(str, "earlydump")) { |
524 | pci_early_dump_regs = 1; | 522 | pci_early_dump_regs = 1; |
525 | return NULL; | 523 | return NULL; |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 5dc9e8c63fcd..46fd43f79103 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -60,22 +60,20 @@ skip_isa_ioresource_align(struct pci_dev *dev) { | |||
60 | * but we want to try to avoid allocating at 0x2900-0x2bff | 60 | * but we want to try to avoid allocating at 0x2900-0x2bff |
61 | * which might have be mirrored at 0x0100-0x03ff.. | 61 | * which might have be mirrored at 0x0100-0x03ff.. |
62 | */ | 62 | */ |
63 | void | 63 | resource_size_t |
64 | pcibios_align_resource(void *data, struct resource *res, | 64 | pcibios_align_resource(void *data, const struct resource *res, |
65 | resource_size_t size, resource_size_t align) | 65 | resource_size_t size, resource_size_t align) |
66 | { | 66 | { |
67 | struct pci_dev *dev = data; | 67 | struct pci_dev *dev = data; |
68 | resource_size_t start = res->start; | ||
68 | 69 | ||
69 | if (res->flags & IORESOURCE_IO) { | 70 | if (res->flags & IORESOURCE_IO) { |
70 | resource_size_t start = res->start; | ||
71 | |||
72 | if (skip_isa_ioresource_align(dev)) | 71 | if (skip_isa_ioresource_align(dev)) |
73 | return; | 72 | return start; |
74 | if (start & 0x300) { | 73 | if (start & 0x300) |
75 | start = (start + 0x3ff) & ~0x3ff; | 74 | start = (start + 0x3ff) & ~0x3ff; |
76 | res->start = start; | ||
77 | } | ||
78 | } | 75 | } |
76 | return start; | ||
79 | } | 77 | } |
80 | EXPORT_SYMBOL(pcibios_align_resource); | 78 | EXPORT_SYMBOL(pcibios_align_resource); |
81 | 79 | ||
@@ -129,9 +127,6 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
129 | continue; | 127 | continue; |
130 | if (!r->start || | 128 | if (!r->start || |
131 | pci_claim_resource(dev, idx) < 0) { | 129 | pci_claim_resource(dev, idx) < 0) { |
132 | dev_info(&dev->dev, | ||
133 | "can't reserve window %pR\n", | ||
134 | r); | ||
135 | /* | 130 | /* |
136 | * Something is wrong with the region. | 131 | * Something is wrong with the region. |
137 | * Invalidate the resource to prevent | 132 | * Invalidate the resource to prevent |
@@ -183,8 +178,6 @@ static void __init pcibios_allocate_resources(int pass) | |||
183 | "BAR %d: reserving %pr (d=%d, p=%d)\n", | 178 | "BAR %d: reserving %pr (d=%d, p=%d)\n", |
184 | idx, r, disabled, pass); | 179 | idx, r, disabled, pass); |
185 | if (pci_claim_resource(dev, idx) < 0) { | 180 | if (pci_claim_resource(dev, idx) < 0) { |
186 | dev_info(&dev->dev, | ||
187 | "can't reserve %pR\n", r); | ||
188 | /* We'll assign a new address later */ | 181 | /* We'll assign a new address later */ |
189 | r->end -= r->start; | 182 | r->end -= r->start; |
190 | r->start = 0; | 183 | r->start = 0; |
@@ -257,10 +250,6 @@ void __init pcibios_resource_survey(void) | |||
257 | */ | 250 | */ |
258 | fs_initcall(pcibios_assign_resources); | 251 | fs_initcall(pcibios_assign_resources); |
259 | 252 | ||
260 | void __weak x86_pci_root_bus_res_quirks(struct pci_bus *b) | ||
261 | { | ||
262 | } | ||
263 | |||
264 | /* | 253 | /* |
265 | * If we set up a device for bus mastering, we need to check the latency | 254 | * If we set up a device for bus mastering, we need to check the latency |
266 | * timer as certain crappy BIOSes forget to set it properly. | 255 | * timer as certain crappy BIOSes forget to set it properly. |
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 25a1f8efed4a..adb62aaa7ecd 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/pci.h> | 1 | #include <linux/pci.h> |
2 | #include <linux/init.h> | 2 | #include <linux/init.h> |
3 | #include <asm/pci_x86.h> | 3 | #include <asm/pci_x86.h> |
4 | #include <asm/x86_init.h> | ||
4 | 5 | ||
5 | /* arch_initcall has too random ordering, so call the initializers | 6 | /* arch_initcall has too random ordering, so call the initializers |
6 | in the right sequence from here. */ | 7 | in the right sequence from here. */ |
@@ -15,10 +16,9 @@ static __init int pci_arch_init(void) | |||
15 | if (!(pci_probe & PCI_PROBE_NOEARLY)) | 16 | if (!(pci_probe & PCI_PROBE_NOEARLY)) |
16 | pci_mmcfg_early_init(); | 17 | pci_mmcfg_early_init(); |
17 | 18 | ||
18 | #ifdef CONFIG_PCI_OLPC | 19 | if (x86_init.pci.arch_init && !x86_init.pci.arch_init()) |
19 | if (!pci_olpc_init()) | 20 | return 0; |
20 | return 0; /* skip additional checks if it's an XO */ | 21 | |
21 | #endif | ||
22 | #ifdef CONFIG_PCI_BIOS | 22 | #ifdef CONFIG_PCI_BIOS |
23 | pci_pcbios_init(); | 23 | pci_pcbios_init(); |
24 | #endif | 24 | #endif |
diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c deleted file mode 100644 index b7a55dc55d13..000000000000 --- a/arch/x86/pci/intel_bus.c +++ /dev/null | |||
@@ -1,90 +0,0 @@ | |||
1 | /* | ||
2 | * to read io range from IOH pci conf, need to do it after mmconfig is there | ||
3 | */ | ||
4 | |||
5 | #include <linux/delay.h> | ||
6 | #include <linux/dmi.h> | ||
7 | #include <linux/pci.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <asm/pci_x86.h> | ||
10 | |||
11 | #include "bus_numa.h" | ||
12 | |||
13 | static inline void print_ioh_resources(struct pci_root_info *info) | ||
14 | { | ||
15 | int res_num; | ||
16 | int busnum; | ||
17 | int i; | ||
18 | |||
19 | printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n", | ||
20 | info->bus_min, info->bus_max); | ||
21 | res_num = info->res_num; | ||
22 | busnum = info->bus_min; | ||
23 | for (i = 0; i < res_num; i++) { | ||
24 | struct resource *res; | ||
25 | |||
26 | res = &info->res[i]; | ||
27 | printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n", | ||
28 | busnum, i, | ||
29 | (res->flags & IORESOURCE_IO) ? "io port" : | ||
30 | "mmio", | ||
31 | res->start, res->end); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | #define IOH_LIO 0x108 | ||
36 | #define IOH_LMMIOL 0x10c | ||
37 | #define IOH_LMMIOH 0x110 | ||
38 | #define IOH_LMMIOH_BASEU 0x114 | ||
39 | #define IOH_LMMIOH_LIMITU 0x118 | ||
40 | #define IOH_LCFGBUS 0x11c | ||
41 | |||
42 | static void __devinit pci_root_bus_res(struct pci_dev *dev) | ||
43 | { | ||
44 | u16 word; | ||
45 | u32 dword; | ||
46 | struct pci_root_info *info; | ||
47 | u16 io_base, io_end; | ||
48 | u32 mmiol_base, mmiol_end; | ||
49 | u64 mmioh_base, mmioh_end; | ||
50 | int bus_base, bus_end; | ||
51 | |||
52 | if (pci_root_num >= PCI_ROOT_NR) { | ||
53 | printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); | ||
54 | return; | ||
55 | } | ||
56 | |||
57 | info = &pci_root_info[pci_root_num]; | ||
58 | pci_root_num++; | ||
59 | |||
60 | pci_read_config_word(dev, IOH_LCFGBUS, &word); | ||
61 | bus_base = (word & 0xff); | ||
62 | bus_end = (word & 0xff00) >> 8; | ||
63 | sprintf(info->name, "PCI Bus #%02x", bus_base); | ||
64 | info->bus_min = bus_base; | ||
65 | info->bus_max = bus_end; | ||
66 | |||
67 | pci_read_config_word(dev, IOH_LIO, &word); | ||
68 | io_base = (word & 0xf0) << (12 - 4); | ||
69 | io_end = (word & 0xf000) | 0xfff; | ||
70 | update_res(info, io_base, io_end, IORESOURCE_IO, 0); | ||
71 | |||
72 | pci_read_config_dword(dev, IOH_LMMIOL, &dword); | ||
73 | mmiol_base = (dword & 0xff00) << (24 - 8); | ||
74 | mmiol_end = (dword & 0xff000000) | 0xffffff; | ||
75 | update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0); | ||
76 | |||
77 | pci_read_config_dword(dev, IOH_LMMIOH, &dword); | ||
78 | mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10); | ||
79 | mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff); | ||
80 | pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword); | ||
81 | mmioh_base |= ((u64)(dword & 0x7ffff)) << 32; | ||
82 | pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword); | ||
83 | mmioh_end |= ((u64)(dword & 0x7ffff)) << 32; | ||
84 | update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0); | ||
85 | |||
86 | print_ioh_resources(info); | ||
87 | } | ||
88 | |||
89 | /* intel IOH */ | ||
90 | DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res); | ||
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 0696d506c4ad..5d362b5ba06f 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
9 | #include <linux/pci.h> | 9 | #include <linux/pci.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/slab.h> | ||
12 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
13 | #include <linux/dmi.h> | 12 | #include <linux/dmi.h> |
14 | #include <linux/io.h> | 13 | #include <linux/io.h> |
@@ -53,7 +52,7 @@ struct irq_router_handler { | |||
53 | int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); | 52 | int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); |
54 | }; | 53 | }; |
55 | 54 | ||
56 | int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL; | 55 | int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq; |
57 | void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; | 56 | void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; |
58 | 57 | ||
59 | /* | 58 | /* |
@@ -590,6 +589,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
590 | case PCI_DEVICE_ID_INTEL_ICH10_1: | 589 | case PCI_DEVICE_ID_INTEL_ICH10_1: |
591 | case PCI_DEVICE_ID_INTEL_ICH10_2: | 590 | case PCI_DEVICE_ID_INTEL_ICH10_2: |
592 | case PCI_DEVICE_ID_INTEL_ICH10_3: | 591 | case PCI_DEVICE_ID_INTEL_ICH10_3: |
592 | case PCI_DEVICE_ID_INTEL_CPT_LPC1: | ||
593 | case PCI_DEVICE_ID_INTEL_CPT_LPC2: | ||
593 | r->name = "PIIX/ICH"; | 594 | r->name = "PIIX/ICH"; |
594 | r->get = pirq_piix_get; | 595 | r->get = pirq_piix_get; |
595 | r->set = pirq_piix_set; | 596 | r->set = pirq_piix_set; |
@@ -1016,7 +1017,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
1016 | return 1; | 1017 | return 1; |
1017 | } | 1018 | } |
1018 | 1019 | ||
1019 | static void __init pcibios_fixup_irqs(void) | 1020 | void __init pcibios_fixup_irqs(void) |
1020 | { | 1021 | { |
1021 | struct pci_dev *dev = NULL; | 1022 | struct pci_dev *dev = NULL; |
1022 | u8 pin; | 1023 | u8 pin; |
@@ -1110,12 +1111,12 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = { | |||
1110 | { } | 1111 | { } |
1111 | }; | 1112 | }; |
1112 | 1113 | ||
1113 | int __init pcibios_irq_init(void) | 1114 | void __init pcibios_irq_init(void) |
1114 | { | 1115 | { |
1115 | DBG(KERN_DEBUG "PCI: IRQ init\n"); | 1116 | DBG(KERN_DEBUG "PCI: IRQ init\n"); |
1116 | 1117 | ||
1117 | if (pcibios_enable_irq || raw_pci_ops == NULL) | 1118 | if (raw_pci_ops == NULL) |
1118 | return 0; | 1119 | return; |
1119 | 1120 | ||
1120 | dmi_check_system(pciirq_dmi_table); | 1121 | dmi_check_system(pciirq_dmi_table); |
1121 | 1122 | ||
@@ -1142,9 +1143,7 @@ int __init pcibios_irq_init(void) | |||
1142 | pirq_table = NULL; | 1143 | pirq_table = NULL; |
1143 | } | 1144 | } |
1144 | 1145 | ||
1145 | pcibios_enable_irq = pirq_enable_irq; | 1146 | x86_init.pci.fixup_irqs(); |
1146 | |||
1147 | pcibios_fixup_irqs(); | ||
1148 | 1147 | ||
1149 | if (io_apic_assign_pci_irqs && pci_routeirq) { | 1148 | if (io_apic_assign_pci_irqs && pci_routeirq) { |
1150 | struct pci_dev *dev = NULL; | 1149 | struct pci_dev *dev = NULL; |
@@ -1157,8 +1156,6 @@ int __init pcibios_irq_init(void) | |||
1157 | for_each_pci_dev(dev) | 1156 | for_each_pci_dev(dev) |
1158 | pirq_enable_irq(dev); | 1157 | pirq_enable_irq(dev); |
1159 | } | 1158 | } |
1160 | |||
1161 | return 0; | ||
1162 | } | 1159 | } |
1163 | 1160 | ||
1164 | static void pirq_penalize_isa_irq(int irq, int active) | 1161 | static void pirq_penalize_isa_irq(int irq, int active) |
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 4061bb0f267d..0db5eaf54560 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -35,16 +35,13 @@ static void __devinit pcibios_fixup_peer_bridges(void) | |||
35 | } | 35 | } |
36 | } | 36 | } |
37 | 37 | ||
38 | static int __init pci_legacy_init(void) | 38 | int __init pci_legacy_init(void) |
39 | { | 39 | { |
40 | if (!raw_pci_ops) { | 40 | if (!raw_pci_ops) { |
41 | printk("PCI: System does not support PCI\n"); | 41 | printk("PCI: System does not support PCI\n"); |
42 | return 0; | 42 | return 0; |
43 | } | 43 | } |
44 | 44 | ||
45 | if (pcibios_scanned++) | ||
46 | return 0; | ||
47 | |||
48 | printk("PCI: Probing PCI hardware\n"); | 45 | printk("PCI: Probing PCI hardware\n"); |
49 | pci_root_bus = pcibios_scan_root(0); | 46 | pci_root_bus = pcibios_scan_root(0); |
50 | if (pci_root_bus) | 47 | if (pci_root_bus) |
@@ -55,18 +52,15 @@ static int __init pci_legacy_init(void) | |||
55 | 52 | ||
56 | int __init pci_subsys_init(void) | 53 | int __init pci_subsys_init(void) |
57 | { | 54 | { |
58 | #ifdef CONFIG_X86_NUMAQ | 55 | /* |
59 | pci_numaq_init(); | 56 | * The init function returns an non zero value when |
60 | #endif | 57 | * pci_legacy_init should be invoked. |
61 | #ifdef CONFIG_ACPI | 58 | */ |
62 | pci_acpi_init(); | 59 | if (x86_init.pci.init()) |
63 | #endif | 60 | pci_legacy_init(); |
64 | #ifdef CONFIG_X86_VISWS | 61 | |
65 | pci_visws_init(); | ||
66 | #endif | ||
67 | pci_legacy_init(); | ||
68 | pcibios_fixup_peer_bridges(); | 62 | pcibios_fixup_peer_bridges(); |
69 | pcibios_irq_init(); | 63 | x86_init.pci.init_irq(); |
70 | pcibios_init(); | 64 | pcibios_init(); |
71 | 65 | ||
72 | return 0; | 66 | return 0; |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index b19d1e54201e..39b9ebe8f886 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/sfi_acpi.h> | 16 | #include <linux/sfi_acpi.h> |
17 | #include <linux/bitmap.h> | 17 | #include <linux/bitmap.h> |
18 | #include <linux/dmi.h> | 18 | #include <linux/dmi.h> |
19 | #include <linux/slab.h> | ||
19 | #include <asm/e820.h> | 20 | #include <asm/e820.h> |
20 | #include <asm/pci_x86.h> | 21 | #include <asm/pci_x86.h> |
21 | #include <asm/acpi.h> | 22 | #include <asm/acpi.h> |
@@ -303,22 +304,17 @@ static void __init pci_mmcfg_check_end_bus_number(void) | |||
303 | { | 304 | { |
304 | struct pci_mmcfg_region *cfg, *cfgx; | 305 | struct pci_mmcfg_region *cfg, *cfgx; |
305 | 306 | ||
306 | /* last one*/ | 307 | /* Fixup overlaps */ |
307 | cfg = list_entry(pci_mmcfg_list.prev, typeof(*cfg), list); | ||
308 | if (cfg) | ||
309 | if (cfg->end_bus < cfg->start_bus) | ||
310 | cfg->end_bus = 255; | ||
311 | |||
312 | if (list_is_singular(&pci_mmcfg_list)) | ||
313 | return; | ||
314 | |||
315 | /* don't overlap please */ | ||
316 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 308 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { |
317 | if (cfg->end_bus < cfg->start_bus) | 309 | if (cfg->end_bus < cfg->start_bus) |
318 | cfg->end_bus = 255; | 310 | cfg->end_bus = 255; |
319 | 311 | ||
312 | /* Don't access the list head ! */ | ||
313 | if (cfg->list.next == &pci_mmcfg_list) | ||
314 | break; | ||
315 | |||
320 | cfgx = list_entry(cfg->list.next, typeof(*cfg), list); | 316 | cfgx = list_entry(cfg->list.next, typeof(*cfg), list); |
321 | if (cfg != cfgx && cfg->end_bus >= cfgx->start_bus) | 317 | if (cfg->end_bus >= cfgx->start_bus) |
322 | cfg->end_bus = cfgx->start_bus - 1; | 318 | cfg->end_bus = cfgx->start_bus - 1; |
323 | } | 319 | } |
324 | } | 320 | } |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c new file mode 100644 index 000000000000..8bf2fcb88d04 --- /dev/null +++ b/arch/x86/pci/mrst.c | |||
@@ -0,0 +1,262 @@ | |||
1 | /* | ||
2 | * Moorestown PCI support | ||
3 | * Copyright (c) 2008 Intel Corporation | ||
4 | * Jesse Barnes <jesse.barnes@intel.com> | ||
5 | * | ||
6 | * Moorestown has an interesting PCI implementation: | ||
7 | * - configuration space is memory mapped (as defined by MCFG) | ||
8 | * - Lincroft devices also have a real, type 1 configuration space | ||
9 | * - Early Lincroft silicon has a type 1 access bug that will cause | ||
10 | * a hang if non-existent devices are accessed | ||
11 | * - some devices have the "fixed BAR" capability, which means | ||
12 | * they can't be relocated or modified; check for that during | ||
13 | * BAR sizing | ||
14 | * | ||
15 | * So, we use the MCFG space for all reads and writes, but also send | ||
16 | * Lincroft writes to type 1 space. But only read/write if the device | ||
17 | * actually exists, otherwise return all 1s for reads and bit bucket | ||
18 | * the writes. | ||
19 | */ | ||
20 | |||
21 | #include <linux/sched.h> | ||
22 | #include <linux/pci.h> | ||
23 | #include <linux/ioport.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/dmi.h> | ||
26 | |||
27 | #include <asm/acpi.h> | ||
28 | #include <asm/segment.h> | ||
29 | #include <asm/io.h> | ||
30 | #include <asm/smp.h> | ||
31 | #include <asm/pci_x86.h> | ||
32 | #include <asm/hw_irq.h> | ||
33 | #include <asm/io_apic.h> | ||
34 | |||
35 | #define PCIE_CAP_OFFSET 0x100 | ||
36 | |||
37 | /* Fixed BAR fields */ | ||
38 | #define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */ | ||
39 | #define PCI_FIXED_BAR_0_SIZE 0x04 | ||
40 | #define PCI_FIXED_BAR_1_SIZE 0x08 | ||
41 | #define PCI_FIXED_BAR_2_SIZE 0x0c | ||
42 | #define PCI_FIXED_BAR_3_SIZE 0x10 | ||
43 | #define PCI_FIXED_BAR_4_SIZE 0x14 | ||
44 | #define PCI_FIXED_BAR_5_SIZE 0x1c | ||
45 | |||
46 | /** | ||
47 | * fixed_bar_cap - return the offset of the fixed BAR cap if found | ||
48 | * @bus: PCI bus | ||
49 | * @devfn: device in question | ||
50 | * | ||
51 | * Look for the fixed BAR cap on @bus and @devfn, returning its offset | ||
52 | * if found or 0 otherwise. | ||
53 | */ | ||
54 | static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) | ||
55 | { | ||
56 | int pos; | ||
57 | u32 pcie_cap = 0, cap_data; | ||
58 | |||
59 | pos = PCIE_CAP_OFFSET; | ||
60 | |||
61 | if (!raw_pci_ext_ops) | ||
62 | return 0; | ||
63 | |||
64 | while (pos) { | ||
65 | if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, | ||
66 | devfn, pos, 4, &pcie_cap)) | ||
67 | return 0; | ||
68 | |||
69 | if (pcie_cap == 0xffffffff) | ||
70 | return 0; | ||
71 | |||
72 | if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) { | ||
73 | raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, | ||
74 | devfn, pos + 4, 4, &cap_data); | ||
75 | if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR) | ||
76 | return pos; | ||
77 | } | ||
78 | |||
79 | pos = pcie_cap >> 20; | ||
80 | } | ||
81 | |||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn, | ||
86 | int reg, int len, u32 val, int offset) | ||
87 | { | ||
88 | u32 size; | ||
89 | unsigned int domain, busnum; | ||
90 | int bar = (reg - PCI_BASE_ADDRESS_0) >> 2; | ||
91 | |||
92 | domain = pci_domain_nr(bus); | ||
93 | busnum = bus->number; | ||
94 | |||
95 | if (val == ~0 && len == 4) { | ||
96 | unsigned long decode; | ||
97 | |||
98 | raw_pci_ext_ops->read(domain, busnum, devfn, | ||
99 | offset + 8 + (bar * 4), 4, &size); | ||
100 | |||
101 | /* Turn the size into a decode pattern for the sizing code */ | ||
102 | if (size) { | ||
103 | decode = size - 1; | ||
104 | decode |= decode >> 1; | ||
105 | decode |= decode >> 2; | ||
106 | decode |= decode >> 4; | ||
107 | decode |= decode >> 8; | ||
108 | decode |= decode >> 16; | ||
109 | decode++; | ||
110 | decode = ~(decode - 1); | ||
111 | } else { | ||
112 | decode = ~0; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * If val is all ones, the core code is trying to size the reg, | ||
117 | * so update the mmconfig space with the real size. | ||
118 | * | ||
119 | * Note: this assumes the fixed size we got is a power of two. | ||
120 | */ | ||
121 | return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4, | ||
122 | decode); | ||
123 | } | ||
124 | |||
125 | /* This is some other kind of BAR write, so just do it. */ | ||
126 | return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val); | ||
127 | } | ||
128 | |||
129 | /** | ||
130 | * type1_access_ok - check whether to use type 1 | ||
131 | * @bus: bus number | ||
132 | * @devfn: device & function in question | ||
133 | * | ||
134 | * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at | ||
135 | * all, the we can go ahead with any reads & writes. If it's on a Lincroft, | ||
136 | * but doesn't exist, avoid the access altogether to keep the chip from | ||
137 | * hanging. | ||
138 | */ | ||
139 | static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) | ||
140 | { | ||
141 | /* This is a workaround for A0 LNC bug where PCI status register does | ||
142 | * not have new CAP bit set. can not be written by SW either. | ||
143 | * | ||
144 | * PCI header type in real LNC indicates a single function device, this | ||
145 | * will prevent probing other devices under the same function in PCI | ||
146 | * shim. Therefore, use the header type in shim instead. | ||
147 | */ | ||
148 | if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) | ||
149 | return 0; | ||
150 | if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0))) | ||
151 | return 1; | ||
152 | return 0; /* langwell on others */ | ||
153 | } | ||
154 | |||
155 | static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, | ||
156 | int size, u32 *value) | ||
157 | { | ||
158 | if (type1_access_ok(bus->number, devfn, where)) | ||
159 | return pci_direct_conf1.read(pci_domain_nr(bus), bus->number, | ||
160 | devfn, where, size, value); | ||
161 | return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, | ||
162 | devfn, where, size, value); | ||
163 | } | ||
164 | |||
165 | static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, | ||
166 | int size, u32 value) | ||
167 | { | ||
168 | int offset; | ||
169 | |||
170 | /* On MRST, there is no PCI ROM BAR, this will cause a subsequent read | ||
171 | * to ROM BAR return 0 then being ignored. | ||
172 | */ | ||
173 | if (where == PCI_ROM_ADDRESS) | ||
174 | return 0; | ||
175 | |||
176 | /* | ||
177 | * Devices with fixed BARs need special handling: | ||
178 | * - BAR sizing code will save, write ~0, read size, restore | ||
179 | * - so writes to fixed BARs need special handling | ||
180 | * - other writes to fixed BAR devices should go through mmconfig | ||
181 | */ | ||
182 | offset = fixed_bar_cap(bus, devfn); | ||
183 | if (offset && | ||
184 | (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) { | ||
185 | return pci_device_update_fixed(bus, devfn, where, size, value, | ||
186 | offset); | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * On Moorestown update both real & mmconfig space | ||
191 | * Note: early Lincroft silicon can't handle type 1 accesses to | ||
192 | * non-existent devices, so just eat the write in that case. | ||
193 | */ | ||
194 | if (type1_access_ok(bus->number, devfn, where)) | ||
195 | return pci_direct_conf1.write(pci_domain_nr(bus), bus->number, | ||
196 | devfn, where, size, value); | ||
197 | return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn, | ||
198 | where, size, value); | ||
199 | } | ||
200 | |||
201 | static int mrst_pci_irq_enable(struct pci_dev *dev) | ||
202 | { | ||
203 | u8 pin; | ||
204 | struct io_apic_irq_attr irq_attr; | ||
205 | |||
206 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | ||
207 | |||
208 | /* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to | ||
209 | * IOAPIC RTE entries, so we just enable RTE for the device. | ||
210 | */ | ||
211 | irq_attr.ioapic = mp_find_ioapic(dev->irq); | ||
212 | irq_attr.ioapic_pin = dev->irq; | ||
213 | irq_attr.trigger = 1; /* level */ | ||
214 | irq_attr.polarity = 1; /* active low */ | ||
215 | io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr); | ||
216 | |||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | struct pci_ops pci_mrst_ops = { | ||
221 | .read = pci_read, | ||
222 | .write = pci_write, | ||
223 | }; | ||
224 | |||
225 | /** | ||
226 | * pci_mrst_init - installs pci_mrst_ops | ||
227 | * | ||
228 | * Moorestown has an interesting PCI implementation (see above). | ||
229 | * Called when the early platform detection installs it. | ||
230 | */ | ||
231 | int __init pci_mrst_init(void) | ||
232 | { | ||
233 | printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); | ||
234 | pci_mmcfg_late_init(); | ||
235 | pcibios_enable_irq = mrst_pci_irq_enable; | ||
236 | pci_root_ops = pci_mrst_ops; | ||
237 | /* Continue with standard init */ | ||
238 | return 1; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * Langwell devices reside at fixed offsets, don't try to move them. | ||
243 | */ | ||
244 | static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) | ||
245 | { | ||
246 | unsigned long offset; | ||
247 | u32 size; | ||
248 | int i; | ||
249 | |||
250 | /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ | ||
251 | offset = fixed_bar_cap(dev->bus, dev->devfn); | ||
252 | if (!offset || PCI_DEVFN(2, 0) == dev->devfn || | ||
253 | PCI_DEVFN(2, 2) == dev->devfn) | ||
254 | return; | ||
255 | |||
256 | for (i = 0; i < PCI_ROM_RESOURCE; i++) { | ||
257 | pci_read_config_dword(dev, offset + 8 + (i * 4), &size); | ||
258 | dev->resource[i].end = dev->resource[i].start + size - 1; | ||
259 | dev->resource[i].flags |= IORESOURCE_PCI_FIXED; | ||
260 | } | ||
261 | } | ||
262 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup); | ||
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 8eb295e116f6..8223738ad806 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c | |||
@@ -8,9 +8,7 @@ | |||
8 | #include <asm/apic.h> | 8 | #include <asm/apic.h> |
9 | #include <asm/mpspec.h> | 9 | #include <asm/mpspec.h> |
10 | #include <asm/pci_x86.h> | 10 | #include <asm/pci_x86.h> |
11 | 11 | #include <asm/numaq.h> | |
12 | #define XQUAD_PORTIO_BASE 0xfe400000 | ||
13 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ | ||
14 | 12 | ||
15 | #define BUS2QUAD(global) (mp_bus_id_to_node[global]) | 13 | #define BUS2QUAD(global) (mp_bus_id_to_node[global]) |
16 | 14 | ||
@@ -18,8 +16,6 @@ | |||
18 | 16 | ||
19 | #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) | 17 | #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) |
20 | 18 | ||
21 | #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) | ||
22 | |||
23 | #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ | 19 | #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ |
24 | (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) | 20 | (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) |
25 | 21 | ||
@@ -152,14 +148,8 @@ int __init pci_numaq_init(void) | |||
152 | { | 148 | { |
153 | int quad; | 149 | int quad; |
154 | 150 | ||
155 | if (!found_numaq) | ||
156 | return 0; | ||
157 | |||
158 | raw_pci_ops = &pci_direct_conf1_mq; | 151 | raw_pci_ops = &pci_direct_conf1_mq; |
159 | 152 | ||
160 | if (pcibios_scanned++) | ||
161 | return 0; | ||
162 | |||
163 | pci_root_bus = pcibios_scan_root(0); | 153 | pci_root_bus = pcibios_scan_root(0); |
164 | if (pci_root_bus) | 154 | if (pci_root_bus) |
165 | pci_bus_add_devices(pci_root_bus); | 155 | pci_bus_add_devices(pci_root_bus); |
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index b889d824f7c6..b34815408f58 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c | |||
@@ -304,9 +304,6 @@ static struct pci_raw_ops pci_olpc_conf = { | |||
304 | 304 | ||
305 | int __init pci_olpc_init(void) | 305 | int __init pci_olpc_init(void) |
306 | { | 306 | { |
307 | if (!machine_is_olpc() || olpc_has_vsa()) | ||
308 | return -ENODEV; | ||
309 | |||
310 | printk(KERN_INFO "PCI: Using configuration type OLPC\n"); | 307 | printk(KERN_INFO "PCI: Using configuration type OLPC\n"); |
311 | raw_pci_ops = &pci_olpc_conf; | 308 | raw_pci_ops = &pci_olpc_conf; |
312 | is_lx = is_geode_lx(); | 309 | is_lx = is_geode_lx(); |
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 1c975cc9839e..59a225c17b84 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/slab.h> | ||
7 | #include <linux/module.h> | 8 | #include <linux/module.h> |
8 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
9 | #include <asm/pci_x86.h> | 10 | #include <asm/pci_x86.h> |
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index bcead7a46871..03008f72eb04 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c | |||
@@ -69,9 +69,6 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) | |||
69 | 69 | ||
70 | int __init pci_visws_init(void) | 70 | int __init pci_visws_init(void) |
71 | { | 71 | { |
72 | if (!is_visws_box()) | ||
73 | return -1; | ||
74 | |||
75 | pcibios_enable_irq = &pci_visws_enable_irq; | 72 | pcibios_enable_irq = &pci_visws_enable_irq; |
76 | pcibios_disable_irq = &pci_visws_disable_irq; | 73 | pcibios_disable_irq = &pci_visws_disable_irq; |
77 | 74 | ||
@@ -90,5 +87,6 @@ int __init pci_visws_init(void) | |||
90 | pci_scan_bus_with_sysdata(pci_bus1); | 87 | pci_scan_bus_with_sysdata(pci_bus1); |
91 | pci_fixup_irqs(pci_common_swizzle, visws_map_irq); | 88 | pci_fixup_irqs(pci_common_swizzle, visws_map_irq); |
92 | pcibios_resource_survey(); | 89 | pcibios_resource_survey(); |
93 | return 0; | 90 | /* Request bus scan */ |
91 | return 1; | ||
94 | } | 92 | } |
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 81197c62d5b3..3769079874d8 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * Copyright (c) 2006 Rafael J. Wysocki <rjw@sisk.pl> | 6 | * Copyright (c) 2006 Rafael J. Wysocki <rjw@sisk.pl> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/gfp.h> | ||
9 | #include <linux/suspend.h> | 10 | #include <linux/suspend.h> |
10 | #include <linux/bootmem.h> | 11 | #include <linux/bootmem.h> |
11 | 12 | ||
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 65fdc86e923f..d24f983ba1e5 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> | 8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/gfp.h> | ||
11 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
12 | #include <linux/suspend.h> | 13 | #include <linux/suspend.h> |
13 | #include <asm/proto.h> | 14 | #include <asm/proto.h> |
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index b641388d8286..ad47daeafa4e 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S | |||
@@ -27,10 +27,17 @@ ENTRY(swsusp_arch_suspend) | |||
27 | ret | 27 | ret |
28 | 28 | ||
29 | ENTRY(restore_image) | 29 | ENTRY(restore_image) |
30 | movl mmu_cr4_features, %ecx | ||
30 | movl resume_pg_dir, %eax | 31 | movl resume_pg_dir, %eax |
31 | subl $__PAGE_OFFSET, %eax | 32 | subl $__PAGE_OFFSET, %eax |
32 | movl %eax, %cr3 | 33 | movl %eax, %cr3 |
33 | 34 | ||
35 | jecxz 1f # cr4 Pentium and higher, skip if zero | ||
36 | andl $~(X86_CR4_PGE), %ecx | ||
37 | movl %ecx, %cr4; # turn off PGE | ||
38 | movl %cr3, %eax; # flush TLB | ||
39 | movl %eax, %cr3 | ||
40 | 1: | ||
34 | movl restore_pblist, %edx | 41 | movl restore_pblist, %edx |
35 | .p2align 4,,7 | 42 | .p2align 4,,7 |
36 | 43 | ||
@@ -54,16 +61,8 @@ done: | |||
54 | movl $swapper_pg_dir, %eax | 61 | movl $swapper_pg_dir, %eax |
55 | subl $__PAGE_OFFSET, %eax | 62 | subl $__PAGE_OFFSET, %eax |
56 | movl %eax, %cr3 | 63 | movl %eax, %cr3 |
57 | /* Flush TLB, including "global" things (vmalloc) */ | ||
58 | movl mmu_cr4_features, %ecx | 64 | movl mmu_cr4_features, %ecx |
59 | jecxz 1f # cr4 Pentium and higher, skip if zero | 65 | jecxz 1f # cr4 Pentium and higher, skip if zero |
60 | movl %ecx, %edx | ||
61 | andl $~(X86_CR4_PGE), %edx | ||
62 | movl %edx, %cr4; # turn off PGE | ||
63 | 1: | ||
64 | movl %cr3, %eax; # flush TLB | ||
65 | movl %eax, %cr3 | ||
66 | jecxz 1f # cr4 Pentium and higher, skip if zero | ||
67 | movl %ecx, %cr4; # turn PGE back on | 66 | movl %ecx, %cr4; # turn PGE back on |
68 | 1: | 67 | 1: |
69 | 68 | ||
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index 5bbb5a33f220..fd1ab80be0de 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk | |||
@@ -8,14 +8,24 @@ BEGIN { | |||
8 | od_sver = 19; | 8 | od_sver = 19; |
9 | } | 9 | } |
10 | 10 | ||
11 | /^GNU/ { | 11 | /^GNU objdump/ { |
12 | split($3, ver, "."); | 12 | verstr = "" |
13 | for (i = 3; i <= NF; i++) | ||
14 | if (match($(i), "^[0-9]")) { | ||
15 | verstr = $(i); | ||
16 | break; | ||
17 | } | ||
18 | if (verstr == "") { | ||
19 | printf("Warning: Failed to find objdump version number.\n"); | ||
20 | exit 0; | ||
21 | } | ||
22 | split(verstr, ver, "."); | ||
13 | if (ver[1] > od_ver || | 23 | if (ver[1] > od_ver || |
14 | (ver[1] == od_ver && ver[2] >= od_sver)) { | 24 | (ver[1] == od_ver && ver[2] >= od_sver)) { |
15 | exit 1; | 25 | exit 1; |
16 | } else { | 26 | } else { |
17 | printf("Warning: objdump version %s is older than %d.%d\n", | 27 | printf("Warning: objdump version %s is older than %d.%d\n", |
18 | $4, od_ver, od_sver); | 28 | verstr, od_ver, od_sver); |
19 | print("Warning: Skipping posttest."); | 29 | print("Warning: Skipping posttest."); |
20 | # Logic is inverted, because we just skip test without error. | 30 | # Logic is inverted, because we just skip test without error. |
21 | exit 0; | 31 | exit 0; |
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index bee8d6ac2691..13403fc95a96 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c | |||
@@ -43,7 +43,7 @@ static int x86_64; | |||
43 | static void usage(void) | 43 | static void usage(void) |
44 | { | 44 | { |
45 | fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" | 45 | fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" |
46 | " %s [-y|-n] [-v] \n", prog); | 46 | " %s [-y|-n] [-v]\n", prog); |
47 | fprintf(stderr, "\t-y 64bit mode\n"); | 47 | fprintf(stderr, "\t-y 64bit mode\n"); |
48 | fprintf(stderr, "\t-n 32bit mode\n"); | 48 | fprintf(stderr, "\t-n 32bit mode\n"); |
49 | fprintf(stderr, "\t-v verbose mode\n"); | 49 | fprintf(stderr, "\t-v verbose mode\n"); |
@@ -69,7 +69,7 @@ static void dump_field(FILE *fp, const char *name, const char *indent, | |||
69 | 69 | ||
70 | static void dump_insn(FILE *fp, struct insn *insn) | 70 | static void dump_insn(FILE *fp, struct insn *insn) |
71 | { | 71 | { |
72 | fprintf(fp, "Instruction = { \n"); | 72 | fprintf(fp, "Instruction = {\n"); |
73 | dump_field(fp, "prefixes", "\t", &insn->prefixes); | 73 | dump_field(fp, "prefixes", "\t", &insn->prefixes); |
74 | dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); | 74 | dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); |
75 | dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); | 75 | dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 21e1aeb9f3ea..ac74869b8140 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | #include <linux/err.h> | 7 | #include <linux/err.h> |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/slab.h> | ||
9 | #include <linux/init.h> | 10 | #include <linux/init.h> |
10 | #include <linux/random.h> | 11 | #include <linux/random.h> |
11 | #include <linux/elf.h> | 12 | #include <linux/elf.h> |
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index e133ce25e290..1304bcec8ee5 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/debugfs.h> | 2 | #include <linux/debugfs.h> |
3 | #include <linux/slab.h> | ||
3 | #include <linux/module.h> | 4 | #include <linux/module.h> |
4 | 5 | ||
5 | #include "debugfs.h" | 6 | #include "debugfs.h" |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2b26dd5930c6..65d8d79b46a8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/console.h> | 29 | #include <linux/console.h> |
30 | #include <linux/pci.h> | 30 | #include <linux/pci.h> |
31 | #include <linux/gfp.h> | ||
31 | 32 | ||
32 | #include <xen/xen.h> | 33 | #include <xen/xen.h> |
33 | #include <xen/interface/xen.h> | 34 | #include <xen/interface/xen.h> |
@@ -50,6 +51,7 @@ | |||
50 | #include <asm/traps.h> | 51 | #include <asm/traps.h> |
51 | #include <asm/setup.h> | 52 | #include <asm/setup.h> |
52 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
54 | #include <asm/pgalloc.h> | ||
53 | #include <asm/pgtable.h> | 55 | #include <asm/pgtable.h> |
54 | #include <asm/tlbflush.h> | 56 | #include <asm/tlbflush.h> |
55 | #include <asm/reboot.h> | 57 | #include <asm/reboot.h> |
@@ -1094,6 +1096,12 @@ asmlinkage void __init xen_start_kernel(void) | |||
1094 | 1096 | ||
1095 | __supported_pte_mask |= _PAGE_IOMAP; | 1097 | __supported_pte_mask |= _PAGE_IOMAP; |
1096 | 1098 | ||
1099 | /* | ||
1100 | * Prevent page tables from being allocated in highmem, even | ||
1101 | * if CONFIG_HIGHPTE is enabled. | ||
1102 | */ | ||
1103 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
1104 | |||
1097 | /* Work out if we support NX */ | 1105 | /* Work out if we support NX */ |
1098 | x86_configure_nx(); | 1106 | x86_configure_nx(); |
1099 | 1107 | ||
@@ -1151,9 +1159,13 @@ asmlinkage void __init xen_start_kernel(void) | |||
1151 | 1159 | ||
1152 | /* keep using Xen gdt for now; no urgent need to change it */ | 1160 | /* keep using Xen gdt for now; no urgent need to change it */ |
1153 | 1161 | ||
1162 | #ifdef CONFIG_X86_32 | ||
1154 | pv_info.kernel_rpl = 1; | 1163 | pv_info.kernel_rpl = 1; |
1155 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | 1164 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
1156 | pv_info.kernel_rpl = 0; | 1165 | pv_info.kernel_rpl = 0; |
1166 | #else | ||
1167 | pv_info.kernel_rpl = 0; | ||
1168 | #endif | ||
1157 | 1169 | ||
1158 | /* set the limit of our address space */ | 1170 | /* set the limit of our address space */ |
1159 | xen_reserve_top(); | 1171 | xen_reserve_top(); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index bf4cd6bfe959..914f04695ce5 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/debugfs.h> | 43 | #include <linux/debugfs.h> |
44 | #include <linux/bug.h> | 44 | #include <linux/bug.h> |
45 | #include <linux/module.h> | 45 | #include <linux/module.h> |
46 | #include <linux/gfp.h> | ||
46 | 47 | ||
47 | #include <asm/pgtable.h> | 48 | #include <asm/pgtable.h> |
48 | #include <asm/tlbflush.h> | 49 | #include <asm/tlbflush.h> |
@@ -1427,23 +1428,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
1427 | #endif | 1428 | #endif |
1428 | } | 1429 | } |
1429 | 1430 | ||
1430 | #ifdef CONFIG_HIGHPTE | ||
1431 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) | ||
1432 | { | ||
1433 | pgprot_t prot = PAGE_KERNEL; | ||
1434 | |||
1435 | if (PagePinned(page)) | ||
1436 | prot = PAGE_KERNEL_RO; | ||
1437 | |||
1438 | if (0 && PageHighMem(page)) | ||
1439 | printk("mapping highpte %lx type %d prot %s\n", | ||
1440 | page_to_pfn(page), type, | ||
1441 | (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); | ||
1442 | |||
1443 | return kmap_atomic_prot(page, type, prot); | ||
1444 | } | ||
1445 | #endif | ||
1446 | |||
1447 | #ifdef CONFIG_X86_32 | 1431 | #ifdef CONFIG_X86_32 |
1448 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | 1432 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) |
1449 | { | 1433 | { |
@@ -1902,10 +1886,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1902 | .alloc_pmd_clone = paravirt_nop, | 1886 | .alloc_pmd_clone = paravirt_nop, |
1903 | .release_pmd = xen_release_pmd_init, | 1887 | .release_pmd = xen_release_pmd_init, |
1904 | 1888 | ||
1905 | #ifdef CONFIG_HIGHPTE | ||
1906 | .kmap_atomic_pte = xen_kmap_atomic_pte, | ||
1907 | #endif | ||
1908 | |||
1909 | #ifdef CONFIG_X86_64 | 1889 | #ifdef CONFIG_X86_64 |
1910 | .set_pte = xen_set_pte, | 1890 | .set_pte = xen_set_pte, |
1911 | #else | 1891 | #else |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 563d20504988..a29693fd3138 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -14,6 +14,7 @@ | |||
14 | */ | 14 | */ |
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/err.h> | 16 | #include <linux/err.h> |
17 | #include <linux/slab.h> | ||
17 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
18 | 19 | ||
19 | #include <asm/paravirt.h> | 20 | #include <asm/paravirt.h> |
@@ -361,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu) | |||
361 | alternatives_smp_switch(0); | 362 | alternatives_smp_switch(0); |
362 | } | 363 | } |
363 | 364 | ||
364 | static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */ | 365 | static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ |
365 | { | 366 | { |
366 | play_dead_common(); | 367 | play_dead_common(); |
367 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | 368 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); |
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 24ded31b5aec..e0500646585d 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/spinlock.h> | 6 | #include <linux/spinlock.h> |
7 | #include <linux/debugfs.h> | 7 | #include <linux/debugfs.h> |
8 | #include <linux/log2.h> | 8 | #include <linux/log2.h> |
9 | #include <linux/gfp.h> | ||
9 | 10 | ||
10 | #include <asm/paravirt.h> | 11 | #include <asm/paravirt.h> |
11 | 12 | ||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0d3f07cd1b5f..32764b8880b5 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/clockchips.h> | 13 | #include <linux/clockchips.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/math64.h> | 15 | #include <linux/math64.h> |
16 | #include <linux/gfp.h> | ||
16 | 17 | ||
17 | #include <asm/pvclock.h> | 18 | #include <asm/pvclock.h> |
18 | #include <asm/xen/hypervisor.h> | 19 | #include <asm/xen/hypervisor.h> |
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 88e15deb8b82..22a2093b5862 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S | |||
@@ -90,9 +90,9 @@ ENTRY(xen_iret) | |||
90 | GET_THREAD_INFO(%eax) | 90 | GET_THREAD_INFO(%eax) |
91 | movl TI_cpu(%eax), %eax | 91 | movl TI_cpu(%eax), %eax |
92 | movl __per_cpu_offset(,%eax,4), %eax | 92 | movl __per_cpu_offset(,%eax,4), %eax |
93 | mov per_cpu__xen_vcpu(%eax), %eax | 93 | mov xen_vcpu(%eax), %eax |
94 | #else | 94 | #else |
95 | movl per_cpu__xen_vcpu, %eax | 95 | movl xen_vcpu, %eax |
96 | #endif | 96 | #endif |
97 | 97 | ||
98 | /* check IF state we're restoring */ | 98 | /* check IF state we're restoring */ |